commit 28bcf19c30547c8845118ab985dbe25f3e860c0e Author: Jim Miller Date: Wed Dec 10 12:12:06 2014 -0600 Fix lower ad placements. diff --git a/allrecent.html b/allrecent.html new file mode 100644 index 00000000..477b17b7 --- /dev/null +++ b/allrecent.html @@ -0,0 +1,78 @@ + + + + + FanFictionDownLoader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +

+ FanFictionDownLoader +

+ + + + + {{yourfile}} + + +

+ {% for fic in fics %} +

+ {{ fic.title }} + by {{ fic.author }} Download Count: {{ fic.count }}
+ Word Count: {{ fic.numWords }} Chapter Count: {{ fic.numChapters }}
+ {% if fic.category %} Categories: {{ fic.category }}
{% endif %} + {% if fic.genre %} Genres: {{ fic.genre }}
{% endif %} + {% if fic.language %} Language: {{ fic.language }}
{% endif %} + {% if fic.series %} Series: {{ fic.series }}
{% endif %} + {% if fic.characters %} Characters: {{ fic.characters }}
{% endif %} + {% if fic.status %} Status: {{ fic.status }}
{% endif %} + {% if fic.datePublished %} Published: {{ fic.datePublished }}
{% endif %} + {% if fic.dateUpdated %} Last Updated: {{ fic.dateUpdated }}
{% endif %} + {% if fic.dateCreated %} Last Downloaded: {{ fic.dateCreated }}
{% endif %} + {% if fic.rating %} Rating: {{ fic.rating }}
{% endif %} + {% if fic.warnings %} Warnings: {{ fic.warnings }}
{% endif %} + {% if fic.description %} Summary: {{ fic.description }}
{% endif %} +

+ {% endfor %} +

+ + + + +

+ + diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..ccd790a9 --- /dev/null +++ b/app.yaml @@ -0,0 +1,46 @@ +# ffd-retief-hrd fanfictiondownloader +application: fanfictiondownloader +version: 2-0-10 +runtime: python27 +api_version: 1 +threadsafe: true + +handlers: + +- url: /r3m0v3r.* + script: utils.remover.app + login: admin + +- url: /tally.* + script: utils.tally.app + login: admin + +- url: /fdownloadtask + script: main.app + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /favicon\.ico + static_files: static/favicon.ico + upload: static/favicon\.ico + +- url: /.* + script: main.app + +#builtins: +#- datastore_admin: on + +libraries: +- name: django + version: "1.2" + +- name: PIL + version: "1.1.7" diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py new file mode 100644 index 00000000..83741b49 --- /dev/null +++ b/calibre-plugin/__init__.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Jim Miller' +__docformat__ = 'restructuredtext en' + +import sys +if sys.version_info >= (2, 7): + import logging + logger = logging.getLogger(__name__) + loghandler=logging.StreamHandler() + loghandler.setFormatter(logging.Formatter("FFDL:%(levelname)s:%(filename)s(%(lineno)d):%(message)s")) + logger.addHandler(loghandler) + loghandler.setLevel(logging.DEBUG) + logger.setLevel(logging.DEBUG) + +# pulls in translation files for _() strings +try: + load_translations() +except NameError: + pass # load_translations() added in calibre 1.9 + +# The class that all Interface Action plugin wrappers must inherit from +from calibre.customize import InterfaceActionBase + +## Apparently the name for this class doesn't matter--it was still +## 'demo' for the first few versions. +class FanFictionDownLoaderBase(InterfaceActionBase): + ''' + This class is a simple wrapper that provides information about the + actual plugin class. The actual interface plugin class is called + InterfacePlugin and is defined in the ffdl_plugin.py file, as + specified in the actual_plugin field below. + + The reason for having two classes is that it allows the command line + calibre utilities to run without needing to load the GUI libraries. + ''' + name = 'FanFictionDownLoader' + description = _('UI plugin to download FanFiction stories from various sites.') + supported_platforms = ['windows', 'osx', 'linux'] + author = 'Jim Miller' + version = (2, 0, 10) + minimum_calibre_version = (1, 48, 0) + + #: This field defines the GUI plugin class that contains all the code + #: that actually does something. Its format is module_path:class_name + #: The specified class must be defined in the specified module. + actual_plugin = 'calibre_plugins.fanfictiondownloader_plugin.ffdl_plugin:FanFictionDownLoaderPlugin' + + def is_customizable(self): + ''' + This method must return True to enable customization via + Preferences->Plugins + ''' + return True + + def config_widget(self): + ''' + Implement this method and :meth:`save_settings` in your plugin to + use a custom configuration dialog. + + This method, if implemented, must return a QWidget. The widget can have + an optional method validate() that takes no arguments and is called + immediately after the user clicks OK. Changes are applied if and only + if the method returns True. + + If for some reason you cannot perform the configuration at this time, + return a tuple of two strings (message, details), these will be + displayed as a warning dialog to the user and the process will be + aborted. + + The base class implementation of this method raises NotImplementedError + so by default no user configuration is possible. + ''' + # It is important to put this import statement here rather than at the + # top of the module as importing the config class will also cause the + # GUI libraries to be loaded, which we do not want when using calibre + # from the command line + from calibre_plugins.fanfictiondownloader_plugin.config import ConfigWidget + return ConfigWidget(self.actual_plugin_) + + def save_settings(self, config_widget): + ''' + Save the settings specified by the user with config_widget. + + :param config_widget: The widget returned by :meth:`config_widget`. + ''' + config_widget.save_settings() + + # Apply the changes + ac = self.actual_plugin_ + if ac is not None: + ac.apply_settings() + + def cli_main(self,argv): + # I believe there's no performance hit loading these here when + # CLI--it would load everytime anyway. + from StringIO import StringIO + from calibre.library import db + from calibre_plugins.fanfictiondownloader_plugin.downloader import main as ffdl_main + from calibre_plugins.fanfictiondownloader_plugin.prefs import PrefsFacade + from calibre.utils.config import prefs as calibre_prefs + from optparse import OptionParser + + parser = OptionParser('%prog --run-plugin '+self.name+' -- [options] ') + parser.add_option('--library-path', '--with-library', default=None, help=_('Path to the calibre library. Default is to use the path stored in the settings.')) + # parser.add_option('--dont-notify-gui', default=False, action='store_true', + # help=_('Do not notify the running calibre GUI (if any) that the database has' + # ' changed. Use with care, as it can lead to database corruption!')) + + pargs = [x for x in argv if x.startswith('--with-library') or x.startswith('--library-path') + or not x.startswith('-')] + opts, args = parser.parse_args(pargs) + + ffdl_prefs = PrefsFacade(db(path=opts.library_path, + read_only=True)) + ffdl_main(argv[1:], + parser=parser, + passed_defaultsini=StringIO(get_resources("defaults.ini")), + passed_personalini=StringIO(ffdl_prefs["personal.ini"])) diff --git a/calibre-plugin/about.txt b/calibre-plugin/about.txt new file mode 100644 index 00000000..9ea9cd05 --- /dev/null +++ b/calibre-plugin/about.txt @@ -0,0 +1,28 @@ +

+ +

Plugin created by Jim Miller, borrowing heavily from Grant Drake's +'Reading List', +'Extract ISBN' and +'Count Pages' +plugins.

+ +

+Calibre officially distributes plugins from the mobileread.com forum site. +The official distro channel for this plugin is there: FanFictionDownLoader +

+ +

I also monitor the +general users +group for the downloader. That covers the web application and CLI, too. +

+ +The source for this plugin is available at it's +project home. +

+ +

+See the list of supported sites. +

+Read the FAQs. +

diff --git a/calibre-plugin/common_utils.py b/calibre-plugin/common_utils.py new file mode 100644 index 00000000..73ac823c --- /dev/null +++ b/calibre-plugin/common_utils.py @@ -0,0 +1,553 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import os +try: + from PyQt5 import QtWidgets as QtGui + from PyQt5.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime, + QTextEdit, QListWidget, QAbstractItemView) +except ImportError as e: + from PyQt4 import QtGui + from PyQt4.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime, + QTextEdit, QListWidget, QAbstractItemView) + +from calibre.constants import iswindows +from calibre.gui2 import gprefs, error_dialog, UNDEFINED_QDATETIME, info_dialog +from calibre.gui2.actions import menu_action_unique_name +from calibre.gui2.keyboard import ShortcutConfig +from calibre.utils.config import config_dir +from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE + +# Global definition of our plugin name. Used for common functions that require this. +plugin_name = None +# Global definition of our plugin resources. Used to share between the xxxAction and xxxBase +# classes if you need any zip images to be displayed on the configuration dialog. +plugin_icon_resources = {} + + +def set_plugin_icon_resources(name, resources): + ''' + Set our global store of plugin name and icon resources for sharing between + the InterfaceAction class which reads them and the ConfigWidget + if needed for use on the customization dialog for this plugin. + ''' + global plugin_icon_resources, plugin_name + plugin_name = name + plugin_icon_resources = resources + + +def get_icon(icon_name): + ''' + Retrieve a QIcon for the named image from the zip file if it exists, + or if not then from Calibre's image cache. + ''' + if icon_name: + pixmap = get_pixmap(icon_name) + if pixmap is None: + # Look in Calibre's cache for the icon + return QIcon(I(icon_name)) + else: + return QIcon(pixmap) + return QIcon() + + +def get_pixmap(icon_name): + ''' + Retrieve a QPixmap for the named image + Any icons belonging to the plugin must be prefixed with 'images/' + ''' + global plugin_icon_resources, plugin_name + + if not icon_name.startswith('images/'): + # We know this is definitely not an icon belonging to this plugin + pixmap = QPixmap() + pixmap.load(I(icon_name)) + return pixmap + + # Check to see whether the icon exists as a Calibre resource + # This will enable skinning if the user stores icons within a folder like: + # ...\AppData\Roaming\calibre\resources\images\Plugin Name\ + if plugin_name: + local_images_dir = get_local_images_dir(plugin_name) + local_image_path = os.path.join(local_images_dir, icon_name.replace('images/', '')) + if os.path.exists(local_image_path): + pixmap = QPixmap() + pixmap.load(local_image_path) + return pixmap + + # As we did not find an icon elsewhere, look within our zip resources + if icon_name in plugin_icon_resources: + pixmap = QPixmap() + pixmap.loadFromData(plugin_icon_resources[icon_name]) + return pixmap + return None + + +def get_local_images_dir(subfolder=None): + ''' + Returns a path to the user's local resources/images folder + If a subfolder name parameter is specified, appends this to the path + ''' + images_dir = os.path.join(config_dir, 'resources/images') + if subfolder: + images_dir = os.path.join(images_dir, subfolder) + if iswindows: + images_dir = os.path.normpath(images_dir) + return images_dir + + +def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=(), triggered=None, is_checked=None): + ''' + Create a menu action with the specified criteria and action + Note that if no shortcut is specified, will not appear in Preferences->Keyboard + This method should only be used for actions which either have no shortcuts, + or register their menus only once. Use create_menu_action_unique for all else. + ''' + if shortcut is not None: + if len(shortcut) == 0: + shortcut = () + else: + shortcut = _(shortcut) + ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut), + attr=menu_text) + if image: + ac.setIcon(get_icon(image)) + if triggered is not None: + ac.triggered.connect(triggered) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + + parent_menu.addAction(ac) + return ac + + +def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ''' + Create a menu action with the specified criteria and action, using the new + InterfaceAction.create_menu_action() function which ensures that regardless of + whether a shortcut is specified it will appear in Preferences->Keyboard + ''' + orig_shortcut = shortcut + kb = ia.gui.keyboard + if unique_name is None: + unique_name = menu_text + if not shortcut == False: + full_unique_name = menu_action_unique_name(ia, unique_name) + if full_unique_name in kb.shortcuts: + shortcut = False + else: + if shortcut is not None and not shortcut == False: + if len(shortcut) == 0: + shortcut = None + else: + shortcut = _(shortcut) + + if shortcut_name is None: + shortcut_name = menu_text.replace('&','') + + ac = ia.create_menu_action(parent_menu, unique_name, menu_text, icon=None, shortcut=shortcut, + description=tooltip, triggered=triggered, shortcut_name=shortcut_name) + if shortcut == False and not orig_shortcut == False: + if ac.calibre_shortcut_unique_name in ia.gui.keyboard.shortcuts: + kb.replace_action(ac.calibre_shortcut_unique_name, ac) + if image: + ac.setIcon(get_icon(image)) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + return ac + + +def swap_author_names(author): + if author.find(',') == -1: + return author + name_parts = author.strip().partition(',') + return name_parts[2].strip() + ' ' + name_parts[0] + + +def get_library_uuid(db): + try: + library_uuid = db.library_id + except: + library_uuid = '' + return library_uuid + + +class ImageLabel(QLabel): + + def __init__(self, parent, icon_name, size=16): + QLabel.__init__(self, parent) + pixmap = get_pixmap(icon_name) + self.setPixmap(pixmap) + self.setMaximumSize(size, size) + self.setScaledContents(True) + + +class ImageTitleLayout(QHBoxLayout): + ''' + A reusable layout widget displaying an image followed by a title + ''' + def __init__(self, parent, icon_name, title, tooltip=None): + QHBoxLayout.__init__(self) + title_image_label = QLabel(parent) + pixmap = get_pixmap(icon_name) + if pixmap is None: + pixmap = get_pixmap('library.png') + # error_dialog(parent, _('Restart required'), + # _('You must restart Calibre before using this plugin!'), show=True) + else: + title_image_label.setPixmap(pixmap) + title_image_label.setMaximumSize(32, 32) + title_image_label.setScaledContents(True) + self.addWidget(title_image_label) + + title_font = QFont() + title_font.setPointSize(16) + shelf_label = QLabel(title, parent) + shelf_label.setFont(title_font) + self.addWidget(shelf_label) + self.insertStretch(-1) + + if tooltip: + title_image_label.setToolTip(tooltip) + shelf_label.setToolTip(tooltip) + +class SizePersistedDialog(QDialog): + ''' + This dialog is a base class for any dialogs that want their size/position + restored when they are next opened. + ''' + def __init__(self, parent, unique_pref_name): + QDialog.__init__(self, parent) + self.unique_pref_name = unique_pref_name + self.geom = gprefs.get(unique_pref_name, None) + self.finished.connect(self.dialog_closing) + + def resize_dialog(self): + if self.geom is None: + self.resize(self.sizeHint()) + else: + self.restoreGeometry(self.geom) + + def dialog_closing(self, result): + self.geom = bytearray(self.saveGeometry()) + gprefs[self.unique_pref_name] = self.geom + + +class ReadOnlyTableWidgetItem(QTableWidgetItem): + + def __init__(self, text): + if text is None: + text = '' + QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class RatingTableWidgetItem(QTableWidgetItem): + + def __init__(self, rating, is_read_only=False): + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, rating) + if is_read_only: + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class DateTableWidgetItem(QTableWidgetItem): + + def __init__(self, date_read, is_read_only=False, default_to_today=False): + if date_read == UNDEFINED_DATE and default_to_today: + date_read = now() + if is_read_only: + QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + else: + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, QDateTime(date_read)) + + +class NoWheelComboBox(QComboBox): + + def wheelEvent (self, event): + # Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid + event.ignore() + + +class CheckableTableWidgetItem(QTableWidgetItem): + + def __init__(self, checked=False, is_tristate=False): + QTableWidgetItem.__init__(self, '') + self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled )) + if is_tristate: + self.setFlags(self.flags() | Qt.ItemIsTristate) + if checked: + self.setCheckState(Qt.Checked) + else: + if is_tristate and checked is None: + self.setCheckState(Qt.PartiallyChecked) + else: + self.setCheckState(Qt.Unchecked) + + def get_boolean_value(self): + ''' + Return a boolean value indicating whether checkbox is checked + If this is a tristate checkbox, a partially checked value is returned as None + ''' + if self.checkState() == Qt.PartiallyChecked: + return None + else: + return self.checkState() == Qt.Checked + + +class TextIconWidgetItem(QTableWidgetItem): + + def __init__(self, text, icon): + QTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem): + + def __init__(self, text, icon): + ReadOnlyTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyLineEdit(QLineEdit): + + def __init__(self, text, parent): + if text is None: + text = '' + QLineEdit.__init__(self, text, parent) + self.setEnabled(False) + + +class KeyValueComboBox(QComboBox): + + def __init__(self, parent, values, selected_key): + QComboBox.__init__(self, parent) + self.values = values + self.populate_combo(selected_key) + + def populate_combo(self, selected_key): + self.clear() + selected_idx = idx = -1 + for key, value in self.values.iteritems(): + idx = idx + 1 + self.addItem(value) + if key == selected_key: + selected_idx = idx + self.setCurrentIndex(selected_idx) + + def selected_key(self): + for key, value in self.values.iteritems(): + if value == unicode(self.currentText()).strip(): + return key + + +class CustomColumnComboBox(QComboBox): + + def __init__(self, parent, custom_columns, selected_column, initial_items=['']): + QComboBox.__init__(self, parent) + self.populate_combo(custom_columns, selected_column, initial_items) + + def populate_combo(self, custom_columns, selected_column, initial_items=['']): + self.clear() + self.column_names = initial_items + if len(initial_items) > 0: + self.addItems(initial_items) + selected_idx = 0 + for idx, value in enumerate(initial_items): + if value == selected_column: + selected_idx = idx + for key in sorted(custom_columns.keys()): + self.column_names.append(key) + self.addItem('%s (%s)'%(key, custom_columns[key]['name'])) + if key == selected_column: + selected_idx = len(self.column_names) - 1 + self.setCurrentIndex(selected_idx) + + def get_selected_column(self): + return self.column_names[self.currentIndex()] + + +class KeyboardConfigDialog(SizePersistedDialog): + ''' + This dialog is used to allow editing of keyboard shortcuts. + ''' + def __init__(self, gui, group_name): + SizePersistedDialog.__init__(self, gui, 'Keyboard shortcut dialog') + self.gui = gui + self.setWindowTitle('Keyboard shortcuts') + layout = QVBoxLayout(self) + self.setLayout(layout) + + self.keyboard_widget = ShortcutConfig(self) + layout.addWidget(self.keyboard_widget) + self.group_name = group_name + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.commit) + button_box.rejected.connect(self.reject) + layout.addWidget(button_box) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.initialize() + + def initialize(self): + self.keyboard_widget.initialize(self.gui.keyboard) + self.keyboard_widget.highlight_group(self.group_name) + + def commit(self): + self.keyboard_widget.commit() + self.accept() + + +class DateDelegate(QStyledItemDelegate): + ''' + Delegate for dates. Because this delegate stores the + format as an instance variable, a new instance must be created for each + column. This differs from all the other delegates. + ''' + def __init__(self, parent): + QStyledItemDelegate.__init__(self, parent) + self.format = 'dd MMM yyyy' + + def displayText(self, val, locale): + d = val.toDateTime() + if d <= UNDEFINED_QDATETIME: + return '' + return format_date(qt_to_dt(d, as_utc=False), self.format) + + def createEditor(self, parent, option, index): + qde = QStyledItemDelegate.createEditor(self, parent, option, index) + qde.setDisplayFormat(self.format) + qde.setMinimumDateTime(UNDEFINED_QDATETIME) + qde.setSpecialValueText(_('Undefined')) + qde.setCalendarPopup(True) + return qde + + def setEditorData(self, editor, index): + val = index.model().data(index, Qt.DisplayRole).toDateTime() + if val is None or val == UNDEFINED_QDATETIME: + val = now() + editor.setDateTime(val) + + def setModelData(self, editor, model, index): + val = editor.dateTime() + if val <= UNDEFINED_QDATETIME: + model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole) + else: + model.setData(index, QDateTime(val), Qt.EditRole) + +class PrefsViewerDialog(SizePersistedDialog): + + def __init__(self, gui, namespace): + SizePersistedDialog.__init__(self, gui, 'Prefs Viewer dialog') + self.setWindowTitle('Preferences for: '+namespace) + + self.gui = gui + self.db = gui.current_db + self.namespace = namespace + self._init_controls() + self.resize_dialog() + + self._populate_settings() + + if self.keys_list.count(): + self.keys_list.setCurrentRow(0) + + def _init_controls(self): + layout = QVBoxLayout(self) + self.setLayout(layout) + + ml = QHBoxLayout() + layout.addLayout(ml, 1) + + self.keys_list = QListWidget(self) + self.keys_list.setSelectionMode(QAbstractItemView.SingleSelection) + self.keys_list.setFixedWidth(150) + self.keys_list.setAlternatingRowColors(True) + ml.addWidget(self.keys_list) + self.value_text = QTextEdit(self) + self.value_text.setTabStopWidth(24) + self.value_text.setReadOnly(True) + ml.addWidget(self.value_text, 1) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok) + button_box.accepted.connect(self.accept) + self.clear_button = button_box.addButton('Clear', QDialogButtonBox.ResetRole) + self.clear_button.setIcon(get_icon('trash.png')) + self.clear_button.setToolTip('Clear all settings for this plugin') + self.clear_button.clicked.connect(self._clear_settings) + layout.addWidget(button_box) + + def _populate_settings(self): + self.keys_list.clear() + ns_prefix = self._get_ns_prefix() + keys = sorted([k[len(ns_prefix):] for k in self.db.prefs.iterkeys() + if k.startswith(ns_prefix)]) + for key in keys: + self.keys_list.addItem(key) + self.keys_list.setMinimumWidth(self.keys_list.sizeHintForColumn(0)) + self.keys_list.currentRowChanged[int].connect(self._current_row_changed) + + def _current_row_changed(self, new_row): + if new_row < 0: + self.value_text.clear() + return + key = unicode(self.keys_list.currentItem().text()) + val = self.db.prefs.get_namespaced(self.namespace, key, '') + self.value_text.setPlainText(self.db.prefs.to_raw(val)) + + def _get_ns_prefix(self): + return 'namespaced:%s:'% self.namespace + + def _clear_settings(self): + from calibre.gui2.dialogs.confirm_delete import confirm + message = '

Are you sure you want to clear your settings in this library for this plugin?

' \ + '

Any settings in other libraries or stored in a JSON file in your calibre plugins ' \ + 'folder will not be touched.

' \ + '

You must restart calibre afterwards.

' + if not confirm(message, self.namespace+'_clear_settings', self): + return + ns_prefix = self._get_ns_prefix() + keys = [k for k in self.db.prefs.iterkeys() if k.startswith(ns_prefix)] + for k in keys: + del self.db.prefs[k] + self._populate_settings() + d = info_dialog(self, 'Settings deleted', + '

All settings for this plugin in this library have been cleared.

' + '

Please restart calibre now.

', + show_copy_button=False) + b = d.bb.addButton(_('Restart calibre now'), d.bb.AcceptRole) + b.setIcon(QIcon(I('lt.png'))) + d.do_restart = False + def rf(): + d.do_restart = True + b.clicked.connect(rf) + d.set_details('') + d.exec_() + b.clicked.disconnect() + self.close() + if d.do_restart: + self.gui.quit(restart=True) + diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py new file mode 100644 index 00000000..5b1b6586 --- /dev/null +++ b/calibre-plugin/config.py @@ -0,0 +1,1094 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Jim Miller' +__docformat__ = 'restructuredtext en' + +import logging +logger = logging.getLogger(__name__) + +import traceback, copy, threading +from collections import OrderedDict + +try: + from PyQt5.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, + QLineEdit, QFont, QWidget, QTextEdit, QComboBox, + QCheckBox, QPushButton, QTabWidget, QScrollArea, + QDialogButtonBox, QGroupBox ) +except ImportError as e: + from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, + QLineEdit, QFont, QWidget, QTextEdit, QComboBox, + QCheckBox, QPushButton, QTabWidget, QScrollArea, + QDialogButtonBox, QGroupBox ) +try: + from calibre.gui2 import QVariant + del QVariant +except ImportError: + is_qt4 = False + convert_qvariant = lambda x: x +else: + is_qt4 = True + def convert_qvariant(x): + vt = x.type() + if vt == x.String: + return unicode(x.toString()) + if vt == x.List: + return [convert_qvariant(i) for i in x.toList()] + return x.toPyObject() + +from calibre.gui2.ui import get_gui +from calibre.gui2 import dynamic, info_dialog +from calibre.constants import numeric_version as calibre_version + +# pulls in translation files for _() strings +try: + load_translations() +except NameError: + pass # load_translations() added in calibre 1.9 + +# There are a number of things used several times that shouldn't be +# translated. This is just a way to make that easier by keeping them +# out of the _() strings. +# I'm tempted to override _() to include them... +no_trans = { 'pini':'personal.ini', + 'imgset':'\n\n[epub]\ninclude_images:true\nkeep_summary_html:true\nmake_firstimage_cover:true\n\n', + 'gcset':'generate_cover_settings', + 'ccset':'custom_columns_settings', + 'gc':'Generate Cover', + 'rl':'Reading List', + 'cp':'Count Pages', + 'cmplt':'Completed', + 'inprog':'In-Progress', + 'lul':'Last Updated', + 'lus':'lastupdate', + 'is':'include_subject', + 'isa':'is_adult', + 'u':'username', + 'p':'password', + } + +from calibre_plugins.fanfictiondownloader_plugin.prefs import prefs, PREFS_NAMESPACE +from calibre_plugins.fanfictiondownloader_plugin.dialogs \ + import (UPDATE, UPDATEALWAYS, collision_order, save_collisions, RejectListDialog, + EditTextDialog, RejectUrlEntry) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters \ + import getConfigSections + +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import ( KeyboardConfigDialog, PrefsViewerDialog ) + +from calibre.gui2.complete2 import EditWithComplete #MultiCompleteLineEdit + +class RejectURLList: + def __init__(self,prefs): + self.prefs = prefs + self.sync_lock = threading.RLock() + self.listcache = None + + def _read_list_from_text(self,text,addreasontext=''): + cache = OrderedDict() + + #print("_read_list_from_text") + for line in text.splitlines(): + rue = RejectUrlEntry(line,addreasontext=addreasontext,fromline=True) + #print("rue.url:%s"%rue.url) + if rue.valid: + cache[rue.url] = rue + return cache + + def _get_listcache(self): + if self.listcache == None: + self.listcache = self._read_list_from_text(prefs['rejecturls']) + return self.listcache + + def _save_list(self,listcache): + #print("_save_list") + self.prefs['rejecturls'] = '\n'.join([x.to_line() for x in listcache.values()]) + self.prefs.save_to_db() + self.listcache = None + + def clear_cache(self): + self.listcache = None + + # true if url is in list. + def check(self,url): + with self.sync_lock: + listcache = self._get_listcache() + return url in listcache + + def get_note(self,url): + with self.sync_lock: + listcache = self._get_listcache() + if url in listcache: + return listcache[url].note + # not found + return '' + + def get_full_note(self,url): + with self.sync_lock: + listcache = self._get_listcache() + if url in listcache: + return listcache[url].fullnote() + # not found + return '' + + def remove(self,url): + with self.sync_lock: + listcache = self._get_listcache() + if url in listcache: + del listcache[url] + self._save_list(listcache) + + def add_text(self,rejecttext,addreasontext): + self.add(self._read_list_from_text(rejecttext,addreasontext).values()) + + def add(self,rejectlist,clear=False): + with self.sync_lock: + if clear: + listcache=OrderedDict() + else: + listcache = self._get_listcache() + for l in rejectlist: + listcache[l.url]=l + self._save_list(listcache) + + def get_list(self): + return self._get_listcache().values() + + def get_reject_reasons(self): + return self.prefs['rejectreasons'].splitlines() + +rejecturllist = RejectURLList(prefs) + +class ConfigWidget(QWidget): + + def __init__(self, plugin_action): + QWidget.__init__(self) + self.plugin_action = plugin_action + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel(''+_('List of Supported Sites')+' -- '+_('FAQs')+'') + label.setOpenExternalLinks(True) + self.l.addWidget(label) + + + self.scroll_area = QScrollArea(self) + self.scroll_area.setFrameShape(QScrollArea.NoFrame) + self.scroll_area.setWidgetResizable(True) + self.l.addWidget(self.scroll_area) + + tab_widget = QTabWidget(self) + self.scroll_area.setWidget(tab_widget) + + self.basic_tab = BasicTab(self, plugin_action) + tab_widget.addTab(self.basic_tab, _('Basic')) + + self.personalini_tab = PersonalIniTab(self, plugin_action) + tab_widget.addTab(self.personalini_tab, 'personal.ini') + + self.readinglist_tab = ReadingListTab(self, plugin_action) + tab_widget.addTab(self.readinglist_tab, 'Reading Lists') + if 'Reading List' not in plugin_action.gui.iactions: + self.readinglist_tab.setEnabled(False) + + self.generatecover_tab = GenerateCoverTab(self, plugin_action) + tab_widget.addTab(self.generatecover_tab, 'Generate Cover') + if 'Generate Cover' not in plugin_action.gui.iactions: + self.generatecover_tab.setEnabled(False) + + self.countpages_tab = CountPagesTab(self, plugin_action) + tab_widget.addTab(self.countpages_tab, 'Count Pages') + if 'Count Pages' not in plugin_action.gui.iactions: + self.countpages_tab.setEnabled(False) + + self.std_columns_tab = StandardColumnsTab(self, plugin_action) + tab_widget.addTab(self.std_columns_tab, _('Standard Columns')) + + self.cust_columns_tab = CustomColumnsTab(self, plugin_action) + tab_widget.addTab(self.cust_columns_tab, _('Custom Columns')) + + self.other_tab = OtherTab(self, plugin_action) + tab_widget.addTab(self.other_tab, _('Other')) + + + def save_settings(self): + + # basic + prefs['fileform'] = unicode(self.basic_tab.fileform.currentText()) + prefs['collision'] = save_collisions[unicode(self.basic_tab.collision.currentText())] + prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked() + prefs['updatecover'] = self.basic_tab.updatecover.isChecked() + prefs['updateepubcover'] = self.basic_tab.updateepubcover.isChecked() + prefs['keeptags'] = self.basic_tab.keeptags.isChecked() + prefs['suppressauthorsort'] = self.basic_tab.suppressauthorsort.isChecked() + prefs['suppresstitlesort'] = self.basic_tab.suppresstitlesort.isChecked() + prefs['mark'] = self.basic_tab.mark.isChecked() + prefs['showmarked'] = self.basic_tab.showmarked.isChecked() + prefs['autoconvert'] = self.basic_tab.autoconvert.isChecked() + prefs['urlsfromclip'] = self.basic_tab.urlsfromclip.isChecked() + prefs['updatedefault'] = self.basic_tab.updatedefault.isChecked() + prefs['deleteotherforms'] = self.basic_tab.deleteotherforms.isChecked() + prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked() + prefs['includeimages'] = self.basic_tab.includeimages.isChecked() + prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked() + prefs['checkforseriesurlid'] = self.basic_tab.checkforseriesurlid.isChecked() + prefs['checkforurlchange'] = self.basic_tab.checkforurlchange.isChecked() + prefs['injectseries'] = self.basic_tab.injectseries.isChecked() + prefs['smarten_punctuation'] = self.basic_tab.smarten_punctuation.isChecked() + prefs['reject_always'] = self.basic_tab.reject_always.isChecked() + + if self.readinglist_tab: + # lists + prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.send_lists_box.text()).split(',')))) + prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.read_lists_box.text()).split(',')))) + # print("send_lists: %s"%prefs['send_lists']) + # print("read_lists: %s"%prefs['read_lists']) + prefs['addtolists'] = self.readinglist_tab.addtolists.isChecked() + prefs['addtoreadlists'] = self.readinglist_tab.addtoreadlists.isChecked() + prefs['addtolistsonread'] = self.readinglist_tab.addtolistsonread.isChecked() + + # personal.ini + ini = unicode(self.personalini_tab.ini.toPlainText()) + if ini: + prefs['personal.ini'] = ini + else: + # if they've removed everything, reset to default. + prefs['personal.ini'] = get_resources('plugin-example.ini') + + # Generate Covers tab + prefs['gcnewonly'] = self.generatecover_tab.gcnewonly.isChecked() + gc_site_settings = {} + for (site,combo) in self.generatecover_tab.gc_dropdowns.iteritems(): + val = unicode(convert_qvariant(combo.itemData(combo.currentIndex()))) + if val != 'none': + gc_site_settings[site] = val + #print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site])) + prefs['gc_site_settings'] = gc_site_settings + prefs['allow_gc_from_ini'] = self.generatecover_tab.allow_gc_from_ini.isChecked() + prefs['gc_polish_cover'] = self.generatecover_tab.gc_polish_cover.isChecked() + + # Count Pages tab + countpagesstats = [] + + if self.countpages_tab.pagecount.isChecked(): + countpagesstats.append('PageCount') + if self.countpages_tab.wordcount.isChecked(): + countpagesstats.append('WordCount') + if self.countpages_tab.fleschreading.isChecked(): + countpagesstats.append('FleschReading') + if self.countpages_tab.fleschgrade.isChecked(): + countpagesstats.append('FleschGrade') + if self.countpages_tab.gunningfog.isChecked(): + countpagesstats.append('GunningFog') + + prefs['countpagesstats'] = countpagesstats + + # Standard Columns tab + colsnewonly = {} + for (col,checkbox) in self.std_columns_tab.stdcol_newonlycheck.iteritems(): + colsnewonly[col] = checkbox.isChecked() + prefs['std_cols_newonly'] = colsnewonly + + # Custom Columns tab + # error column + prefs['errorcol'] = unicode(convert_qvariant(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()))) + + # cust cols tab + colsmap = {} + for (col,combo) in self.cust_columns_tab.custcol_dropdowns.iteritems(): + val = unicode(convert_qvariant(combo.itemData(combo.currentIndex()))) + if val != 'none': + colsmap[col] = val + #print("colsmap[%s]:%s"%(col,colsmap[col])) + prefs['custom_cols'] = colsmap + + colsnewonly = {} + for (col,checkbox) in self.cust_columns_tab.custcol_newonlycheck.iteritems(): + colsnewonly[col] = checkbox.isChecked() + prefs['custom_cols_newonly'] = colsnewonly + + prefs['allow_custcol_from_ini'] = self.cust_columns_tab.allow_custcol_from_ini.isChecked() + + prefs.save_to_db() + + def edit_shortcuts(self): + self.save_settings() + # Force the menus to be rebuilt immediately, so we have all our actions registered + self.plugin_action.rebuild_menus() + d = KeyboardConfigDialog(self.plugin_action.gui, self.plugin_action.action_spec[0]) + if d.exec_() == d.Accepted: + self.plugin_action.gui.keyboard.finalize() + +class BasicTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + topl = QVBoxLayout() + self.setLayout(topl) + + label = QLabel(_('These settings control the basic features of the plugin--downloading FanFiction.')) + label.setWordWrap(True) + topl.addWidget(label) + + defs_gb = groupbox = QGroupBox(_("Defaults Options on Download")) + self.l = QVBoxLayout() + groupbox.setLayout(self.l) + + tooltip = _("On each download, FFDL offers an option to select the output format.
This sets what that option will default to.") + horz = QHBoxLayout() + label = QLabel(_('Default Output &Format:')) + label.setToolTip(tooltip) + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip(tooltip) + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + tooltip = _("On each download, FFDL offers an option of what happens if that story already exists.
This sets what that option will default to.") + horz = QHBoxLayout() + label = QLabel(_('Default If Story Already Exists?')) + label.setToolTip(tooltip) + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(save_collisions[prefs['collision']]) + if i > -1: + self.collision.setCurrentIndex(i) + self.collision.setToolTip(tooltip) + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox(_('Default Update Calibre &Metadata?'),self) + self.updatemeta.setToolTip(_("On each download, FFDL offers an option to update Calibre's metadata (title, author, URL, tags, custom columns, etc) from the web site.
This sets whether that will default to on or off.
Columns set to 'New Only' in the column tabs will only be set for new books.")) + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + self.updateepubcover = QCheckBox(_('Default Update EPUB Cover when Updating EPUB?'),self) + self.updateepubcover.setToolTip(_("On each download, FFDL offers an option to update the book cover image inside the EPUB from the web site when the EPUB is updated.
This sets whether that will default to on or off.")) + self.updateepubcover.setChecked(prefs['updateepubcover']) + self.l.addWidget(self.updateepubcover) + + self.smarten_punctuation = QCheckBox(_('Smarten Punctuation (EPUB only)'),self) + self.smarten_punctuation.setToolTip(_("Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB download and update.")) + self.smarten_punctuation.setChecked(prefs['smarten_punctuation']) + if calibre_version >= (0, 9, 39): + self.l.addWidget(self.smarten_punctuation) + + cali_gb = groupbox = QGroupBox(_("Updating Calibre Options")) + self.l = QVBoxLayout() + groupbox.setLayout(self.l) + + self.deleteotherforms = QCheckBox(_('Delete other existing formats?'),self) + self.deleteotherforms.setToolTip(_('Check this to automatically delete all other ebook formats when updating an existing book.\nHandy if you have both a Nook(epub) and Kindle(mobi), for example.')) + self.deleteotherforms.setChecked(prefs['deleteotherforms']) + self.l.addWidget(self.deleteotherforms) + + self.updatecover = QCheckBox(_('Update Calibre Cover when Updating Metadata?'),self) + self.updatecover.setToolTip(_("Update calibre book cover image from EPUB when metadata is updated. (EPUB only.)\nDoesn't go looking for new images on 'Update Calibre Metadata Only'.")) + self.updatecover.setChecked(prefs['updatecover']) + self.l.addWidget(self.updatecover) + + self.keeptags = QCheckBox(_('Keep Existing Tags when Updating Metadata?'),self) + self.keeptags.setToolTip(_("Existing tags will be kept and any new tags added.\n%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n%(lul)s tags will be updated if %(lus)s in %(is)s.\n(If Tags is set to 'New Only' in the Standard Columns tab, this has no effect.)")%no_trans) + self.keeptags.setChecked(prefs['keeptags']) + self.l.addWidget(self.keeptags) + + self.suppressauthorsort = QCheckBox(_('Force Author into Author Sort?'),self) + self.suppressauthorsort.setToolTip(_("If checked, the author(s) as given will be used for the Author Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc.")) + self.suppressauthorsort.setChecked(prefs['suppressauthorsort']) + self.l.addWidget(self.suppressauthorsort) + + self.suppresstitlesort = QCheckBox(_('Force Title into Title Sort?'),self) + self.suppresstitlesort.setToolTip(_("If checked, the title as given will be used for the Title Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'The Title' sort as 'Title, The', etc.")) + self.suppresstitlesort.setChecked(prefs['suppresstitlesort']) + self.l.addWidget(self.suppresstitlesort) + + self.checkforseriesurlid = QCheckBox(_("Check for existing Series Anthology books?"),self) + self.checkforseriesurlid.setToolTip(_("Check for existings Series Anthology books using each new story's series URL before downloading.\nOffer to skip downloading if a Series Anthology is found.")) + self.checkforseriesurlid.setChecked(prefs['checkforseriesurlid']) + self.l.addWidget(self.checkforseriesurlid) + + self.checkforurlchange = QCheckBox(_("Check for changed Story URL?"),self) + self.checkforurlchange.setToolTip(_("Warn you if an update will change the URL of an existing book.\nfanfiction.net URLs will change from http to https silently.")) + self.checkforurlchange.setChecked(prefs['checkforurlchange']) + self.l.addWidget(self.checkforurlchange) + + self.lookforurlinhtml = QCheckBox(_("Search EPUB text for Story URL?"),self) + self.lookforurlinhtml.setToolTip(_("Look for first valid story URL inside EPUB text if not found in metadata.\nSomewhat risky, could find wrong URL depending on EPUB content.\nAlso finds and corrects bad ffnet URLs from ficsaver.com files.")) + self.lookforurlinhtml.setChecked(prefs['lookforurlinhtml']) + self.l.addWidget(self.lookforurlinhtml) + + self.mark = QCheckBox(_("Mark added/updated books when finished?"),self) + self.mark.setToolTip(_("Mark added/updated books when finished. Use with option below.\nYou can also manually search for 'marked:ffdl_success'.\n'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both.")) + self.mark.setChecked(prefs['mark']) + self.l.addWidget(self.mark) + + self.showmarked = QCheckBox(_("Show Marked books when finished?"),self) + self.showmarked.setToolTip(_("Show Marked added/updated books only when finished.\nYou can also manually search for 'marked:ffdl_success'.\n'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both.")) + self.showmarked.setChecked(prefs['showmarked']) + self.l.addWidget(self.showmarked) + + self.autoconvert = QCheckBox(_("Automatically Convert new/update books?"),self) + self.autoconvert.setToolTip(_("Automatically call calibre's Convert for new/update books.\nConverts to the current output format as chosen in calibre's\nPreferences->Behavior settings.")) + self.autoconvert.setChecked(prefs['autoconvert']) + self.l.addWidget(self.autoconvert) + + gui_gb = groupbox = QGroupBox(_("GUI Options")) + self.l = QVBoxLayout() + groupbox.setLayout(self.l) + + self.urlsfromclip = QCheckBox(_('Take URLs from Clipboard?'),self) + self.urlsfromclip.setToolTip(_('Prefill URLs from valid URLs in Clipboard when Adding New.')) + self.urlsfromclip.setChecked(prefs['urlsfromclip']) + self.l.addWidget(self.urlsfromclip) + + self.updatedefault = QCheckBox(_('Default to Update when books selected?'),self) + self.updatedefault.setToolTip(_('The top FanFictionDownLoader plugin button will start Update if\nbooks are selected. If unchecked, it will always bring up \'Add New\'.')) + self.updatedefault.setChecked(prefs['updatedefault']) + self.l.addWidget(self.updatedefault) + + self.adddialogstaysontop = QCheckBox(_("Keep 'Add New from URL(s)' dialog on top?"),self) + self.adddialogstaysontop.setToolTip(_("Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\ndialog on top of all other windows. Useful for dragging URLs onto it.")) + self.adddialogstaysontop.setChecked(prefs['adddialogstaysontop']) + self.l.addWidget(self.adddialogstaysontop) + + misc_gb = groupbox = QGroupBox(_("Misc Options")) + self.l = QVBoxLayout() + groupbox.setLayout(self.l) + + # this is a cheat to make it easier for users to realize there's a new include_images features. + self.includeimages = QCheckBox(_("Include images in EPUBs?"),self) + self.includeimages.setToolTip(_("Download and include images in EPUB stories. This is equivalent to adding:%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will override this.")%no_trans) + self.includeimages.setChecked(prefs['includeimages']) + self.l.addWidget(self.includeimages) + + self.injectseries = QCheckBox(_("Inject calibre Series when none found?"),self) + self.injectseries.setToolTip(_("If no series is found, inject the calibre series (if there is one) so it appears on the FFDL title page(not cover).")) + self.injectseries.setChecked(prefs['injectseries']) + self.l.addWidget(self.injectseries) + + rej_gb = groupbox = QGroupBox(_("Reject List")) + self.l = QVBoxLayout() + groupbox.setLayout(self.l) + + self.rejectlist = QPushButton(_('Edit Reject URL List'), self) + self.rejectlist.setToolTip(_("Edit list of URLs FFDL will automatically Reject.")) + self.rejectlist.clicked.connect(self.show_rejectlist) + self.l.addWidget(self.rejectlist) + + self.reject_urls = QPushButton(_('Add Reject URLs'), self) + self.reject_urls.setToolTip(_("Add additional URLs to Reject as text.")) + self.reject_urls.clicked.connect(self.add_reject_urls) + self.l.addWidget(self.reject_urls) + + self.reject_reasons = QPushButton(_('Edit Reject Reasons List'), self) + self.reject_reasons.setToolTip(_("Customize the Reasons presented when Rejecting URLs")) + self.reject_reasons.clicked.connect(self.show_reject_reasons) + self.l.addWidget(self.reject_reasons) + + self.reject_always = QCheckBox(_('Reject Without Confirmation?'),self) + self.reject_always.setToolTip(_("Always reject URLs on the Reject List without stopping and asking.")) + self.reject_always.setChecked(prefs['reject_always']) + self.l.addWidget(self.reject_always) + + topl.addWidget(defs_gb) + + horz = QHBoxLayout() + + horz.addWidget(cali_gb) + + vert = QVBoxLayout() + vert.addWidget(gui_gb) + vert.addWidget(misc_gb) + vert.addWidget(rej_gb) + + horz.addLayout(vert) + + topl.addLayout(horz) + topl.insertStretch(-1) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + + def show_rejectlist(self): + d = RejectListDialog(self, + rejecturllist.get_list(), + rejectreasons=rejecturllist.get_reject_reasons(), + header=_("Edit Reject URLs List"), + show_delete=False, + show_all_reasons=False) + d.exec_() + + if d.result() != d.Accepted: + return + + rejecturllist.add(d.get_reject_list(),clear=True) + + def show_reject_reasons(self): + d = EditTextDialog(self, + prefs['rejectreasons'], + icon=self.windowIcon(), + title=_("Reject Reasons"), + label=_("Customize Reject List Reasons"), + tooltip=_("Customize the Reasons presented when Rejecting URLs")) + d.exec_() + if d.result() == d.Accepted: + prefs['rejectreasons'] = d.get_plain_text() + + def add_reject_urls(self): + d = EditTextDialog(self, + "http://example.com/story.php?sid=5,"+_("Reason why I rejected it")+"\nhttp://example.com/story.php?sid=6,"+_("Title by Author")+" - "+_("Reason why I rejected it"), + icon=self.windowIcon(), + title=_("Add Reject URLs"), + label=_("Add Reject URLs. Use: http://...,note or http://...,title by author - note
Invalid story URLs will be ignored."), + tooltip=_("One URL per line:\nhttp://...,note\nhttp://...,title by author - note"), + rejectreasons=rejecturllist.get_reject_reasons(), + reasonslabel=_('Add this reason to all URLs added:')) + d.exec_() + if d.result() == d.Accepted: + rejecturllist.add_text(d.get_plain_text(),d.get_reason_text()) + +class PersonalIniTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel(_('These settings provide more detailed control over what metadata will be displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s for different sites.')%no_trans) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.label = QLabel('personal.ini:') + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + try: + self.ini.setFont(QFont("Courier", + self.plugin_action.gui.font().pointSize()+1)) + except Exception as e: + logger.error("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(prefs['personal.ini']) + self.l.addWidget(self.ini) + + self.defaults = QPushButton(_('View Defaults')+' (plugin-defaults.ini)', self) + self.defaults.setToolTip(_("View all of the plugin's configurable settings\nand their default settings.")) + self.defaults.clicked.connect(self.show_defaults) + self.l.addWidget(self.defaults) + + # self.l.insertStretch(-1) + # let edit box fill the space. + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class ShowDefaultsIniDialog(QDialog): + + def __init__(self, icon, text, parent=None): + QDialog.__init__(self, parent) + self.resize(600, 500) + self.l = QVBoxLayout() + self.setLayout(self.l) + self.label = QLabel(_("Plugin Defaults (%s) (Read-Only)")%'plugin-defaults.ini') + self.label.setToolTip(_("These are all of the plugin's configurable options\nand their default settings.")) + self.setWindowTitle(_('Plugin Defaults')) + self.setWindowIcon(icon) + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + self.ini.setToolTip(_("These are all of the plugin's configurable options\nand their default settings.")) + try: + self.ini.setFont(QFont("Courier", + get_gui().font().pointSize()+1)) + except Exception as e: + logger.error("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(text) + self.ini.setReadOnly(True) + self.l.addWidget(self.ini) + + self.ok_button = QPushButton(_('OK'), self) + self.ok_button.clicked.connect(self.hide) + self.l.addWidget(self.ok_button) + +class ReadingListTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + rl_plugin = plugin_action.gui.iactions['Reading List'] + reading_lists = rl_plugin.get_list_names() + except KeyError: + reading_lists= [] + + label = QLabel(_('These settings provide integration with the %(rl)s Plugin. %(rl)s can automatically send to devices and change custom columns. You have to create and configure the lists in %(rl)s to be useful.')%no_trans) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.addtolists = QCheckBox(_('Add new/updated stories to "Send to Device" Reading List(s).'),self) + self.addtolists.setToolTip(_('Automatically add new/updated stories to these lists in the %(rl)s plugin.')%no_trans) + self.addtolists.setChecked(prefs['addtolists']) + self.l.addWidget(self.addtolists) + + horz = QHBoxLayout() + label = QLabel(_('"Send to Device" Reading Lists')) + label.setToolTip(_("When enabled, new/updated stories will be automatically added to these lists.")) + horz.addWidget(label) + self.send_lists_box = EditWithComplete(self) + self.send_lists_box.setToolTip(_("When enabled, new/updated stories will be automatically added to these lists.")) + self.send_lists_box.update_items_cache(reading_lists) + self.send_lists_box.setText(prefs['send_lists']) + horz.addWidget(self.send_lists_box) + self.l.addLayout(horz) + + self.addtoreadlists = QCheckBox(_('Add new/updated stories to "To Read" Reading List(s).'),self) + self.addtoreadlists.setToolTip(_('Automatically add new/updated stories to these lists in the %(rl)s plugin.\nAlso offers menu option to remove stories from the "To Read" lists.')%no_trans) + self.addtoreadlists.setChecked(prefs['addtoreadlists']) + self.l.addWidget(self.addtoreadlists) + + horz = QHBoxLayout() + label = QLabel(_('"To Read" Reading Lists')) + label.setToolTip(_("When enabled, new/updated stories will be automatically added to these lists.")) + horz.addWidget(label) + self.read_lists_box = EditWithComplete(self) + self.read_lists_box.setToolTip(_("When enabled, new/updated stories will be automatically added to these lists.")) + self.read_lists_box.update_items_cache(reading_lists) + self.read_lists_box.setText(prefs['read_lists']) + horz.addWidget(self.read_lists_box) + self.l.addLayout(horz) + + self.addtolistsonread = QCheckBox(_('Add stories back to "Send to Device" Reading List(s) when marked "Read".'),self) + self.addtolistsonread.setToolTip(_('Menu option to remove from "To Read" lists will also add stories back to "Send to Device" Reading List(s)')) + self.addtolistsonread.setChecked(prefs['addtolistsonread']) + self.l.addWidget(self.addtolistsonread) + + self.l.insertStretch(-1) + +class GenerateCoverTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + gc_plugin = plugin_action.gui.iactions['Generate Cover'] + gc_settings = gc_plugin.get_saved_setting_names() + except KeyError: + gc_settings= [] + + label = QLabel(_('The %(gc)s plugin can create cover images for books using various metadata and configurations. If you have GC installed, FFDL can run GC on new downloads and metadata updates. Pick a GC setting by site or Default.')%no_trans) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + scrollable = QScrollArea() + scrollcontent = QWidget() + scrollable.setWidget(scrollcontent) + scrollable.setWidgetResizable(True) + self.l.addWidget(scrollable) + + self.sl = QVBoxLayout() + scrollcontent.setLayout(self.sl) + + self.gc_dropdowns = {} + + sitelist = getConfigSections() + sitelist.sort() + sitelist.insert(0,_("Default")) + for site in sitelist: + horz = QHBoxLayout() + label = QLabel(site) + if site == _("Default"): + s = _("On Metadata update, run %(gc)s with this setting, if not selected for specific site.")%no_trans + else: + no_trans['site']=site # not ideal, but, meh. + s = _("On Metadata update, run %(gc)s with this setting for %(site)s stories.")%no_trans + + label.setToolTip(s) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.setToolTip(s) + dropdown.addItem('','none') + for setting in gc_settings: + dropdown.addItem(setting,setting) + if site == _("Default"): + self.gc_dropdowns["Default"] = dropdown + if 'Default' in prefs['gc_site_settings']: + dropdown.setCurrentIndex(dropdown.findData(prefs['gc_site_settings']['Default'])) + else: + self.gc_dropdowns[site] = dropdown + if site in prefs['gc_site_settings']: + dropdown.setCurrentIndex(dropdown.findData(prefs['gc_site_settings'][site])) + + horz.addWidget(dropdown) + self.sl.addLayout(horz) + + self.sl.insertStretch(-1) + + self.gcnewonly = QCheckBox(_("Run %(gc)s Only on New Books")%no_trans,self) + self.gcnewonly.setToolTip(_("Default is to run GC any time the calibre metadata is updated.")) + self.gcnewonly.setChecked(prefs['gcnewonly']) + self.l.addWidget(self.gcnewonly) + + self.allow_gc_from_ini = QCheckBox(_('Allow %(gcset)s from %(pini)s to override')%no_trans,self) + self.allow_gc_from_ini.setToolTip(_("The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on metadata rather than site, but it's much more complex.
%(gcset)s is ignored when this is off.")%no_trans) + self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini']) + self.l.addWidget(self.allow_gc_from_ini) + + self.gc_polish_cover = QCheckBox(_("Use calibre's Polish feature to inject/update the cover"),self) + self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated cover into the ebook, EPUB only.")) + self.gc_polish_cover.setChecked(prefs['gc_polish_cover']) + self.l.addWidget(self.gc_polish_cover) + +class CountPagesTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel(_('These settings provide integration with the %(cp)s Plugin. %(cp)s can automatically update custom columns with page, word and reading level statistics. You have to create and configure the columns in %(cp)s first.')%no_trans) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + label = QLabel(_('If any of the settings below are checked, when stories are added or updated, the %(cp)s Plugin will be called to update the checked statistics.')%no_trans) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + # the same for all settings. Mostly. + tooltip = _('Which column and algorithm to use are configured in %(cp)s.')%no_trans + # 'PageCount', 'WordCount', 'FleschReading', 'FleschGrade', 'GunningFog' + self.pagecount = QCheckBox('Page Count',self) + self.pagecount.setToolTip(tooltip) + self.pagecount.setChecked('PageCount' in prefs['countpagesstats']) + self.l.addWidget(self.pagecount) + + self.wordcount = QCheckBox('Word Count',self) + self.wordcount.setToolTip(tooltip+"\n"+_('Will overwrite word count from FFDL metadata if set to update the same custom column.')) + self.wordcount.setChecked('WordCount' in prefs['countpagesstats']) + self.l.addWidget(self.wordcount) + + self.fleschreading = QCheckBox('Flesch Reading Ease',self) + self.fleschreading.setToolTip(tooltip) + self.fleschreading.setChecked('FleschReading' in prefs['countpagesstats']) + self.l.addWidget(self.fleschreading) + + self.fleschgrade = QCheckBox('Flesch-Kincaid Grade Level',self) + self.fleschgrade.setToolTip(tooltip) + self.fleschgrade.setChecked('FleschGrade' in prefs['countpagesstats']) + self.l.addWidget(self.fleschgrade) + + self.gunningfog = QCheckBox('Gunning Fog Index',self) + self.gunningfog.setToolTip(tooltip) + self.gunningfog.setChecked('GunningFog' in prefs['countpagesstats']) + self.l.addWidget(self.gunningfog) + + self.l.insertStretch(-1) + +class OtherTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFictionDownLoader confirmation dialogs back again.")) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + keyboard_shortcuts_button = QPushButton(_('Keyboard shortcuts...'), self) + keyboard_shortcuts_button.setToolTip(_('Edit the keyboard shortcuts associated with this plugin')) + keyboard_shortcuts_button.clicked.connect(parent_dialog.edit_shortcuts) + self.l.addWidget(keyboard_shortcuts_button) + + reset_confirmation_button = QPushButton(_('Reset disabled &confirmation dialogs'), self) + reset_confirmation_button.setToolTip(_('Reset all show me again dialogs for the FanFictionDownLoader plugin')) + reset_confirmation_button.clicked.connect(self.reset_dialogs) + self.l.addWidget(reset_confirmation_button) + + view_prefs_button = QPushButton(_('&View library preferences...'), self) + view_prefs_button.setToolTip(_('View data stored in the library database for this plugin')) + view_prefs_button.clicked.connect(self.view_prefs) + self.l.addWidget(view_prefs_button) + + self.l.insertStretch(-1) + + def reset_dialogs(self): + for key in dynamic.keys(): + if key.startswith('fanfictiondownloader_') and key.endswith('_again') \ + and dynamic[key] is False: + dynamic[key] = True + info_dialog(self, _('Done'), + _('Confirmation dialogs have all been reset'), + show=True, + show_copy_button=False) + + def view_prefs(self): + d = PrefsViewerDialog(self.plugin_action.gui, PREFS_NAMESPACE) + d.exec_() + +permitted_values = { + 'int' : ['numWords','numChapters'], + 'float' : ['numWords','numChapters'], + 'bool' : ['status-C','status-I'], + 'datetime' : ['datePublished', 'dateUpdated', 'dateCreated'], + 'series' : ['series'], + 'enumeration' : ['category', + 'genre', + 'language', + 'series', + 'characters', + 'ships', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname', + 'version' + #,'formatext' # not useful information. + #,'siteabbrev' + ] + } +# no point copying the whole list. +permitted_values['text'] = permitted_values['enumeration'] +permitted_values['comments'] = permitted_values['enumeration'] + +titleLabels = { + 'category':_('Category'), + 'genre':_('Genre'), + 'language':_('Language'), + 'status':_('Status'), + 'status-C':_('Status:%(cmplt)s')%no_trans, + 'status-I':_('Status:%(inprog)s')%no_trans, + 'series':_('Series'), + 'characters':_('Characters'), + 'ships':_('Relationships'), + 'datePublished':_('Published'), + 'dateUpdated':_('Updated'), + 'dateCreated':_('Created'), + 'rating':_('Rating'), + 'warnings':_('Warnings'), + 'numChapters':_('Chapters'), + 'numWords':_('Words'), + 'site':_('Site'), + 'storyId':_('Story ID'), + 'authorId':_('Author ID'), + 'extratags':_('Extra Tags'), + 'title':_('Title'), + 'storyUrl':_('Story URL'), + 'description':_('Description'), + 'author':_('Author'), + 'authorUrl':_('Author URL'), + 'formatname':_('File Format'), + 'formatext':_('File Extension'), + 'siteabbrev':_('Site Abbrev'), + 'version':_('FFDL Version') + } + +class CustomColumnsTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + custom_columns = self.plugin_action.gui.library_view.model().custom_columns + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel(_("If you have custom columns defined, they will be listed below. Choose a metadata value type to fill your columns automatically.")) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.custcol_dropdowns = {} + self.custcol_newonlycheck = {} + + scrollable = QScrollArea() + scrollcontent = QWidget() + scrollable.setWidget(scrollcontent) + scrollable.setWidgetResizable(True) + self.l.addWidget(scrollable) + + self.sl = QVBoxLayout() + scrollcontent.setLayout(self.sl) + + for key, column in custom_columns.iteritems(): + + if column['datatype'] in permitted_values: + # print("\n============== %s ===========\n"%key) + # for (k,v) in column.iteritems(): + # print("column['%s'] => %s"%(k,v)) + horz = QHBoxLayout() + label = QLabel(column['name']) + label.setToolTip(_("Update this %s column(%s) with...")%(key,column['datatype'])) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.addItem('','none') + for md in permitted_values[column['datatype']]: + dropdown.addItem(titleLabels[md],md) + self.custcol_dropdowns[key] = dropdown + if key in prefs['custom_cols']: + dropdown.setCurrentIndex(dropdown.findData(prefs['custom_cols'][key])) + if column['datatype'] == 'enumeration': + dropdown.setToolTip(_("Metadata values valid for this type of column.")+"\n"+_("Values that aren't valid for this enumeration column will be ignored.")) + else: + dropdown.setToolTip(_("Metadata values valid for this type of column.")) + horz.addWidget(dropdown) + + newonlycheck = QCheckBox(_("New Only"),self) + newonlycheck.setToolTip(_("Write to %s(%s) only for new\nbooks, not updates to existing books.")%(column['name'],key)) + self.custcol_newonlycheck[key] = newonlycheck + if key in prefs['custom_cols_newonly']: + newonlycheck.setChecked(prefs['custom_cols_newonly'][key]) + horz.addWidget(newonlycheck) + + self.sl.addLayout(horz) + + self.sl.insertStretch(-1) + + self.l.addSpacing(5) + self.allow_custcol_from_ini = QCheckBox(_('Allow %(ccset)s from %(pini)s to override')%no_trans,self) + self.allow_custcol_from_ini.setToolTip(_("The %(pini)s parameter %(ccset)s allows you to set custom columns to site specific values that aren't common to all sites.
%(ccset)s is ignored when this is off.")%no_trans) + self.allow_custcol_from_ini.setChecked(prefs['allow_custcol_from_ini']) + self.l.addWidget(self.allow_custcol_from_ini) + + self.l.addSpacing(5) + label = QLabel(_("Special column:")) + label.setWordWrap(True) + self.l.addWidget(label) + + horz = QHBoxLayout() + label = QLabel(_("Update/Overwrite Error Column:")) + tooltip=_("When an update or overwrite of an existing story fails, record the reason in this column.\n(Text and Long Text columns only.)") + label.setToolTip(tooltip) + horz.addWidget(label) + self.errorcol = QComboBox(self) + self.errorcol.setToolTip(tooltip) + self.errorcol.addItem('','none') + for key, column in custom_columns.iteritems(): + if column['datatype'] in ('text','comments'): + self.errorcol.addItem(column['name'],key) + self.errorcol.setCurrentIndex(self.errorcol.findData(prefs['errorcol'])) + horz.addWidget(self.errorcol) + self.l.addLayout(horz) + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) + + +class StandardColumnsTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + columns=OrderedDict() + + columns["title"]=_("Title") + columns["authors"]=_("Author(s)") + columns["publisher"]=_("Publisher") + columns["tags"]=_("Tags") + columns["languages"]=_("Languages") + columns["pubdate"]=_("Published Date") + columns["timestamp"]=_("Date") + columns["comments"]=_("Comments") + columns["series"]=_("Series") + columns["identifiers"]=_("Ids(url id only)") + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel(_("The standard calibre metadata columns are listed below. You may choose whether FFDL will fill each column automatically on updates or only for new books.")) + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.stdcol_newonlycheck = {} + + for key, column in columns.iteritems(): + horz = QHBoxLayout() + label = QLabel(column) + #label.setToolTip("Update this %s column(%s) with..."%(key,column['datatype'])) + horz.addWidget(label) + + newonlycheck = QCheckBox(_("New Only"),self) + newonlycheck.setToolTip(_("Write to %s only for new\nbooks, not updates to existing books.")%column) + self.stdcol_newonlycheck[key] = newonlycheck + if key in prefs['std_cols_newonly']: + newonlycheck.setChecked(prefs['std_cols_newonly'][key]) + horz.addWidget(newonlycheck) + + self.l.addLayout(horz) + + self.l.insertStretch(-1) + diff --git a/calibre-plugin/dialogs.py b/calibre-plugin/dialogs.py new file mode 100644 index 00000000..19095e5a --- /dev/null +++ b/calibre-plugin/dialogs.py @@ -0,0 +1,1125 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import logging +logger = logging.getLogger(__name__) + +import traceback, re +from functools import partial + +import logging +logger = logging.getLogger(__name__) + +import urllib +import email + +try: + from PyQt5 import QtWidgets as QtGui + from PyQt5.Qt import (QDialog, QTableWidget, QVBoxLayout, QHBoxLayout, QGridLayout, + QPushButton, QLabel, QCheckBox, QIcon, QLineEdit, + QComboBox, QProgressDialog, QTimer, QDialogButtonBox, + QPixmap, Qt, QAbstractItemView, QTextEdit, pyqtSignal, + QGroupBox, QFrame) +except ImportError as e: + from PyQt4 import QtGui + from PyQt4.Qt import (QDialog, QTableWidget, QVBoxLayout, QHBoxLayout, QGridLayout, + QPushButton, QLabel, QCheckBox, QIcon, QLineEdit, + QComboBox, QProgressDialog, QTimer, QDialogButtonBox, + QPixmap, Qt, QAbstractItemView, QTextEdit, pyqtSignal, + QGroupBox, QFrame) + +try: + from calibre.gui2 import QVariant + del QVariant +except ImportError: + is_qt4 = False + convert_qvariant = lambda x: x +else: + is_qt4 = True + def convert_qvariant(x): + vt = x.type() + if vt == x.String: + return unicode(x.toString()) + if vt == x.List: + return [convert_qvariant(i) for i in x.toList()] + return x.toPyObject() + +from calibre.gui2.dialogs.confirm_delete import confirm +from calibre.gui2.complete2 import EditWithComplete + +# pulls in translation files for _() strings +try: + load_translations() +except NameError: + pass # load_translations() added in calibre 1.9 + +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import (ReadOnlyTableWidgetItem, ReadOnlyTextIconWidgetItem, SizePersistedDialog, + ImageTitleLayout, get_icon) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_html, get_urls_from_text +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters import getNormalStoryURL + +SKIP=_('Skip') +ADDNEW=_('Add New Book') +UPDATE=_('Update EPUB if New Chapters') +UPDATEALWAYS=_('Update EPUB Always') +OVERWRITE=_('Overwrite if Newer') +OVERWRITEALWAYS=_('Overwrite Always') +CALIBREONLY=_('Update Calibre Metadata Only') +collision_order=[SKIP, + ADDNEW, + UPDATE, + UPDATEALWAYS, + OVERWRITE, + OVERWRITEALWAYS, + CALIBREONLY,] + +# best idea I've had for how to deal with config/pref saving the +# collision name in english. +SAVE_SKIP='Skip' +SAVE_ADDNEW='Add New Book' +SAVE_UPDATE='Update EPUB if New Chapters' +SAVE_UPDATEALWAYS='Update EPUB Always' +SAVE_OVERWRITE='Overwrite if Newer' +SAVE_OVERWRITEALWAYS='Overwrite Always' +SAVE_CALIBREONLY='Update Calibre Metadata Only' +save_collisions={ + SKIP:SAVE_SKIP, + ADDNEW:SAVE_ADDNEW, + UPDATE:SAVE_UPDATE, + UPDATEALWAYS:SAVE_UPDATEALWAYS, + OVERWRITE:SAVE_OVERWRITE, + OVERWRITEALWAYS:SAVE_OVERWRITEALWAYS, + CALIBREONLY:SAVE_CALIBREONLY, + SAVE_SKIP:SKIP, + SAVE_ADDNEW:ADDNEW, + SAVE_UPDATE:UPDATE, + SAVE_UPDATEALWAYS:UPDATEALWAYS, + SAVE_OVERWRITE:OVERWRITE, + SAVE_OVERWRITEALWAYS:OVERWRITEALWAYS, + SAVE_CALIBREONLY:CALIBREONLY, + } + +anthology_collision_order=[UPDATE, + UPDATEALWAYS, + OVERWRITEALWAYS] + +gpstyle='QGroupBox {border:0; padding-top:10px; padding-bottom:0px; margin-bottom:0px;}' # background-color:red; + +class RejectUrlEntry: + + matchpat=re.compile(r"^(?P[^,]+)(,(?P(((?P.+) by (?P<auth>.+?)( - (?P<note>.+))?)|.*)))?$") + + def __init__(self,url_or_line,note=None,title=None,auth=None, + addreasontext=None,fromline=False,book_id=None): + + self.url=url_or_line + self.note=note + self.title=title + self.auth=auth + self.valid=False + self.book_id=book_id + + if fromline: + mc = re.match(self.matchpat,url_or_line) + if mc: + #print("mc:%s"%mc.groupdict()) + (url,title,auth,note) = mc.group('url','title','auth','note') + if not mc.group('title'): + title='' + auth='' + note=mc.group('fullnote') + self.url=url + self.note=note + self.title=title + self.auth=auth + + if not self.note: + if addreasontext: + self.note = addreasontext + else: + self.note = '' + else: + if addreasontext: + self.note = self.note + ' - ' + addreasontext + + self.url = getNormalStoryURL(self.url) + self.valid = self.url != None + + def to_line(self): + # always 'url,' + return self.url+","+self.fullnote() + + def fullnote(self): + retval = "" + if self.title and self.auth: + # don't translate--ends up being saved and confuses regex above. + retval = retval + "%s by %s"%(self.title,self.auth) + if self.note: + retval = retval + " - " + + if self.note: + retval = retval + self.note + + return retval + +class NotGoingToDownload(Exception): + def __init__(self,error,icon='dialog_error.png'): + self.error=error + self.icon=icon + + def __str__(self): + return self.error + +class DroppableQTextEdit(QTextEdit): + def __init__(self,parent): + QTextEdit.__init__(self,parent) + + def dropEvent(self,event): + # print("event:%s"%event) + + mimetype='text/uri-list' + + urllist=[] + filelist="%s"%event.mimeData().data(mimetype) + for f in filelist.splitlines(): + #print("filename:%s"%f) + if f.endswith(".eml"): + fhandle = urllib.urlopen(f) + #print("file:\n%s\n\n"%fhandle.read()) + msg = email.message_from_file(fhandle) + if msg.is_multipart(): + for part in msg.walk(): + #print("part type:%s"%part.get_content_type()) + if part.get_content_type() == "text/html": + #print("URL list:%s"%get_urls_from_data(part.get_payload(decode=True))) + urllist.extend(get_urls_from_html(part.get_payload(decode=True))) + if part.get_content_type() == "text/plain": + #print("part content:text/plain") + # print("part content:%s"%part.get_payload(decode=True)) + urllist.extend(get_urls_from_text(part.get_payload(decode=True))) + else: + urllist.extend(get_urls_from_text("%s"%msg)) + if urllist: + self.append("\n".join(urllist)) + return None + return QTextEdit.dropEvent(self,event) + + def canInsertFromMimeData(self, source): + if source.hasUrls(): + return True + else: + return QTextEdit.canInsertFromMimeData(self,source) + + def insertFromMimeData(self, source): + if source.hasText(): + self.append(source.text()) + else: + return QTextEdit.insertFromMimeData(self, source) + +class AddNewDialog(SizePersistedDialog): + + go_signal = pyqtSignal(object, object, object, object) + + def __init__(self, gui, prefs, icon): + SizePersistedDialog.__init__(self, gui, 'FanFictionDownLoader plugin:add new dialog') + self.prefs = prefs + + self.setMinimumWidth(300) + self.l = QVBoxLayout() + self.setLayout(self.l) + + self.setWindowTitle(_('FanFictionDownLoader')) + self.setWindowIcon(icon) + + self.toplabel=QLabel("Toplabel") + self.l.addWidget(self.toplabel) + self.url = DroppableQTextEdit(self) + self.url.setToolTip("UrlTooltip") + self.url.setLineWrapMode(QTextEdit.NoWrap) + self.l.addWidget(self.url) + + self.merge = self.newmerge = False + + # elements to hide when doing merge. + self.mergehide = [] + # elements to show again when doing *update* merge + self.mergeupdateshow = [] + + self.groupbox = QGroupBox(_("Show Download Options")) + self.groupbox.setCheckable(True) + self.groupbox.setChecked(False) + self.groupbox.setFlat(True) + #print("style:%s"%self.groupbox.styleSheet()) + self.groupbox.setStyleSheet(gpstyle) + + self.gbf = QFrame() + self.gbl = QVBoxLayout() + self.gbl.addWidget(self.gbf) + self.groupbox.setLayout(self.gbl) + self.gbl = QVBoxLayout() + self.gbf.setLayout(self.gbl) + self.l.addWidget(self.groupbox) + + self.gbf.setVisible(False) + self.groupbox.toggled.connect(self.gbf.setVisible) + + horz = QHBoxLayout() + label = QLabel(_('Output &Format:')) + self.mergehide.append(label) + + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setToolTip(_('Choose output format to create. May set default from plugin configuration.')) + self.fileform.activated.connect(self.set_collisions) + + horz.addWidget(label) + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.gbl.addLayout(horz) + self.mergehide.append(self.fileform) + + horz = QHBoxLayout() + self.collisionlabel = QLabel("CollisionLabel") + horz.addWidget(self.collisionlabel) + self.collision = QComboBox(self) + self.collision.setToolTip("CollisionToolTip") + # add collision options + self.set_collisions() + i = self.collision.findText(save_collisions[prefs['collision']]) + if i > -1: + self.collision.setCurrentIndex(i) + self.collisionlabel.setBuddy(self.collision) + horz.addWidget(self.collision) + self.gbl.addLayout(horz) + self.mergehide.append(self.collisionlabel) + self.mergehide.append(self.collision) + self.mergeupdateshow.append(self.collisionlabel) + self.mergeupdateshow.append(self.collision) + + horz = QHBoxLayout() + self.updatemeta = QCheckBox(_('Update Calibre &Metadata?'),self) + self.updatemeta.setToolTip(_("Update metadata for existing stories in Calibre from web site?\n(Columns set to 'New Only' in the column tabs will only be set for new books.)")) + self.updatemeta.setChecked(prefs['updatemeta']) + horz.addWidget(self.updatemeta) + self.mergehide.append(self.updatemeta) + self.mergeupdateshow.append(self.updatemeta) + + self.updateepubcover = QCheckBox(_('Update EPUB Cover?'),self) + self.updateepubcover.setToolTip(_('Update book cover image from site or defaults (if found) inside the EPUB when EPUB is updated.')) + self.updateepubcover.setChecked(prefs['updateepubcover']) + horz.addWidget(self.updateepubcover) + self.mergehide.append(self.updateepubcover) + + self.gbl.addLayout(horz) + + self.button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + self.button_box.accepted.connect(self.ok_clicked) + self.button_box.rejected.connect(self.reject) + self.l.addWidget(self.button_box) + + # invoke the + def ok_clicked(self): + self.dialog_closing(None) # save persistent size. + self.hide() + self.go_signal.emit( self.get_ffdl_options(), + self.get_urlstext(), + self.merge, + self.extrapayload ) + + def show_dialog(self, + url_list_text, + callback, + show=True, + merge=False, + newmerge=True, + extraoptions={}, + extrapayload=None): + # rather than mutex in ffdl_plugin, just bail here if it's + # already in use. + if self.isVisible(): return + + try: + self.go_signal.disconnect() + except: + pass # if not already connected. + self.go_signal.connect(callback) + + self.merge = merge + self.newmerge = newmerge + self.extraoptions = extraoptions + self.extrapayload = extrapayload + + self.groupbox.setVisible(not(self.merge and self.newmerge)) + + if self.merge: + self.toplabel.setText(_('Story URL(s) for anthology, one per line:')) + self.url.setToolTip(_('URLs for stories to include in the anthology, one per line.\nWill take URLs from clipboard, but only valid URLs.')) + self.collisionlabel.setText(_('If Story Already Exists in Anthology?')) + self.collision.setToolTip(_("What to do if there's already an existing story with the same URL in the anthology.")) + for widget in self.mergehide: + widget.setVisible(False) + if not self.newmerge: + for widget in self.mergeupdateshow: + widget.setVisible(True) + else: + for widget in self.mergehide: + widget.setVisible(True) + self.toplabel.setText(_('Story URL(s), one per line:')) + self.url.setToolTip(_('URLs for stories, one per line.\nWill take URLs from clipboard, but only valid URLs.\nAdd [1,5] after the URL to limit the download to chapters 1-5.')) + self.collisionlabel.setText(_('If Story Already Exists?')) + self.collision.setToolTip(_("What to do if there's already an existing story with the same URL or title and author.")) + + # Need to re-able after hiding/showing + self.setAcceptDrops(True) + self.url.setFocus() + + if self.prefs['adddialogstaysontop']: + QDialog.setWindowFlags ( self, Qt.Dialog | Qt.WindowStaysOnTopHint ) + else: + QDialog.setWindowFlags ( self, Qt.Dialog ) + + if not self.merge: + self.fileform.setCurrentIndex(self.fileform.findText(self.prefs['fileform'])) + + # add collision options + self.set_collisions() + i = self.collision.findText(save_collisions[self.prefs['collision']]) + if i > -1: + self.collision.setCurrentIndex(i) + + self.updatemeta.setChecked(self.prefs['updatemeta']) + + if not self.merge: + self.updateepubcover.setChecked(self.prefs['updateepubcover']) + + self.url.setText(url_list_text) + if url_list_text: + self.button_box.button(QDialogButtonBox.Ok).setFocus() + # restore saved size. + self.resize_dialog() + + if show: # so anthology update can be modal still. + self.show() + #self.resize(self.sizeHint()) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + if self.merge: + order = anthology_collision_order + else: + order = collision_order + for o in order: + if self.merge or self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def get_ffdl_options(self): + retval = { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + 'updateepubcover': self.updateepubcover.isChecked(), + 'smarten_punctuation':self.prefs['smarten_punctuation'] + } + + if self.merge: + retval['fileform']=='epub' + retval['updateepubcover']=True + if self.newmerge: + retval['updatemeta']=True + retval['collision']=ADDNEW + + return dict(retval.items() + self.extraoptions.items() ) + + def get_urlstext(self): + return unicode(self.url.toPlainText()) + + +class FakeLineEdit(): + def __init__(self): + pass + + def text(self): + pass + +class CollectURLDialog(SizePersistedDialog): + ''' + Collect single url for get urls. + ''' + def __init__(self, gui, title, url_text, epubmerge_plugin=None): + SizePersistedDialog.__init__(self, gui, 'FanFictionDownLoader plugin:get story urls') + self.status=False + self.anthology=False + + self.setMinimumWidth(300) + + self.l = QGridLayout() + self.setLayout(self.l) + + self.setWindowTitle(title) + self.l.addWidget(QLabel(title),0,0,1,3) + + self.l.addWidget(QLabel("URL:"),1,0) + self.url = QLineEdit(self) + self.url.setText(url_text) + self.l.addWidget(self.url,1,1,1,2) + + self.indiv_button = QPushButton(_('For Individual Books'), self) + self.indiv_button.setToolTip(_('Get URLs and go to dialog for individual story downloads.')) + self.indiv_button.clicked.connect(self.indiv) + self.l.addWidget(self.indiv_button,2,0) + + self.merge_button = QPushButton(_('For Anthology Epub'), self) + self.merge_button.setToolTip(_('Get URLs and go to dialog for Anthology download.\nRequires %s plugin.')%'EpubMerge 1.3.1+') + self.merge_button.clicked.connect(self.merge) + self.l.addWidget(self.merge_button,2,1) + self.merge_button.setEnabled(epubmerge_plugin!=None) + + self.cancel_button = QPushButton(_('Cancel'), self) + self.cancel_button.clicked.connect(self.cancel) + self.l.addWidget(self.cancel_button,2,2) + + # restore saved size. + self.resize_dialog() + + def indiv(self): + self.status=True + self.accept() + + def merge(self): + self.status=True + self.anthology=True + self.accept() + + def cancel(self): + self.status=False + self.reject() + +class UserPassDialog(QDialog): + ''' + Need to collect User/Pass for some sites. + ''' + def __init__(self, gui, site, exception=None): + QDialog.__init__(self, gui) + self.status=False + + self.l = QGridLayout() + self.setLayout(self.l) + + if exception and exception.passwdonly: + self.setWindowTitle(_('Password')) + self.l.addWidget(QLabel(_("Author requires a password for this story(%s).")%exception.url),0,0,1,2) + # user isn't used, but it's easier to still have it for + # post processing. + self.user = FakeLineEdit() + else: + self.setWindowTitle(_('User/Password')) + self.l.addWidget(QLabel(_("%s requires you to login to download this story.")%site),0,0,1,2) + + self.l.addWidget(QLabel(_("User:")),1,0) + self.user = QLineEdit(self) + self.l.addWidget(self.user,1,1) + + self.l.addWidget(QLabel(_("Password:")),2,0) + self.passwd = QLineEdit(self) + self.passwd.setEchoMode(QLineEdit.Password) + self.l.addWidget(self.passwd,2,1) + + self.ok_button = QPushButton(_('OK'), self) + self.ok_button.clicked.connect(self.ok) + self.l.addWidget(self.ok_button,3,0) + + self.cancel_button = QPushButton(_('Cancel'), self) + self.cancel_button.clicked.connect(self.cancel) + self.l.addWidget(self.cancel_button,3,1) + + self.resize(self.sizeHint()) + + def ok(self): + self.status=True + self.hide() + + def cancel(self): + self.status=False + self.hide() + +class LoopProgressDialog(QProgressDialog): + ''' + ProgressDialog displayed while fetching metadata for each story. + ''' + def __init__(self, gui, + book_list, + foreach_function, + finish_function, + init_label=_("Fetching metadata for stories..."), + win_title=_("Downloading metadata for stories"), + status_prefix=_("Fetched metadata for")): + QProgressDialog.__init__(self, + init_label, + _('Cancel'), 0, len(book_list), gui) + self.setWindowTitle(win_title) + self.setMinimumWidth(500) + self.book_list = book_list + self.foreach_function = foreach_function + self.finish_function = finish_function + self.status_prefix = status_prefix + self.i = 0 + + ## self.do_loop does QTimer.singleShot on self.do_loop also. + ## A weird way to do a loop, but that was the example I had. + QTimer.singleShot(0, self.do_loop) + self.exec_() + + def updateStatus(self): + self.setLabelText("%s %d / %d"%(self.status_prefix,self.i+1,len(self.book_list))) + self.setValue(self.i+1) + #print(self.labelText()) + + def do_loop(self): + + if self.i == 0: + self.setValue(0) + + book = self.book_list[self.i] + try: + ## collision spec passed into getadapter by partial from ffdl_plugin + ## no retval only if it exists, but collision is SKIP + self.foreach_function(book) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + logger.error("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + self.updateStatus() + self.i += 1 + + if self.i >= len(self.book_list) or self.wasCanceled(): + return self.do_when_finished() + else: + QTimer.singleShot(0, self.do_loop) + + def do_when_finished(self): + self.hide() + # Queues a job to process these books in the background. + self.finish_function(self.book_list) + +class AboutDialog(QDialog): + + def __init__(self, parent, icon, text): + QDialog.__init__(self, parent) + self.resize(400, 250) + self.l = QGridLayout() + self.setLayout(self.l) + self.logo = QLabel() + self.logo.setMaximumWidth(110) + self.logo.setPixmap(QPixmap(icon.pixmap(100,100))) + self.label = QLabel(text) + self.label.setOpenExternalLinks(True) + self.label.setWordWrap(True) + self.setWindowTitle(_('About FanFictionDownLoader')) + self.setWindowIcon(icon) + self.l.addWidget(self.logo, 0, 0) + self.l.addWidget(self.label, 0, 1) + self.bb = QDialogButtonBox(self) + b = self.bb.addButton(_('OK'), self.bb.AcceptRole) + b.setDefault(True) + self.l.addWidget(self.bb, 2, 0, 1, -1) + self.bb.accepted.connect(self.accept) + +class IconWidgetItem(ReadOnlyTextIconWidgetItem): + def __init__(self, text, icon, sort_key): + ReadOnlyTextIconWidgetItem.__init__(self, text, icon) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class AuthorTableWidgetItem(ReadOnlyTableWidgetItem): + def __init__(self, text, sort_key): + ReadOnlyTableWidgetItem.__init__(self, text) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key.lower() < other.sort_key.lower() + +class UpdateExistingDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + save_size_name='fanfictiondownloader_plugin:update list dialog'): + SizePersistedDialog.__init__(self, gui, save_size_name) + + self.prefs = prefs + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + books_layout = QHBoxLayout() + layout.addLayout(books_layout) + + self.books_table = StoryListTableWidget(self) + books_layout.addWidget(self.books_table) + + button_layout = QVBoxLayout() + books_layout.addLayout(button_layout) + + spacerItem = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem) + self.remove_button = QtGui.QToolButton(self) + self.remove_button.setToolTip(_('Remove selected books from the list')) + self.remove_button.setIcon(get_icon('list_remove.png')) + self.remove_button.clicked.connect(self.remove_from_list) + button_layout.addWidget(self.remove_button) + spacerItem1 = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem1) + + options_layout = QHBoxLayout() + + groupbox = QGroupBox(_("Show Download Options")) + groupbox.setCheckable(True) + groupbox.setChecked(False) + groupbox.setFlat(True) + groupbox.setStyleSheet(gpstyle) + + gbf = QFrame() + gbl = QVBoxLayout() + gbl.addWidget(gbf) + groupbox.setLayout(gbl) + gbl = QHBoxLayout() + gbf.setLayout(gbl) + options_layout.addWidget(groupbox) + + gbf.setVisible(False) + groupbox.toggled.connect(gbf.setVisible) + + label = QLabel(_('Output &Format:')) + gbl.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip(_('Choose output format to create. May set default from plugin configuration.')) + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + gbl.addWidget(self.fileform) + + label = QLabel(_('Update Mode:')) + gbl.addWidget(label) + self.collision = QComboBox(self) + self.collision.setToolTip(_("What sort of update to perform. May set default from plugin configuration.")) + # add collision options + self.set_collisions() + i = self.collision.findText(save_collisions[prefs['collision']]) + if i > -1: + self.collision.setCurrentIndex(i) + label.setBuddy(self.collision) + gbl.addWidget(self.collision) + + self.updatemeta = QCheckBox(_('Update Calibre &Metadata?'),self) + self.updatemeta.setToolTip(_("Update metadata for existing stories in Calibre from web site?\n(Columns set to 'New Only' in the column tabs will only be set for new books.)")) + self.updatemeta.setChecked(prefs['updatemeta']) + gbl.addWidget(self.updatemeta) + + self.updateepubcover = QCheckBox(_('Update EPUB Cover?'),self) + self.updateepubcover.setToolTip(_('Update book cover image from site or defaults (if found) inside the EPUB when EPUB is updated.')) + self.updateepubcover.setChecked(prefs['updateepubcover']) + gbl.addWidget(self.updateepubcover) + + + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if o not in [ADDNEW,SKIP] and \ + (self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]): + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def remove_from_list(self): + self.books_table.remove_selected_rows() + + def get_books(self): + return self.books_table.get_books() + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + 'updateepubcover': self.updateepubcover.isChecked(), + 'smarten_punctuation':self.prefs['smarten_punctuation'] + } + +class StoryListTableWidget(QTableWidget): + + def __init__(self, parent): + QTableWidget.__init__(self, parent) + self.setSelectionBehavior(QAbstractItemView.SelectRows) + + def populate_table(self, books): + self.clear() + self.setAlternatingRowColors(True) + self.setRowCount(len(books)) + header_labels = ['',_('Title'), _('Author'), 'URL', _('Comment')] + self.setColumnCount(len(header_labels)) + self.setHorizontalHeaderLabels(header_labels) + self.horizontalHeader().setStretchLastSection(True) + #self.verticalHeader().setDefaultSectionSize(24) + self.verticalHeader().hide() + + self.books={} + for row, book in enumerate(books): + self.populate_table_row(row, book) + self.books[row] = book + + # turning True breaks up/down. Do we need either sorting or up/down? + self.setSortingEnabled(True) + self.resizeColumnsToContents() + self.setMinimumColumnWidth(1, 100) + self.setMinimumColumnWidth(2, 100) + self.setMinimumColumnWidth(3, 100) + self.setMinimumSize(300, 0) + # if len(books) > 0: + # self.selectRow(0) + self.sortItems(1) + self.sortItems(0) + + def setMinimumColumnWidth(self, col, minimum): + if self.columnWidth(col) < minimum: + self.setColumnWidth(col, minimum) + + def populate_table_row(self, row, book): + if book['good']: + icon = get_icon('ok.png') + val = 0 + else: + icon = get_icon('minus.png') + val = 1 + if 'icon' in book: + icon = get_icon(book['icon']) + + status_cell = IconWidgetItem(None,icon,val) + status_cell.setData(Qt.UserRole, val) + self.setItem(row, 0, status_cell) + + title_cell = ReadOnlyTableWidgetItem(book['title']) + title_cell.setData(Qt.UserRole, row) + self.setItem(row, 1, title_cell) + + self.setItem(row, 2, AuthorTableWidgetItem(", ".join(book['author']), ", ".join(book['author_sort']))) + + url_cell = ReadOnlyTableWidgetItem(book['url']) + self.setItem(row, 3, url_cell) + + comment_cell = ReadOnlyTableWidgetItem(book['comment']) + self.setItem(row, 4, comment_cell) + + def get_books(self): + books = [] + #print("=========================\nbooks:%s"%self.books) + for row in range(self.rowCount()): + rnum = convert_qvariant(self.item(row, 1).data(Qt.UserRole)) + book = self.books[rnum] + books.append(book) + return books + + def remove_selected_rows(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + message = ''+_('Are you sure you want to remove this book from the list?') + if len(rows) > 1: + message = ''+_('Are you sure you want to remove the selected %d books from the list?')%len(rows) + if not confirm(message,'fanfictiondownloader_delete_item', self): + return + first_sel_row = self.currentRow() + for selrow in reversed(rows): + self.removeRow(selrow.row()) + if first_sel_row < self.rowCount(): + self.select_and_scroll_to_row(first_sel_row) + elif self.rowCount() > 0: + self.select_and_scroll_to_row(first_sel_row - 1) + + def select_and_scroll_to_row(self, row): + self.selectRow(row) + self.scrollToItem(self.currentItem()) + +class RejectListTableWidget(QTableWidget): + + def __init__(self, parent,rejectreasons=[]): + QTableWidget.__init__(self, parent) + self.setSelectionBehavior(QAbstractItemView.SelectRows) + self.rejectreasons = rejectreasons + + def populate_table(self, reject_list): + self.clear() + self.setAlternatingRowColors(True) + self.setRowCount(len(reject_list)) + header_labels = ['URL', _('Title'), _('Author'), _('Note')] + self.setColumnCount(len(header_labels)) + self.setHorizontalHeaderLabels(header_labels) + self.horizontalHeader().setStretchLastSection(True) + #self.verticalHeader().setDefaultSectionSize(24) + self.verticalHeader().hide() + + # it's generally recommended to enable sort after pop, not + # before. But then it needs to be sorted on a column and I'd + # rather keep the order given. + self.setSortingEnabled(True) + # row is just row number. + for row, rejectrow in enumerate(reject_list): + #print("populating table:%s"%rejectrow.to_line()) + self.populate_table_row(row,rejectrow) + + self.resizeColumnsToContents() + self.setMinimumColumnWidth(0, 100) + self.setMinimumColumnWidth(3, 100) + self.setMinimumSize(300, 0) + + def setMinimumColumnWidth(self, col, minimum): + if self.columnWidth(col) < minimum: + self.setColumnWidth(col, minimum) + + def populate_table_row(self, row, rej): + + url_cell = ReadOnlyTableWidgetItem(rej.url) + url_cell.setData(Qt.UserRole, rej.book_id) + self.setItem(row, 0, url_cell) + self.setItem(row, 1, ReadOnlyTableWidgetItem(rej.title)) + self.setItem(row, 2, ReadOnlyTableWidgetItem(rej.auth)) + + note_cell = EditWithComplete(self,sort_func=lambda x:1) + + items = [rej.note]+self.rejectreasons + note_cell.update_items_cache(items) + note_cell.show_initial_value(rej.note) + note_cell.set_separator(None) + note_cell.setToolTip(_('Select or Edit Reject Note.')) + self.setCellWidget(row, 3, note_cell) + + def remove_selected_rows(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + message = ''+_('Are you sure you want to remove this URL from the list?') + if len(rows) > 1: + message = ''+_('Are you sure you want to remove the %d selected URLs from the list?')%len(rows) + if not confirm(message,'ffdl_rejectlist_delete_item_again', self): + return + first_sel_row = self.currentRow() + for selrow in reversed(rows): + self.removeRow(selrow.row()) + if first_sel_row < self.rowCount(): + self.select_and_scroll_to_row(first_sel_row) + elif self.rowCount() > 0: + self.select_and_scroll_to_row(first_sel_row - 1) + + def select_and_scroll_to_row(self, row): + self.selectRow(row) + self.scrollToItem(self.currentItem()) + +class RejectListDialog(SizePersistedDialog): + def __init__(self, gui, reject_list, + rejectreasons=[], + header=_("List of Books to Reject"), + icon='rotate-right.png', + show_delete=True, + show_all_reasons=True, + save_size_name='ffdl:reject list dialog'): + SizePersistedDialog.__init__(self, gui, save_size_name) + + self.setWindowTitle(header) + self.setWindowIcon(get_icon(icon)) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, icon, header, + ''+_('FFDL will remember these URLs and display the note and offer to reject them if you try to download them again later.')) + layout.addLayout(title_layout) + rejects_layout = QHBoxLayout() + layout.addLayout(rejects_layout) + + self.rejects_table = RejectListTableWidget(self,rejectreasons=rejectreasons) + rejects_layout.addWidget(self.rejects_table) + + button_layout = QVBoxLayout() + rejects_layout.addLayout(button_layout) + spacerItem = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem) + + self.remove_button = QtGui.QToolButton(self) + self.remove_button.setToolTip(_('Remove selected URL(s) from the list')) + self.remove_button.setIcon(get_icon('list_remove.png')) + self.remove_button.clicked.connect(self.remove_from_list) + button_layout.addWidget(self.remove_button) + + spacerItem1 = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem1) + + if show_all_reasons: + self.reason_edit = EditWithComplete(self,sort_func=lambda x:1) + + items = ['']+rejectreasons + self.reason_edit.update_items_cache(items) + self.reason_edit.show_initial_value('') + self.reason_edit.set_separator(None) + self.reason_edit.setToolTip(_("This will be added to whatever note you've set for each URL above.")) + + horz = QHBoxLayout() + label = QLabel(_("Add this reason to all URLs added:")) + label.setToolTip(_("This will be added to whatever note you've set for each URL above.")) + horz.addWidget(label) + horz.addWidget(self.reason_edit) + horz.insertStretch(-1) + layout.addLayout(horz) + + options_layout = QHBoxLayout() + + if show_delete: + self.deletebooks = QCheckBox(_('Delete Books (including books without FanFiction URLs)?'),self) + self.deletebooks.setToolTip(_("Delete the selected books after adding them to the Rejected URLs list.")) + self.deletebooks.setChecked(True) + options_layout.addWidget(self.deletebooks) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.rejects_table.populate_table(reject_list) + + def remove_from_list(self): + self.rejects_table.remove_selected_rows() + + def get_reject_list(self): + rejectrows = [] + for row in range(self.rejects_table.rowCount()): + url = unicode(self.rejects_table.item(row, 0).text()).strip() + book_id =convert_qvariant(self.rejects_table.item(row, 0).data(Qt.UserRole)) + title = unicode(self.rejects_table.item(row, 1).text()).strip() + auth = unicode(self.rejects_table.item(row, 2).text()).strip() + note = unicode(self.rejects_table.cellWidget(row, 3).currentText()).strip() + rejectrows.append(RejectUrlEntry(url,note,title,auth,self.get_reason_text(),book_id=book_id)) + return rejectrows + + def get_reject_list_ids(self): + rejectrows = [] + for row in range(self.rejects_table.rowCount()): + book_id = convert_qvariant(self.rejects_table.item(row, 0).data(Qt.UserRole)) + if book_id: + rejectrows.append(book_id) + return rejectrows + + def get_reason_text(self): + try: + return unicode(self.reason_edit.currentText()).strip() + except: + # doesn't have self.reason_edit when editing existing list. + return None + + def get_deletebooks(self): + return self.deletebooks.isChecked() + +class EditTextDialog(QDialog): + + def __init__(self, parent, text, + icon=None, title=None, label=None, tooltip=None, + rejectreasons=[],reasonslabel=None + ): + QDialog.__init__(self, parent) + self.resize(600, 500) + self.l = QVBoxLayout() + self.setLayout(self.l) + self.label = QLabel(label) + if title: + self.setWindowTitle(title) + if icon: + self.setWindowIcon(icon) + self.l.addWidget(self.label) + + self.textedit = QTextEdit(self) + self.textedit.setLineWrapMode(QTextEdit.NoWrap) + self.textedit.setText(text) + self.l.addWidget(self.textedit) + + if tooltip: + self.label.setToolTip(tooltip) + self.textedit.setToolTip(tooltip) + + if rejectreasons or reasonslabel: + self.reason_edit = EditWithComplete(self,sort_func=lambda x:1) + + items = ['']+rejectreasons + self.reason_edit.update_items_cache(items) + self.reason_edit.show_initial_value('') + self.reason_edit.set_separator(None) + self.reason_edit.setToolTip(reasonslabel) + + if reasonslabel: + horz = QHBoxLayout() + label = QLabel(reasonslabel) + label.setToolTip(reasonslabel) + horz.addWidget(label) + horz.addWidget(self.reason_edit) + self.l.addLayout(horz) + else: + self.l.addWidget(self.reason_edit) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + self.l.addWidget(button_box) + + def get_plain_text(self): + return unicode(self.textedit.toPlainText()) + + def get_reason_text(self): + return unicode(self.reason_edit.currentText()).strip() + diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py new file mode 100644 index 00000000..294b7fb5 --- /dev/null +++ b/calibre-plugin/ffdl_plugin.py @@ -0,0 +1,2121 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Jim Miller' +__docformat__ = 'restructuredtext en' + +import logging +logger = logging.getLogger(__name__) + +import time, os, copy, threading, re, platform, sys +from StringIO import StringIO +from functools import partial +from datetime import datetime, time +from string import Template +import urllib +import email +import traceback + +try: + from PyQt5.Qt import (QApplication, QMenu, QTimer) + from PyQt5.QtCore import QBuffer +except ImportError as e: + from PyQt4.Qt import (QApplication, QMenu, QTimer) + from PyQt4.QtCore import QBuffer + +from calibre.constants import numeric_version as calibre_version + +from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory, remove_dir +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata.meta import get_metadata +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.message_box import ViewLog +from calibre.gui2.dialogs.confirm_delete import confirm +from calibre.utils.config import prefs as calibre_prefs +from calibre.utils.date import local_tz +from calibre.library.comments import sanitize_comments_html +from calibre.constants import config_dir as calibre_config_dir + +# The class that all interface action plugins must inherit from +from calibre.gui2.actions import InterfaceAction + +# pulls in translation files for _() strings +try: + load_translations() +except NameError: + pass # load_translations() added in calibre 1.9 + +from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin_icon_resources, get_icon, + create_menu_action_unique, get_library_uuid) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page, get_urls_from_html, get_urls_from_text + +from calibre_plugins.fanfictiondownloader_plugin.ffdl_util import (get_ffdl_adapter, get_ffdl_config, get_ffdl_personalini) +from calibre_plugins.fanfictiondownloader_plugin.config import (permitted_values, rejecturllist) +from calibre_plugins.fanfictiondownloader_plugin.prefs import prefs +from calibre_plugins.fanfictiondownloader_plugin.dialogs import ( + AddNewDialog, UpdateExistingDialog, + LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog, RejectListDialog, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, + NotGoingToDownload, RejectUrlEntry ) + +# because calibre immediately transforms html into zip and don't want +# to have an 'if html'. db.has_format is cool with the case mismatch, +# but if I'm doing it anyway... +formmapping = { + 'epub':'EPUB', + 'mobi':'MOBI', + 'html':'ZIP', + 'txt':'TXT' + } + +PLUGIN_ICONS = ['images/icon.png'] + +class FanFictionDownLoaderPlugin(InterfaceAction): + + name = 'FanFictionDownLoader' + + # Declare the main action associated with this plugin + # The keyboard shortcut can be None if you dont want to use a keyboard + # shortcut. Remember that currently calibre has no central management for + # keyboard shortcuts, so try to use an unusual/unused shortcut. + # (text, icon_path, tooltip, keyboard shortcut) + # icon_path isn't in the zip--icon loaded below. + action_spec = (_('FanFictionDownLoader'), None, + _('Download FanFiction stories from various web sites'), ()) + # None for keyboard shortcut doesn't allow shortcut. () does, there just isn't one yet + + action_type = 'global' + # make button menu drop down only + #popup_type = QToolButton.InstantPopup + + def genesis(self): + + # This method is called once per plugin, do initial setup here + + # Read the plugin icons and store for potential sharing with the config widget + icon_resources = self.load_resources(PLUGIN_ICONS) + set_plugin_icon_resources(self.name, icon_resources) + + base = self.interface_action_base_plugin + self.version = base.name+" v%d.%d.%d"%base.version + + # Set the icon for this interface action + # The get_icons function is a builtin function defined for all your + # plugin code. It loads icons from the plugin zip file. It returns + # QIcon objects, if you want the actual data, use the analogous + # get_resources builtin function. + + # Note that if you are loading more than one icon, for performance, you + # should pass a list of names to get_icons. In this case, get_icons + # will return a dictionary mapping names to QIcons. Names that + # are not found in the zip file will result in null QIcons. + icon = get_icon('images/icon.png') + + self.qaction.setText(_('FanFictionDL')) + + # The qaction is automatically created from the action_spec defined + # above + self.qaction.setIcon(icon) + + # Call function when plugin triggered. + self.qaction.triggered.connect(self.plugin_button) + + # Assign our menu to this action + self.menu = QMenu(self.gui) + # menu_actions is to keep a live reference to the menu items + # to prevent GC removing it and so rebuild_menus has a list + self.menu_actions = [] + self.qaction.setMenu(self.menu) + self.menus_lock = threading.RLock() + self.menu.aboutToShow.connect(self.about_to_show_menu) + + def initialization_complete(self): + # otherwise configured hot keys won't work until the menu's + # been displayed once. + self.rebuild_menus() + + self.add_new_dialog = AddNewDialog(self.gui, + prefs, + self.qaction.icon()) + + ## Kludgey, yes, but with the real configuration inside the + ## library now, how else would a user be able to change this + ## setting if it's crashing calibre? + def check_macmenuhack(self): + try: + return self.macmenuhack + except: + file_path = os.path.join(calibre_config_dir, + *("plugins/fanfictiondownloader_macmenuhack.txt".split('/'))) + file_path = os.path.abspath(file_path) + logger.debug("Plugin %s macmenuhack file_path:%s"%(self.name,file_path)) + self.macmenuhack = os.access(file_path, os.F_OK) + return self.macmenuhack + + accepts_drops = True + + def accept_enter_event(self, event, mime_data): + if mime_data.hasFormat("application/calibre+from_library") or \ + mime_data.hasFormat("text/plain") or \ + mime_data.hasFormat("text/uri-list"): + return True + + return False + + def accept_drag_move_event(self, event, mime_data): + return self.accept_enter_event(event, mime_data) + + def drop_event(self, event, mime_data): + + dropped_ids=None + urllist=[] + + mime = 'application/calibre+from_library' + if mime_data.hasFormat(mime): + dropped_ids = tuple(map(int, str(mime_data.data(mime)).split())) + + mimetype='text/uri-list' + filelist="%s"%event.mimeData().data(mimetype) + if filelist: + for f in filelist.splitlines(): + #print("filename:%s"%f) + if f.endswith(".eml"): + fhandle = urllib.urlopen(f) + msg = email.message_from_file(fhandle) + if msg.is_multipart(): + for part in msg.walk(): + #print("part type:%s"%part.get_content_type()) + if part.get_content_type() == "text/html": + #print("URL list:%s"%get_urls_from_data(part.get_payload(decode=True))) + urllist.extend(get_urls_from_html(part.get_payload(decode=True))) + if part.get_content_type() == "text/plain": + #print("part content:text/plain") + #print("part content:%s"%part.get_payload(decode=True)) + urllist.extend(get_urls_from_text(part.get_payload(decode=True))) + else: + urllist.extend(get_urls_from_text("%s"%msg)) + else: + urllist.extend(get_urls_from_text(f)) + else: + mimetype='text/plain' + if mime_data.hasFormat(mimetype): + #print("text/plain:%s"%event.mimeData().data(mimetype)) + urllist.extend(get_urls_from_text(event.mimeData().data(mimetype))) + + # print("urllist:%s\ndropped_ids:%s"%(urllist,dropped_ids)) + if urllist or dropped_ids: + QTimer.singleShot(1, partial(self.do_drop, + dropped_ids=dropped_ids, + urllist=urllist)) + return True + + return False + + def do_drop(self,dropped_ids=None,urllist=None): + # shouldn't ever be both. + if dropped_ids: + self.update_dialog(dropped_ids) + elif urllist: + self.add_dialog("\n".join(urllist)) + + def about_to_show_menu(self): + self.rebuild_menus() + + def library_changed(self, db): + # We need to reset our menus after switching libraries + self.rebuild_menus() + rejecturllist.clear_cache() + + def rebuild_menus(self): + with self.menus_lock: + #self.qaction.setText("FFDL") + do_user_config = self.interface_action_base_plugin.do_user_config + self.menu.clear() + + for action in self.menu_actions: + self.gui.keyboard.unregister_shortcut(action.calibre_shortcut_unique_name) + # starting in calibre 2.10.0, actions are registers at + # the top gui level for OSX' benefit. + if calibre_version >= (2,10,0): + self.gui.removeAction(action) + self.menu_actions = [] + + self.add_action = self.create_menu_item_ex(self.menu, _('&Add New from URL(s)'), image='plus.png', + unique_name='Add New FanFiction Book(s) from URL(s)', + shortcut_name=_('Add New FanFiction Book(s) from URL(s)'), + triggered=self.add_dialog ) + + self.update_action = self.create_menu_item_ex(self.menu, _('&Update Existing FanFiction Book(s)'), image='plusplus.png', + unique_name='&Update Existing FanFiction Book(s)', + triggered=self.update_dialog) + + if self.get_epubmerge_plugin(): + self.menu.addSeparator() + self.get_list_url_action = self.create_menu_item_ex(self.menu, _('Get Story URLs to Download from Web Page'), image='view.png', + unique_name='Get Story URLs from Web Page', + triggered=self.get_urls_from_page_menu) + + self.makeanth_action = self.create_menu_item_ex(self.menu, _('&Make Anthology Epub Manually from URL(s)'), image='plusplus.png', + unique_name='Make FanFiction Anthology Epub Manually from URL(s)', + shortcut_name=_('Make FanFiction Anthology Epub Manually from URL(s)'), + triggered=partial(self.add_dialog,merge=True) ) + + self.updateanth_action = self.create_menu_item_ex(self.menu, _('&Update Anthology Epub'), image='plusplus.png', + unique_name='Update FanFiction Anthology Epub', + shortcut_name=_('Update FanFiction Anthology Epub'), + triggered=self.update_anthology) + + if 'Reading List' in self.gui.iactions and (prefs['addtolists'] or prefs['addtoreadlists']) : + + self.menu.addSeparator() + addmenutxt, rmmenutxt = None, None + if prefs['addtolists'] and prefs['addtoreadlists'] : + addmenutxt = _('Add to "To Read" and "Send to Device" Lists') + if prefs['addtolistsonread']: + rmmenutxt = _('Remove from "To Read" and add to "Send to Device" Lists') + else: + rmmenutxt = _('Remove from "To Read" Lists') + elif prefs['addtolists'] : + addmenutxt = _('Add Selected to "Send to Device" Lists') + elif prefs['addtoreadlists']: + addmenutxt = _('Add to "To Read" Lists') + rmmenutxt = _('Remove from "To Read" Lists') + + if addmenutxt: + self.add_send_action = self.create_menu_item_ex(self.menu, addmenutxt, + unique_name='Add to "To Read" and "Send to Device" Lists', + image='plusplus.png', + triggered=partial(self.update_lists,add=True)) + + if rmmenutxt: + self.add_remove_action = self.create_menu_item_ex(self.menu, rmmenutxt, + unique_name='Remove from "To Read" and add to "Send to Device" Lists', + image='minusminus.png', + triggered=partial(self.update_lists,add=False)) + + self.menu.addSeparator() + self.get_list_action = self.create_menu_item_ex(self.menu, _('Get URLs from Selected Books'), + unique_name='Get URLs from Selected Books', + image='bookmarks.png', + triggered=self.list_story_urls) + + if not self.get_epubmerge_plugin(): + self.get_list_url_action = self.create_menu_item_ex(self.menu, _('Get Story URLs from Web Page'), + unique_name='Get Story URLs from Web Page', + image='view.png', + triggered=self.get_urls_from_page_menu) + + self.reject_list_action = self.create_menu_item_ex(self.menu, _('Reject Selected Books'), + unique_name='Reject Selected Books', image='rotate-right.png', + triggered=self.reject_list_urls) + + # print("platform.system():%s"%platform.system()) + # print("platform.mac_ver()[0]:%s"%platform.mac_ver()[0]) + if not self.check_macmenuhack(): # not platform.mac_ver()[0]: # Some macs crash on these menu items for unknown reasons. + self.menu.addSeparator() + self.config_action = self.create_menu_item_ex(self.menu, _('&Configure Plugin'), + image= 'config.png', + unique_name='Configure FanFictionDownLoader', + shortcut_name=_('Configure FanFictionDownLoader'), + triggered=partial(do_user_config,parent=self.gui)) + + self.about_action = self.create_menu_item_ex(self.menu, _('About Plugin'), + image= 'images/icon.png', + unique_name='About FanFictionDownLoader', + shortcut_name=_('About FanFictionDownLoader'), + triggered=self.about) + + self.gui.keyboard.finalize() + + def about(self): + # Get the about text from a file inside the plugin zip file + # The get_resources function is a builtin function defined for all your + # plugin code. It loads files from the plugin zip file. It returns + # the bytes from the specified file. + # + # Note that if you are loading more than one file, for performance, you + # should pass a list of names to get_resources. In this case, + # get_resources will return a dictionary mapping names to bytes. Names that + # are not found in the zip file will not be in the returned dictionary. + + text = get_resources('about.txt') + AboutDialog(self.gui,self.qaction.icon(),self.version + text).exec_() + + def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + #print("create_menu_item_ex before %s"%menu_text) + ac = create_menu_action_unique(self, parent_menu, menu_text, image, tooltip, + shortcut, triggered, is_checked, shortcut_name, unique_name) + self.menu_actions.append(ac) + #print("create_menu_item_ex after %s"%menu_text) + return ac + + def is_library_view(self): + # 0 = library, 1 = main, 2 = card_a, 3 = card_b + return self.gui.stack.currentIndex() == 0 + + def plugin_button(self): + if self.is_library_view() and \ + len(self.gui.library_view.get_selected_ids()) > 0 and \ + prefs['updatedefault']: + self.update_dialog() + else: + self.add_dialog() + + def get_epubmerge_plugin(self): + if 'EpubMerge' in self.gui.iactions and self.gui.iactions['EpubMerge'].interface_action_base_plugin.version >= (1,3,1): + return self.gui.iactions['EpubMerge'] + + def update_lists(self,add=True): + if prefs['addtolists'] or prefs['addtoreadlists']: + if not self.is_library_view(): + self.gui.status_bar.show_message(_('Cannot Update Reading Lists from Device View'), 3000) + return + + if len(self.gui.library_view.get_selected_ids()) == 0: + self.gui.status_bar.show_message(_('No Selected Books to Update Reading Lists'), 3000) + return + + self.update_reading_lists(self.gui.library_view.get_selected_ids(),add) + + def get_urls_from_page_menu(self): + + urltxt = "" + if prefs['urlsfromclip']: + try: + urltxt = self.get_urls_clip(storyurls=False)[0] + except: + urltxt = "" + + d = CollectURLDialog(self.gui,_("Get Story URLs from Web Page"),urltxt,self.get_epubmerge_plugin()) + d.exec_() + if not d.status: + return + url = u"%s"%d.url.text() + + url_list = self.get_urls_from_page(url) + + if url_list: + self.add_dialog("\n".join(url_list),merge=d.anthology,anthology_url=url) + else: + info_dialog(self.gui, _('List of Story URLs'), + _('No Valid Story URLs found on given page.'), + show=True, + show_copy_button=False) + + def get_urls_from_page(self,url): + logger.debug("get_urls_from_page URL:%s"%url) + if 'archiveofourown.org' in url: + configuration = get_ffdl_config(url) + else: + configuration = None + return get_urls_from_page(url,configuration) + + def list_story_urls(self): + '''Get list of URLs from existing books.''' + if not self.gui.current_view().selectionModel().selectedRows() : + self.gui.status_bar.show_message(_('No Selected Books to Get URLs From'), + 3000) + return + + if self.is_library_view(): + book_list = map( partial(self.make_book_id_only), + self.gui.library_view.get_selected_ids() ) + + else: # device view, get from epubs on device. + view = self.gui.current_view() + rows = view.selectionModel().selectedRows() + # paths = view.model().paths(rows) + book_list = map( partial(self.make_book_from_device_row), rows ) + + LoopProgressDialog(self.gui, + book_list, + partial(self.get_list_story_urls_loop, db=self.gui.current_db), + self.get_list_story_urls_finish, + init_label=_("Collecting URLs for stories..."), + win_title=_("Get URLs for stories"), + status_prefix=_("URL retrieved")) + + def get_list_story_urls_loop(self,book,db=None): + if book['calibre_id']: + book['url'] = self.get_story_url(db,book_id=book['calibre_id']) + elif book['path']: + book['url'] = self.get_story_url(db,path=book['path']) + + if book['url'] == None: + book['good']=False + else: + book['good']=True + + def get_list_story_urls_finish(self, book_list): + url_list = [ x['url'] for x in book_list if x['good'] ] + if url_list: + d = ViewLog(_("List of Story URLs"),"\n".join(url_list),parent=self.gui) + d.setWindowIcon(get_icon('bookmarks.png')) + d.exec_() + else: + info_dialog(self.gui, _('List of URLs'), + _('No Story URLs found in selected books.'), + show=True, + show_copy_button=False) + + def reject_list_urls(self): + if self.is_library_view(): + book_list = map( partial(self.make_book_id_only), + self.gui.library_view.get_selected_ids() ) + + else: # device view, get from epubs on device. + view = self.gui.current_view() + rows = view.selectionModel().selectedRows() + #paths = view.model().paths(rows) + book_list = map( partial(self.make_book_from_device_row), rows ) + + if len(book_list) == 0 : + self.gui.status_bar.show_message(_('No Selected Books have URLs to Reject'), 3000) + return + + # Progbar because fetching urls from device epubs can be slow. + LoopProgressDialog(self.gui, + book_list, + partial(self.reject_list_urls_loop, db=self.gui.current_db), + self.reject_list_urls_finish, + init_label=_("Collecting URLs for Reject List..."), + win_title=_("Get URLs for Reject List"), + status_prefix=_("URL retrieved")) + + def reject_list_urls_loop(self,book,db=None): + self.get_list_story_urls_loop(book,db) # common with get_list_story_urls_loop + if book['calibre_id']: + # want title/author, too, for rejects. + self.populate_book_from_calibre_id(book,db) + if book['url']: + # get existing note, if on rejected list. + book['oldrejnote']=rejecturllist.get_note(book['url']) + + def reject_list_urls_finish(self, book_list): + + # construct reject list of objects + reject_list = [ RejectUrlEntry(x['url'], + x['oldrejnote'], + x['title'], + ', '.join(x['author']), + book_id=x['calibre_id']) + for x in book_list if x['good'] ] + if reject_list: + d = RejectListDialog(self.gui,reject_list, + rejectreasons=rejecturllist.get_reject_reasons()) + d.exec_() + + if d.result() != d.Accepted: + return + + rejecturllist.add(d.get_reject_list()) + + if d.get_deletebooks(): + self.gui.iactions['Remove Books'].do_library_delete(d.get_reject_list_ids()) + + else: + message=""+_("Rejecting FFDL URLs: None of the books selected have FanFiction URLs.")+""+_("Proceed to Remove?")+"" + if confirm(message,'fanfictiondownloader_reject_non_fanfiction', self.gui): + self.gui.iactions['Remove Books'].delete_books() + + def add_dialog(self,url_list_text=None,merge=False,anthology_url=None): + 'Both new individual stories and new anthologies are created here.' + + if not url_list_text: + url_list = self.get_urls_clip() + url_list_text = "\n".join(url_list) + + # AddNewDialog collects URLs, format and presents buttons. + # add_new_dialog is modeless and reused, both for new stories + # and anthologies, and for updating existing anthologies. + self.add_new_dialog.show_dialog(url_list_text, + self.prep_downloads, + merge=merge, + newmerge=True, + extraoptions={'anthology_url':anthology_url}) + + def update_anthology(self): + if not self.get_epubmerge_plugin(): + self.gui.status_bar.show_message(_('Cannot Make Anthologys without %s')%'EpubMerge 1.3.1+', 3000) + return + + if not self.is_library_view(): + self.gui.status_bar.show_message(_('Cannot Update Books from Device View'), 3000) + return + + if len(self.gui.library_view.get_selected_ids()) != 1: + self.gui.status_bar.show_message(_('Can only update 1 anthology at a time'), 3000) + return + + db = self.gui.current_db + book_id = self.gui.library_view.get_selected_ids()[0] + mergebook = self.make_book_id_only(book_id) + self.populate_book_from_calibre_id(mergebook, db) + + if not db.has_format(book_id,'EPUB',index_is_id=True): + self.gui.status_bar.show_message(_('Can only Update Epub Anthologies'), 3000) + return + + tdir = PersistentTemporaryDirectory(prefix='ffdl_anthology_') + logger.debug("tdir:\n%s"%tdir) + + bookepubio = StringIO(db.format(book_id,'EPUB',index_is_id=True)) + + filenames = self.get_epubmerge_plugin().do_unmerge(bookepubio,tdir) + urlmapfile = {} + url_list = [] + for f in filenames: + url = get_dcsource(f) + if url: + urlmapfile[url]=f + url_list.append(url) + + if not filenames or len(filenames) != len (url_list): + info_dialog(self.gui, _("Cannot Update Anthology"), + ""+_("Cannot Update Anthology")+""+_("Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs."), + show=True, + show_copy_button=False) + remove_dir(tdir) + return + + # get list from identifiers:url/uri if present, but only if + # it's *not* a valid story URL. + mergeurl = self.get_story_url(db,book_id) + if mergeurl and not self.is_good_downloader_url(mergeurl): + url_list = self.get_urls_from_page(mergeurl) + + url_list_text = "\n".join(url_list) + + #print("urlmapfile:%s"%urlmapfile) + + # AddNewDialog collects URLs, format and presents buttons. + # add_new_dialog is modeless and reused, both for new stories + # and anthologies, and for updating existing anthologies. + self.add_new_dialog.show_dialog(url_list_text, + self.prep_anthology_downloads, + show=False, + merge=True, + newmerge=False, + extrapayload=urlmapfile, + extraoptions={'tdir':tdir, + 'mergebook':mergebook}) + # Need to use AddNewDialog modal here because it's an update + # of an existing book. Don't want the user deleting it or + # switching libraries on us. + self.add_new_dialog.exec_() + + + def prep_anthology_downloads(self, options, update_books, + merge=False, urlmapfile=None): + + if isinstance(update_books,basestring): + url_list = split_text_to_urls(update_books) + update_books = self.convert_urls_to_books(url_list) + + for j, book in enumerate(update_books): + url = book['url'] + book['listorder'] = j + if url in urlmapfile: + #print("found epub for %s"%url) + book['epub_for_update']=urlmapfile[url] + del urlmapfile[url] + #else: + #print("didn't found epub for %s"%url) + + if urlmapfile: + text = ''' + %s + %s + <ul> + <li>%s</li> + </ul> + %s'''%( + _('There are %d stories in the current anthology that are not going to be kept if you go ahead.')%len(urlmapfile), + _('Story URLs that will be removed:'), + "</li><li>".join(urlmapfile.keys()), + _('Update anyway?')) + if not question_dialog(self.gui, _('Stories Removed'), + text, show_copy_button=False): + logger.debug("Canceling anthology update due to removed stories.") + return + + # Now that we've + self.prep_downloads( options, update_books, merge=True ) + + def update_dialog(self, id_list=None): + if not self.is_library_view(): + self.gui.status_bar.show_message(_('Cannot Update Books from Device View'), 3000) + return + + if not id_list: + id_list = self.gui.library_view.get_selected_ids() + + if len(id_list) == 0: + self.gui.status_bar.show_message(_('No Selected Books to Update'), 3000) + return + #print("update_dialog()") + + db = self.gui.current_db + books = map( self.make_book_id_only, id_list ) + + for j, book in enumerate(books): + book['listorder'] = j + + LoopProgressDialog(self.gui, + books, + partial(self.populate_book_from_calibre_id, db=self.gui.current_db), + self.update_dialog_finish, + init_label=_("Collecting stories for update..."), + win_title=_("Get stories for updates"), + status_prefix=_("URL retrieved")) + + #books = self.convert_calibre_ids_to_books(db, book_ids) + #print("update books:%s"%books) + + def update_dialog_finish(self,book_list): + '''Present list to update and head to prep when done.''' + + d = UpdateExistingDialog(self.gui, + _('Update Existing List'), + prefs, + self.qaction.icon(), + book_list, + ) + d.exec_() + if d.result() != d.Accepted: + return + + update_books = d.get_books() + + #print("update_books:%s"%update_books) + #print("options:%s"%d.get_ffdl_options()) + # only if there's some good ones. + if 0 < len(filter(lambda x : x['good'], update_books)): + options = d.get_ffdl_options() + self.prep_downloads( options, update_books ) + + def get_urls_clip(self,storyurls=True): + url_list = [] + if prefs['urlsfromclip']: + for url in unicode(QApplication.instance().clipboard().text()).split(): + if not storyurls or self.is_good_downloader_url(url): + url_list.append(url) + + return url_list + + def apply_settings(self): + # No need to do anything with perfs here, but we could. + prefs + + def make_id_searchstr(self,url): + # older idents can be uri vs url and have | instead of : after + # http, plus many sites are now switching to https. + return 'identifiers:"~ur(i|l):~^%s$"'%re.sub(r'https?\\\:','https?(\:|\|)',re.escape(url)) + + def prep_downloads(self, options, books, merge=False, extrapayload=None): + '''Fetch metadata for stories from servers, launch BG job when done.''' + + if isinstance(books,basestring): + url_list = split_text_to_urls(books) + books = self.convert_urls_to_books(url_list) + + ## for tweak_fg_sleep + options['ffnetcount']=len(filter(lambda x : x['site']=='www.fanfiction.net', books)) + + options['version'] = self.version + logger.debug(self.version) + options['personal.ini'] = get_ffdl_personalini() + + #print("prep_downloads:%s"%books) + + if 'tdir' not in options: # if merging an anthology, there's alread a tdir. + # create and pass temp dir. + tdir = PersistentTemporaryDirectory(prefix='fanfictiondownloader_') + options['tdir']=tdir + + if 0 < len(filter(lambda x : x['good'], books)): + self.gui.status_bar.show_message(_('Started fetching metadata for %s stories.')%len(books), 3000) + LoopProgressDialog(self.gui, + books, + partial(self.prep_download_loop, options = options, merge=merge), + partial(self.start_download_job, options = options, merge=merge)) + else: + self.gui.status_bar.show_message(_('No valid story URLs entered.'), 3000) + # LoopProgressDialog calls prep_download_loop for each 'good' story, + # prep_download_loop updates book object for each with metadata from site, + # LoopProgressDialog calls start_download_job at the end which goes + # into the BG, or shows list if no 'good' books. + + def prep_download_loop(self,book, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True, + 'updateepubcover':True}, + merge=False): + ''' + Update passed in book dict with metadata from website and + necessary data. To be called from LoopProgressDialog + 'loop'. Also pops dialogs for is adult, user/pass. + ''' + + url = book['url'] + logger.debug("url:%s"%url) + mi = None + + if not merge: # skip reject list when merging. + if rejecturllist.check(url): + rejnote = rejecturllist.get_full_note(url) + if prefs['reject_always'] or question_dialog(self.gui, _('Reject URL?'),''' + <h3>%s</h3> + %s + "%s" + %s + %s'''%( + _('Reject URL?'), + _('%s is on your Reject URL list:')%url, + rejnote, + _("Click 'Yes' to Reject."), + _("Click 'No' to download anyway.")), + show_copy_button=False): + book['comment'] = _("Story on Reject URLs list (%s).")%rejnote + book['good']=False + book['icon']='rotate-right.png' + book['status'] = _('Rejected') + return + else: + if question_dialog(self.gui, _('Remove Reject URL?'),''' + <h3>%s</h3> + %s + "%s" + %s + %s'''%( + _("Remove URL from Reject List?"), + _('%s is on your Reject URL list:')%url, + rejnote, + _("Click 'Yes' to remove it from the list,"), + _("Click 'No' to leave it on the list.")), + show_copy_button=False): + rejecturllist.remove(url) + + # The current database shown in the GUI + # db is an instance of the class LibraryDatabase2 from database.py + # This class has many, many methods that allow you to do a lot of + # things. + db = self.gui.current_db + + fileform = options['fileform'] + collision = options['collision'] + updatemeta= options['updatemeta'] + updateepubcover= options['updateepubcover'] + + # Dialogs should prevent this case now. + if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub': + raise NotGoingToDownload(_("Cannot update non-epub format.")) + + if not book['good']: + # book has already been flagged bad for whatever reason. + return + + skip_date_update = False + + adapter = get_ffdl_adapter(url,fileform) + ## save and share cookiejar and pagecache between all + ## downloads. + if 'pagecache' not in options: + options['pagecache'] = adapter.get_empty_pagecache() + adapter.set_pagecache(options['pagecache']) + if 'cookiejar' not in options: + options['cookiejar'] = adapter.get_empty_cookiejar() + adapter.set_cookiejar(options['cookiejar']) + + # reduce foreground sleep time for ffnet when few books. + if 'ffnetcount' in options and \ + adapter.getConfig('tweak_fg_sleep') and \ + adapter.getSiteDomain() == 'www.fanfiction.net': + minslp = float(adapter.getConfig('min_fg_sleep')) + maxslp = float(adapter.getConfig('max_fg_sleep')) + dwnlds = float(adapter.getConfig('max_fg_sleep_at_downloads')) + m = (maxslp-minslp) / (dwnlds-1) + b = minslp - m + slp = min(maxslp,m*float(options['ffnetcount'])+b) + #print("m:%s b:%s = %s"%(m,b,slp)) + adapter.set_sleep(slp) + + ## three tries, that's enough if both user/pass & is_adult needed, + ## or a couple tries of one or the other + for x in range(0,2): + try: + adapter.getStoryMetadataOnly(get_cover=False) + except exceptions.FailedToLogin, f: + logger.warn("Login Failed, Need Username/Password.") + userpass = UserPassDialog(self.gui,url,f) + userpass.exec_() # exec_ will make it act modal + if userpass.status: + adapter.username = userpass.user.text() + adapter.password = userpass.passwd.text() + + except exceptions.AdultCheckRequired: + if question_dialog(self.gui, _('Are You an Adult?'), ''+ + _("%s requires that you be an adult. Please confirm you are an adult in your locale:")%url, + show_copy_button=False): + adapter.is_adult=True + + # let other exceptions percolate up. + story = adapter.getStoryMetadataOnly(get_cover=False) + + series = story.getMetadata('series') + if not merge and series and prefs['checkforseriesurlid']: + # try to find *series anthology* by *seriesUrl* identifier url or uri first. + searchstr = self.make_id_searchstr(story.getMetadata('seriesUrl')) + identicalbooks = db.search_getting_ids(searchstr, None) + # print("searchstr:%s"%searchstr) + # print("identicalbooks:%s"%identicalbooks) + if len(identicalbooks) > 0 and question_dialog(self.gui, _('Skip Story?'),''' + <h3>%s</h3> + %s + %s + %s + '''%( + _('Skip Anthology Story?'), + _('"%s" is in series "<a href="%s">%s</a>" that you have an anthology book for.')%(story.getMetadata('title'),story.getMetadata('seriesUrl'),series[:series.index(' [')]), + _("Click 'Yes' to Skip."), + _("Click 'No' to download anyway.")), + show_copy_button=False): + book['comment'] = _("Story in Series Anthology(%s).")%series + book['title'] = story.getMetadata('title') + book['author'] = [story.getMetadata('author')] + book['good']=False + book['icon']='rotate-right.png' + book['status'] = _('Skipped') + return + + + ################################################################################################################################################33 + + # set PI version instead of default. + if 'version' in options: + story.setMetadata('version',options['version']) + + # all_metadata duplicates some data, but also includes extra_entries, etc. + book['all_metadata'] = story.getAllMetadata(removeallentities=True) + + book['title'] = story.getMetadata("title", removeallentities=True) + book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) + book['publisher'] = story.getMetadata("site") + book['tags'] = story.getSubjectTags(removeallentities=True) + if story.getMetadata("description"): + book['comments'] = sanitize_comments_html(story.getMetadata("description")) + else: + book['comments']='' + book['series'] = story.getMetadata("series", removeallentities=True) + + book['is_adult'] = adapter.is_adult + book['username'] = adapter.username + book['password'] = adapter.password + + book['icon'] = 'plus.png' + book['status'] = _('Add') + if story.getMetadataRaw('datePublished'): + book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz) + if story.getMetadataRaw('dateUpdated'): + book['updatedate'] = story.getMetadataRaw('dateUpdated').replace(tzinfo=local_tz) + if story.getMetadataRaw('dateCreated'): + book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz) + else: + book['timestamp'] = None # need *something* there for calibre. + + if not merge:# skip all the collision code when d/ling for merging. + if collision in (CALIBREONLY): + book['icon'] = 'metadata.png' + book['status'] = _('Meta') + + book_id = None + + if book['calibre_id'] != None: + # updating an existing book. Update mode applies. + logger.debug("update existing id:%s"%book['calibre_id']) + book_id = book['calibre_id'] + # No handling needed: OVERWRITEALWAYS,CALIBREONLY + + # only care about collisions when not ADDNEW + elif collision != ADDNEW: + # 'new' book from URL. collision handling applies. + logger.debug("from URL(%s)"%url) + + # try to find by identifier url or uri first. + searchstr = self.make_id_searchstr(url) + identicalbooks = db.search_getting_ids(searchstr, None) + # print("searchstr:%s"%searchstr) + # print("identicalbooks:%s"%identicalbooks) + if len(identicalbooks) < 1: + # find dups + authlist = story.getList("author", removeallentities=True) + mi = MetaInformation(story.getMetadata("title", removeallentities=True), + authlist) + identicalbooks = db.find_identical_books(mi) + if len(identicalbooks) > 0: + logger.debug("existing found by title/author(s)") + + else: + logger.debug("existing found by identifier URL") + + if collision == SKIP and identicalbooks: + raise NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png") + + if len(identicalbooks) > 1: + raise NotGoingToDownload(_("More than one identical book by Identifer URL or title/author(s)--can't tell which book to update/overwrite."),"minusminus.png") + + ## changed: add new book when CALIBREONLY if none found. + if collision == CALIBREONLY and not identicalbooks: + collision = ADDNEW + options['collision'] = ADDNEW + + if len(identicalbooks)>0: + book_id = identicalbooks.pop() + book['calibre_id'] = book_id + book['icon'] = 'edit-redo.png' + book['status'] = _('Update') + + if book_id and mi: # book_id and mi only set if matched by title/author. + liburl = self.get_story_url(db,book_id) + if book['url'] != liburl and prefs['checkforurlchange'] and \ + not (book['url'].replace('https','http') == liburl): # several sites have been changing to + # https now. Don't flag when that's the only change. + # special case for ffnet urls change to https. + if not question_dialog(self.gui, _('Change Story URL?'),''' + <h3>%s</h3> + %s + %s + %s + %s + %s'''%( + _('Change Story URL?'), + _('%s by %s is already in your library with a different source URL:')%(mi.title,', '.join(mi.author)), + _('In library: <a href="%(liburl)s">%(liburl)s</a>')%{'liburl':liburl}, + _('New URL: <a href="%(newurl)s">%(newurl)s</a>')%{'newurl':book['url']}, + _("Click 'Yes' to update/overwrite book with new URL."), + _("Click 'No' to skip updating/overwriting this book.")), + show_copy_button=False): + if question_dialog(self.gui, _('Download as New Book?'),''' + <h3>%s</h3> + %s + %s + %s + %s + %s'''%( + _('Download as New Book?'), + _('%s by %s is already in your library with a different source URL.')%(mi.title,', '.join(mi.author)), + _('You chose not to update the existing book. Do you want to add a new book for this URL?'), + _('New URL: <a href="%(newurl)s">%(newurl)s</a>')%{'newurl':book['url']}, + _("Click 'Yes' to a new book with new URL."), + _("Click 'No' to skip URL.")), + show_copy_button=False): + book_id = None + mi = None + book['calibre_id'] = None + else: + book['comment'] = _("Update declined by user due to differing story URL(%s)")%liburl + book['good']=False + book['icon']='rotate-right.png' + book['status'] = _('Different URL') + return + + if book_id != None and collision != ADDNEW: + if collision in (CALIBREONLY): + book['comment'] = _('Metadata collected.') + # don't need temp file created below. + return + + ## newer/chaptercount checks are the same for both: + # Update epub, but only if more chapters. + if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE + # 'book' can exist without epub. If there's no existing epub, + # let it go and it will download it. + if db.has_format(book_id,fileform,index_is_id=True): + (epuburl,chaptercount) = \ + get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB', + index_is_id=True))) + urlchaptercount = int(story.getMetadata('numChapters').replace(',','')) + if chaptercount == urlchaptercount: + if collision == UPDATE: + raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png') + else: + # UPDATEALWAYS + skip_date_update = True + elif chaptercount > urlchaptercount: + raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') + elif chaptercount == 0: + raise NotGoingToDownload(_("FFDL doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png') + + if collision == OVERWRITE and \ + db.has_format(book_id,formmapping[fileform],index_is_id=True): + # check make sure incoming is newer. + lastupdated=story.getMetadataRaw('dateUpdated') + fileupdated=datetime.fromtimestamp(os.stat(db.format_abspath(book_id, formmapping[fileform], index_is_id=True))[8]) + + # updated doesn't have time (or is midnight), use dates only. + # updated does have time, use full timestamps. + if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \ + (lastupdated.time() != time.min and fileupdated > lastupdated): + raise NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png') + + + + # For update, provide a tmp file copy of the existing epub so + # it can't change underneath us. Now also overwrite for logpage preserve. + if collision in (UPDATE,UPDATEALWAYS,OVERWRITE,OVERWRITEALWAYS) and \ + fileform == 'epub' and \ + db.has_format(book['calibre_id'],'EPUB',index_is_id=True): + tmp = PersistentTemporaryFile(prefix='old-%s-'%book['calibre_id'], + suffix='.epub', + dir=options['tdir']) + db.copy_format_to(book_id,fileform,tmp,index_is_id=True) + logger.debug("existing epub tmp:"+tmp.name) + book['epub_for_update'] = tmp.name + + if book_id != None and prefs['injectseries']: + mi = db.get_metadata(book_id,index_is_id=True) + if not book['series'] and mi.series != None: + book['calibre_series'] = (mi.series,mi.series_index) + #print("calibre_series:%s [%s]"%book['calibre_series']) + + if book['good']: # there shouldn't be any !'good' books at this point. + # if still 'good', make a temp file to write the output to. + # For HTML format users, make the filename inside the zip something reasonable. + # For crazy long titles/authors, limit it to 200chars. + # For weird/OS-unsafe characters, use file safe only. + tmp = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + suffix='.'+options['fileform'], + dir=options['tdir']) + logger.debug("title:"+book['title']) + logger.debug("outfile:"+tmp.name) + book['outfile'] = tmp.name + + # cookiejar = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + # suffix='.cookiejar', + # dir=options['tdir']) + # adapter.save_cookiejar(cookiejar.name) + # book['cookiejar'] = cookiejar.name + # pagecache = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + # suffix='.pagecache', + # dir=options['tdir']) + # adapter.save_pagecache(pagecache.name) + # book['pagecache'] = pagecache.name + + return + + def start_download_job(self,book_list, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True, + 'updateepubcover':True}, + merge=False): + ''' + Called by LoopProgressDialog to start story downloads BG processing. + adapter_list is a list of tuples of (url,adapter) + ''' + #print("start_download_job:book_list:%s"%book_list) + + ## No need to BG process when CALIBREONLY! Fake it. + #print("options:%s"%options) + if options['collision'] == CALIBREONLY: + class NotJob(object): + def __init__(self,result): + self.failed=False + self.result=result + notjob = NotJob(book_list) + self.download_list_completed(notjob,options=options) + return + + for book in book_list: + if book['good']: + break + else: + ## No good stories to try to download, go straight to + ## updating error col. + msg = ''' + %s + %s + %s'''%( + _('None of the %d URLs/stories given can be/need to be downloaded.')%len(book_list), + _('See log for details.'), + _('Proceed with updating your library(Error Column, if configured)?')) + + htmllog='<html><body><table border="1"><tr><th>'+_('Status')+'</th><th>'+_('Title')+'</th><th>'+_('Author')+'</th><th>'+_('Comment')+'</th><th>URL</th></tr>' + for book in book_list: + if 'status' in book: + status = book['status'] + else: + status = _('Bad') + htmllog = htmllog + '<tr><td>' + '</td><td>'.join([escapehtml(status),escapehtml(book['title']),escapehtml(", ".join(book['author'])),escapehtml(book['comment']),book['url']]) + '</td></tr>' + + htmllog = htmllog + '</table></body></html>' + + payload = ([], book_list, options) + self.gui.proceed_question(self.update_error_column, + payload, htmllog, + _('FFDL log'), _('FFDL download ended'), msg, + show_copy_button=False) + return + + cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar', + dir=options['tdir']) + options['cookiejar'].save(cookiejarfile.name, + ignore_discard=True, + ignore_expires=True) + options['cookiejarfile']=cookiejarfile.name + del options['cookiejar'] ## can't be pickled. + + func = 'arbitrary_n' + cpus = self.gui.job_manager.server.pool_size + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker', + (book_list, options, cpus)] + desc = _('Download FanFiction Book') + job = self.gui.job_manager.run_job( + self.Dispatcher(partial(self.download_list_completed,options=options,merge=merge)), + func, args=args, + description=desc) + + self.gui.jobs_pointer.start() + self.gui.status_bar.show_message(_('Starting %d FanFictionDownLoads')%len(book_list),3000) + + def update_books_loop(self,book,db=None, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True, + 'updateepubcover':True}): + custom_columns = self.gui.library_view.model().custom_columns + if book['calibre_id'] and prefs['errorcol'] != '' and prefs['errorcol'] in custom_columns: + label = custom_columns[prefs['errorcol']]['label'] + if not book['good']: + logger.debug("record/update error message column %s %s"%(book['title'],book['url'])) + db.set_custom(book['calibre_id'], book['comment'], label=label, commit=True) # book['comment'] + else: + db.set_custom(book['calibre_id'], '', label=label, commit=True) # book['comment'] + + if not book['good']: + return # only update errorcol on error. + + logger.debug("add/update %s %s"%(book['title'],book['url'])) + mi = self.make_mi_from_book(book) + + if options['collision'] != CALIBREONLY: + self.add_book_or_update_format(book,options,prefs,mi) + + if options['collision'] == CALIBREONLY or \ + ( (options['updatemeta'] or book['added']) and book['good'] ): + try: + self.update_metadata(db, book['calibre_id'], book, mi, options) + except: + det_msg = "".join(traceback.format_exception(*sys.exc_info()))+"\n"+_("Story Details:")+pretty_book(book) + logger.error("Error Updating Metadata:\n%s"%det_msg) + error_dialog(self.gui, + _("Error Updating Metadata"), + ""+_("An error has occurred while FFDL was updating calibre's metadata for <a href='%s'>%s</a>.")%(book['url'],book['title'])+""+ + _("The ebook has been updated, but the metadata has not."), + det_msg=det_msg, + show=True) + + def update_books_finish(self, book_list, options={}, showlist=True): + '''Notify calibre about updated rows, update external plugins + (Reading Lists & Count Pages) as configured''' + + add_list = filter(lambda x : x['good'] and x['added'], book_list) + add_ids = [ x['calibre_id'] for x in add_list ] + update_list = filter(lambda x : x['good'] and not x['added'], book_list) + update_ids = [ x['calibre_id'] for x in update_list ] + all_ids = add_ids + update_ids + + failed_list = filter(lambda x : not x['good'] , book_list) + failed_ids = [ x['calibre_id'] for x in failed_list ] + + if options['collision'] != CALIBREONLY and \ + (prefs['addtolists'] or prefs['addtoreadlists']): + self.update_reading_lists(all_ids,add=True) + + if len(add_list): + self.gui.library_view.model().books_added(len(add_list)) + self.gui.library_view.model().refresh_ids(add_ids) + + if len(update_list): + self.gui.library_view.model().refresh_ids(update_ids) + + current = self.gui.library_view.currentIndex() + self.gui.library_view.model().current_changed(current, self.previous) + self.gui.tags_view.recount() + + if self.gui.cover_flow: + self.gui.cover_flow.dataChanged() + + if showlist and prefs['mark']: # don't use with anthology + db = self.gui.current_db + marked_ids = dict() + marked_text = "ffdl_success" + for index, book_id in enumerate(all_ids): + marked_ids[book_id] = '%s_%04d' % (marked_text, index) + for index, book_id in enumerate(failed_ids): + marked_ids[book_id] = 'ffdl_failed_%04d' % index + # Mark the results in our database + db.set_marked_ids(marked_ids) + + if prefs['showmarked']: # show add/update + # Search to display the list contents + self.gui.search.set_search_string('marked:' + marked_text) + # Sort by our marked column to display the books in order + self.gui.library_view.sort_by_named_field('marked', True) + + self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.')%(len(update_list) + len(add_list)), 3000) + remove_dir(options['tdir']) + + if 'Count Pages' in self.gui.iactions and len(prefs['countpagesstats']) and len(all_ids): + cp_plugin = self.gui.iactions['Count Pages'] + cp_plugin.count_statistics(all_ids,prefs['countpagesstats']) + + if prefs['autoconvert'] and options['collision'] != CALIBREONLY: + self.gui.status_bar.show_message(_('Starting auto conversion of %d books.')%(len(all_ids)), 3000) + self.gui.iactions['Convert Books'].auto_convert_auto_add(all_ids) + + def download_list_completed(self, job, options={},merge=False): + if job.failed: + self.gui.job_exception(job, dialog_title='Failed to Download Stories') + return + + self.previous = self.gui.library_view.currentIndex() + db = self.gui.current_db + + book_list = job.result + good_list = filter(lambda x : x['good'], book_list) + bad_list = filter(lambda x : not x['good'], book_list) + good_list = sorted(good_list,key=lambda x : x['listorder']) + bad_list = sorted(bad_list,key=lambda x : x['listorder']) + #print("book_list:%s"%book_list) + payload = (good_list, bad_list, options) + + if merge: + if len(good_list) < 1: + info_dialog(self.gui, _('No Good Stories for Anthology'), + _('No good stories/updates where downloaded, Anthology creation/update aborted.'), + show=True, + show_copy_button=False) + return + + msg = ''+_('FFDL found %s good and %s bad updates.')%(len(good_list),len(bad_list))+'' + if len(bad_list) > 0: + msg = msg + ''' + %s + %s + %s + %s'''%( + _('Are you sure you want to continue with creating/updating this Anthology?'), + _('Any updates that failed will not be included in the Anthology.'), + _("However, if there's an older version, it will still be included."), + _('See log for details.')) + + msg = msg + ''+_('Proceed with updating this anthology and your library?')+ '' + + htmllog='<html><body><table border="1"><tr><th>'+_('Status')+'</th><th>'+_('Title')+'</th><th>'+_('Author')+'</th><th>'+_('Comment')+'</th><th>URL</th></tr>' + for book in sorted(good_list+bad_list,key=lambda x : x['listorder']): + if 'status' in book: + status = book['status'] + else: + if book in good_list: + status = _('Good') + else: + status = _('Bad') + htmllog = htmllog + '<tr><td>' + '</td><td>'.join([escapehtml(status),escapehtml(book['title']),escapehtml(", ".join(book['author'])),escapehtml(book['comment']),book['url']]) + '</td></tr>' + + htmllog = htmllog + '</table></body></html>' + + for book in bad_list: + if 'epub_for_update' in book: + book['good']=True + book['outfile'] = book['epub_for_update'] + good_list.append(book) + + do_update_func = self.do_download_merge_update + else: + msg = ''' + %s + %s + %s'''%( + _('FFDL found %s good and %s bad updates.')%(len(good_list),len(bad_list)), + _('See log for details.'), + _('Proceed with updating your library?') + ) + + htmllog='<html><body><table border="1"><tr><th>'+_('Status')+'</th><th>'+_('Title')+'</th><th>'+_('Author')+'</th><th>'+_('Comment')+'</th><th>URL</th></tr>' + for book in good_list: + if 'status' in book: + status = book['status'] + else: + status = 'Good' + htmllog = htmllog + '<tr><td>' + '</td><td>'.join([escapehtml(status),escapehtml(book['title']),escapehtml(", ".join(book['author'])),escapehtml(book['comment']),book['url']]) + '</td></tr>' + + for book in bad_list: + if 'status' in book: + status = book['status'] + else: + status = 'Bad' + htmllog = htmllog + '<tr><td>' + '</td><td>'.join([escapehtml(status),escapehtml(book['title']),escapehtml(", ".join(book['author'])),escapehtml(book['comment']),book['url']]) + '</td></tr>' + + htmllog = htmllog + '</table></body></html>' + + do_update_func = self.do_download_list_update + + self.gui.proceed_question(do_update_func, + payload, htmllog, + _('FFDL log'), _('FFDL download complete'), msg, + show_copy_button=False) + + def do_download_merge_update(self, payload): + + (good_list,bad_list,options) = payload + total_good = len(good_list) + + logger.debug("merge titles:\n%s"%"\n".join([ "%s %s"%(x['title'],x['listorder']) for x in good_list ])) + + good_list = sorted(good_list,key=lambda x : x['listorder']) + bad_list = sorted(bad_list,key=lambda x : x['listorder']) + + self.gui.status_bar.show_message(_('Merging %s books.')%total_good) + + existingbook = None + if 'mergebook' in options: + existingbook = options['mergebook'] + #print("existingbook:\n%s"%existingbook) + mergebook = self.merge_meta_books(existingbook,good_list,options['fileform']) + + if 'mergebook' in options: + mergebook['calibre_id'] = options['mergebook']['calibre_id'] + + if 'anthology_url' in options: + mergebook['url'] = options['anthology_url'] + + #print("mergebook:\n%s"%mergebook) + + if mergebook['good']: # there shouldn't be any !'good' books at this point. + # if still 'good', make a temp file to write the output to. + tmp = PersistentTemporaryFile(suffix='.'+options['fileform'], + dir=options['tdir']) + logger.debug("title:"+mergebook['title']) + logger.debug("outfile:"+tmp.name) + mergebook['outfile'] = tmp.name + + self.get_epubmerge_plugin().do_merge(tmp.name, + [ x['outfile'] for x in good_list ], + titleopt=mergebook['title'], + keepmetadatafiles=True, + source=mergebook['url']) + + options['collision']=OVERWRITEALWAYS + self.update_books_loop(mergebook,self.gui.current_db,options) + self.update_books_finish([mergebook], options=options, showlist=False) + + def do_download_list_update(self, payload): + + (good_list,bad_list,options) = payload + good_list = sorted(good_list,key=lambda x : x['listorder']) + bad_list = sorted(bad_list,key=lambda x : x['listorder']) + + self.gui.status_bar.show_message(_('FFDL Adding/Updating books.')) + + if good_list or prefs['mark'] or (bad_list and prefs['errorcol'] != '' and prefs['errorcol'] in self.gui.library_view.model().custom_columns): + LoopProgressDialog(self.gui, + good_list+bad_list, + partial(self.update_books_loop, options=options, db=self.gui.current_db), + partial(self.update_books_finish, options=options), + init_label=_("Updating calibre for FanFiction stories..."), + win_title=_("Update calibre for FanFiction stories"), + status_prefix=_("Updated")) + + def update_error_column(self,payload): + '''Update custom error column if configured.''' + (empty_list,book_list,options)=payload + custom_columns = self.gui.library_view.model().custom_columns + if prefs['mark'] or (prefs['errorcol'] != '' and prefs['errorcol'] in custom_columns): + self.previous = self.gui.library_view.currentIndex() # used by update_books_finish. + self.gui.status_bar.show_message(_('Adding/Updating %s BAD books.')%len(book_list)) + if (prefs['errorcol'] != '' and prefs['errorcol'] in custom_columns): + label = custom_columns[prefs['errorcol']]['label'] + else: + label = None + LoopProgressDialog(self.gui, + book_list, + partial(self.update_error_column_loop, db=self.gui.current_db, label=label), + partial(self.update_books_finish, options=options), + init_label=_("Updating calibre for BAD FanFiction stories..."), + win_title=_("Update calibre for BAD FanFiction stories"), + status_prefix=_("Updated")) + + def update_error_column_loop(self,book,db=None,label=None): + if book['calibre_id'] and label: + logger.debug("add/update bad %s %s %s"%(book['title'],book['url'],book['comment'])) + db.set_custom(book['calibre_id'], book['comment'], label=label, commit=True) + + def add_book_or_update_format(self,book,options,prefs,mi=None): + db = self.gui.current_db + + if mi == None: + mi = self.make_mi_from_book(book) + + book_id = book['calibre_id'] + if book_id == None: + book_id = db.create_book_entry(mi, + add_duplicates=True) + book['calibre_id'] = book_id + book['added'] = True + else: + book['added'] = False + + if not db.add_format_with_hooks(book_id, + options['fileform'], + book['outfile'], index_is_id=True): + book['comment'] = _("Adding format to book failed for some reason...") + book['good']=False + book['icon']='dialog_error.png' + book['status'] = _('Error') + + if prefs['deleteotherforms']: + fmts = db.formats(book['calibre_id'], index_is_id=True).split(',') + for fmt in fmts: + if fmt != formmapping[options['fileform']]: + logger.debug("deleteotherforms remove f:"+fmt) + db.remove_format(book['calibre_id'], fmt, index_is_id=True)#, notify=False + elif prefs['autoconvert']: + ## 'Convert Book'.auto_convert_auto_add doesn't convert if + ## the format is already there. + fmt = calibre_prefs['output_format'] + # delete if there, but not if the format we just made. + if fmt != formmapping[options['fileform']] and \ + db.has_format(book_id,fmt,index_is_id=True): + logger.debug("autoconvert remove f:"+fmt) + db.remove_format(book['calibre_id'], fmt, index_is_id=True)#, notify=False + + + return book_id + + def update_metadata(self, db, book_id, book, mi, options): + oldmi = db.get_metadata(book_id,index_is_id=True) + if prefs['keeptags']: + old_tags = db.get_tags(book_id) + #print("old_tags:%s"%old_tags) + #print("mi.tags:%s"%mi.tags) + # remove old Completed/In-Progress only if there's a new one. + if 'Completed' in mi.tags or 'In-Progress' in mi.tags: + old_tags = filter( lambda x : x not in ('Completed', 'In-Progress'), old_tags) + # remove old Last Update tags if there are new ones. + if len(filter( lambda x : not x.startswith("Last Update"), mi.tags)) > 0: + old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) + + # mi.tags needs to be list, but set kills dups. + # this way also removes case-mismatched dups, keeping old_tags version. + foldedcase_tags = dict() + for t in list(mi.tags) + list(old_tags): + foldedcase_tags[t.lower()] = t + + mi.tags = foldedcase_tags.values() + #print("mi.tags:%s"%mi.tags) + + if book['all_metadata']['langcode']: + mi.languages=[book['all_metadata']['langcode']] + else: + # Set language english, but only if not already set. + if not oldmi.languages: + mi.languages=['en'] + + if options['fileform'] == 'epub' and prefs['updatecover']: + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + epubmi = get_metadata(existingepub,'EPUB') + if epubmi.cover_data[1] is not None: + try: + db.set_cover(book_id, epubmi.cover_data[1]) + except: + logger.info("Failed to set_cover, skipping") + + # implement 'newonly' flags here by setting to the current + # value again. + if not book['added']: + for (col,newonly) in prefs['std_cols_newonly'].iteritems(): + if newonly: + if col == "identifiers": + mi.set_identifiers(oldmi.get_identifiers()) + else: + try: + mi.__setattr__(col,oldmi.__getattribute__(col)) + except AttributeError: + logger.warn("AttributeError? %s"%col) + pass + + db.set_metadata(book_id,mi) + # mi.authors gets run through the string_to_authors and split on '&' ',' 'and' and 'with' + db.set_authors(book_id,book['author']) # author is a list. + + # do configured column updates here. + #print("all_metadata: %s"%book['all_metadata']) + custom_columns = self.gui.library_view.model().custom_columns + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) + for col, meta in prefs['custom_cols'].iteritems(): + #print("setting %s to %s"%(col,meta)) + if col not in custom_columns: + logger.debug("%s not an existing column, skipping."%col) + continue + coldef = custom_columns[col] + if col in prefs['custom_cols_newonly'] and prefs['custom_cols_newonly'][col] and not book['added']: + logger.debug("Skipping custom column(%s) update, set to New Books Only"%coldef['name']) + continue + if not meta.startswith('status-') and meta not in book['all_metadata'] or \ + meta.startswith('status-') and 'status' not in book['all_metadata']: + logger.debug("No value for %s, skipping custom column(%s) update."%(meta,coldef['name'])) + continue + if meta not in permitted_values[coldef['datatype']]: + logger.debug("%s not a valid column type for %s, skipping."%(col,meta)) + continue + label = coldef['label'] + if coldef['datatype'] in ('enumeration','text','comments','datetime','series'): + db.set_custom(book_id, book['all_metadata'][meta], label, commit=False) + elif coldef['datatype'] in ('int','float'): + num = unicode(book['all_metadata'][meta]).replace(",","") + if num != '': + db.set_custom(book_id, num, label=label, commit=False) + elif coldef['datatype'] == 'bool' and meta.startswith('status-'): + if meta == 'status-C': + val = book['all_metadata']['status'] == 'Completed' + if meta == 'status-I': + val = book['all_metadata']['status'] == 'In-Progress' + db.set_custom(book_id, val, label=label, commit=False) + + configuration = None + if prefs['allow_custcol_from_ini']: + configuration = get_ffdl_config(book['url'],options['fileform']) + # meta => custcol[,a|n|r] + # cliches=>\#acolumn,r + for line in configuration.getConfig('custom_columns_settings').splitlines(): + if "=>" in line: + (meta,custcol) = map( lambda x: x.strip(), line.split("=>") ) + flag='r' + if "," in custcol: + (custcol,flag) = map( lambda x: x.strip(), custcol.split(",") ) + + if meta not in book['all_metadata']: + # if double quoted, use as a literal value. + if meta[0] == '"' and meta[-1] == '"': + val = meta[1:-1] + logger.debug("No metadata value for %s, setting custom column(%s) literally to %s."%(meta,custcol,val)) + else: + logger.debug("No value for %s, skipping custom column(%s) update."%(meta,custcol)) + continue + else: + val = book['all_metadata'][meta] + + if custcol not in custom_columns: + continue + else: + coldef = custom_columns[custcol] + label = coldef['label'] + + if flag == 'r' or (flag == 'n' and book['added']): + if coldef['datatype'] in ('int','float'): # for favs, etc--site specific metadata. + if 'anthology_meta_list' in book and meta in book['anthology_meta_list']: + # re-split list, strip commas, convert to floats, sum up. + val = sum([ float(x.replace(",","")) for x in val.split(", ") ]) + else: + val = unicode(val).replace(",","") + else: + val = val + if val != '': + if coldef['datatype'] == 'bool': + if val.lower() in ('t','true','1','yes','y'): + val = True + elif val.lower() in ('f','false','0','no','n'): + val = False + else: + val = None # for tri-state 'booleans'. Yes/No/Null + #print("setting 'r' or 'added':%s"%val) + db.set_custom(book_id, val, label=label, commit=False) + + if flag == 'a': + vallist = [] + try: + existing=db.get_custom(book_id,label=label,index_is_id=True) + #print("existing:%s"%existing) + if isinstance(existing,list): + vallist = existing + elif existing: + vallist = [existing] + except: + pass + + #print("vallist:%s"%vallist) + if val: + vallist.append(val) + + db.set_custom(book_id, ", ".join(vallist), label=label, commit=False) + + # set author link if found. All current adapters have authorUrl, except anonymous on AO3. + # Moved down so author's already in the DB. + if 'authorUrl' in book['all_metadata']: + authurls = book['all_metadata']['authorUrl'].split(", ") + authorlist = [ a.replace('&',';') for a in book['author'] ] + authorids = db.new_api.get_item_ids('authors',authorlist) + authordata = db.new_api.author_data(authorids.values()) + # print("\n\nauthorids:%s"%authorids) + # print("authordata:%s"%authordata) + + author_id_to_link_map = dict() + for i, author in enumerate(authorlist): + author_id_to_link_map[authorids[author]] = authurls[i] + + # print("author_id_to_link_map:%s\n\n"%author_id_to_link_map) + db.new_api.set_link_for_authors(author_id_to_link_map) + + db.commit() + + if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']): + + #logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly'])) + + # force a refresh if generating cover so complex composite + # custom columns are current and correct + db.refresh_ids([book_id]) + + gc_plugin = self.gui.iactions['Generate Cover'] + setting_name = None + if prefs['allow_gc_from_ini']: + if not configuration: # might already have it from allow_custcol_from_ini + configuration = get_ffdl_config(book['url'],options['fileform']) + + # template => regexp to match => GC Setting to use. + # generate_cover_settings: + # ${category} => Buffy:? the Vampire Slayer => Buffy + for line in configuration.getConfig('generate_cover_settings').splitlines(): + if "=>" in line: + (template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") ) + value = Template(template).safe_substitute(book['all_metadata']).encode('utf8') + # print("%s(%s) => %s => %s"%(template,value,regexp,setting)) + if re.search(regexp,value): + setting_name = setting + break + + if setting_name: + logger.debug("Generate Cover Setting from generate_cover_settings(%s)"%line) + if setting_name not in gc_plugin.get_saved_setting_names(): + logger.info("GC Name %s not found, discarding! (check personal.ini for typos)"%setting_name) + setting_name = None + + if not setting_name and book['all_metadata']['site'] in prefs['gc_site_settings']: + setting_name = prefs['gc_site_settings'][book['all_metadata']['site']] + logger.debug("Generate Cover Setting from site(%s)"%setting_name) + + if not setting_name and 'Default' in prefs['gc_site_settings']: + setting_name = prefs['gc_site_settings']['Default'] + logger.debug("Generate Cover Setting from Default(%s)"%setting_name) + + if setting_name: + logger.debug("Running Generate Cover with settings %s."%setting_name) + realmi = db.get_metadata(book_id, index_is_id=True) + gc_plugin.generate_cover_for_book(realmi,saved_setting_name=setting_name) + + if prefs['gc_polish_cover'] and \ + options['fileform'] == "epub": + # set cover inside epub from calibre's polish feature + from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS + from calibre.utils.logging import Log + from collections import namedtuple + + # Couldn't find a better way to get the cover path. + cover_path = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') + data = {'cover':cover_path} + #print("cover_path:%s"%cover_path) + opts = ALL_OPTS.copy() + opts.update(data) + O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys())) + opts = O(**opts) + + log = Log(level=Log.DEBUG) + outfile = db.format_abspath(book_id, formmapping[options['fileform']], index_is_id=True) + #print("polish cover outfile:%s"%outfile) + polish({outfile:outfile}, opts, log, logger.info) + + + def get_clean_reading_lists(self,lists): + if lists == None or lists.strip() == "" : + return [] + else: + return filter( lambda x : x, map( lambda x : x.strip(), lists.split(',') ) ) + + def update_reading_lists(self,book_ids,add=True): + try: + rl_plugin = self.gui.iactions['Reading List'] + except: + if prefs['addtolists'] or prefs['addtoreadlists']: + message=""+_("You configured FanFictionDownLoader to automatically update Reading Lists, but you don't have the %s plugin installed anymore?")%'Reading List'+"" + confirm(message,'fanfictiondownloader_no_reading_list_plugin', self.gui) + return + + if prefs['addtoreadlists']: + if add: + addremovefunc = rl_plugin.add_books_to_list + else: + addremovefunc = rl_plugin.remove_books_from_list + + lists = self.get_clean_reading_lists(prefs['read_lists']) + if len(lists) < 1 : + message=""+_("You configured FanFictionDownLoader to automatically update \"To Read\" Reading Lists, but you don't have any lists set?")+"" + confirm(message,'fanfictiondownloader_no_read_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("add good read l:(%s)"%l) + addremovefunc(l, + book_ids, + display_warnings=False) + else: + if l != '': + message=""+_("You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?")%l+"" + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + if prefs['addtolists'] and (add or (prefs['addtolistsonread'] and prefs['addtoreadlists']) ): + lists = self.get_clean_reading_lists(prefs['send_lists']) + if len(lists) < 1 : + message=""+_("You configured FanFictionDownLoader to automatically update \"Send to Device\" Reading Lists, but you don't have any lists set?")+"" + confirm(message,'fanfictiondownloader_no_send_lists', self.gui) + + for l in lists: + if l in rl_plugin.get_list_names(): + #print("good send l:(%s)"%l) + rl_plugin.add_books_to_list(l, + #add_book_ids, + book_ids, + display_warnings=False) + else: + if l != '': + message=""+_("You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?")%l+"" + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + def make_mi_from_book(self,book): + mi = MetaInformation(book['title'],book['author']) # author is a list. + if prefs['suppressauthorsort']: + # otherwise author names will have calibre's sort algs + # applied automatically. + mi.author_sort = ' & '.join(book['author']) + if prefs['suppresstitlesort']: + # otherwise titles will have calibre's sort algs applied + # automatically. + mi.title_sort = book['title'] + mi.set_identifiers({'url':book['url']}) + mi.publisher = book['publisher'] + mi.tags = book['tags'] + #mi.languages = ['en'] # handled in update_metadata so it can check for existing lang. + mi.pubdate = book['pubdate'] + mi.timestamp = book['timestamp'] + mi.comments = book['comments'] + mi.series = book['series'] + return mi + + # Can't make book a class because it needs to be passed into the + # bg jobs and only serializable things can be. + def make_book(self): + book = {} + book['title'] = 'Unknown' + book['author_sort'] = book['author'] = ['Unknown'] # list + book['comments'] = '' # note this is the book comments. + + book['good'] = True + book['calibre_id'] = None + book['begin'] = None + book['end'] = None + book['comment'] = '' # note this is a comment on the d/l or update. + book['url'] = '' + book['site'] = '' + book['added'] = False + book['pubdate'] = None + return book + + def convert_urls_to_books(self, urls): + books = [] + uniqueurls = set() + for i, url in enumerate(urls): + book = self.convert_url_to_book(url) + if book['url'] in uniqueurls: + book['good'] = False + book['comment'] = "Same story already included." + uniqueurls.add(book['url']) + book['listorder']=i # BG d/l jobs don't come back in order. + # Didn't matter until anthologies & 'marked' successes + books.append(book) + return books + + def convert_url_to_book(self, url): + book = self.make_book() + # look here for [\d,\d] at end of url, and remove? + mc = re.match(r"^(?P<url>.*?)(?:\[(?P<begin>\d+)?(?P<comma>[,-])?(?P<end>\d+)?\])?$",url) + #print("url:(%s) begin:(%s) end:(%s)"%(mc.group('url'),mc.group('begin'),mc.group('end'))) + url = mc.group('url') + book['begin'] = mc.group('begin') + book['end'] = mc.group('end') + if book['begin'] and not mc.group('comma'): + book['end'] = book['begin'] + + self.set_book_url_and_comment(book,url) + return book + + # basic book, plus calibre_id. Assumed bad until proven + # otherwise. + def make_book_id_only(self, idval): + book = self.make_book() + book['good'] = False + book['calibre_id'] = idval + return book + + def populate_book_from_mi(self,book,mi): + book['title'] = mi.title + book['author'] = mi.authors + book['author_sort'] = mi.author_sort + if hasattr(mi,'publisher'): + book['publisher'] = mi.publisher + if hasattr(mi,'path'): + book['path'] = mi.path + if hasattr(mi,'id'): + book['calibre_id'] = mi.id + + # book data from device. Assumed bad until proven otherwise. + def make_book_from_device_row(self, row): + book = self.make_book() + mi = self.gui.current_view().model().get_book_display_info(row.row()) + self.populate_book_from_mi(book,mi) + book['good'] = False + return book + + def populate_book_from_calibre_id(self, book, db=None): + mi = db.get_metadata(book['calibre_id'], index_is_id=True) + #book = {} + book['good'] = True + self.populate_book_from_mi(book,mi) + + url = self.get_story_url(db,book['calibre_id']) + self.set_book_url_and_comment(book,url) + #return book - populated passed in book. + + def set_book_url_and_comment(self,book,url): + if not url: + book['comment'] = _("No story URL found.") + book['good'] = False + book['icon'] = 'search_delete_saved.png' + book['status'] = _('Not Found') + else: + # get normalized url or None. + urlsitetuple = adapters.getNormalStoryURLSite(url) + if urlsitetuple == None: + book['url'] = url + book['comment'] = _("URL is not a valid story URL.") + book['good'] = False + book['icon']='dialog_error.png' + book['status'] = _('Bad URL') + else: + (book['url'],book['site'])=urlsitetuple + + def get_story_url(self, db, book_id=None, path=None): + if book_id == None: + identifiers={} + else: + identifiers = db.get_identifiers(book_id,index_is_id=True) + if 'url' in identifiers: + # identifiers have :->| in url. + # print("url from ident url:%s"%identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + elif 'uri' in identifiers: + # identifiers have :->| in uri. + # print("uri from ident uri:%s"%identifiers['uri'].replace('|',':')) + return identifiers['uri'].replace('|',':') + else: + existingepub = None + if path == None and db.has_format(book_id,'EPUB',index_is_id=True): + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + mi = get_metadata(existingepub,'EPUB') + identifiers = mi.get_identifiers() + if 'url' in identifiers: + # print("url from get_metadata:%s"%identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + elif 'uri' in identifiers: + # identifiers have :->| in uri. + # print("uri from ident uri:%s"%identifiers['uri'].replace('|',':')) + return identifiers['uri'].replace('|',':') + elif path and path.lower().endswith('.epub'): + existingepub = path + + ## only epub has URL in it--at least where I can easily find it. + if existingepub: + # look for dc:source first, then scan HTML if lookforurlinhtml + link = get_dcsource(existingepub) + if link: + # print("url from get_dcsource:%s"%link) + return link + elif prefs['lookforurlinhtml']: + link = get_story_url_from_html(existingepub,self.is_good_downloader_url) + # print("url from get_story_url_from_html:%s"%link) + return link + return None + + def is_good_downloader_url(self,url): + return adapters.getNormalStoryURL(url) + + def merge_meta_books(self,existingbook,book_list,fileform): + book = self.make_book() + book['author'] = [] + book['tags'] = [] + book['url'] = '' + book['all_metadata'] = {} + book['anthology_meta_list'] = {} + book['comment'] = '' + book['added'] = True + book['good'] = True + book['calibre_id'] = None + book['series'] = None + + serieslist=[] + + # copy list top level + for b in book_list: + if b['series']: + serieslist.append(b['series'][:b['series'].index(" [")]) + #print("book series:%s"%serieslist[-1]) + + if b['publisher']: + if 'publisher' not in book: + book['publisher']=b['publisher'] + elif book['publisher']!=b['publisher']: + book['publisher']=None # if any are different, don't use. + + # copy authors & tags. + for k in ('author','tags'): + for v in b[k]: + if v not in book[k]: + book[k].append(v) + + # fill from first of each if not already present: + for k in ('pubdate', 'timestamp', 'updatedate'): + if k not in b or not b[k]: # not in this book? Skip it. + continue + if k not in book or not book[k]: # first is good enough for publisher. + book[k]=b[k] + + # Do these even on first to get the all_metadata settings. + # pubdate should be earliest date. + if k == 'pubdate' and book[k] >= b[k]: + book[k]=b[k] + book['all_metadata']['datePublished'] = b['all_metadata']['datePublished'] + # timestamp should be latest date. + if k == 'timestamp' and book[k] <= b[k]: + book[k]=b[k] + book['all_metadata']['dateCreated'] = b['all_metadata']['dateCreated'] + # updated should be latest date. + if k == 'updatedate' and book[k] <= b[k]: + book[k]=b[k] + book['all_metadata']['dateUpdated'] = b['all_metadata']['dateUpdated'] + + # copy list all_metadata + for (k,v) in b['all_metadata'].iteritems(): + #print("merge_meta_books v:%s k:%s"%(v,k)) + if k in ('numChapters','numWords'): + if k in b['all_metadata'] and b['all_metadata'][k]: + if k not in book['all_metadata']: + book['all_metadata'][k] = b['all_metadata'][k] + else: + # lot of work for a simple add. + book['all_metadata'][k] = unicode(int(book['all_metadata'][k].replace(',',''))+int(b['all_metadata'][k].replace(',',''))) + elif k in ('dateUpdated','datePublished','dateCreated', + 'series','status','title'): + pass # handled above, below or skip these for now, not going to do anything with them. + elif k not in book['all_metadata'] or not book['all_metadata'][k]: + book['all_metadata'][k]=v + elif v: + if k == 'description': + book['all_metadata'][k]=book['all_metadata'][k]+"\n\n"+v + else: + book['all_metadata'][k]=book['all_metadata'][k]+", "+v + # flag psuedo list element. Used so numeric + # cust cols can convert back to numbers and + # add. + book['anthology_meta_list'][k]=True + + print("book['url']:%s"%book['url']) + configuration = get_ffdl_config(book['url'],fileform) + if existingbook: + book['title'] = deftitle = existingbook['title'] + book['comments'] = existingbook['comments'] + else: + book['title'] = deftitle = book_list[0]['title'] + if len(book['author']) > 1: + book['comments'] = _("Anthology containing:")+"\n" + \ + "\n".join([ _("%s by %s")%(b['title'],', '.join(b['author'])) for b in book_list ]) + else: + book['comments'] = _("Anthology containing:")+"\n" + \ + "\n".join([ b['title'] for b in book_list ]) + # book['all_metadata']['description'] + + # if all same series, use series for name. But only if all and not previous named + if len(serieslist) == len(book_list): + series = serieslist[0] + book['title'] = series + for sr in serieslist: + if series != sr: + book['title'] = deftitle + break + + logger.debug("anthology_title_pattern:%s"%configuration.getConfig('anthology_title_pattern')) + if configuration.getConfig('anthology_title_pattern'): + tmplt = Template(configuration.getConfig('anthology_title_pattern')) + book['title'] = tmplt.safe_substitute({'title':book['title']}).encode('utf8') + else: + # No setting, do fall back default. Shouldn't happen, + # should always have a version in defaults. + book['title'] = book['title']+_(" Anthology") + + book['all_metadata']['title'] = book['title'] # because custom columns are set from all_metadata + book['all_metadata']['author'] = ", ".join(book['author']) + book['author_sort']=book['author'] + for v in ['Completed','In-Progress']: + if v in book['tags']: + book['tags'].remove(v) + book['tags'].extend(configuration.getConfigList('anthology_tags')) + book['all_metadata']['anthology'] = "true" + + return book + +def split_text_to_urls(urls): + # remove dups while preserving order. + dups=set() + def f(x): + x=x.strip() + if x and x not in dups: + dups.add(x) + return True + else: + return False + return filter(f,urls.strip().splitlines()) + +def escapehtml(txt): + return txt.replace("&","&").replace(">",">").replace("<","<") + +def pretty_book(d, indent=0, spacer=' '): + kindent = spacer * indent + + # if isinstance(d, list): + # return '\n'.join([(pretty_book(v, indent, spacer)) for v in d]) + + if isinstance(d, dict): + for k in ('password','username'): + if k in d and d[k]: + d[k]=_('(was set, removed for security)') + return '\n'.join(['%s%s:\n%s' % (kindent, k, pretty_book(v, indent + 1, spacer)) + for k, v in d.items()]) + return "%s%s"%(kindent, d) + diff --git a/calibre-plugin/ffdl_util.py b/calibre-plugin/ffdl_util.py new file mode 100644 index 00000000..c169d80c --- /dev/null +++ b/calibre-plugin/ffdl_util.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Jim Miller' +__docformat__ = 'restructuredtext en' + +from StringIO import StringIO + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration +from calibre_plugins.fanfictiondownloader_plugin.prefs import (prefs) + +def get_ffdl_personalini(): + if prefs['includeimages']: + # this is a cheat to make it easier for users. + return '''[epub] +include_images:true +keep_summary_html:true +make_firstimage_cover:true +''' + prefs['personal.ini'] + else: + return prefs['personal.ini'] + +def get_ffdl_config(url,fileform="epub",personalini=None): + if not personalini: + personalini = get_ffdl_personalini() + site='unknown' + try: + site = adapters.getConfigSectionFor(url) + except Exception as e: + print("Failed trying to get ini config for url(%s): %s, using section [%s] instead"%(url,e,site)) + configuration = Configuration(site,fileform) + configuration.readfp(StringIO(get_resources("plugin-defaults.ini"))) + configuration.readfp(StringIO(personalini)) + + return configuration + +def get_ffdl_adapter(url,fileform="epub",personalini=None): + return adapters.getAdapter(get_ffdl_config(url,fileform,personalini),url) + diff --git a/calibre-plugin/images/icon.png b/calibre-plugin/images/icon.png new file mode 100644 index 00000000..e9715307 Binary files /dev/null and b/calibre-plugin/images/icon.png differ diff --git a/calibre-plugin/images/icon.xcf b/calibre-plugin/images/icon.xcf new file mode 100644 index 00000000..76d7c0c9 Binary files /dev/null and b/calibre-plugin/images/icon.xcf differ diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py new file mode 100644 index 00000000..fbae9c6c --- /dev/null +++ b/calibre-plugin/jobs.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Jim Miller' +__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>' +__docformat__ = 'restructuredtext en' + +import logging +logger = logging.getLogger(__name__) + +import time, os, traceback + +from StringIO import StringIO + +from calibre.utils.ipc.server import Server +from calibre.utils.ipc.job import ParallelJob +from calibre.constants import numeric_version as calibre_version + +# for smarten punc +from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS +from calibre.utils.logging import Log +from collections import namedtuple + +from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data + +from calibre_plugins.fanfictiondownloader_plugin.ffdl_util import (get_ffdl_adapter, get_ffdl_config) +# ------------------------------------------------------------------------------ +# +# Functions to perform downloads using worker jobs +# +# ------------------------------------------------------------------------------ + +def do_download_worker(book_list, options, + cpus, notification=lambda x,y:x): + ''' + Master job, to launch child jobs to extract ISBN for a set of books + This is run as a worker job in the background to keep the UI more + responsive and get around the memory leak issues as it will launch + a child job for each book as a worker process + ''' + server = Server(pool_size=cpus) + + logger.info(options['version']) + total = 0 + alreadybad = [] + # Queue all the jobs + logger.info("Adding jobs for URLs:") + for book in book_list: + logger.info("%s"%book['url']) + if book['good']: + total += 1 + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', + 'do_download_for_worker', + (book,options)] + job = ParallelJob('arbitrary_n', + "url:(%s) id:(%s)"%(book['url'],book['calibre_id']), + done=None, + args=args) + job._book = book + server.add_job(job) + else: + # was already bad before the subprocess ever started. + alreadybad.append(book) + + # This server is an arbitrary_n job, so there is a notifier available. + # Set the % complete to a small number to avoid the 'unavailable' indicator + notification(0.01, _('Downloading FanFiction Stories')) + + # dequeue the job results as they arrive, saving the results + count = 0 + while True: + job = server.changed_jobs_queue.get() + # A job can 'change' when it is not finished, for example if it + # produces a notification. Ignore these. + job.update() + if not job.is_finished: + continue + # A job really finished. Get the information. + book_list.remove(job._book) + book_list.append(job.result) + book_id = job._book['calibre_id'] + count = count + 1 + notification(float(count)/total, '%d of %d stories finished downloading'%(count,total)) + # Add this job's output to the current log + logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title'])) + logger.info(job.details) + + if count >= total: + logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in + filter(lambda x: x['good'], book_list) ] ) ) ) + logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in + filter(lambda x: not x['good'], book_list) ] ) ) ) + break + + server.close() + + # return the book list as the job result + return book_list + +def do_download_for_worker(book,options,notification=lambda x,y:x): + ''' + Child job, to download story when run as a worker job + ''' + try: + book['comment'] = _('Download started...') + + configuration = get_ffdl_config(book['url'], + options['fileform'], + options['personal.ini']) + + if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): + configuration.set("overrides","never_make_cover","true") + + # images only for epub, html, even if the user mistakenly + # turned it on else where. + if options['fileform'] not in ("epub","html"): + configuration.set("overrides","include_images","false") + + adapter = adapters.getAdapter(configuration,book['url']) + adapter.is_adult = book['is_adult'] + adapter.username = book['username'] + adapter.password = book['password'] + adapter.setChaptersRange(book['begin'],book['end']) + + adapter.load_cookiejar(options['cookiejarfile']) + logger.debug("cookiejar:%s"%adapter.cookiejar) + adapter.set_pagecache(options['pagecache']) + + story = adapter.getStoryMetadataOnly() + if 'calibre_series' in book: + adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) + + # set PI version instead of default. + if 'version' in options: + story.setMetadata('version',options['version']) + + writer = writers.getWriter(options['fileform'],configuration,adapter) + + outfile = book['outfile'] + + ## No need to download at all. Shouldn't ever get down here. + if options['collision'] in (CALIBREONLY): + logger.info("Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening...") + book['comment'] = 'Metadata collected.' + + ## checks were done earlier, it's new or not dup or newer--just write it. + elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \ + ('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)): + + # preserve logfile even on overwrite. + if 'epub_for_update' in book: + (urlignore, + chaptercountignore, + oldchaptersignore, + oldimgsignore, + oldcoverignore, + calibrebookmarkignore, + # only logfile set in adapter, so others aren't used. + adapter.logfile) = get_update_data(book['epub_for_update']) + + # change the existing entries id to notid so + # write_epub writes a whole new set to indicate overwrite. + if adapter.logfile: + adapter.logfile = adapter.logfile.replace("span id","span notid") + + logger.info("write to %s"%outfile) + writer.writeStory(outfilename=outfile, forceOverwrite=True) + book['comment'] = 'Download %s completed, %s chapters.'%(options['fileform'],story.getMetadata("numChapters")) + + ## checks were done earlier, just update it. + elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): + + # update now handled by pre-populating the old images and + # chapters in the adapter rather than merging epubs. + urlchaptercount = int(story.getMetadata('numChapters').replace(',','')) + (url, + chaptercount, + adapter.oldchapters, + adapter.oldimgs, + adapter.oldcover, + adapter.calibrebookmark, + adapter.logfile) = get_update_data(book['epub_for_update']) + + # dup handling from ffdl_plugin needed for anthology updates. + if options['collision'] == UPDATE: + if chaptercount == urlchaptercount: + book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount + book['outfile'] = book['epub_for_update'] # for anthology merge ops. + return book + + # dup handling from ffdl_plugin needed for anthology updates. + if chaptercount > urlchaptercount: + raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') + + if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \ + and adapter.getConfig("do_update_hook"): + chaptercount = adapter.hookForUpdates(chaptercount) + + logger.info("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) + logger.info("write to %s"%outfile) + + writer.writeStory(outfilename=outfile, forceOverwrite=True) + + book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\ + (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) + + if options['smarten_punctuation'] and options['fileform'] == "epub" \ + and calibre_version >= (0, 9, 39): + # do smarten_punctuation from calibre's polish feature + data = {'smarten_punctuation':True} + opts = ALL_OPTS.copy() + opts.update(data) + O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys())) + opts = O(**opts) + + log = Log(level=Log.DEBUG) + # report = [] + polish({outfile:outfile}, opts, log, logger.info) # report.append + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + book['icon']='dialog_error.png' + book['status'] = 'Error' + logger.info("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + #time.sleep(10) + return book diff --git a/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt b/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt new file mode 100644 index 00000000..e69de29b diff --git a/calibre-plugin/prefs.py b/calibre-plugin/prefs.py new file mode 100644 index 00000000..90af87bf --- /dev/null +++ b/calibre-plugin/prefs.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Jim Miller' +__docformat__ = 'restructuredtext en' + +import copy + +from calibre.utils.config import JSONConfig +from calibre.gui2.ui import get_gui + +from calibre_plugins.fanfictiondownloader_plugin.dialogs import OVERWRITE +from calibre_plugins.fanfictiondownloader_plugin.common_utils import get_library_uuid +PREFS_NAMESPACE = 'FanFictionDownLoaderPlugin' +PREFS_KEY_SETTINGS = 'settings' + +# Set defaults used by all. Library specific settings continue to +# take from here. +default_prefs = {} +default_prefs['personal.ini'] = get_resources('plugin-example.ini') +default_prefs['rejecturls'] = '' +default_prefs['rejectreasons'] = '''Sucked +Boring +Dup from another site''' +default_prefs['reject_always'] = False + +default_prefs['updatemeta'] = True +default_prefs['updatecover'] = False +default_prefs['updateepubcover'] = False +default_prefs['keeptags'] = False +default_prefs['suppressauthorsort'] = False +default_prefs['suppresstitlesort'] = False +default_prefs['mark'] = False +default_prefs['showmarked'] = False +default_prefs['autoconvert'] = False +default_prefs['urlsfromclip'] = True +default_prefs['updatedefault'] = True +default_prefs['fileform'] = 'epub' +default_prefs['collision'] = OVERWRITE +default_prefs['deleteotherforms'] = False +default_prefs['adddialogstaysontop'] = False +default_prefs['includeimages'] = False +default_prefs['lookforurlinhtml'] = False +default_prefs['checkforseriesurlid'] = True +default_prefs['checkforurlchange'] = True +default_prefs['injectseries'] = False +default_prefs['smarten_punctuation'] = False + +default_prefs['send_lists'] = '' +default_prefs['read_lists'] = '' +default_prefs['addtolists'] = False +default_prefs['addtoreadlists'] = False +default_prefs['addtolistsonread'] = False + +default_prefs['gcnewonly'] = False +default_prefs['gc_site_settings'] = {} +default_prefs['allow_gc_from_ini'] = True +default_prefs['gc_polish_cover'] = False + +default_prefs['countpagesstats'] = [] + +default_prefs['errorcol'] = '' +default_prefs['custom_cols'] = {} +default_prefs['custom_cols_newonly'] = {} +default_prefs['allow_custcol_from_ini'] = True + +default_prefs['std_cols_newonly'] = {} + +# This is where all preferences for this plugin *were* stored +# Remember that this name (i.e. plugins/fanfictiondownloader_plugin) is also +# in a global namespace, so make it as unique as possible. +# You should always prefix your config file name with plugins/, +# so as to ensure you dont accidentally clobber a calibre config file +old_prefs = JSONConfig('plugins/fanfictiondownloader_plugin') + +def set_library_config(library_config,db): + db.prefs.set_namespaced(PREFS_NAMESPACE, + PREFS_KEY_SETTINGS, + library_config) + +def get_library_config(db): + library_id = get_library_uuid(db) + library_config = None + # Check whether this is a configuration needing to be migrated + # from json into database. If so: get it, set it, rename it in json. + if library_id in old_prefs: + #print("get prefs from old_prefs") + library_config = old_prefs[library_id] + set_library_config(library_config,db) + old_prefs["migrated to library db %s"%library_id] = old_prefs[library_id] + del old_prefs[library_id] + + if library_config is None: + #print("get prefs from db") + library_config = db.prefs.get_namespaced(PREFS_NAMESPACE, PREFS_KEY_SETTINGS, + copy.deepcopy(default_prefs)) + return library_config + +# fake out so I don't have to change the prefs calls anywhere. The +# Java programmer in me is offended by op-overloading, but it's very +# tidy. +class PrefsFacade(): + def _get_db(self): + if self.passed_db: + return self.passed_db + else: + # In the GUI plugin we want current db so we detect when + # it's changed. CLI plugin calls need to pass db in. + return get_gui().current_db + + def __init__(self,passed_db=None): + self.default_prefs = default_prefs + self.libraryid = None + self.current_prefs = None + self.passed_db=passed_db + + def _get_prefs(self): + libraryid = get_library_uuid(self._get_db()) + if self.current_prefs == None or self.libraryid != libraryid: + #print("self.current_prefs == None(%s) or self.libraryid != libraryid(%s)"%(self.current_prefs == None,self.libraryid != libraryid)) + self.libraryid = libraryid + self.current_prefs = get_library_config(self._get_db()) + return self.current_prefs + + def __getitem__(self,k): + prefs = self._get_prefs() + if k not in prefs: + # pulls from default_prefs.defaults automatically if not set + # in default_prefs + return self.default_prefs[k] + return prefs[k] + + def __setitem__(self,k,v): + prefs = self._get_prefs() + prefs[k]=v + # self._save_prefs(prefs) + + def __delitem__(self,k): + prefs = self._get_prefs() + if k in prefs: + del prefs[k] + + def save_to_db(self): + set_library_config(self._get_prefs(),self._get_db()) + +prefs = PrefsFacade() + diff --git a/calibre-plugin/translations/de.po b/calibre-plugin/translations/de.po new file mode 100644 index 00000000..0d35abfb --- /dev/null +++ b/calibre-plugin/translations/de.po @@ -0,0 +1,1626 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# +# Translators: +# Ettore Atalan <atalanttore@googlemail.com>, 2014 +# ILB, 2014 +# Simon_Schuette <simonschuette@arcor.de>, 2014 +msgid "" +msgstr "" +"Project-Id-Version: calibre-plugins\n" +"POT-Creation-Date: 2014-09-09 15:54+Central Daylight Time\n" +"PO-Revision-Date: 2014-09-02 11:38+0000\n" +"Last-Translator: Ettore Atalan <atalanttore@googlemail.com>\n" +"Language-Team: German (http://www.transifex.com/projects/p/calibre-plugins/language/de/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: ENCODING\n" +"Generated-By: pygettext.py 1.5\n" +"Language: de\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#: __init__.py:42 +msgid "UI plugin to download FanFiction stories from various sites." +msgstr "UI Plugin um FanFicition-Stories von verschiedenen Seiten herunterzuladen." + +#: __init__.py:109 +msgid "" +"Path to the calibre library. Default is to use the path stored in the " +"settings." +msgstr "Pfad für die Calibre-Bibliothek. Als Standardeinstellung ist der Pfad gesetzt, der in den Einstellungen gespeichert ist." + +#: config.py:176 +msgid "FAQs" +msgstr "FAQ´s" + +#: config.py:176 +msgid "List of Supported Sites" +msgstr "Liste der unterstützten Seiten" + +#: config.py:190 +msgid "Basic" +msgstr "Basis" + +#: config.py:211 +msgid "Standard Columns" +msgstr "Standard-Spalten" + +#: config.py:214 +msgid "Custom Columns" +msgstr "Benutzerdefinierte Spalten" + +#: config.py:217 +msgid "Other" +msgstr "Andere" + +#: config.py:338 +msgid "" +"These settings control the basic features of the plugin--downloading " +"FanFiction." +msgstr "Diese Einstellungen steuern die grundlegenden Funktionen des Plugins -- FanFictions herunterladen." + +#: config.py:342 +msgid "Defaults Options on Download" +msgstr "Standardeinstellung für das Herunterladen" + +#: config.py:346 +msgid "" +"On each download, FFDL offers an option to select the output format. This sets what that option will default to." +msgstr "Bei jedem Download bietet FFDL die Option, das Ausgabeformat auszuwählen. Dies legt fest, welche Standardeinstellung gesetzt werden." + +#: config.py:348 +msgid "Default Output &Format:" +msgstr "Standardeinstellung Ausgabe-Format:" + +#: config.py:363 +msgid "" +"On each download, FFDL offers an option of what happens if that story " +"already exists. This sets what that option will default to." +msgstr "Bei jedem Download bietet FFDL die Option, was geschehen soll, wenn diese Story bereits vorhanden ist. Dies legt fest, welche Standardeinstellung gesetzt werden." + +#: config.py:365 +msgid "Default If Story Already Exists?" +msgstr "Standardeinstellung, wenn die Story bereits vorhanden ist?" + +#: config.py:379 +msgid "Default Update Calibre &Metadata?" +msgstr "Standardeinstellung für die Aktualisierung der Calibre-Metadaten?" + +#: config.py:380 +msgid "" +"On each download, FFDL offers an option to update Calibre's metadata (title," +" author, URL, tags, custom columns, etc) from the web site. This sets " +"whether that will default to on or off. Columns set to 'New Only' in " +"the column tabs will only be set for new books." +msgstr "Bei jedem Download bietet FFDL die Option, die Calibre-Metadaten (Titel, Autor, URL, Schlagworte, benutzerdefinierte Spalten usw.) von der Web-Seite zu aktualisieren. Dies legt fest, ob die Standardeinstellung auf an oder aus gesetzt ist. \nSpalten, die mit \"Nur neue\" in den benutzerdefinierten Spalten gesetzt sind, werden nur bei neuen Büchern gefüllt." + +#: config.py:384 +msgid "Default Update EPUB Cover when Updating EPUB?" +msgstr "Als Standardeinstellung das EPUB-Cover aktualisieren, wenn das EPUB aktualisiert wird?" + +#: config.py:385 +msgid "" +"On each download, FFDL offers an option to update the book cover image " +"inside the EPUB from the web site when the EPUB is updated. This" +" sets whether that will default to on or off." +msgstr "Bei jedem Download bietet FFDL die Option, das Buch-Cover-Image im EPUB mit den Daten der Web-Seite zu aktualisieren, wenn ein EPUB aktualisiert wird. Dies legt fest, ob die Standardeinstellung auf an oder aus gesetzt ist." + +#: config.py:389 +msgid "Smarten Punctuation (EPUB only)" +msgstr "Intelligente Zeichensetzung (nur EPUB)" + +#: config.py:390 +msgid "" +"Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB " +"download and update." +msgstr "Ausführen der Zeichensetzung von Calibre´s Polish Book feature (eBook-Feinabstimmung) bei jedem EPUB Download und Aktualisierung." + +#: config.py:395 +msgid "Updating Calibre Options" +msgstr "Calibre Optionen beim Aktualisieren" + +#: config.py:399 +msgid "Delete other existing formats?" +msgstr "Andere vorhandene Formate löschen?" + +#: config.py:400 +msgid "" +"Check this to automatically delete all other ebook formats when updating an existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." +msgstr "Markieren sie dies um automatisch alle anderen eBook-Formate zu löschen, wenn ein vorhandenes Buch aktualisiert wird. Praktisch, wenn sie zum Beispiel sowohl ein Nook (epub) und ein Kindle (mobi) haben." + +#: config.py:404 +msgid "Update Calibre Cover when Updating Metadata?" +msgstr "Calibre Cover aktualisieren, wenn die Metadaten aktualiert werden?" + +#: config.py:405 +msgid "" +"Update calibre book cover image from EPUB when metadata is updated. (EPUB only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." +msgstr "Aktualisiert das Calibre-Buch-Cover des EPUB´s, wenn die Metadaten aktualisiert werden (nur EPUB).\nBei \"Nur Calibre-Medaten akualisieren\" werden keine neuen Bilder gesucht." + +#: config.py:409 +msgid "Keep Existing Tags when Updating Metadata?" +msgstr "Vorhandene Schlagworte behalten, wenn die Metadaten aktualisiert werden?" + +#: config.py:410 +msgid "" +"Existing tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no effect.)" +msgstr "Vorhanden Schlagworte werden beibehalten und alle neuen hinzugefügt.\n%(cmplt)s (fertiggestellt) und %(inprog)s (in Arbeit) werden trotzdem aktualisiert, wenn bekannt.\n%(lul)s (zuletzt aktualisiert) wird aktualisiert, wenn %(lus)s in %(is)s enthalten ist.\n(Wenn die Spalte mit \"Nur neue\" in den benutzerdefinierten Spalten gesetzt sind, hat dies keinen Effekt.)" + +#: config.py:414 +msgid "Force Author into Author Sort?" +msgstr "Autor in Autoren-Sortierung übernehmen?" + +#: config.py:415 +msgid "" +"If checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc." +msgstr "Wenn markiert, wird der Autor (die Autoren) wie vorgegeben auch in die Autoren-Sortierung übernommen.\nWenn nicht markiert, wird Calibre den eingebauten Algorithmus verwenden, was 'Bob Smith' in 'Smith, Bob' usw. umwandelt." + +#: config.py:419 +msgid "Force Title into Title Sort?" +msgstr "Titel in Titel-Sortierung übernehmen?" + +#: config.py:420 +msgid "" +"If checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The Title' sort as 'Title, The', etc." +msgstr "Wenn markiert, wird der Titel wie vorgegeben auch in die Titel-Sortierung übernommen.\n\nWenn nicht markiert, wird Calibre den eingebauten Algorithmus verwenden, was 'Der Titel' in 'Titel, Der' usw. umwandelt." + +#: config.py:424 +msgid "Check for existing Series Anthology books?" +msgstr "Auf vorhandene Serien-Sammelband-Bücher prüfen?" + +#: config.py:425 +msgid "" +"Check for existings Series Anthology books using each new story's series URL before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." +msgstr "Unter Verwendung jeder neuen Stories-Serien-URL vor dem Herunterladen auf vorhandene Serien-Sammelband-Bücher prüfen.\nVorschlag, das Herunterladen zu überspringen, wenn ein Serien-Sammelband gefunden wird." + +#: config.py:429 +msgid "Check for changed Story URL?" +msgstr "Auf geänderte Story-URL prüfen?" + +#: config.py:430 +msgid "" +"Warn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." +msgstr "Sie werden gewarnt, wenn eine Aktualisierung die URL eines vorhandenen Buches ändern wird.\n\nfanfiction.net URL´s werden von http auf https ohne Hinweis geändert." + +#: config.py:434 +msgid "Search EPUB text for Story URL?" +msgstr "Durchsuche den EPUB-Text nach einer Story-URL?" + +#: config.py:435 +msgid "" +"Look for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." +msgstr "Wenn in den Metadaten keine Story-URL gefunden wird, wird nach der ersten gültigen gesucht.\n\nEtwas riskant, könnte - abhängig vom EPUB-Inhalt - die falsche URL finden.\n\nFindet und korrigiert ebenfalls ungeeignete ffnet URL´s von ficsaver.com-Datein." + +#: config.py:439 +msgid "Mark added/updated books when finished?" +msgstr "Hinzugefügte/aktualisierte Bücher markieren, wenn fertiggestellt?" + +#: config.py:440 +msgid "" +"Mark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Hinzugefügte/aktualisierte Bücher markieren, wenn fertiggestellt. Mit nachfolgender Option nutzen.\nEs kann ebenso manuell für 'marked:ffdl_success' (als \"erfolgreich\" markierte) gesucht werden.\n\nmarked:ffdl_failed' (als \"fehlgeschlagen\" markierte) ist ebenfalls verfügbar oder die Suche 'marked:ffdl' für beides." + +#: config.py:444 +msgid "Show Marked books when finished?" +msgstr "Hinzugefügte/aktualisierte Bücher anzeigen, wenn fertiggestellt?" + +#: config.py:445 +msgid "" +"Show Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Hinzugefügte/aktualisierte Bücher nur anzeigen, wenn erledigt.\nEs kann ebenso manuell für 'marked:ffdl_success' (als \"erfolgreich\" markierte) gesucht werden.\nmarked:ffdl_failed' (als \"fehlgeschlagen\" markierte) ist ebenfalls verfügbar oder die Suche 'marked:ffdl' für beides." + +#: config.py:449 +msgid "Automatically Convert new/update books?" +msgstr "Neue/aktualisierte Bücher automatisch konvertieren?" + +#: config.py:450 +msgid "" +"Automatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." +msgstr "Automatisches Aufrufen von Calibre´s Konvertierung von neuen/aktualisierten Büchern.\nWandelt in das bevorzugte Ausgabeformat um, das aktuell in den Calibre Einstellungen-->Verhalten gewählt ist." + +#: config.py:454 +msgid "GUI Options" +msgstr "GUI Optionen:" + +#: config.py:458 +msgid "Take URLs from Clipboard?" +msgstr "URL´s aus der Zwischenablage nehmen?" + +#: config.py:459 +msgid "Prefill URLs from valid URLs in Clipboard when Adding New." +msgstr "Bei \"Neu hinzufügen\" werden automatisch die gültigen URL´s aus der Zwischenablage eingefügt." + +#: config.py:463 +msgid "Default to Update when books selected?" +msgstr "Standardmäßig aktualisieren, wenn Bücher ausgewählt sind?" + +#: config.py:464 +msgid "" +"The top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." +msgstr "Die oberste Taste im FFDL-Plugin wird, wenn Bücher ausgewählt sind, diese aktualisieren. Wenn nicht markiert, kommt immer zuerst \"Neu hinzufügen\"." + +#: config.py:468 +msgid "Keep 'Add New from URL(s)' dialog on top?" +msgstr "\"Neu von URL´s hinzufügen\" immer an erster Stelle behalten?" + +#: config.py:469 +msgid "" +"Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." +msgstr "Weist das Betriebssystem und den Fenstermanager an \"Neu von URL´s hinzufügen\" immer an erster Stelle zu halten. Nützlich um URL´s darauf zu ziehen." + +#: config.py:473 +msgid "Misc Options" +msgstr "Verschiedene Optionen" + +#: config.py:478 +msgid "Include images in EPUBs?" +msgstr "Bilder in EPUB´s einfügen?" + +#: config.py:479 +msgid "" +"Download and include images in EPUB stories. This is equivalent to " +"adding:%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will" +" override this." +msgstr "Herunterladen und einfügen von Bildern im EPUB-Stories. Dies entspricht dem hinzufügen von: %(imgset)s... an den Anfang von %(pini)s. Ihre Einstellungen in %(pini)s werden dies überschreiben." + +#: config.py:483 +msgid "Inject calibre Series when none found?" +msgstr "Calibre-Serie einfügen, wenn keine gefunden wurde?" + +#: config.py:484 +msgid "" +"If no series is found, inject the calibre series (if there is one) so it " +"appears on the FFDL title page(not cover)." +msgstr "Wenn keine Serie gefunden wurde, die Calibre-Serie einfügen, damit sie auf der FFDL-Titelseite erscheint (nicht auf dem Cover)." + +#: config.py:488 +msgid "Reject List" +msgstr "Ablehnungsliste" + +#: config.py:492 +msgid "Edit Reject URL List" +msgstr "Ablehnungsliste bearbeiten" + +#: config.py:493 +msgid "Edit list of URLs FFDL will automatically Reject." +msgstr "Liste von URL´s bearbeiten, die FFDL automatisch ablehnen wird." + +#: config.py:497 config.py:571 +msgid "Add Reject URLs" +msgstr "Abzulehnende URL´s hinzufügen" + +#: config.py:498 +msgid "Add additional URLs to Reject as text." +msgstr "Zusätzliche URL´s als Text zur Ablehnungsliste hinzufügen." + +#: config.py:502 +msgid "Edit Reject Reasons List" +msgstr "Ablehnungsgründeliste bearbeiten" + +#: config.py:503 config.py:562 +msgid "Customize the Reasons presented when Rejecting URLs" +msgstr "Gründe für die Ablehnung von URL´s benutzerdefinieren" + +#: config.py:507 +msgid "Reject Without Confirmation?" +msgstr "Ohne Bestätigung zurückweisen?" + +#: config.py:508 +msgid "Always reject URLs on the Reject List without stopping and asking." +msgstr "Generell URL´s die auf Ablehnungsliste stehen zurückweisen, ohne nachzufragen." + +#: config.py:546 +msgid "Edit Reject URLs List" +msgstr "Ablehnungsliste bearbeiten" + +#: config.py:560 +msgid "Reject Reasons" +msgstr "Ablehnungsgründe" + +#: config.py:561 +msgid "Customize Reject List Reasons" +msgstr "Ablehnungsgründeliste benutzerdefinieren" + +#: config.py:569 +msgid "Reason why I rejected it" +msgstr "Grund der Ablehnung" + +#: config.py:569 +msgid "Title by Author" +msgstr "Titel von Autor" + +#: config.py:572 +msgid "" +"Add Reject URLs. Use: http://...,note or http://...,title by " +"author - note Invalid story URLs will be ignored." +msgstr "Abzulehnende URL´s hinzufügen. Verwende: http://...,Notiz oder http://...,Titel von Autor - Notiz \nUngültige URL´s werden ignoriert." + +#: config.py:573 +msgid "" +"One URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" +msgstr "Eine URL pro Zeile\nhttp://...,Notiz\nhttp://...,Titel von Autor - Notiz" + +#: config.py:575 dialogs.py:1012 +msgid "Add this reason to all URLs added:" +msgstr "Bei allen oben angegebenen URL`s diesen Grund anfügen:" + +#: config.py:590 +msgid "" +"These settings provide more detailed control over what metadata will be " +"displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s " +"for different sites." +msgstr "Diese Einstellungen bieten eine detailliertere Kontrolle darüber, welche Metadaten im eBook angezeigt werden und es kann auch die Altersverifikation (\"%(isa)s\") und Benutzer/Passwort für verschiedene Seiten gesetzt werden." + +#: config.py:608 +msgid "View Defaults" +msgstr "Ansicht der Standardeinstellungen" + +#: config.py:609 +msgid "" +"View all of the plugin's configurable settings\n" +"and their default settings." +msgstr "Anzeige aller konfigurierbaren Einstellungen des Plugins und deren Standardeinstellung." + +#: config.py:627 +msgid "Plugin Defaults (%s) (Read-Only)" +msgstr "Plugin Standardeinstellungen (%s) (nur lesen)" + +#: config.py:628 config.py:634 +msgid "" +"These are all of the plugin's configurable options\n" +"and their default settings." +msgstr "Dies ist die Anzeige aller konfigurierbaren Einstellungen des Plugins und deren Standardeinstellung." + +#: config.py:629 +msgid "Plugin Defaults" +msgstr "Plugin Standardeinstellungen" + +#: config.py:645 dialogs.py:542 dialogs.py:645 +msgid "OK" +msgstr "OK" + +#: config.py:665 +msgid "" +"These settings provide integration with the %(rl)s Plugin. %(rl)s can " +"automatically send to devices and change custom columns. You have to create" +" and configure the lists in %(rl)s to be useful." +msgstr "Diese Einstellungen bieten die Integration mit dem %(rl)s Plugin. Die %(rl)s kann automatisch an das Gerät senden und benutzerdefinierte Spalten ändern. Sie müssen die Listen im %(rl)s Plugin erstellen und konfigurieren, um sie verwenden zu können." + +#: config.py:670 +msgid "Add new/updated stories to \"Send to Device\" Reading List(s)." +msgstr "Neue/aktualiserte Stories auf \"ans Gerät senden\" Leseliste(n) hinzufügen." + +#: config.py:671 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin." +msgstr "Automatisch neue/aktualisierte Stories zu diesen Listen im %(rl)s Plugin hinzufügen." + +#: config.py:676 +msgid "\"Send to Device\" Reading Lists" +msgstr "\"ans Gerät senden\" Leselisten" + +#: config.py:677 config.py:680 config.py:693 config.py:696 +msgid "" +"When enabled, new/updated stories will be automatically added to these " +"lists." +msgstr "Wenn markiert, werden neue/aktualisierte Stories automatisch zu diesen Listen hinzugefügt." + +#: config.py:686 +msgid "Add new/updated stories to \"To Read\" Reading List(s)." +msgstr "Neue/aktualiserte Stories auf \"zu lesen\" Leseliste(n) hinzufügen." + +#: config.py:687 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." +msgstr "Automatisch neue/aktualisierte Stories zu diesen Listen im %(rl)s Plugin hinzufügen.\nBietet auch die Menü-Option, Stories von den \"zu lesen\" Leseliste(n) zu entfernen." + +#: config.py:692 +msgid "\"To Read\" Reading Lists" +msgstr "\"zu lesen\" Leseliste(n)" + +#: config.py:702 +msgid "Add stories back to \"Send to Device\" Reading List(s) when marked \"Read\"." +msgstr "Stories wieder auf \"ans Gerät senden\" Leseliste(n) hinzufügen, wenn mit \"gelesen\" markiert." + +#: config.py:703 +msgid "" +"Menu option to remove from \"To Read\" lists will also add stories back to " +"\"Send to Device\" Reading List(s)" +msgstr "Die Menü-Option, Bücher von der \"zu lesen\" Liste zu entfernen, wird gleichzeitig die Stories zurück auf die \"ans Gerät senden\" Leseliste(n) fügen." + +#: config.py:725 +msgid "" +"The %(gc)s plugin can create cover images for books using various metadata " +"and configurations. If you have GC installed, FFDL can run GC on new " +"downloads and metadata updates. Pick a GC setting by site or Default." +msgstr "Das %(gc)s Plugin kann Cover-Bilder erzeugen, mit Hilfe verschiedener Metadaten und Konfigurationen. Wenn sie GC installiert haben, kann FFDL dies auf neuen Downloads und Metadaten-Aktualisierungen anwenden. Wählen Sie eine GC-Einstellung pro Seite oder als Standardeinstellung." + +#: config.py:743 config.py:747 config.py:760 +msgid "Default" +msgstr "Standardeinstellung" + +#: config.py:748 +msgid "" +"On Metadata update, run %(gc)s with this setting, if not selected for " +"specific site." +msgstr "Beim Aktualisieren der Metadaten soll %(gc)s mit dieser Einstellung verwendet werden, wenn für diese bestimmte Seite keine Auswahl festgelegt wurde." + +#: config.py:751 +msgid "On Metadata update, run %(gc)s with this setting for %(site)s stories." +msgstr "Beim Aktualisieren der Metadaten soll %(gc)s für die %(site)s Stories diese Einstellung verwenden." + +#: config.py:774 +msgid "Run %(gc)s Only on New Books" +msgstr "%(gc)s nur bei neuen Bücher anwenden" + +#: config.py:775 +msgid "Default is to run GC any time the calibre metadata is updated." +msgstr "Als Standardeinstellung GC jedesmal anwenden, wenn die Calibre-Metadaten aktualisiert werden." + +#: config.py:779 +msgid "Allow %(gcset)s from %(pini)s to override" +msgstr "Ermöglicht es %(gcset)s von %(pini)s diese Einstellungen zu überschreiben." + +#: config.py:780 +msgid "" +"The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on " +"metadata rather than site, but it's much more complex. %(gcset)s is " +"ignored when this is off." +msgstr "Die %(gcset)s Einstellungen in %(pini)s erlauben es ihnen eine GC-Einstellung wählen, die auf Metadaten statt der Web-Seite basiert, aber dies ist sehr viel komplexer. %(gcset)s Einstellungen werden ignoriert, wenn dies deaktiviert ist." + +#: config.py:784 +msgid "Use calibre's Polish feature to inject/update the cover" +msgstr "Verwenden Sie Calibre´s \"Bücher Perfektionieren\" Funktion zum Einfügen/Aktualisieren des Coverbildes." + +#: config.py:785 +msgid "" +"Calibre's Polish feature will be used to inject or update the generated " +"cover into the ebook, EPUB only." +msgstr "Calibre´s \"Bücher Perfektionieren\" Funktion wird verwendet, um das erstellte Cover zu aktualisieren oder in das Buch einzufügen. Es werden nur AZW3 und EPUB unterstützt." + +#: config.py:799 +msgid "" +"These settings provide integration with the %(cp)s Plugin. %(cp)s can " +"automatically update custom columns with page, word and reading level " +"statistics. You have to create and configure the columns in %(cp)s first." +msgstr "Diese Einstellungen können mit dem %(cp)s-Plugin verwendet werden. %(cp)s kann automatisch benutzerdefinierte Spalte mit Seiten-, Wort- und Leselevel-Statistiken aktualisieren. Sie müssen diese Spalten zuerst in %(cp)s erstellen und gestalten." + +#: config.py:804 +msgid "" +"If any of the settings below are checked, when stories are added or updated," +" the %(cp)s Plugin will be called to update the checked statistics." +msgstr "Wenn Stories hinzugefügt oder aktualisiert werden, wird bei allen markierten Einstellungen das %(cp)s-Plugin die markierten Statistiken aktualisieren." + +#: config.py:810 +msgid "Which column and algorithm to use are configured in %(cp)s." +msgstr "Welche Spalte und Algorithmus verwendet werden, ist in %(cp)s festgelegt." + +#: config.py:818 +msgid "" +"Will overwrite word count from FFDL metadata if set to update the same " +"custom column." +msgstr "Überschreibt die Wortanzahl von den FFDL-Metadaten, wenn die Aktualisierung für die gleiche benutzerdefinierte Spalte gesetzt ist." + +#: config.py:849 +msgid "" +"These controls aren't plugin settings as such, but convenience buttons for " +"setting Keyboard shortcuts and getting all the FanFictionDownLoader " +"confirmation dialogs back again." +msgstr "Diese Werte sind nicht Plugin-Einstellungen als solche, sondern Komfort-Schaltflächen, um Tastenkombination festzulegen und alle FFDL-Bestätigungsdialoge wieder zurück zu setzen." + +#: config.py:854 +msgid "Keyboard shortcuts..." +msgstr "Tastenkombinationen..." + +#: config.py:855 +msgid "Edit the keyboard shortcuts associated with this plugin" +msgstr "Bearbeiten Sie die Tastenkombinationen, die mit diesem Plugin verbunden sind." + +#: config.py:859 +msgid "Reset disabled &confirmation dialogs" +msgstr "Setzt alle deaktivierten Bestätigungsdialoge zurück." + +#: config.py:860 +msgid "Reset all show me again dialogs for the FanFictionDownLoader plugin" +msgstr "Setzt alle Dialoge, bei denen der Haken für \"Diese Meldung nicht wieder anzeigen\" gesetzt wurde, wieder auf den Standard zurück." + +#: config.py:864 +msgid "&View library preferences..." +msgstr "Anzeige der Bibliotheks-Einstellungen" + +#: config.py:865 +msgid "View data stored in the library database for this plugin" +msgstr "In der Bibliotheksdatenbank für dieses Plugin gespeicherten Daten anzeigen" + +#: config.py:876 +msgid "Done" +msgstr "Fertig" + +#: config.py:877 +msgid "Confirmation dialogs have all been reset" +msgstr "Die Bestätigungsdialoge wurden alle zurückgesetzt" + +#: config.py:925 +msgid "Category" +msgstr "Kategorie" + +#: config.py:926 +msgid "Genre" +msgstr "Genre" + +#: config.py:927 +msgid "Language" +msgstr "Sprache" + +#: config.py:928 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Status" +msgstr "Status" + +#: config.py:929 +msgid "Status:%(cmplt)s" +msgstr "Status: fertiggestellt" + +#: config.py:930 +msgid "Status:%(inprog)s" +msgstr "Status: in Arbeit" + +#: config.py:931 config.py:1065 +msgid "Series" +msgstr "Serie" + +#: config.py:932 +msgid "Characters" +msgstr "Charaktere" + +#: config.py:933 +msgid "Relationships" +msgstr "Beziehungen" + +#: config.py:934 +msgid "Published" +msgstr "Veröffentlicht" + +#: config.py:935 ffdl_plugin.py:1437 ffdl_plugin.py:1456 +msgid "Updated" +msgstr "Aktualisierung" + +#: config.py:936 +msgid "Created" +msgstr "Erstellt" + +#: config.py:937 +msgid "Rating" +msgstr "Wertung" + +#: config.py:938 +msgid "Warnings" +msgstr "Warnungen" + +#: config.py:939 +msgid "Chapters" +msgstr "Kapitel" + +#: config.py:940 +msgid "Words" +msgstr "Worte" + +#: config.py:941 +msgid "Site" +msgstr "Seite" + +#: config.py:942 +msgid "Story ID" +msgstr "Story ID" + +#: config.py:943 +msgid "Author ID" +msgstr "Autor ID" + +#: config.py:944 +msgid "Extra Tags" +msgstr "zusätzliche Schlagworte" + +#: config.py:945 config.py:1057 dialogs.py:804 dialogs.py:900 +#: ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Title" +msgstr "Titel" + +#: config.py:946 +msgid "Story URL" +msgstr "Story URL" + +#: config.py:947 +msgid "Description" +msgstr "Beschreibung" + +#: config.py:948 dialogs.py:804 dialogs.py:900 ffdl_plugin.py:1152 +#: ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Author" +msgstr "Autor" + +#: config.py:949 +msgid "Author URL" +msgstr "Autor URL" + +#: config.py:950 +msgid "File Format" +msgstr "Datei-Format" + +#: config.py:951 +msgid "File Extension" +msgstr "Dateierweiterung" + +#: config.py:952 +msgid "Site Abbrev" +msgstr "Seiten-Kürzel" + +#: config.py:953 +msgid "FFDL Version" +msgstr "FFDL-Version" + +#: config.py:968 +msgid "" +"If you have custom columns defined, they will be listed below. Choose a " +"metadata value type to fill your columns automatically." +msgstr "Wenn Sie benutzerdefinierte Spalten definiert haben, werden diese unten aufgeführt. Wählen Sie einen Metadatenwert-Typ um Spalten automatisch zu füllen." + +#: config.py:993 +msgid "Update this %s column(%s) with..." +msgstr "Aktualisiere diese %s Spalte (%s) mit ..." + +#: config.py:1003 +msgid "Values that aren't valid for this enumeration column will be ignored." +msgstr "Werte, die nicht für diese Aufzählungs-Spalte gültig sind, werden ignoriert." + +#: config.py:1003 config.py:1005 +msgid "Metadata values valid for this type of column." +msgstr "Metadaten-Werte, die für diese Art der Spalte gültig sind" + +#: config.py:1008 config.py:1084 +msgid "New Only" +msgstr "Nur neue" + +#: config.py:1009 +msgid "" +"Write to %s(%s) only for new\n" +"books, not updates to existing books." +msgstr "% s (% s) werden nur für neue Bücher gefüllt, nicht bei Aktualisierungen vorhandener Bücher." + +#: config.py:1020 +msgid "Allow %(ccset)s from %(pini)s to override" +msgstr "Erlaubt es %(ccset)s von %(pini)s dies zu überschreiben" + +#: config.py:1021 +msgid "" +"The %(pini)s parameter %(ccset)s allows you to set custom columns to site " +"specific values that aren't common to all sites. %(ccset)s is ignored " +"when this is off." +msgstr "Die %(pini)s-Parameter %(ccset)s erlaubt es ihnen, benutzerdefinierte Spalten festzulegen, die mit seitenspezifischen Werten gefüllt werden, die nicht auf allen Web-Seiten gleich sind. %(ccset)s wird ignoriert, wenn dies deaktiviert ist." + +#: config.py:1026 +msgid "Special column:" +msgstr "Sonder-Spalte:" + +#: config.py:1031 +msgid "Update/Overwrite Error Column:" +msgstr "Aktualisieren/Überschreiben der Fehlerspalte:" + +#: config.py:1032 +msgid "" +"When an update or overwrite of an existing story fails, record the reason in this column.\n" +"(Text and Long Text columns only.)" +msgstr "Wenn eine Aktualisierung oder Überschreibung einer existierenden Story fehlschlägt, erfasse den Grund in dieser Spalte.\n(Nur Text-und Langtext-Spalten.)" + +#: config.py:1058 +msgid "Author(s)" +msgstr "Autor(en)" + +#: config.py:1059 +msgid "Publisher" +msgstr "Herausgeber" + +#: config.py:1060 +msgid "Tags" +msgstr "Schlagworte" + +#: config.py:1061 +msgid "Languages" +msgstr "Sprachen" + +#: config.py:1062 +msgid "Published Date" +msgstr "Veröffentlichungsdatum" + +#: config.py:1063 +msgid "Date" +msgstr "Datum" + +#: config.py:1064 +msgid "Comments" +msgstr "Kommentare" + +#: config.py:1066 +msgid "Ids(url id only)" +msgstr "Ids(nur url id)" + +#: config.py:1071 +msgid "" +"The standard calibre metadata columns are listed below. You may choose " +"whether FFDL will fill each column automatically on updates or only for new " +"books." +msgstr "Die Standard-Calibre-Metadatenspalten sind unten aufgeführt. Sie können wählen, ob FFDL jede Spalte automatisch bei Aktualisierungen oder nur bei neuen Büchern füllen soll." + +#: config.py:1085 +msgid "" +"Write to %s only for new\n" +"books, not updates to existing books." +msgstr "Fülle %s nur bei neuen Bücher, nicht bei Aktualisierungen vorhandener Bücher." + +#: dialogs.py:69 +msgid "Skip" +msgstr "Überspringen" + +#: dialogs.py:70 +msgid "Add New Book" +msgstr "Neues Buch hinzufügen" + +#: dialogs.py:71 +msgid "Update EPUB if New Chapters" +msgstr "EPUB aktualisieren, wenn neue Kapitel vorhanden sind" + +#: dialogs.py:72 +msgid "Update EPUB Always" +msgstr "EPUB immer aktualisieren" + +#: dialogs.py:73 +msgid "Overwrite if Newer" +msgstr "Überschreiben, wenn neuer" + +#: dialogs.py:74 +msgid "Overwrite Always" +msgstr "Immer überschreiben" + +#: dialogs.py:75 +msgid "Update Calibre Metadata Only" +msgstr "Nur Calibre-Metadaten aktualisieren" + +#: dialogs.py:239 ffdl_plugin.py:89 +msgid "FanFictionDownLoader" +msgstr "FanFictionDownLoader" + +#: dialogs.py:256 dialogs.py:703 +msgid "Show Download Options" +msgstr "Optionen zum Herunterladen anzeigen" + +#: dialogs.py:275 dialogs.py:720 +msgid "Output &Format:" +msgstr "Ausgabe-&Format:" + +#: dialogs.py:283 dialogs.py:728 +msgid "" +"Choose output format to create. May set default from plugin configuration." +msgstr "Wählen Sie das zu erstellende Ausgabeformat. Kann als Voreinstellung in der Plugin-Konfiguration gesetzt werden." + +#: dialogs.py:311 dialogs.py:745 +msgid "Update Calibre &Metadata?" +msgstr "Calibre-&Metadaten aktualisieren?" + +#: dialogs.py:312 dialogs.py:746 +msgid "" +"Update metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new books.)" +msgstr "Metadaten für vorhandene Stories in Calibre von der Web-Seite aktualisieren?\n(Gesetzte Spalten für \"Nur neue\" in der Spalte Registerkarte wird nur für neue Bücher berücksichtigt.)" + +#: dialogs.py:318 dialogs.py:750 +msgid "Update EPUB Cover?" +msgstr "EPUB Cover aktualisieren?" + +#: dialogs.py:319 dialogs.py:751 +msgid "" +"Update book cover image from site or defaults (if found) inside the " +"EPUB when EPUB is updated." +msgstr "Aktualisiert das Buch-Cover-Image von der Seite oder den Standardeinstellungen (wenn vorhanden) im EPUB, wenn das EPUB aktualisiert wird." + +#: dialogs.py:366 +msgid "Story URL(s) for anthology, one per line:" +msgstr "Story URL(´s) für Sammelbände, eine pro Zeile:" + +#: dialogs.py:367 +msgid "" +"URLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." +msgstr "URL´s für Stories, die im Sammelband enthalten sein sollen, eine pro Zeile.\nURL´s werden aus der Zwischenablage genommen, aber nur gültige URL´s." + +#: dialogs.py:368 +msgid "If Story Already Exists in Anthology?" +msgstr "Wenn die Story bereits in einem Sammelband enthalten ist?" + +#: dialogs.py:369 +msgid "" +"What to do if there's already an existing story with the same URL in the " +"anthology." +msgstr "Was soll geschehen, wenn es bereits eine vorhandene Story mit der gleichen URL in dem Sammelband gibt." + +#: dialogs.py:378 +msgid "Story URL(s), one per line:" +msgstr "Story URL(´s), eine pro Zeile:" + +#: dialogs.py:379 +msgid "" +"URLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." +msgstr "URL´s für Stories, eine pro Zeile.\nURL´s werden aus der Zwischenablage genommen, aber nur gültige URL´s.\nFügen Sie [1,5] nach der URL hinzu, um nur die Kapitel 1-5 herunterzuladen." + +#: dialogs.py:380 +msgid "If Story Already Exists?" +msgstr "Wenn die Story bereits vorhanden ist?" + +#: dialogs.py:381 +msgid "" +"What to do if there's already an existing story with the same URL or title " +"and author." +msgstr "Was soll geschehen, wenn es bereits eine vorhandene Story mit der gleichen URL oder Titel und Autor gibt." + +#: dialogs.py:481 +msgid "For Individual Books" +msgstr "Für einzelne Bücher" + +#: dialogs.py:482 +msgid "Get URLs and go to dialog for individual story downloads." +msgstr "URL´s holen und zum Download-Dialog für einzelne Stories gehen." + +#: dialogs.py:486 +msgid "For Anthology Epub" +msgstr "Für Sammelband-EPUB" + +#: dialogs.py:487 +msgid "" +"Get URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." +msgstr "URL´s holen und zum Download-Dialog für Sammelbände gehen.\nErfordert %s Plugin." + +#: dialogs.py:492 dialogs.py:546 dialogs.py:573 +msgid "Cancel" +msgstr "Abbrechen" + +#: dialogs.py:524 +msgid "Password" +msgstr "Passwort" + +#: dialogs.py:525 +msgid "Author requires a password for this story(%s)." +msgstr "Der Autor benötigt ein Passwort für diese Story(%s)." + +#: dialogs.py:530 +msgid "User/Password" +msgstr "Benutzer/Passwort" + +#: dialogs.py:531 +msgid "%s requires you to login to download this story." +msgstr "%s erfordert, dass sie sich einloggen, um diese Geschichte herunterzuladen." + +#: dialogs.py:533 +msgid "User:" +msgstr "Benutzer:" + +#: dialogs.py:537 +msgid "Password:" +msgstr "Passwort:" + +#: dialogs.py:568 +msgid "Fetching metadata for stories..." +msgstr "Metadaten für folgende Stories abrufen..." + +#: dialogs.py:569 +msgid "Downloading metadata for stories" +msgstr "Metadaten für folgende Stories herunterladen" + +#: dialogs.py:570 +msgid "Fetched metadata for" +msgstr "Metadaten abgerufen für" + +#: dialogs.py:640 ffdl_plugin.py:325 +msgid "About FanFictionDownLoader" +msgstr "Über FanFictionDownLoader" + +#: dialogs.py:694 +msgid "Remove selected books from the list" +msgstr "Ausgewählte Bücher von der Liste löschen" + +#: dialogs.py:733 +msgid "Update Mode:" +msgstr "Aktualisierungsmodus:" + +#: dialogs.py:736 +msgid "" +"What sort of update to perform. May set default from plugin configuration." +msgstr "Welche Art von Aktualisierung ausgeführt werden soll. Eine Standardeinstellung kann in den Plugin Konfigurationen festgelegt werden." + +#: dialogs.py:804 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Comment" +msgstr "Kommentar" + +#: dialogs.py:872 +msgid "Are you sure you want to remove this book from the list?" +msgstr "Sind sie sicher, dass sie dieses Buch von der Liste löschen wollen?" + +#: dialogs.py:874 +msgid "Are you sure you want to remove the selected %d books from the list?" +msgstr "Sind sie sicher, dass sie die ausgewählten Bücher von der Liste löschen wollen?" + +#: dialogs.py:900 +msgid "Note" +msgstr "Notiz" + +#: dialogs.py:939 +msgid "Select or Edit Reject Note." +msgstr "Ablehnungsnotiz auswählen oder bearbeiten." + +#: dialogs.py:947 +msgid "Are you sure you want to remove this URL from the list?" +msgstr "Sind sie sicher, dass sie diese URL von der Liste löschen möchten?" + +#: dialogs.py:949 +msgid "Are you sure you want to remove the %d selected URLs from the list?" +msgstr "Sind sie sicher, dass sie die %d ausgewählten URL´s von der Liste löschen möchten?" + +#: dialogs.py:967 +msgid "List of Books to Reject" +msgstr "Liste der Bücher zur Ablehnung" + +#: dialogs.py:980 +msgid "" +"FFDL will remember these URLs and display the note and offer to reject them " +"if you try to download them again later." +msgstr "FFDL merkt sich diese URL´s und gibt die Notiz wieder und bietet an abzulehnen, wenn sie nochmal versuchen, diese herunterzuladen." + +#: dialogs.py:994 +msgid "Remove selected URL(s) from the list" +msgstr "Ausgewählte URL(´s) von der Liste löschen" + +#: dialogs.py:1009 dialogs.py:1013 +msgid "This will be added to whatever note you've set for each URL above." +msgstr "Dies wird für jede der oben angegebenen URL´s zusätzlich an die Notiz angefügt, die sie oben angegeben haben." + +#: dialogs.py:1022 +msgid "Delete Books (including books without FanFiction URLs)?" +msgstr "Bücher löschen (inklusive Bücher ohne FanFiction-URL´s)?" + +#: dialogs.py:1023 +msgid "Delete the selected books after adding them to the Rejected URLs list." +msgstr "Die ausgewählten Bücher werden zur URL-Ablehnungsliste hinzugefügt und danach gelöscht." + +#: ffdl_plugin.py:90 +msgid "Download FanFiction stories from various web sites" +msgstr "FanFiction-Stories von verschiedenen Web-Seiten herunterladen" + +#: ffdl_plugin.py:120 +msgid "FanFictionDL" +msgstr "FanFictionDL" + +#: ffdl_plugin.py:243 +msgid "&Add New from URL(s)" +msgstr "Neu von URL hinzufügen" + +#: ffdl_plugin.py:245 +msgid "Add New FanFiction Book(s) from URL(s)" +msgstr "Neue FanFiction-Bücher von URL´s herunterladen" + +#: ffdl_plugin.py:248 +msgid "&Update Existing FanFiction Book(s)" +msgstr "Markierte FanFiction-Bücher aktualisieren" + +#: ffdl_plugin.py:254 +msgid "Get Story URLs to Download from Web Page" +msgstr "Story-URL´s von einer Web-Seite erfassen" + +#: ffdl_plugin.py:258 +msgid "&Make Anthology Epub Manually from URL(s)" +msgstr "Sammelband-EPUB manuell aus URL(´s) erstellen" + +#: ffdl_plugin.py:260 +msgid "Make FanFiction Anthology Epub Manually from URL(s)" +msgstr "FanFiction-Sammelband-EPUB manuell aus URL(´s) erstellen" + +#: ffdl_plugin.py:263 +msgid "&Update Anthology Epub" +msgstr "Sammelband-EPUB aktualisieren" + +#: ffdl_plugin.py:265 +msgid "Update FanFiction Anthology Epub" +msgstr "FanFicition-Sammelband-EPUB aktualisieren" + +#: ffdl_plugin.py:273 +msgid "Add to \"To Read\" and \"Send to Device\" Lists" +msgstr "Zu \"zu lesen\" und \"ans Gerät senden\" Liste hinzufügen" + +#: ffdl_plugin.py:275 +msgid "Remove from \"To Read\" and add to \"Send to Device\" Lists" +msgstr "Von \"zu lesen\" entfernen und zu \"ans Gerät senden\" Liste hinzufügen" + +#: ffdl_plugin.py:277 ffdl_plugin.py:282 +msgid "Remove from \"To Read\" Lists" +msgstr "Von \"zu lesen\" Liste entfernen" + +#: ffdl_plugin.py:279 +msgid "Add Selected to \"Send to Device\" Lists" +msgstr "Zu \"ans Gerät senden\" Liste hinzufügen" + +#: ffdl_plugin.py:281 +msgid "Add to \"To Read\" Lists" +msgstr "Zu \"zu lesen\" Liste hinzufügen" + +#: ffdl_plugin.py:297 +msgid "Get URLs from Selected Books" +msgstr "Die URL´s der ausgewählten Bücher holen" + +#: ffdl_plugin.py:303 ffdl_plugin.py:397 +msgid "Get Story URLs from Web Page" +msgstr "Story-URL´s von der Web-Seite holen" + +#: ffdl_plugin.py:308 +msgid "Reject Selected Books" +msgstr "Ausgewählte Bücher zurückweisen" + +#: ffdl_plugin.py:316 +msgid "&Configure Plugin" +msgstr "Plugin konfigurieren" + +#: ffdl_plugin.py:319 +msgid "Configure FanFictionDownLoader" +msgstr "FanFictionDownLoader konfigurieren" + +#: ffdl_plugin.py:322 +msgid "About Plugin" +msgstr "Über das Plugin" + +#: ffdl_plugin.py:379 +msgid "Cannot Update Reading Lists from Device View" +msgstr "Leseliste kann vom Gerät nicht aktualisiert werden" + +#: ffdl_plugin.py:383 +msgid "No Selected Books to Update Reading Lists" +msgstr "Mit den ausgewählten Bücher konnten keine Leselisten aktualisiert werden." + +#: ffdl_plugin.py:408 ffdl_plugin.py:460 +msgid "List of Story URLs" +msgstr "Liste der Story-URL´s" + +#: ffdl_plugin.py:409 +msgid "No Valid Story URLs found on given page." +msgstr "Auf der angegebene Seite wurde keine gültige URL gefunden." + +#: ffdl_plugin.py:424 +msgid "No Selected Books to Get URLs From" +msgstr "Es wurden keine Bücher ausgewählt um eine URL zu holen" + +#: ffdl_plugin.py:442 +msgid "Collecting URLs for stories..." +msgstr "Stories für ... werden gesammelt" + +#: ffdl_plugin.py:443 +msgid "Get URLs for stories" +msgstr "URL für Stories holen" + +#: ffdl_plugin.py:444 ffdl_plugin.py:491 ffdl_plugin.py:678 +msgid "URL retrieved" +msgstr "URL abgerufen" + +#: ffdl_plugin.py:464 +msgid "List of URLs" +msgstr "Liste der URL´s" + +#: ffdl_plugin.py:465 +msgid "No Story URLs found in selected books." +msgstr "In den ausgewählten Büchern wurden keine Story-URL´s gefunden." + +#: ffdl_plugin.py:481 +msgid "No Selected Books have URLs to Reject" +msgstr "Keine der gewählten Bücher haben URL´s zum Ablehnen" + +#: ffdl_plugin.py:489 +msgid "Collecting URLs for Reject List..." +msgstr "URL´s für die Ablehnungsliste werden gesammelt..." + +#: ffdl_plugin.py:490 +msgid "Get URLs for Reject List" +msgstr "URL´s für die Ablehnungsliste holen" + +#: ffdl_plugin.py:525 +msgid "Proceed to Remove?" +msgstr "Fortfahren mit der Entfernung?" + +#: ffdl_plugin.py:525 +msgid "Rejecting FFDL URLs: None of the books selected have FanFiction URLs." +msgstr "Ablehnung der FFDL URL´s: Keines der ausgewählten Bücher hat eine FanFiction-URL." + +#: ffdl_plugin.py:547 +msgid "Cannot Make Anthologys without %s" +msgstr "Sammelbände können nicht ohne %s erstellt werden." + +#: ffdl_plugin.py:551 ffdl_plugin.py:655 +msgid "Cannot Update Books from Device View" +msgstr "Bücher können nicht von der Geräte-Sicht aktualisiert werden" + +#: ffdl_plugin.py:555 +msgid "Can only update 1 anthology at a time" +msgstr "Kann nur einen Sammelband auf einmal aktualisieren" + +#: ffdl_plugin.py:564 +msgid "Can only Update Epub Anthologies" +msgstr "Es können nur EPUB-Anthologien aktualisiert werden" + +#: ffdl_plugin.py:582 ffdl_plugin.py:583 +msgid "Cannot Update Anthology" +msgstr "Sammelband kann nicht aktualisiert werden" + +#: ffdl_plugin.py:583 +msgid "" +"Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." +msgstr "Das Buch ist kein FFDL-Sammelband oder enhält ein Buch (mehrere Bücher) ohne gültige FFDL-URL´s." + +#: ffdl_plugin.py:641 +msgid "" +"There are %d stories in the current anthology that are not going to " +"be kept if you go ahead." +msgstr "Es gibt %d Stories im aktuellen Sammelband, die nicht behalten werden, wenn sie fortfahren." + +#: ffdl_plugin.py:642 +msgid "Story URLs that will be removed:" +msgstr "Story URL´s, die entfernt werden:" + +#: ffdl_plugin.py:644 +msgid "Update anyway?" +msgstr "Trotzdem aktualisieren?" + +#: ffdl_plugin.py:645 +msgid "Stories Removed" +msgstr "Stories entfernt" + +#: ffdl_plugin.py:662 +msgid "No Selected Books to Update" +msgstr "Keines der gewählten Bücher wird aktualisiert" + +#: ffdl_plugin.py:676 +msgid "Collecting stories for update..." +msgstr "Stories für die Aktualisierung sammlen..." + +#: ffdl_plugin.py:677 +msgid "Get stories for updates" +msgstr "Stories für die Aktualisierung werden gesammelt..." + +#: ffdl_plugin.py:687 +msgid "Update Existing List" +msgstr "Vorhandene Liste aktualisieren" + +#: ffdl_plugin.py:745 +msgid "Started fetching metadata for %s stories." +msgstr "Hole die Metadaten für %s-Stories." + +#: ffdl_plugin.py:751 +msgid "No valid story URLs entered." +msgstr "Es wurde keine gültige URL eingegeben." + +#: ffdl_plugin.py:776 ffdl_plugin.py:782 +msgid "Reject URL?" +msgstr "URL ablehnen?" + +#: ffdl_plugin.py:783 ffdl_plugin.py:801 +msgid "%s is on your Reject URL list:" +msgstr "%s ist auf ihrer URL-Ablehnungsliste:" + +#: ffdl_plugin.py:785 +msgid "Click 'Yes' to Reject." +msgstr "Klicken Sie 'Yes' um abzulehnen." + +#: ffdl_plugin.py:786 ffdl_plugin.py:890 +msgid "Click 'No' to download anyway." +msgstr "Klicken Sie 'No' um trotzdem herunterzuladen." + +#: ffdl_plugin.py:788 +msgid "Story on Reject URLs list (%s)." +msgstr "Die Story ist auf der URL-Ablehnungsliste (%s)." + +#: ffdl_plugin.py:791 +msgid "Rejected" +msgstr "Abgelehnt" + +#: ffdl_plugin.py:794 +msgid "Remove Reject URL?" +msgstr "Entferne die abgelehnte URL?" + +#: ffdl_plugin.py:800 +msgid "Remove URL from Reject List?" +msgstr "Entferne die URL von der Ablehnungsliste?" + +#: ffdl_plugin.py:803 +msgid "Click 'Yes' to remove it from the list," +msgstr "Klicken Sie 'Yes' um es von der Liste zu entfernen," + +#: ffdl_plugin.py:804 +msgid "Click 'No' to leave it on the list." +msgstr "Klicken Sie 'No' um es auf der Liste zu lassen." + +#: ffdl_plugin.py:821 +msgid "Cannot update non-epub format." +msgstr "Nicht-EPUB-Format kann nicht aktualisiert werden." + +#: ffdl_plugin.py:866 +msgid "Are You an Adult?" +msgstr "Sind sie volljährig?" + +#: ffdl_plugin.py:867 +msgid "" +"%s requires that you be an adult. Please confirm you are an adult in your " +"locale:" +msgstr "%s erfordert, dass sie volljährig sind. Bitte bestätigen Sie, dass Sie ein Erwachsener in ihrem Land sind:" + +#: ffdl_plugin.py:881 +msgid "Skip Story?" +msgstr "Story überspringen?" + +#: ffdl_plugin.py:887 +msgid "Skip Anthology Story?" +msgstr "Sammelband-Story überspringen?" + +#: ffdl_plugin.py:888 +msgid "" +"\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have an" +" anthology book for." +msgstr "\"%s\" ist in der Serie \"<a href=\"%s\">%s</a>\" enthalten, die sie in einem Sammelband haben." + +#: ffdl_plugin.py:889 +msgid "Click 'Yes' to Skip." +msgstr "Klicken Sie 'Yes' um zu überspringen." + +#: ffdl_plugin.py:892 +msgid "Story in Series Anthology(%s)." +msgstr "Story in Serien-Sammelband (%s)." + +#: ffdl_plugin.py:897 +msgid "Skipped" +msgstr "Übersprungen" + +#: ffdl_plugin.py:925 +msgid "Add" +msgstr "Hinzufügen" + +#: ffdl_plugin.py:938 +msgid "Meta" +msgstr "Meta" + +#: ffdl_plugin.py:971 +msgid "Skipping duplicate story." +msgstr "Doppelte Story überspringen." + +#: ffdl_plugin.py:974 +msgid "" +"More than one identical book by Identifer URL or title/author(s)--can't tell" +" which book to update/overwrite." +msgstr "Mehr als ein identisches Buch mit der gleichen URL oder Titel/Autor(en) - es kann nicht festgestellt werden, welches Buch aktualisiert/überschrieben werden soll." + +#: ffdl_plugin.py:985 +msgid "Update" +msgstr "Aktualisieren" + +#: ffdl_plugin.py:993 ffdl_plugin.py:1000 +msgid "Change Story URL?" +msgstr "Story-URL ändern?" + +#: ffdl_plugin.py:1001 +msgid "" +"%s by %s is already in your library with a different source " +"URL:" +msgstr "%s von %s ist bereits in ihrer Bibliothek mit einer anderen URL:" + +#: ffdl_plugin.py:1002 +msgid "In library: <a href=\"%(liburl)s\">%(liburl)s</a>" +msgstr "In der Bibliothek: <a href=\"%(liburl)s\">%(liburl)s</a>" + +#: ffdl_plugin.py:1003 ffdl_plugin.py:1017 +msgid "New URL: <a href=\"%(newurl)s\">%(newurl)s</a>" +msgstr "Neue URL: <a href=\"%(newurl)s\">%(newurl)s</a>" + +#: ffdl_plugin.py:1004 +msgid "Click 'Yes' to update/overwrite book with new URL." +msgstr "Klicken Sie 'Yes' um das Buch mit der neuen URL zu aktualisieren/überschreiben." + +#: ffdl_plugin.py:1005 +msgid "Click 'No' to skip updating/overwriting this book." +msgstr "Klicken Sie 'No' um das Aktualisieren/Überschreiben dieses Buches abzubrechen." + +#: ffdl_plugin.py:1007 ffdl_plugin.py:1014 +msgid "Download as New Book?" +msgstr "Als neues Buch herunterladen?" + +#: ffdl_plugin.py:1015 +msgid "" +"%s by %s is already in your library with a different source " +"URL." +msgstr "%s von %s ist bereits in ihrer Bibliothek mit einer anderen URL:" + +#: ffdl_plugin.py:1016 +msgid "" +"You chose not to update the existing book. Do you want to add a new book " +"for this URL?" +msgstr "Sie haben sich entschieden, das vorhandene Buch nicht zu aktualisieren. Wollen Sie ein neues Buch mit diese URL hinzufügen?" + +#: ffdl_plugin.py:1018 +msgid "Click 'Yes' to a new book with new URL." +msgstr "Klicken Sie 'Yes' um eine neues Buch mit dieser neuen URL hinzuzufügen." + +#: ffdl_plugin.py:1019 +msgid "Click 'No' to skip URL." +msgstr "Klicken Sie 'No' um die URL überspringen." + +#: ffdl_plugin.py:1025 +msgid "Update declined by user due to differing story URL(%s)" +msgstr "Aktualisierung wurde vom Benutzer abgelehnt aufgrund unterschiedlicher Story-URL (%s)" + +#: ffdl_plugin.py:1028 +msgid "Different URL" +msgstr "Andere URL" + +#: ffdl_plugin.py:1033 +msgid "Metadata collected." +msgstr "Metadaten gesammelt." + +#: ffdl_plugin.py:1049 +msgid "Already contains %d chapters." +msgstr "Enthält bereits %d Kapitel." + +#: ffdl_plugin.py:1054 jobs.py:199 +msgid "" +"Existing epub contains %d chapters, web site only has %d. Use Overwrite to " +"force update." +msgstr "Das existierende EPUB hat %d Kapitel, die Web-Seite nur %d. Benutzen Sie überschreiben, um eine Aktualisierung zu erzwingen." + +#: ffdl_plugin.py:1056 +msgid "" +"FFDL doesn't recognize chapters in existing epub, epub is probably from a " +"different source. Use Overwrite to force update." +msgstr "FFDL kann im existierenden EPUT die Kapitel nicht erkennen, das EPUB ist vermutlich aus einer anderen Quelle. Benutzen Sie überschreiben, um eine Aktualisierung zu erzwingen." + +#: ffdl_plugin.py:1068 +msgid "Not Overwriting, web site is not newer." +msgstr "Keine Überschreibung, die Web-Seite ist nicht aktueller." + +#: ffdl_plugin.py:1148 +msgid "None of the %d URLs/stories given can be/need to be downloaded." +msgstr "Keine der angegebenen %d URL´s kann/muss heruntergeladen werden." + +#: ffdl_plugin.py:1149 ffdl_plugin.py:1320 ffdl_plugin.py:1350 +msgid "See log for details." +msgstr "Siehe Protokoll." + +#: ffdl_plugin.py:1150 +msgid "Proceed with updating your library(Error Column, if configured)?" +msgstr "Fortfahren mit der Aktualisierung ihrer Bibliothek (Fehler-Spalte, wenn konfiguriert)?" + +#: ffdl_plugin.py:1157 ffdl_plugin.py:1332 +msgid "Bad" +msgstr "Ungeeignet" + +#: ffdl_plugin.py:1165 +msgid "FFDL download ended" +msgstr "FFDL herunterladen beendet" + +#: ffdl_plugin.py:1165 ffdl_plugin.py:1375 +msgid "FFDL log" +msgstr "FFDL Protokoll" + +#: ffdl_plugin.py:1181 +msgid "Download FanFiction Book" +msgstr "FanFiction-Buch herunterladen" + +#: ffdl_plugin.py:1188 +msgid "Starting %d FanFictionDownLoads" +msgstr "%d FanFictionDownLoads starten" + +#: ffdl_plugin.py:1218 +msgid "Story Details:" +msgstr "Story-Einzelheiten:" + +#: ffdl_plugin.py:1221 +msgid "Error Updating Metadata" +msgstr "Fehler beim Herunterladen der Metadaten" + +#: ffdl_plugin.py:1222 +msgid "" +"An error has occurred while FFDL was updating calibre's metadata for <a " +"href='%s'>%s</a>." +msgstr "Während FFDL die Calibre-Metadaten für <a href='%s'>%s</a> aktualisierte, trat ein Fehler auf." + +#: ffdl_plugin.py:1223 +msgid "The ebook has been updated, but the metadata has not." +msgstr "Das eBook wurde aktualisiert, aber die Metadaten nicht." + +#: ffdl_plugin.py:1275 +msgid "Finished Adding/Updating %d books." +msgstr "Hinzufügen/Aktualisierung von %d Büchern abgeschlossen." + +#: ffdl_plugin.py:1283 +msgid "Starting auto conversion of %d books." +msgstr "Starte automatische Konvertierung von %d Büchern." + +#: ffdl_plugin.py:1304 +msgid "No Good Stories for Anthology" +msgstr "Ungültige Story für einen Sammelband" + +#: ffdl_plugin.py:1305 +msgid "" +"No good stories/updates where downloaded, Anthology creation/update aborted." +msgstr "Ungültige Stories/Aktualisierungen wurden heruntergeladen, Sammelband-Erstellung/Aktualisierung wurde abgebrochen." + +#: ffdl_plugin.py:1310 ffdl_plugin.py:1349 +msgid "FFDL found %s good and %s bad updates." +msgstr "FFDL hat %s gute und %s ungeeignete Updates gefunden." + +#: ffdl_plugin.py:1317 +msgid "" +"Are you sure you want to continue with creating/updating this Anthology?" +msgstr "Sind sie sicher, dass sie fortfahren wollen, diesen Sammelband zu erstellen/aktualisieren?" + +#: ffdl_plugin.py:1318 +msgid "Any updates that failed will not be included in the Anthology." +msgstr "Jede Aktualisierung, die fehlgeschlagen ist, wird nicht in den Sammelband eingefügt." + +#: ffdl_plugin.py:1319 +msgid "However, if there's an older version, it will still be included." +msgstr "Allerdings, wenn es eine ältere Version gibt, wird es dennoch aufgenommen werden." + +#: ffdl_plugin.py:1322 +msgid "Proceed with updating this anthology and your library?" +msgstr "Mit der Aktualiserung dieses Sammelbandes und ihrer Bibliothek fortfahren?" + +#: ffdl_plugin.py:1330 +msgid "Good" +msgstr "Geeignet" + +#: ffdl_plugin.py:1351 +msgid "Proceed with updating your library?" +msgstr "Mit der Aktualisierung der Bibliothek fortfahren?" + +#: ffdl_plugin.py:1375 +msgid "FFDL download complete" +msgstr "FFDL herunterladen abgeschlossen" + +#: ffdl_plugin.py:1388 +msgid "Merging %s books." +msgstr "Zusammenführung von %s Büchern." + +#: ffdl_plugin.py:1428 +msgid "FFDL Adding/Updating books." +msgstr "FFDL hinzufügen/aktualisieren der Bücher." + +#: ffdl_plugin.py:1435 +msgid "Updating calibre for FanFiction stories..." +msgstr "Calibre mit FanFiction-Stories aktualisieren..." + +#: ffdl_plugin.py:1436 +msgid "Update calibre for FanFiction stories" +msgstr "Calibre mit FanFiction-Stories aktualisieren." + +#: ffdl_plugin.py:1445 +msgid "Adding/Updating %s BAD books." +msgstr "Hinzufügen/aktualisieren %s ungeeigneter Bücher." + +#: ffdl_plugin.py:1454 +msgid "Updating calibre for BAD FanFiction stories..." +msgstr "Calibre mit ungeeigneten FanFiction-Stories aktualisieren..." + +#: ffdl_plugin.py:1455 +msgid "Update calibre for BAD FanFiction stories" +msgstr "Calibre mit ungeeigneten FanFiction-Stories aktualisieren." + +#: ffdl_plugin.py:1481 +msgid "Adding format to book failed for some reason..." +msgstr "Das Hinzufügen eines Formates zum Buch ist aus irgendeinem Grund fehlgeschlagen..." + +#: ffdl_plugin.py:1484 +msgid "Error" +msgstr "Fehler" + +#: ffdl_plugin.py:1757 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading Lists, " +"but you don't have the %s plugin installed anymore?" +msgstr "Sie haben FFDL konfiguriert, die Leselisten automatisch zu aktualisieren, aber sie haben das %s-Plugin nicht mehr installiert?" + +#: ffdl_plugin.py:1769 +msgid "" +"You configured FanFictionDownLoader to automatically update \"To Read\" " +"Reading Lists, but you don't have any lists set?" +msgstr "Sie haben FFDL konfiguriert, um die \"zu lesen\" Leselisten automatisch zu aktualisieren, aber sie haben keinen Listen gesetzt?" + +#: ffdl_plugin.py:1779 ffdl_plugin.py:1797 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading List " +"'%s', but you don't have a list of that name?" +msgstr "Sie haben FFDL konfiguriert, um die Leseliste '%s' automatisch zu aktualisieren, aber sie haben keine Liste dieses Namens?" + +#: ffdl_plugin.py:1785 +msgid "" +"You configured FanFictionDownLoader to automatically update \"Send to " +"Device\" Reading Lists, but you don't have any lists set?" +msgstr "Sie haben FFDL konfiguriert, um die \"ans Gerät senden\"-Leselisten automatisch zu aktualisieren, aber sie haben keinen Listen gesetzt?" + +#: ffdl_plugin.py:1906 +msgid "No story URL found." +msgstr "Keine URL wurde gefunden." + +#: ffdl_plugin.py:1909 +msgid "Not Found" +msgstr "Nicht gefunden" + +#: ffdl_plugin.py:1915 +msgid "URL is not a valid story URL." +msgstr "URL ist keine gültige Story-URL." + +#: ffdl_plugin.py:1918 +msgid "Bad URL" +msgstr "Bad URL" + +#: ffdl_plugin.py:2054 ffdl_plugin.py:2057 +msgid "Anthology containing:" +msgstr "Sammelband enthält:" + +#: ffdl_plugin.py:2055 +msgid "%s by %s" +msgstr "%s von %s" + +#: ffdl_plugin.py:2077 +msgid " Anthology" +msgstr "Sammelband" + +#: ffdl_plugin.py:2114 +msgid "(was set, removed for security)" +msgstr "(wurde eingestellt, aus Sicherheitsgründen entfernt)" + +#: jobs.py:73 +msgid "Downloading FanFiction Stories" +msgstr "FanFiction-Geschichten herunterladen" + +#: jobs.py:95 +msgid "Successful:" +msgstr "Erfolgreich:" + +#: jobs.py:97 +msgid "Unsuccessful:" +msgstr "Erfolglos:" + +#: jobs.py:111 +msgid "Download started..." +msgstr "Herunterladen gestartet..." + +#: jobs.py:193 +msgid "Already contains %d chapters. Reuse as is." +msgstr "Enthält bereits %d Kapitel. Erhalten wie es ist." + +#: jobs.py:210 +msgid "Update %s completed, added %s chapters for %s total." +msgstr "Aktualisierung %s abgeschlossen, %s hinzugefügte Kapitel zu insgesamt %s." diff --git a/calibre-plugin/translations/es.po b/calibre-plugin/translations/es.po new file mode 100644 index 00000000..0f3b8dfc --- /dev/null +++ b/calibre-plugin/translations/es.po @@ -0,0 +1,1625 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# +# Translators: +# Adolfo Jayme Barrientos <fito@libreoffice.org>, 2014 +# Jellby <jellby@yahoo.com>, 2014 +msgid "" +msgstr "" +"Project-Id-Version: calibre-plugins\n" +"POT-Creation-Date: 2014-09-09 15:54+Central Daylight Time\n" +"PO-Revision-Date: 2014-09-02 09:55+0000\n" +"Last-Translator: Jellby <jellby@yahoo.com>\n" +"Language-Team: Spanish (http://www.transifex.com/projects/p/calibre-plugins/language/es/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: ENCODING\n" +"Generated-By: pygettext.py 1.5\n" +"Language: es\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#: __init__.py:42 +msgid "UI plugin to download FanFiction stories from various sites." +msgstr "Complemento de interfaz de usuario para descargar historias de «fanfiction» desde distintos sitios web." + +#: __init__.py:109 +msgid "" +"Path to the calibre library. Default is to use the path stored in the " +"settings." +msgstr "Ruta de acceso a la biblioteca de calibre. De manera predeterminada se utiliza la ruta definida en la configuración." + +#: config.py:176 +msgid "FAQs" +msgstr "Preguntas frecuentes" + +#: config.py:176 +msgid "List of Supported Sites" +msgstr "Lista de sitios compatibles" + +#: config.py:190 +msgid "Basic" +msgstr "Básico" + +#: config.py:211 +msgid "Standard Columns" +msgstr "Columnas estándar" + +#: config.py:214 +msgid "Custom Columns" +msgstr "Columnas personalizadas" + +#: config.py:217 +msgid "Other" +msgstr "Otros" + +#: config.py:338 +msgid "" +"These settings control the basic features of the plugin--downloading " +"FanFiction." +msgstr "Estas opciones controlan las funciones básicas del complemento, como descargar «fanfiction»." + +#: config.py:342 +msgid "Defaults Options on Download" +msgstr "Opciones predeterminadas al descargar" + +#: config.py:346 +msgid "" +"On each download, FFDL offers an option to select the output format. This sets what that option will default to." +msgstr "En cada descarga, FFDL ofrece la opción de seleccionar el formato de salida. Esto establece el valor predeterminado para esta opción." + +#: config.py:348 +msgid "Default Output &Format:" +msgstr "&Formato de salida predeterminado:" + +#: config.py:363 +msgid "" +"On each download, FFDL offers an option of what happens if that story " +"already exists. This sets what that option will default to." +msgstr "En cada descarga, FFDL ofrece la opción de elegir qué hacer si la historia ya existe. Esto establece el valor predeterminado para esta opción." + +#: config.py:365 +msgid "Default If Story Already Exists?" +msgstr "Acción predeterminada si la historia ya existe" + +#: config.py:379 +msgid "Default Update Calibre &Metadata?" +msgstr "Actualización predeterminada de metadatos" + +#: config.py:380 +msgid "" +"On each download, FFDL offers an option to update Calibre's metadata (title," +" author, URL, tags, custom columns, etc) from the web site. This sets " +"whether that will default to on or off. Columns set to 'New Only' in " +"the column tabs will only be set for new books." +msgstr "En cada descarga, FFDL ofrece la opción de actualizar los metadatos de calibre (título, autor, URL, etiquetas, columnas personalizadas, etc.) desde un sitio web. Esto establece si la opción está activada o desactivada de manera predeterminada. Las columnas establecidas en «Sólo nuevo» en la pestaña de columnas sólo se rellenan para los libros nuevos." + +#: config.py:384 +msgid "Default Update EPUB Cover when Updating EPUB?" +msgstr "Actualización predeterminada de portada de EPUB al actualizar EPUB" + +#: config.py:385 +msgid "" +"On each download, FFDL offers an option to update the book cover image " +"inside the EPUB from the web site when the EPUB is updated. This" +" sets whether that will default to on or off." +msgstr "En cada descarga, FFDL ofrece la opción de actualizar la imagen de portada del archivo EPUB a partir del sitio web cuando se actualiza el archivo EPUB. Esto establece si la opción está activada o desactivada de manera predeterminada." + +#: config.py:389 +msgid "Smarten Punctuation (EPUB only)" +msgstr "Corregir puntuación (sólo EPUB)" + +#: config.py:390 +msgid "" +"Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB " +"download and update." +msgstr "Ejecutar la corrección de puntuación de la función para pulir libros de calibre en cada descarga y actualización de archivos EPUB." + +#: config.py:395 +msgid "Updating Calibre Options" +msgstr "Actualizando opciones de calibre" + +#: config.py:399 +msgid "Delete other existing formats?" +msgstr "¿Borrar otros formatos existentes?" + +#: config.py:400 +msgid "" +"Check this to automatically delete all other ebook formats when updating an existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." +msgstr "Marque esta opción para borrar automáticamente todos los otros formatos al actualizar un libro existente.\nEs útil si tiene un Nook (epub) y un Kindle (mobi), por ejemplo." + +#: config.py:404 +msgid "Update Calibre Cover when Updating Metadata?" +msgstr "¿Actualizar la portada de calibre al actualizar metadatos?" + +#: config.py:405 +msgid "" +"Update calibre book cover image from EPUB when metadata is updated. (EPUB only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." +msgstr "Actualizar la imagen de portada del libro desde el archivo EPUB al actualizar los metadatos. (Sólo EPUB).\nNo busca nuevas imágenes si se usa «Actualizar sólo los metadatos de calibre»." + +#: config.py:409 +msgid "Keep Existing Tags when Updating Metadata?" +msgstr "¿Mantener las etiquetas existentes al actualizar metadatos?" + +#: config.py:410 +msgid "" +"Existing tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no effect.)" +msgstr "Las etiquetas existentes se mantendrán y se añadiran las etiquetas nuevas.\nLas etiquetas %(cmplt)s y %(inprog)s se actualizarán en todo caso, si se conocen.\nLas etiquetas %(lul)s se actualizarán si %(lus)s en %(is)s.\n(Si las etiquetas se establecen en «Sólo nuevo» en la pestaña de columnas estándar, esto no tiene efecto)." + +#: config.py:414 +msgid "Force Author into Author Sort?" +msgstr "¿Forzar autor en orden de autor?" + +#: config.py:415 +msgid "" +"If checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc." +msgstr "Si se activa, el campo de autor(es), tal como esté dado, se usa para orden de autor también.\nSi no se activa, calibre aplicará su algoritmo predefinido, que hace que «Juan Pérez» se ordene como «Pérez, Juan», etc." + +#: config.py:419 +msgid "Force Title into Title Sort?" +msgstr "¿Forzar título en orden de título?" + +#: config.py:420 +msgid "" +"If checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The Title' sort as 'Title, The', etc." +msgstr "Si se activa, el campo de título, tal como esté dado, se usa para orden de título también.\nSi no se activa, calibre aplicará su algoritmo predefinido, que hace que «El título» se ordene como «título, El», etc." + +#: config.py:424 +msgid "Check for existing Series Anthology books?" +msgstr "¿Comprobar si existen antologías de serie?" + +#: config.py:425 +msgid "" +"Check for existings Series Anthology books using each new story's series URL before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." +msgstr "Comprobar si existen antologías de serie usando el URL de la serie de cada nueva historia antes de descargar.\nOfrece la posibilidad de no descargar si se encuentra una antología de serie." + +#: config.py:429 +msgid "Check for changed Story URL?" +msgstr "¿Comprobar cambio de URL de historia?" + +#: config.py:430 +msgid "" +"Warn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." +msgstr "Avisar si una actualización cambiará el URL de un libro existente.\nLos URL de fanfiction.net cambiarán de http a https sin avisar." + +#: config.py:434 +msgid "Search EPUB text for Story URL?" +msgstr "¿Buscar el URL de historia en el texto del archivo EPUB?" + +#: config.py:435 +msgid "" +"Look for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." +msgstr "Buscar el primer URL de historia válido en el texto del archivo EPUB si no se encuentra en los metadatos.\nEsto es algo arriesgado, ya que según el contenido del archivo puede encontrarse un URL incorrecto.\nTambién encuentra y corrige URL de ffnet erróneos en archivos de ficsaver.com." + +#: config.py:439 +msgid "Mark added/updated books when finished?" +msgstr "¿Marcar los libros añadidos o actualizados al terminar?" + +#: config.py:440 +msgid "" +"Mark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Marcar los libros añadidos o actualizados al terminar. Usar con la siguiente opción.\nTambién puede buscar manualmente: «marked:ffdl_success» (éxito) o «marked:ffdl_failed» (fallo). «marked:ffdl» incluye ambos casos." + +#: config.py:444 +msgid "Show Marked books when finished?" +msgstr "¿Mostrar los libros marcados al terminar?" + +#: config.py:445 +msgid "" +"Show Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Mostrar los libros añadidos o actualizados marcados únicamente al terminar.\nTambién puede buscar manualmente: «marked:ffdl_success» (éxito) o «marked:ffdl_failed» (fallo). «marked:ffdl» incluye ambos casos." + +#: config.py:449 +msgid "Automatically Convert new/update books?" +msgstr "¿Convertir automáticamente los libros nuevos o actualizados?" + +#: config.py:450 +msgid "" +"Automatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." +msgstr "Utilizar automáticamente la función de conversión de calibre para los libros nuevos o actualizados.\nConvertir al formato de salida actual, definido en Preferencias->Comportamiento" + +#: config.py:454 +msgid "GUI Options" +msgstr "Opciones de la interfaz gráfica" + +#: config.py:458 +msgid "Take URLs from Clipboard?" +msgstr "¿Tomar los URL del portapapeles?" + +#: config.py:459 +msgid "Prefill URLs from valid URLs in Clipboard when Adding New." +msgstr "Rellenar los URL con URL válidos del portapapeles al añadir nuevo." + +#: config.py:463 +msgid "Default to Update when books selected?" +msgstr "¿Actualizar de manera predeterminada al seleccionar libros?" + +#: config.py:464 +msgid "" +"The top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." +msgstr "El botón principal del complemento FanFictionDownLoader efectuará una actualización si hay libros seleccionados. Si se desactiva, siempre funcionará como «Añadir nuevo»." + +#: config.py:468 +msgid "Keep 'Add New from URL(s)' dialog on top?" +msgstr "¿Mantener la ventana «Añadir nuevo de URL» en primer plano?" + +#: config.py:469 +msgid "" +"Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." +msgstr "Soliciar al sistema operativo y al gestor de ventanas que mantenga la ventana «Añadir nuevo de URL» por encima de todas las otras ventanas. Es útil para arrastrar URL sobre ella." + +#: config.py:473 +msgid "Misc Options" +msgstr "Opciones varias" + +#: config.py:478 +msgid "Include images in EPUBs?" +msgstr "¿Incluir imágenes en archivos EPUB?" + +#: config.py:479 +msgid "" +"Download and include images in EPUB stories. This is equivalent to " +"adding:%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will" +" override this." +msgstr "Descargar e incluir imágenes en historias en formato EPUB. Esto es equivalente a añadir:%(imgset)s ... al principio de %(pini)s. La configuración de %(pini)s tiene prioridad sobr esta opción." + +#: config.py:483 +msgid "Inject calibre Series when none found?" +msgstr "¿Incluir la serie de calibre si no se encuentra ninguna?" + +#: config.py:484 +msgid "" +"If no series is found, inject the calibre series (if there is one) so it " +"appears on the FFDL title page(not cover)." +msgstr "Si no se encuentra ninguna serie, incluir la de calibre (si hay alguna) para que aparezca en la página de título de FFDL (no en la portada)." + +#: config.py:488 +msgid "Reject List" +msgstr "Lista de rechazos" + +#: config.py:492 +msgid "Edit Reject URL List" +msgstr "Modificar la lista de URL rechazados" + +#: config.py:493 +msgid "Edit list of URLs FFDL will automatically Reject." +msgstr "Modificar la lista de URL que FFDL rechazará automáticamente" + +#: config.py:497 config.py:571 +msgid "Add Reject URLs" +msgstr "Añadir URL rechazados" + +#: config.py:498 +msgid "Add additional URLs to Reject as text." +msgstr "Añadir URL adicionales para rechazar como texto." + +#: config.py:502 +msgid "Edit Reject Reasons List" +msgstr "Modificar la lista de motivos de rechazo" + +#: config.py:503 config.py:562 +msgid "Customize the Reasons presented when Rejecting URLs" +msgstr "Personalizar los motivos mostrados al rechazar URL" + +#: config.py:507 +msgid "Reject Without Confirmation?" +msgstr "¿Rechazar sin confirmación?" + +#: config.py:508 +msgid "Always reject URLs on the Reject List without stopping and asking." +msgstr "Rechazar siempre los URL en la lista de rechazos sin parar y preguntar." + +#: config.py:546 +msgid "Edit Reject URLs List" +msgstr "Modificar la lista de URL rechazados" + +#: config.py:560 +msgid "Reject Reasons" +msgstr "Motivos de rechazo" + +#: config.py:561 +msgid "Customize Reject List Reasons" +msgstr "Personalizar la lista de motivos de rechazo" + +#: config.py:569 +msgid "Reason why I rejected it" +msgstr "Motivo por el que se rechaza" + +#: config.py:569 +msgid "Title by Author" +msgstr "Título por autor" + +#: config.py:572 +msgid "" +"Add Reject URLs. Use: http://...,note or http://...,title by " +"author - note Invalid story URLs will be ignored." +msgstr "Añadir URL rechazados. http://...,nota o http://...,título por autor - nota No se tendrán en cuenta los URL de historia no válidos." + +#: config.py:573 +msgid "" +"One URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" +msgstr "Un URL por línea.\nhttp://...,nota\nhttp://...,título por autor - nota" + +#: config.py:575 dialogs.py:1012 +msgid "Add this reason to all URLs added:" +msgstr "Añadir este motivo a todos los URL añadidos:" + +#: config.py:590 +msgid "" +"These settings provide more detailed control over what metadata will be " +"displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s " +"for different sites." +msgstr "Estas configuraciones proporcionan un control más fino sobre qué metadatos se muestran dentro del libro, y permiten establecer%(isa)s y %(u)s o %(p)s para distintos sitios." + +#: config.py:608 +msgid "View Defaults" +msgstr "Ver opciones predeterminadas" + +#: config.py:609 +msgid "" +"View all of the plugin's configurable settings\n" +"and their default settings." +msgstr "Ver todas las opciones configurables del complemento y sus valores predeterminados." + +#: config.py:627 +msgid "Plugin Defaults (%s) (Read-Only)" +msgstr "Valores predeterminados (%s) (sólo lectura)" + +#: config.py:628 config.py:634 +msgid "" +"These are all of the plugin's configurable options\n" +"and their default settings." +msgstr "Éstas son todas las opciones configurables del complemento y sus valores predeterminados." + +#: config.py:629 +msgid "Plugin Defaults" +msgstr "Opciones predeterminadas del complemento" + +#: config.py:645 dialogs.py:542 dialogs.py:645 +msgid "OK" +msgstr "Aceptar" + +#: config.py:665 +msgid "" +"These settings provide integration with the %(rl)s Plugin. %(rl)s can " +"automatically send to devices and change custom columns. You have to create" +" and configure the lists in %(rl)s to be useful." +msgstr "Estas configuraciones permiten la integración con el complemento %(rl)s. %(rl)s puede enviar automáticamente a dispositivos y cambiar columnas personalizadas. Debe crear y configurar las listas en %(rl)s para que sea útil." + +#: config.py:670 +msgid "Add new/updated stories to \"Send to Device\" Reading List(s)." +msgstr "Añadir historias nuevas o actualizadas a la(s) lista(s) de «Enviar a dispositivo»." + +#: config.py:671 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin." +msgstr "Añadir automáticamente las historias nuevas o actualizadas a estas listas en el complemento %(rl)s." + +#: config.py:676 +msgid "\"Send to Device\" Reading Lists" +msgstr "Listas de «Enviar a dispositivo»" + +#: config.py:677 config.py:680 config.py:693 config.py:696 +msgid "" +"When enabled, new/updated stories will be automatically added to these " +"lists." +msgstr "Si se activa, las historias nuevas o actualizadas se añadirán automáticamente a estas listas." + +#: config.py:686 +msgid "Add new/updated stories to \"To Read\" Reading List(s)." +msgstr "Añadir las historias nuevas o actualizadas a las listas «Para leer»." + +#: config.py:687 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." +msgstr "Añadir automáticamente las historias nuevas o actualizadas a estas listas en el complemento %(rl)s.\nTambién hay una opción de menú para eliminar historias de las listas «Para leer»." + +#: config.py:692 +msgid "\"To Read\" Reading Lists" +msgstr "Listas «Para leer»" + +#: config.py:702 +msgid "Add stories back to \"Send to Device\" Reading List(s) when marked \"Read\"." +msgstr "Volver a añadir las historias a la(s) lista(s) de «Enviar a dispositivo» al marcarlas como leídas." + +#: config.py:703 +msgid "" +"Menu option to remove from \"To Read\" lists will also add stories back to " +"\"Send to Device\" Reading List(s)" +msgstr "La opción de menú para eliminar de las listas «Para leer» también vuelve a añadir las historias a la(s) lista(s) de «Enviar a dispositivo»." + +#: config.py:725 +msgid "" +"The %(gc)s plugin can create cover images for books using various metadata " +"and configurations. If you have GC installed, FFDL can run GC on new " +"downloads and metadata updates. Pick a GC setting by site or Default." +msgstr "El complemento %(gc)s puede crear imágenes de portada para los libros usando distintos metadatos y configuraciones. Si tiene GC instalado, FFDL puede ejecutar GC par las nuevas descargas y las actualizaciones de metadatos. Elija una configuración de GC o «Predeterminada» para cada sitio." + +#: config.py:743 config.py:747 config.py:760 +msgid "Default" +msgstr "Predeterminada" + +#: config.py:748 +msgid "" +"On Metadata update, run %(gc)s with this setting, if not selected for " +"specific site." +msgstr "Al actualizar metadatos, ejecutar %(gc)s con esta configuración, si no hay una seleccionada para el sitio específico." + +#: config.py:751 +msgid "On Metadata update, run %(gc)s with this setting for %(site)s stories." +msgstr "Al actualizar metadatos, ejecutar %(gc)s con esta configuración para las historias de %(site)s." + +#: config.py:774 +msgid "Run %(gc)s Only on New Books" +msgstr "Ejecutar %(gc)s sólo para libros nuevos" + +#: config.py:775 +msgid "Default is to run GC any time the calibre metadata is updated." +msgstr "De manera predeterminada GC se ejecuta cada vez que se actualizan los metadatos de calibre." + +#: config.py:779 +msgid "Allow %(gcset)s from %(pini)s to override" +msgstr "Permitir que %(gcset)s de %(pini)s tenga prioridad" + +#: config.py:780 +msgid "" +"The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on " +"metadata rather than site, but it's much more complex. %(gcset)s is " +"ignored when this is off." +msgstr "El parámetro %(gcset)s de %(pini) le permite elegir una configuración de GC según los metadatos en vez del sitio, pero es mucho más complejo. %(gcset)s no se tiene en cuenta si esta opción está desactivada." + +#: config.py:784 +msgid "Use calibre's Polish feature to inject/update the cover" +msgstr "Usar la función pulir de calibre para incluir o actualizar la portada" + +#: config.py:785 +msgid "" +"Calibre's Polish feature will be used to inject or update the generated " +"cover into the ebook, EPUB only." +msgstr "La función pulir de calibre se usará para incluir o actualizar la portada generada en el libro, sólo en formato EPUB." + +#: config.py:799 +msgid "" +"These settings provide integration with the %(cp)s Plugin. %(cp)s can " +"automatically update custom columns with page, word and reading level " +"statistics. You have to create and configure the columns in %(cp)s first." +msgstr "Estas configuraciones permiten la integración con el complemento %(cp)s. %(cp)s puede actualizar automáticamente columnas personalizadas con estadísticas de páginas, palabras y progreso de lectura. Debe crear y configurar primero las columnas en %(cp)s." + +#: config.py:804 +msgid "" +"If any of the settings below are checked, when stories are added or updated," +" the %(cp)s Plugin will be called to update the checked statistics." +msgstr "Si alguna de las siguientes opciones está marcada, cuando se añaden o actualizan historias el complemento %(cp)s se ejecutará para actualizar las estadísticas marcadas." + +#: config.py:810 +msgid "Which column and algorithm to use are configured in %(cp)s." +msgstr "Las columnas y algoritmos que se utilizarán se configuran en %(cp)s." + +#: config.py:818 +msgid "" +"Will overwrite word count from FFDL metadata if set to update the same " +"custom column." +msgstr "Reemplazará la cuenta de palabras de los metadatos de FFDL si se configura para actualizar la misma columna personalizada." + +#: config.py:849 +msgid "" +"These controls aren't plugin settings as such, but convenience buttons for " +"setting Keyboard shortcuts and getting all the FanFictionDownLoader " +"confirmation dialogs back again." +msgstr "Estos controles no son opciones de configuración del complemento como tales, sino botones útiles para configurar los atajos de teclado y volver a mostrar todos los diálogos de confirmación de FanFictionDownLoader." + +#: config.py:854 +msgid "Keyboard shortcuts..." +msgstr "Atajos de teclado..." + +#: config.py:855 +msgid "Edit the keyboard shortcuts associated with this plugin" +msgstr "Modificar los atajos de teclado asociados con este complemento" + +#: config.py:859 +msgid "Reset disabled &confirmation dialogs" +msgstr "Restablecer ventanas de &confirmación desactivadas" + +#: config.py:860 +msgid "Reset all show me again dialogs for the FanFictionDownLoader plugin" +msgstr "Restablecer todas las ventanas «Mostrar otra vez» para el complemento FanFictionDownLoader" + +#: config.py:864 +msgid "&View library preferences..." +msgstr "&Mostrar preferencias de la biblioteca..." + +#: config.py:865 +msgid "View data stored in the library database for this plugin" +msgstr "Ver los datos almacenados en la base de datos de la biblioteca para este complemento" + +#: config.py:876 +msgid "Done" +msgstr "Hecho" + +#: config.py:877 +msgid "Confirmation dialogs have all been reset" +msgstr "Se han restablecido todas las ventanas de confirmación" + +#: config.py:925 +msgid "Category" +msgstr "Categoría" + +#: config.py:926 +msgid "Genre" +msgstr "Género" + +#: config.py:927 +msgid "Language" +msgstr "Idioma" + +#: config.py:928 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Status" +msgstr "Estado" + +#: config.py:929 +msgid "Status:%(cmplt)s" +msgstr "Estado:%(cmplt)s" + +#: config.py:930 +msgid "Status:%(inprog)s" +msgstr "Estado:%(inprog)s" + +#: config.py:931 config.py:1065 +msgid "Series" +msgstr "Serie" + +#: config.py:932 +msgid "Characters" +msgstr "Personajes" + +#: config.py:933 +msgid "Relationships" +msgstr "Relaciones" + +#: config.py:934 +msgid "Published" +msgstr "Publicado" + +#: config.py:935 ffdl_plugin.py:1437 ffdl_plugin.py:1456 +msgid "Updated" +msgstr "Actualizado" + +#: config.py:936 +msgid "Created" +msgstr "Creado" + +#: config.py:937 +msgid "Rating" +msgstr "Valoración" + +#: config.py:938 +msgid "Warnings" +msgstr "Avisos" + +#: config.py:939 +msgid "Chapters" +msgstr "Capítulos" + +#: config.py:940 +msgid "Words" +msgstr "Palabras" + +#: config.py:941 +msgid "Site" +msgstr "Sitio" + +#: config.py:942 +msgid "Story ID" +msgstr "ID de la historia" + +#: config.py:943 +msgid "Author ID" +msgstr "ID del autor" + +#: config.py:944 +msgid "Extra Tags" +msgstr "Etiquetas adicionales" + +#: config.py:945 config.py:1057 dialogs.py:804 dialogs.py:900 +#: ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Title" +msgstr "Título" + +#: config.py:946 +msgid "Story URL" +msgstr "URL de la historia" + +#: config.py:947 +msgid "Description" +msgstr "Descripción" + +#: config.py:948 dialogs.py:804 dialogs.py:900 ffdl_plugin.py:1152 +#: ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Author" +msgstr "Autor" + +#: config.py:949 +msgid "Author URL" +msgstr "URL del autor" + +#: config.py:950 +msgid "File Format" +msgstr "Formato de archivo" + +#: config.py:951 +msgid "File Extension" +msgstr "Extensión del archivo" + +#: config.py:952 +msgid "Site Abbrev" +msgstr "Abreviatura de sitio" + +#: config.py:953 +msgid "FFDL Version" +msgstr "Versión de FFDL" + +#: config.py:968 +msgid "" +"If you have custom columns defined, they will be listed below. Choose a " +"metadata value type to fill your columns automatically." +msgstr "Si tiene columnas personalizadas, aparecerán a continuación. Elija un tipo de valor de metadatos para rellenar las columnas automáticamente." + +#: config.py:993 +msgid "Update this %s column(%s) with..." +msgstr "Actualizar esta columna %s (%s) con..." + +#: config.py:1003 +msgid "Values that aren't valid for this enumeration column will be ignored." +msgstr "Los valores que no sean válidos par esta columna de enumeración serán ignorados." + +#: config.py:1003 config.py:1005 +msgid "Metadata values valid for this type of column." +msgstr "Valores de metadatos válidos para este tipo de columna." + +#: config.py:1008 config.py:1084 +msgid "New Only" +msgstr "Sólo nuevo" + +#: config.py:1009 +msgid "" +"Write to %s(%s) only for new\n" +"books, not updates to existing books." +msgstr "Escribir a %s(%s) sólo para libros nuevos,\nno en actualizaciones de libros existentes." + +#: config.py:1020 +msgid "Allow %(ccset)s from %(pini)s to override" +msgstr "Permitir que %(ccset)s de %(pini)s tenga prioridad" + +#: config.py:1021 +msgid "" +"The %(pini)s parameter %(ccset)s allows you to set custom columns to site " +"specific values that aren't common to all sites. %(ccset)s is ignored " +"when this is off." +msgstr "El parámetro %(ccset)s de %(pini) le permite asignar a las columnas personalizadas valores específicos para cada sitio. %(ccset)s no se tiene en cuenta si esta opción está desactivada." + +#: config.py:1026 +msgid "Special column:" +msgstr "Columna especial:" + +#: config.py:1031 +msgid "Update/Overwrite Error Column:" +msgstr "Actualizar o reemplazar columna de error:" + +#: config.py:1032 +msgid "" +"When an update or overwrite of an existing story fails, record the reason in this column.\n" +"(Text and Long Text columns only.)" +msgstr "Cuando una actualización o reemplazo de una historia existente falla, registrar el motivo en esta columna.\n(Sólo columnas de texto y texto largo)." + +#: config.py:1058 +msgid "Author(s)" +msgstr "Autor(es)" + +#: config.py:1059 +msgid "Publisher" +msgstr "Editorial" + +#: config.py:1060 +msgid "Tags" +msgstr "Etiquetas" + +#: config.py:1061 +msgid "Languages" +msgstr "Idiomas" + +#: config.py:1062 +msgid "Published Date" +msgstr "Fecha de publicación" + +#: config.py:1063 +msgid "Date" +msgstr "Fecha" + +#: config.py:1064 +msgid "Comments" +msgstr "Comentarios" + +#: config.py:1066 +msgid "Ids(url id only)" +msgstr "ID (sólo identificador de URL)" + +#: config.py:1071 +msgid "" +"The standard calibre metadata columns are listed below. You may choose " +"whether FFDL will fill each column automatically on updates or only for new " +"books." +msgstr "Las columnas de metadatos estándar de calibre se muestran a continuación. Puede elegir si FFDL rellenará cada columna automáticamente para las actualizaciones o sólo para los libros nuevos." + +#: config.py:1085 +msgid "" +"Write to %s only for new\n" +"books, not updates to existing books." +msgstr "Escribir a %s sólo para libros nuevos, no en actualizaciones de libros existentes." + +#: dialogs.py:69 +msgid "Skip" +msgstr "Omitir" + +#: dialogs.py:70 +msgid "Add New Book" +msgstr "Añadir libro nuevo" + +#: dialogs.py:71 +msgid "Update EPUB if New Chapters" +msgstr "Actualizar EPUB cuando haya capítulos nuevos" + +#: dialogs.py:72 +msgid "Update EPUB Always" +msgstr "Actualizar EPUB siempre" + +#: dialogs.py:73 +msgid "Overwrite if Newer" +msgstr "Reemplazar si es más reciente" + +#: dialogs.py:74 +msgid "Overwrite Always" +msgstr "Reemplazar siempre" + +#: dialogs.py:75 +msgid "Update Calibre Metadata Only" +msgstr "Actualizar sólo los metadatos de calibre" + +#: dialogs.py:239 ffdl_plugin.py:89 +msgid "FanFictionDownLoader" +msgstr "FanFictionDownLoader" + +#: dialogs.py:256 dialogs.py:703 +msgid "Show Download Options" +msgstr "Mostrar opciones de descarga" + +#: dialogs.py:275 dialogs.py:720 +msgid "Output &Format:" +msgstr "&Formato de salida:" + +#: dialogs.py:283 dialogs.py:728 +msgid "" +"Choose output format to create. May set default from plugin configuration." +msgstr "Elija un formato de salida para crear. Puede establecer el predeterminado en la configuración del complemento." + +#: dialogs.py:311 dialogs.py:745 +msgid "Update Calibre &Metadata?" +msgstr "¿Actualizar los metadatos de calibre?" + +#: dialogs.py:312 dialogs.py:746 +msgid "" +"Update metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new books.)" +msgstr "¿Actualizar los metadatos de las historias existentes en calibre a partir del sitio web?\n(Las columnas establecidas en «Sólo nuevo» en la pestaña de columnas sólo se cambiarán para libros nuevos)." + +#: dialogs.py:318 dialogs.py:750 +msgid "Update EPUB Cover?" +msgstr "¿Actualizar portada del archivo EPUB?" + +#: dialogs.py:319 dialogs.py:751 +msgid "" +"Update book cover image from site or defaults (if found) inside the " +"EPUB when EPUB is updated." +msgstr "Actualizar la imagen de portada a partir del sitio o de la configuración predeterminada (si se encuentra) dentro del archivo EPUB al actualizarlo." + +#: dialogs.py:366 +msgid "Story URL(s) for anthology, one per line:" +msgstr "URL de historias en la antología, uno por línea:" + +#: dialogs.py:367 +msgid "" +"URLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." +msgstr "URL de las historias que se incluyen en la antología, uno por línea.\nSe tomarán URL del portapapeles, pero sólo los que sean válidos." + +#: dialogs.py:368 +msgid "If Story Already Exists in Anthology?" +msgstr "¿Si la historia ya existe en la antología?" + +#: dialogs.py:369 +msgid "" +"What to do if there's already an existing story with the same URL in the " +"anthology." +msgstr "Qué hacer si ya existe una historia con el mismo URL en la antología." + +#: dialogs.py:378 +msgid "Story URL(s), one per line:" +msgstr "URL de historias, uno por línea:" + +#: dialogs.py:379 +msgid "" +"URLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." +msgstr "URL para las historias, uno por línea.\nSe tomarán URL del portapapeles, pero sólo los que sean válidos.\nAñada [1,5] después del URL para restringir la descarga a los capítulos 1 a 5." + +#: dialogs.py:380 +msgid "If Story Already Exists?" +msgstr "¿Si la historia ya existe?" + +#: dialogs.py:381 +msgid "" +"What to do if there's already an existing story with the same URL or title " +"and author." +msgstr "Qué hacer si ya existe una historia con el mismo URL o título y autor." + +#: dialogs.py:481 +msgid "For Individual Books" +msgstr "Para libros individuales" + +#: dialogs.py:482 +msgid "Get URLs and go to dialog for individual story downloads." +msgstr "Obtener URL e ir a la ventana de descarga para historias individuales." + +#: dialogs.py:486 +msgid "For Anthology Epub" +msgstr "Para epub de antología" + +#: dialogs.py:487 +msgid "" +"Get URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." +msgstr "Obtener URL e ir a la ventana de descarga para antologías.\nRequiere el complemento %s." + +#: dialogs.py:492 dialogs.py:546 dialogs.py:573 +msgid "Cancel" +msgstr "Cancelar" + +#: dialogs.py:524 +msgid "Password" +msgstr "Contraseña" + +#: dialogs.py:525 +msgid "Author requires a password for this story(%s)." +msgstr "El autor solicita una contraseña para esta historia (%s)." + +#: dialogs.py:530 +msgid "User/Password" +msgstr "Usuario y contraseña" + +#: dialogs.py:531 +msgid "%s requires you to login to download this story." +msgstr "%s solicita que se registre para descargar esta historia." + +#: dialogs.py:533 +msgid "User:" +msgstr "Usuario:" + +#: dialogs.py:537 +msgid "Password:" +msgstr "Contraseña:" + +#: dialogs.py:568 +msgid "Fetching metadata for stories..." +msgstr "Obteniendo metadatos par las historias..." + +#: dialogs.py:569 +msgid "Downloading metadata for stories" +msgstr "Descargando metadatos para las historias" + +#: dialogs.py:570 +msgid "Fetched metadata for" +msgstr "Metadatos obtenidos para" + +#: dialogs.py:640 ffdl_plugin.py:325 +msgid "About FanFictionDownLoader" +msgstr "Acerca de FanFictionDownLoader" + +#: dialogs.py:694 +msgid "Remove selected books from the list" +msgstr "Eliminar los libros seleccionados de la lista" + +#: dialogs.py:733 +msgid "Update Mode:" +msgstr "Modo de actualización:" + +#: dialogs.py:736 +msgid "" +"What sort of update to perform. May set default from plugin configuration." +msgstr "Qué tipo de actualización se realizará. Puede definirse el valor predeterminado en la configuración del complemento." + +#: dialogs.py:804 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Comment" +msgstr "Comentario" + +#: dialogs.py:872 +msgid "Are you sure you want to remove this book from the list?" +msgstr "¿Está seguro de querer eliminar este libro de la lista?" + +#: dialogs.py:874 +msgid "Are you sure you want to remove the selected %d books from the list?" +msgstr "¿Está seguro de querer eliminar los %d libros seleccionados de la lista?" + +#: dialogs.py:900 +msgid "Note" +msgstr "Nota" + +#: dialogs.py:939 +msgid "Select or Edit Reject Note." +msgstr "Seleccionar o modificar nota de rechazo." + +#: dialogs.py:947 +msgid "Are you sure you want to remove this URL from the list?" +msgstr "¿Está seguro de querer eliminar este URL de la lista?" + +#: dialogs.py:949 +msgid "Are you sure you want to remove the %d selected URLs from the list?" +msgstr "¿Está seguro de querer eliminar los %d URL seleccionados de la lista?" + +#: dialogs.py:967 +msgid "List of Books to Reject" +msgstr "Lista de libros para rechazar" + +#: dialogs.py:980 +msgid "" +"FFDL will remember these URLs and display the note and offer to reject them " +"if you try to download them again later." +msgstr "FFDL recordará estos URL, mostrará una nota y le permitirá rechazarlos si vuelve a intentar descargarlos más adelante." + +#: dialogs.py:994 +msgid "Remove selected URL(s) from the list" +msgstr "Eliminar los URL seleccionados de la lista" + +#: dialogs.py:1009 dialogs.py:1013 +msgid "This will be added to whatever note you've set for each URL above." +msgstr "Se añadirá a cualquier nota que haya establecido para cada URL anterior." + +#: dialogs.py:1022 +msgid "Delete Books (including books without FanFiction URLs)?" +msgstr "¿Eliminar libros (incluyendo libros sin URL de «fanfiction»)?" + +#: dialogs.py:1023 +msgid "Delete the selected books after adding them to the Rejected URLs list." +msgstr "Eliminar los libros seleccionados después de añadirlos a la lista de URL rechazados." + +#: ffdl_plugin.py:90 +msgid "Download FanFiction stories from various web sites" +msgstr "Descargar historias de «fanfiction» desde distintos sitios web" + +#: ffdl_plugin.py:120 +msgid "FanFictionDL" +msgstr "FanFictionDL" + +#: ffdl_plugin.py:243 +msgid "&Add New from URL(s)" +msgstr "&Añadir nuevo(s) a partir de URL" + +#: ffdl_plugin.py:245 +msgid "Add New FanFiction Book(s) from URL(s)" +msgstr "Añadir nuevo(s) libro(s) de «fanfiction» a partir de URL" + +#: ffdl_plugin.py:248 +msgid "&Update Existing FanFiction Book(s)" +msgstr "&Actualizar libro(s) de «fanfiction» existente(s)" + +#: ffdl_plugin.py:254 +msgid "Get Story URLs to Download from Web Page" +msgstr "Obtener URL de historia para descargar de una página web" + +#: ffdl_plugin.py:258 +msgid "&Make Anthology Epub Manually from URL(s)" +msgstr "&Crear manualmente un epub de antología a partir de URL" + +#: ffdl_plugin.py:260 +msgid "Make FanFiction Anthology Epub Manually from URL(s)" +msgstr "Crear un epub de antología a partir de URL manualmente" + +#: ffdl_plugin.py:263 +msgid "&Update Anthology Epub" +msgstr "Ac&tualizar epub de antología" + +#: ffdl_plugin.py:265 +msgid "Update FanFiction Anthology Epub" +msgstr "Actualizar un epub de antología de «fanfiction»" + +#: ffdl_plugin.py:273 +msgid "Add to \"To Read\" and \"Send to Device\" Lists" +msgstr "Añadir a las listas «Para leer» o de «Enviar a dispositivo»" + +#: ffdl_plugin.py:275 +msgid "Remove from \"To Read\" and add to \"Send to Device\" Lists" +msgstr "Eliminar de las listas «Para leer» y añadir a las de «Enviar a dispositivo»" + +#: ffdl_plugin.py:277 ffdl_plugin.py:282 +msgid "Remove from \"To Read\" Lists" +msgstr "Eliminar de las listas «Para leer»" + +#: ffdl_plugin.py:279 +msgid "Add Selected to \"Send to Device\" Lists" +msgstr "Añadir seleccionados a las listas de «Enviar a dispositivo»" + +#: ffdl_plugin.py:281 +msgid "Add to \"To Read\" Lists" +msgstr "Añadir a las listas «Para leer»" + +#: ffdl_plugin.py:297 +msgid "Get URLs from Selected Books" +msgstr "Obtener URL de los libros seleccionados" + +#: ffdl_plugin.py:303 ffdl_plugin.py:397 +msgid "Get Story URLs from Web Page" +msgstr "Obtener URL de historias a partir de una página web" + +#: ffdl_plugin.py:308 +msgid "Reject Selected Books" +msgstr "Rechazar los libros seleccionados" + +#: ffdl_plugin.py:316 +msgid "&Configure Plugin" +msgstr "&Configurar complemento" + +#: ffdl_plugin.py:319 +msgid "Configure FanFictionDownLoader" +msgstr "Configurar " + +#: ffdl_plugin.py:322 +msgid "About Plugin" +msgstr "Acerca del complemento" + +#: ffdl_plugin.py:379 +msgid "Cannot Update Reading Lists from Device View" +msgstr "No se pueden actualizar las listas de lectura en la vista de dispositivo" + +#: ffdl_plugin.py:383 +msgid "No Selected Books to Update Reading Lists" +msgstr "No hay libros seleccionados para actualizar las listas de lectura" + +#: ffdl_plugin.py:408 ffdl_plugin.py:460 +msgid "List of Story URLs" +msgstr "Lista de URL de historias" + +#: ffdl_plugin.py:409 +msgid "No Valid Story URLs found on given page." +msgstr "No se encontró ningún URL de historia válida en la página." + +#: ffdl_plugin.py:424 +msgid "No Selected Books to Get URLs From" +msgstr "No hay ningún libro seleccionado del que obtener URL" + +#: ffdl_plugin.py:442 +msgid "Collecting URLs for stories..." +msgstr "Recopilando URL para las historias..." + +#: ffdl_plugin.py:443 +msgid "Get URLs for stories" +msgstr "Obtener URL para las historias" + +#: ffdl_plugin.py:444 ffdl_plugin.py:491 ffdl_plugin.py:678 +msgid "URL retrieved" +msgstr "URL obtenido" + +#: ffdl_plugin.py:464 +msgid "List of URLs" +msgstr "Lista de URL" + +#: ffdl_plugin.py:465 +msgid "No Story URLs found in selected books." +msgstr "No se encontró ningún URL de historia en los libros seleccionados." + +#: ffdl_plugin.py:481 +msgid "No Selected Books have URLs to Reject" +msgstr "Ningún libro seleccionado tiene URL para rechazar" + +#: ffdl_plugin.py:489 +msgid "Collecting URLs for Reject List..." +msgstr "Recopilando URL para la lista de rechazos..." + +#: ffdl_plugin.py:490 +msgid "Get URLs for Reject List" +msgstr "Obtener URL para la lista de rechazos" + +#: ffdl_plugin.py:525 +msgid "Proceed to Remove?" +msgstr "¿Eliminar?" + +#: ffdl_plugin.py:525 +msgid "Rejecting FFDL URLs: None of the books selected have FanFiction URLs." +msgstr "Rechazo de URL en FFDL: Ninguno de los libros seleccionades tiene URL de «fanfiction»." + +#: ffdl_plugin.py:547 +msgid "Cannot Make Anthologys without %s" +msgstr "No se pueden hacer antologías sin %s" + +#: ffdl_plugin.py:551 ffdl_plugin.py:655 +msgid "Cannot Update Books from Device View" +msgstr "No se pueden actualizar libros en la vista de dispositivo" + +#: ffdl_plugin.py:555 +msgid "Can only update 1 anthology at a time" +msgstr "Sólo se puede actualizar 1 antología cada vez" + +#: ffdl_plugin.py:564 +msgid "Can only Update Epub Anthologies" +msgstr "Sólo se pueden actualizar antologías en formato EPUB" + +#: ffdl_plugin.py:582 ffdl_plugin.py:583 +msgid "Cannot Update Anthology" +msgstr "No se puede actualizar la antología" + +#: ffdl_plugin.py:583 +msgid "" +"Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." +msgstr "El libro no es una antología de FFDL o contiene libros sin URL de FFDL válidos." + +#: ffdl_plugin.py:641 +msgid "" +"There are %d stories in the current anthology that are not going to " +"be kept if you go ahead." +msgstr "Hay %d historias en la antología actual que no se mantendrán si continúa." + +#: ffdl_plugin.py:642 +msgid "Story URLs that will be removed:" +msgstr "URL de historias que se eliminarán:" + +#: ffdl_plugin.py:644 +msgid "Update anyway?" +msgstr "¿Actualizar de todas formas?" + +#: ffdl_plugin.py:645 +msgid "Stories Removed" +msgstr "Historias eliminadas" + +#: ffdl_plugin.py:662 +msgid "No Selected Books to Update" +msgstr "No se han seleccionado libros para actualizar" + +#: ffdl_plugin.py:676 +msgid "Collecting stories for update..." +msgstr "Recopilando historias para actualizar..." + +#: ffdl_plugin.py:677 +msgid "Get stories for updates" +msgstr "Obtener historias para actualizar" + +#: ffdl_plugin.py:687 +msgid "Update Existing List" +msgstr "Actualizando la lista existente" + +#: ffdl_plugin.py:745 +msgid "Started fetching metadata for %s stories." +msgstr "Obtención de metadatos iniciada para %s historias." + +#: ffdl_plugin.py:751 +msgid "No valid story URLs entered." +msgstr "No se ha introducido ningún URL de historia válido." + +#: ffdl_plugin.py:776 ffdl_plugin.py:782 +msgid "Reject URL?" +msgstr "¿Rechazar URL?" + +#: ffdl_plugin.py:783 ffdl_plugin.py:801 +msgid "%s is on your Reject URL list:" +msgstr "%s está en la lista de rechazos:" + +#: ffdl_plugin.py:785 +msgid "Click 'Yes' to Reject." +msgstr "Pulse en «Sí» para rechazar." + +#: ffdl_plugin.py:786 ffdl_plugin.py:890 +msgid "Click 'No' to download anyway." +msgstr "Pulse en «No» para descargar de todos modos." + +#: ffdl_plugin.py:788 +msgid "Story on Reject URLs list (%s)." +msgstr "Historia en la lista de rechazos (%s)." + +#: ffdl_plugin.py:791 +msgid "Rejected" +msgstr "Rechazado" + +#: ffdl_plugin.py:794 +msgid "Remove Reject URL?" +msgstr "¿Eliminar URL rechazado?" + +#: ffdl_plugin.py:800 +msgid "Remove URL from Reject List?" +msgstr "¿Eliminar URL de la lista de rechazos?" + +#: ffdl_plugin.py:803 +msgid "Click 'Yes' to remove it from the list," +msgstr "Pulse en «Sí» para eliminarlo de la lista." + +#: ffdl_plugin.py:804 +msgid "Click 'No' to leave it on the list." +msgstr "Pulse en «No» para dejarlo en la lista." + +#: ffdl_plugin.py:821 +msgid "Cannot update non-epub format." +msgstr "No se puede actualizar un formato que no sea EPUB." + +#: ffdl_plugin.py:866 +msgid "Are You an Adult?" +msgstr "¿Es usted adulto?" + +#: ffdl_plugin.py:867 +msgid "" +"%s requires that you be an adult. Please confirm you are an adult in your " +"locale:" +msgstr "%s requiere que usted sea adulto. Por favor confirme que es usted adulto en su jurisdicción:" + +#: ffdl_plugin.py:881 +msgid "Skip Story?" +msgstr "¿Omitir historia?" + +#: ffdl_plugin.py:887 +msgid "Skip Anthology Story?" +msgstr "¿Omitir historia de antología?" + +#: ffdl_plugin.py:888 +msgid "" +"\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have an" +" anthology book for." +msgstr "«%s» está en la serie «<a href=\"%s\">%s</a>», para la que tiene una antología." + +#: ffdl_plugin.py:889 +msgid "Click 'Yes' to Skip." +msgstr "Pulse en «Sí» para omitir." + +#: ffdl_plugin.py:892 +msgid "Story in Series Anthology(%s)." +msgstr "Historia en antología de serie (%s)." + +#: ffdl_plugin.py:897 +msgid "Skipped" +msgstr "Omitida" + +#: ffdl_plugin.py:925 +msgid "Add" +msgstr "Añadir" + +#: ffdl_plugin.py:938 +msgid "Meta" +msgstr "Meta" + +#: ffdl_plugin.py:971 +msgid "Skipping duplicate story." +msgstr "Omitiendo historia duplicada." + +#: ffdl_plugin.py:974 +msgid "" +"More than one identical book by Identifer URL or title/author(s)--can't tell" +" which book to update/overwrite." +msgstr "Hay más de un libro idéntico según el URL identificador o el título y autor(es). No se puede saberse cuál hay que actualizar o reemplazar." + +#: ffdl_plugin.py:985 +msgid "Update" +msgstr "Actualizar" + +#: ffdl_plugin.py:993 ffdl_plugin.py:1000 +msgid "Change Story URL?" +msgstr "¿Cambiar el URL de historia?" + +#: ffdl_plugin.py:1001 +msgid "" +"%s by %s is already in your library with a different source " +"URL:" +msgstr "%s por %s ya está en la biblioteca con un URL de origen diferente:" + +#: ffdl_plugin.py:1002 +msgid "In library: <a href=\"%(liburl)s\">%(liburl)s</a>" +msgstr "En la biblioteca: <a href=\"%(liburl)s\">%(liburl)s</a>" + +#: ffdl_plugin.py:1003 ffdl_plugin.py:1017 +msgid "New URL: <a href=\"%(newurl)s\">%(newurl)s</a>" +msgstr "Nuevo URL: <a href=\"%(newurl)s\">%(newurl)s</a>" + +#: ffdl_plugin.py:1004 +msgid "Click 'Yes' to update/overwrite book with new URL." +msgstr "Pulse en «Sí» para actualizar o reemplazar el libro con el URL nuevo." + +#: ffdl_plugin.py:1005 +msgid "Click 'No' to skip updating/overwriting this book." +msgstr "Pulse en «No» para omitir la actualización o reemplazo de este libro." + +#: ffdl_plugin.py:1007 ffdl_plugin.py:1014 +msgid "Download as New Book?" +msgstr "¿Descargar como un libro nuevo?" + +#: ffdl_plugin.py:1015 +msgid "" +"%s by %s is already in your library with a different source " +"URL." +msgstr "%s por %s ya está en la biblioteca con un URL de origen diferente." + +#: ffdl_plugin.py:1016 +msgid "" +"You chose not to update the existing book. Do you want to add a new book " +"for this URL?" +msgstr "Ha elegido no actualizar el libro existente. ¿Quiere añadir un nuevo libro con este URL?" + +#: ffdl_plugin.py:1018 +msgid "Click 'Yes' to a new book with new URL." +msgstr "Pulse en «Sí» para añadir un nuevo libro con el nuevo URL." + +#: ffdl_plugin.py:1019 +msgid "Click 'No' to skip URL." +msgstr "Pulse en «No» para omitir el URL." + +#: ffdl_plugin.py:1025 +msgid "Update declined by user due to differing story URL(%s)" +msgstr "Actualización anulada por el usuario debido a un conflicto de URL de historia (%s)" + +#: ffdl_plugin.py:1028 +msgid "Different URL" +msgstr "URL diferente" + +#: ffdl_plugin.py:1033 +msgid "Metadata collected." +msgstr "Metadatos recopilados." + +#: ffdl_plugin.py:1049 +msgid "Already contains %d chapters." +msgstr "Ya contiene %d capítulos." + +#: ffdl_plugin.py:1054 jobs.py:199 +msgid "" +"Existing epub contains %d chapters, web site only has %d. Use Overwrite to " +"force update." +msgstr "El epub existente contiene %d capítulos, el sitio web sólo tiene %d. Use Reemplazar para forzar la actualización." + +#: ffdl_plugin.py:1056 +msgid "" +"FFDL doesn't recognize chapters in existing epub, epub is probably from a " +"different source. Use Overwrite to force update." +msgstr "FFDL no encuentra capítulos en el epub existente, probablemente procede de un origen distinto. Use Reemplazar para forzar la actualización." + +#: ffdl_plugin.py:1068 +msgid "Not Overwriting, web site is not newer." +msgstr "No se reemplaza, el sitio web no es más reciente." + +#: ffdl_plugin.py:1148 +msgid "None of the %d URLs/stories given can be/need to be downloaded." +msgstr "Ninguna de las %d historias o URL dados pueden o necesitan descargarse." + +#: ffdl_plugin.py:1149 ffdl_plugin.py:1320 ffdl_plugin.py:1350 +msgid "See log for details." +msgstr "Vea el registro para más detalles." + +#: ffdl_plugin.py:1150 +msgid "Proceed with updating your library(Error Column, if configured)?" +msgstr "¿Continuar actualizando la biblioteca (columna de error, si está configurada)?" + +#: ffdl_plugin.py:1157 ffdl_plugin.py:1332 +msgid "Bad" +msgstr "Incorrecta" + +#: ffdl_plugin.py:1165 +msgid "FFDL download ended" +msgstr "Descarga de FFDL finalizada" + +#: ffdl_plugin.py:1165 ffdl_plugin.py:1375 +msgid "FFDL log" +msgstr "Registro de FFDL" + +#: ffdl_plugin.py:1181 +msgid "Download FanFiction Book" +msgstr "Descargar libro de «fanfiction»" + +#: ffdl_plugin.py:1188 +msgid "Starting %d FanFictionDownLoads" +msgstr "Iniciando %d descargas de FanFictionDownLoader" + +#: ffdl_plugin.py:1218 +msgid "Story Details:" +msgstr "Detalles de la historia:" + +#: ffdl_plugin.py:1221 +msgid "Error Updating Metadata" +msgstr "Error actualizando metadatos" + +#: ffdl_plugin.py:1222 +msgid "" +"An error has occurred while FFDL was updating calibre's metadata for <a " +"href='%s'>%s</a>." +msgstr "Ha ocurrido un error mientras FFDL actualizaba la base de datos de calibre para " + +#: ffdl_plugin.py:1223 +msgid "The ebook has been updated, but the metadata has not." +msgstr "El libro ha sido actualizado, pero no los metadatos." + +#: ffdl_plugin.py:1275 +msgid "Finished Adding/Updating %d books." +msgstr "Finalizada la adición o actualización de %d libros." + +#: ffdl_plugin.py:1283 +msgid "Starting auto conversion of %d books." +msgstr "Iniciando la conversión automática de %d libros." + +#: ffdl_plugin.py:1304 +msgid "No Good Stories for Anthology" +msgstr "No hay historias válidas para la antología" + +#: ffdl_plugin.py:1305 +msgid "" +"No good stories/updates where downloaded, Anthology creation/update aborted." +msgstr "No se han descargado historias o actualizaciones válidas, la creación o actualización de la antología se cancela." + +#: ffdl_plugin.py:1310 ffdl_plugin.py:1349 +msgid "FFDL found %s good and %s bad updates." +msgstr "FFDL ha encontrado %s actualizaciones válidas y %s incorrectas." + +#: ffdl_plugin.py:1317 +msgid "" +"Are you sure you want to continue with creating/updating this Anthology?" +msgstr "¿Está seguro de querer continuar creando o actualizando la antología?" + +#: ffdl_plugin.py:1318 +msgid "Any updates that failed will not be included in the Anthology." +msgstr "Las actualizaciones fallidas no se incluirán en la antología." + +#: ffdl_plugin.py:1319 +msgid "However, if there's an older version, it will still be included." +msgstr "Sin embargo, si hay una versión más antigua, se incluirá." + +#: ffdl_plugin.py:1322 +msgid "Proceed with updating this anthology and your library?" +msgstr "¿Continuar actualizando esta antología y la biblioteca?" + +#: ffdl_plugin.py:1330 +msgid "Good" +msgstr "Válida" + +#: ffdl_plugin.py:1351 +msgid "Proceed with updating your library?" +msgstr "¿Continuar actualizando la biblioteca?" + +#: ffdl_plugin.py:1375 +msgid "FFDL download complete" +msgstr "Descarga de FFDL completa" + +#: ffdl_plugin.py:1388 +msgid "Merging %s books." +msgstr "Combinando %s libros." + +#: ffdl_plugin.py:1428 +msgid "FFDL Adding/Updating books." +msgstr "FFDL está añadiendo o actualizando libros." + +#: ffdl_plugin.py:1435 +msgid "Updating calibre for FanFiction stories..." +msgstr "Actualizando calibre para las historias de «fanfiction»..." + +#: ffdl_plugin.py:1436 +msgid "Update calibre for FanFiction stories" +msgstr "Actualizar calibre para las historias de «fanfiction»" + +#: ffdl_plugin.py:1445 +msgid "Adding/Updating %s BAD books." +msgstr "Añadiendo o actualizando %s libros INCORRECTOS." + +#: ffdl_plugin.py:1454 +msgid "Updating calibre for BAD FanFiction stories..." +msgstr "Actualizando calibre para las historias de «fanfiction» INCORRECTAS..." + +#: ffdl_plugin.py:1455 +msgid "Update calibre for BAD FanFiction stories" +msgstr "Actualizar calibre para las historias de «fanfiction» INCORRECTAS" + +#: ffdl_plugin.py:1481 +msgid "Adding format to book failed for some reason..." +msgstr "Hubo un fallo al añadir un formato al libro por algún motivo..." + +#: ffdl_plugin.py:1484 +msgid "Error" +msgstr "Error" + +#: ffdl_plugin.py:1757 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading Lists, " +"but you don't have the %s plugin installed anymore?" +msgstr "Ha configurado FanFictionDownLoader para actualizar automáticamente las listas de lectura, pero ya no tiene instalado el complemento %s" + +#: ffdl_plugin.py:1769 +msgid "" +"You configured FanFictionDownLoader to automatically update \"To Read\" " +"Reading Lists, but you don't have any lists set?" +msgstr "Ha configurado FanFictionDownLoader para actualizar automáticamente las listas «Para leer», pero no tiene ninguna lista configurada." + +#: ffdl_plugin.py:1779 ffdl_plugin.py:1797 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading List " +"'%s', but you don't have a list of that name?" +msgstr "Ha configurado FanFictionDownLoader para actualizar automáticamente las listas «%s», pero no tiene ninguna lista con ese nombre." + +#: ffdl_plugin.py:1785 +msgid "" +"You configured FanFictionDownLoader to automatically update \"Send to " +"Device\" Reading Lists, but you don't have any lists set?" +msgstr "Ha configurado FanFictionDownLoader para actualizar automáticamente las listas de «Enviar a dispositivo», pero no tiene ninguna lista configurada." + +#: ffdl_plugin.py:1906 +msgid "No story URL found." +msgstr "No se ha encontrado ningún URL de historia." + +#: ffdl_plugin.py:1909 +msgid "Not Found" +msgstr "No se ha encontrado" + +#: ffdl_plugin.py:1915 +msgid "URL is not a valid story URL." +msgstr "El URL no es un URL de historia válido." + +#: ffdl_plugin.py:1918 +msgid "Bad URL" +msgstr "El URL es incorrecto" + +#: ffdl_plugin.py:2054 ffdl_plugin.py:2057 +msgid "Anthology containing:" +msgstr "La antología contiene:" + +#: ffdl_plugin.py:2055 +msgid "%s by %s" +msgstr "%s por %s" + +#: ffdl_plugin.py:2077 +msgid " Anthology" +msgstr "Antología" + +#: ffdl_plugin.py:2114 +msgid "(was set, removed for security)" +msgstr "(estaba activado, eliminado por seguridad)" + +#: jobs.py:73 +msgid "Downloading FanFiction Stories" +msgstr "Descargando historias de «fanfiction»" + +#: jobs.py:95 +msgid "Successful:" +msgstr "Con éxito:" + +#: jobs.py:97 +msgid "Unsuccessful:" +msgstr "Sin éxito:" + +#: jobs.py:111 +msgid "Download started..." +msgstr "Descarga comenzada..." + +#: jobs.py:193 +msgid "Already contains %d chapters. Reuse as is." +msgstr "Ya contiene %d capítulos. Volver a usar tal cual." + +#: jobs.py:210 +msgid "Update %s completed, added %s chapters for %s total." +msgstr "Completada la actualización de %s, se añadieron %s capítulos para un total de %s." diff --git a/calibre-plugin/translations/fr.po b/calibre-plugin/translations/fr.po new file mode 100644 index 00000000..d3c0218f --- /dev/null +++ b/calibre-plugin/translations/fr.po @@ -0,0 +1,1624 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# +# Translators: +# Ptitprince <leporello1791@gmail.com>, 2014 +msgid "" +msgstr "" +"Project-Id-Version: calibre-plugins\n" +"POT-Creation-Date: 2014-09-09 15:54+Central Daylight Time\n" +"PO-Revision-Date: 2014-09-04 12:30+0000\n" +"Last-Translator: Ptitprince <leporello1791@gmail.com>\n" +"Language-Team: French (http://www.transifex.com/projects/p/calibre-plugins/language/fr/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: ENCODING\n" +"Generated-By: pygettext.py 1.5\n" +"Language: fr\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +#: __init__.py:42 +msgid "UI plugin to download FanFiction stories from various sites." +msgstr "Greffon à interface utilisateur pour télécharger des récits FanFiction de différents sites." + +#: __init__.py:109 +msgid "" +"Path to the calibre library. Default is to use the path stored in the " +"settings." +msgstr "Chemin vers la bibliothèque calibre. Par défaut utilise le chemin stocké dans les paramètres." + +#: config.py:176 +msgid "FAQs" +msgstr "FAQs" + +#: config.py:176 +msgid "List of Supported Sites" +msgstr "Liste des Sites Supportés" + +#: config.py:190 +msgid "Basic" +msgstr "De base" + +#: config.py:211 +msgid "Standard Columns" +msgstr "Colonnes standards" + +#: config.py:214 +msgid "Custom Columns" +msgstr "Colonnes personnalisées" + +#: config.py:217 +msgid "Other" +msgstr "Autre" + +#: config.py:338 +msgid "" +"These settings control the basic features of the plugin--downloading " +"FanFiction." +msgstr "Ces paramètres contrôlent les caractéristiques de base du greffon--téléchargement FanFiction" + +#: config.py:342 +msgid "Defaults Options on Download" +msgstr "Options par défaut au téléchargement" + +#: config.py:346 +msgid "" +"On each download, FFDL offers an option to select the output format. This sets what that option will default to." +msgstr "A chaque téléchargement, FFDL propose une option pour sélectionner le format de sortie. Ces réglages sont ce que cette option fera par défaut." + +#: config.py:348 +msgid "Default Output &Format:" +msgstr "&Format de sortie par défaut : " + +#: config.py:363 +msgid "" +"On each download, FFDL offers an option of what happens if that story " +"already exists. This sets what that option will default to." +msgstr "A chaque téléchargement, FFDL propose une option sur ce qu'il arrive si ce récit existe déjà. Ces réglages sont ce que cette option fera par défaut." + +#: config.py:365 +msgid "Default If Story Already Exists?" +msgstr "Par défaut si le récit existe déjà ?" + +#: config.py:379 +msgid "Default Update Calibre &Metadata?" +msgstr "Par défaut met à jour les &métadonnées calibre ?" + +#: config.py:380 +msgid "" +"On each download, FFDL offers an option to update Calibre's metadata (title," +" author, URL, tags, custom columns, etc) from the web site. This sets " +"whether that will default to on or off. Columns set to 'New Only' in " +"the column tabs will only be set for new books." +msgstr "A chaque téléchargement, FFDL propose une option pour mettre les métadonnées de calibre à jour (titre, auteur, URL, étiquettes, colonnes personnalisées etc.) depuis le site web. Ces paramétrages se placeront sur marche ou arrêt par défaut. Les colonnes définies à \"Nouveau uniquement\" dans l'étiquette de colonne seront uniquement définies pour les nouveaux livres." + +#: config.py:384 +msgid "Default Update EPUB Cover when Updating EPUB?" +msgstr "Par défaut mettre à jour la couverture de l'ePub quand mise à jour de l'ePub ?" + +#: config.py:385 +msgid "" +"On each download, FFDL offers an option to update the book cover image " +"inside the EPUB from the web site when the EPUB is updated. This" +" sets whether that will default to on or off." +msgstr "A chaque téléchargement, FFDL propose une option pour mettre l'image de couverture du livre à l'intérieur de l'ePub depuis le site web quand l'ePub est mis à jour. Ces paramétrages se placeront sur marche ou arrêt par défaut." + +#: config.py:389 +msgid "Smarten Punctuation (EPUB only)" +msgstr "Ponctuation intelligente (ePub uniquement)" + +#: config.py:390 +msgid "" +"Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB " +"download and update." +msgstr "Exécuter Ponctuation Intelligente depuis la caractéristique Polish Book de calibre sur chaque ePub téléchargé et mis à jour." + +#: config.py:395 +msgid "Updating Calibre Options" +msgstr "Mise à jour des options de calibre" + +#: config.py:399 +msgid "Delete other existing formats?" +msgstr "Supprimer les autres formats existants ?" + +#: config.py:400 +msgid "" +"Check this to automatically delete all other ebook formats when updating an existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." +msgstr "Cocher ceci pour supprimer automatiquement tous les autres formats d'ebook quand vous mettez à jour un ebook existant.\nPratique si vous avez en même temps un Nook (epub) et une Kindle (mobi), par exemple. " + +#: config.py:404 +msgid "Update Calibre Cover when Updating Metadata?" +msgstr "Mettre à jour les couvertures calibre lors de la mise à jour des métadonnées ?" + +#: config.py:405 +msgid "" +"Update calibre book cover image from EPUB when metadata is updated. (EPUB only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." +msgstr "Met à jour les images de couverture calibre depuis l'ePub quand les métadonnées sont mises à jour. (Uniquement ePub)\n Ne va pas rechercher de nouvelles images sur \"Mettre seulement les métadonnées de calibre à jour\"." + +#: config.py:409 +msgid "Keep Existing Tags when Updating Metadata?" +msgstr "Garder les étiquettes existantes quand mise à jour des métadonnées ?" + +#: config.py:410 +msgid "" +"Existing tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no effect.)" +msgstr "Les étiquettes existantes seront gardées et toutes les nouvelles étiquettes ajoutées.\nLes étiquettes %(cmplt)s et %(inprog) seront quand même mise à jour, si connues.\nLes étiquettes %(lul)s seront mises à jour si %(lus)s dans %(is)s.\n(Si les étiquettes sont définies à 'Nouveau uniquement\" dans l'onglet colonnes standards, ceci n'a pas d'effet.)" + +#: config.py:414 +msgid "Force Author into Author Sort?" +msgstr "Forcer auteur dans tri par auteur ?" + +#: config.py:415 +msgid "" +"If checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc." +msgstr "Si coché, le(s) auteur(s) comme indiqué seront utilisés pour le tri par auteur, également.\nSi non coché, calibre appliquera sont algorithme intégré qui fait que \"Bob Smith\" sorte comme \"Smith, Bob\" etc." + +#: config.py:419 +msgid "Force Title into Title Sort?" +msgstr "Forcer le titre dans le tri par titre ?" + +#: config.py:420 +msgid "" +"If checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The Title' sort as 'Title, The', etc." +msgstr "Si coché, le titre comme indiqué sera utilisé pour le tri par titre, également.\nSi non coché, calibre appliquera sont algorithme intégré qui fait que \"Le Titre\" sorte comme \"Titre, Le\" etc." + +#: config.py:424 +msgid "Check for existing Series Anthology books?" +msgstr "Cocher pour les livres Séries Anthologies existantes ?" + +#: config.py:425 +msgid "" +"Check for existings Series Anthology books using each new story's series URL before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." +msgstr "Cocher pour les livres Série Anthologie utilisant l'URL de chaque nouvelles séries d'histoires avant de télécharger.\nPropose d'ignorer le téléchargement si une Série Anthologie est trouvée " + +#: config.py:429 +msgid "Check for changed Story URL?" +msgstr "Vérifier le changement d'URL du récit ?" + +#: config.py:430 +msgid "" +"Warn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." +msgstr "Vous prévient si une mise à jour changera l'URL d'un livre existant.\nLes URLs fanfiction.net changent de http à https silencieusement." + +#: config.py:434 +msgid "Search EPUB text for Story URL?" +msgstr "Rechercher un texte ePub pour une URL de récit ? " + +#: config.py:435 +msgid "" +"Look for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." +msgstr "Recherche après la première URL de récit valide dans le texte de l'ePub si non trouvée dans les métadonnées.\nQuelque peu risqué, pourrait trouvé une mauvaise URL dépendant du contenu de l'URL.\nTrouve et corrige également les mauvaises URLs ffnet de ficsaver.com." + +#: config.py:439 +msgid "Mark added/updated books when finished?" +msgstr "Marquer les livres ajoutés/mis à jour quand terminé ?" + +#: config.py:440 +msgid "" +"Mark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Marque les livres ajoutés/mis à jour quand terminé. Utilisé avec l'option ci-dessous.\nVous pouvez également chercher manuellement après 'marked:ffdl_success'.\n'marked:ffdl_failed' est également disponible, ou chercher 'marked:ffdl' pour les deux." + +#: config.py:444 +msgid "Show Marked books when finished?" +msgstr "Montrer les livres marqués quand terminés ?" + +#: config.py:445 +msgid "" +"Show Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Montre les livres marqués ajoutés/mis à jour uniquement quans ils sont terminés/\nVous pouvez également rechercher manuellement après 'marked:ffdl_success'./n'marked:ffdl_failed' est aussi disponible, ou cherchez 'marked:ffdl' pour les deux." + +#: config.py:449 +msgid "Automatically Convert new/update books?" +msgstr "Converti automatiquement les livres nouveaux/mis à jour ?" + +#: config.py:450 +msgid "" +"Automatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." +msgstr "Appele automatiquement le convertisseur de calibre pour les livres nouveaux/mis à jour.\nConverti au format de sortie courant tel que choisi dans les paramètres de calibre\nPréférences->Comportement" + +#: config.py:454 +msgid "GUI Options" +msgstr "Options de l'Interface Graphique Utilisateur" + +#: config.py:458 +msgid "Take URLs from Clipboard?" +msgstr "Prendre les URLs du presse-papier ?" + +#: config.py:459 +msgid "Prefill URLs from valid URLs in Clipboard when Adding New." +msgstr "Pré-rempli les URLs depuis les URLs valides dans le presse-papier lorsque vous utilisez Ajoutez Nouveau" + +#: config.py:463 +msgid "Default to Update when books selected?" +msgstr "Mise à jour par défaut quand les livres sont sélectionnés ?" + +#: config.py:464 +msgid "" +"The top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." +msgstr "Le bouton supérieur du greffon FanFictionDownloader démarrera la mise à jour si\ndes livres sont sélectionnés. Si décoché, ceci prendra toujours 'Ajouter nouveau\"" + +#: config.py:468 +msgid "Keep 'Add New from URL(s)' dialog on top?" +msgstr "Garder le dialogue 'Ajouter nouveau' au dessus ?" + +#: config.py:469 +msgid "" +"Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." +msgstr "Informe l'OS et le gestionnaire de fenêtre de garder le dialogue\n'Ajouter nouveau depuis les URLs' au dessus de toutes les autres fenêtres. Utile pour glisser dessus des URLs." + +#: config.py:473 +msgid "Misc Options" +msgstr "Options diverses" + +#: config.py:478 +msgid "Include images in EPUBs?" +msgstr "Inclure les images dans les ePubs ?" + +#: config.py:479 +msgid "" +"Download and include images in EPUB stories. This is equivalent to " +"adding:%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will" +" override this." +msgstr "Télécharge et inclus les images dans les récits ePub. Ceci est équivalent à ajouté : %(imgset)s ... au dessus de %(pini)s. Vos paramètres %(pini)s outrepassent cela." + +#: config.py:483 +msgid "Inject calibre Series when none found?" +msgstr "Injecter la Série calibre quand aucune n'est trouvée ?" + +#: config.py:484 +msgid "" +"If no series is found, inject the calibre series (if there is one) so it " +"appears on the FFDL title page(not cover)." +msgstr "Si aucune série n'est trouvée, injecte la série calibre (s'il y en a une) aussi ceci apparaît sur la page de titre de FFDL (pas la couverture)" + +#: config.py:488 +msgid "Reject List" +msgstr "Liste des rejets" + +#: config.py:492 +msgid "Edit Reject URL List" +msgstr "Editer la Liste de Rejet URL" + +#: config.py:493 +msgid "Edit list of URLs FFDL will automatically Reject." +msgstr "Edite la liste des URLs FFDL qui seront automatiquement rejetées." + +#: config.py:497 config.py:571 +msgid "Add Reject URLs" +msgstr "Ajouter des URLs rejetées" + +#: config.py:498 +msgid "Add additional URLs to Reject as text." +msgstr "Ajoute des URLs additionnelle à rejeter comme texte." + +#: config.py:502 +msgid "Edit Reject Reasons List" +msgstr "Editer la liste des raisons de rejet" + +#: config.py:503 config.py:562 +msgid "Customize the Reasons presented when Rejecting URLs" +msgstr "Personnalise les raisons présentées quand URLs rejettées" + +#: config.py:507 +msgid "Reject Without Confirmation?" +msgstr "Rejeter Sans Confirmation ?" + +#: config.py:508 +msgid "Always reject URLs on the Reject List without stopping and asking." +msgstr "Toujours rejeter les URLs dans la Liste de Rejet sans interruption ni question." + +#: config.py:546 +msgid "Edit Reject URLs List" +msgstr "Editer la liste des URLs rejetées" + +#: config.py:560 +msgid "Reject Reasons" +msgstr "Raisons du rejet" + +#: config.py:561 +msgid "Customize Reject List Reasons" +msgstr "Personnaliser la liste des raisons du rejet" + +#: config.py:569 +msgid "Reason why I rejected it" +msgstr "Raison pour laquelle je la rejette" + +#: config.py:569 +msgid "Title by Author" +msgstr "Titre par auteur" + +#: config.py:572 +msgid "" +"Add Reject URLs. Use: http://...,note or http://...,title by " +"author - note Invalid story URLs will be ignored." +msgstr "Ajoute des URLs rejetées. Utilise : http://...,note ou http://...,titre par auteur - note Les URLs de récit invalides seront ignorées." + +#: config.py:573 +msgid "" +"One URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" +msgstr "Une URL par ligne : \nhttp://...,note\nhttp://...,titre par auteur - note" + +#: config.py:575 dialogs.py:1012 +msgid "Add this reason to all URLs added:" +msgstr "Ajouter cette raison pour toutes les URLs ajoutée : " + +#: config.py:590 +msgid "" +"These settings provide more detailed control over what metadata will be " +"displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s " +"for different sites." +msgstr "Ces paramètres donnent un contrôle plus détaillé sur quelles métadonnées seront affichées dans le le livre aussi bien que si vous régliez %(isa)s et %(u)s pour les différents sites." + +#: config.py:608 +msgid "View Defaults" +msgstr "Afficher les paramètres par défaut" + +#: config.py:609 +msgid "" +"View all of the plugin's configurable settings\n" +"and their default settings." +msgstr "Affiche tous les paramètres configurables du greffon\net leurs paramètres par défaut." + +#: config.py:627 +msgid "Plugin Defaults (%s) (Read-Only)" +msgstr "Paramètres par défaut du greffon (%s) (Lecture seule)" + +#: config.py:628 config.py:634 +msgid "" +"These are all of the plugin's configurable options\n" +"and their default settings." +msgstr "Ceci sont toutes les options configurables du greffon\net leurs paramètres par défaut" + +#: config.py:629 +msgid "Plugin Defaults" +msgstr "Paramètres par défaut du greffon" + +#: config.py:645 dialogs.py:542 dialogs.py:645 +msgid "OK" +msgstr "OK" + +#: config.py:665 +msgid "" +"These settings provide integration with the %(rl)s Plugin. %(rl)s can " +"automatically send to devices and change custom columns. You have to create" +" and configure the lists in %(rl)s to be useful." +msgstr "Ces paramètres fournissent une intégration avec le greffon %(rl)s. %(rl)s peut envoyer et changer les colonnes personnalisées vers les les appareils. Vous avez à créer et configuer les listes dans %(rl)s pour être utilisables." + +#: config.py:670 +msgid "Add new/updated stories to \"Send to Device\" Reading List(s)." +msgstr "Ajouter des récits nouveaux/mis à jour à la/aux liste(s) de lecture de \"Envoyer vers le dispositif\"." + +#: config.py:671 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin." +msgstr "Ajoute automatiquement des récits nouveaux/mis à jour à ces listes dans le greffon %(rl)s." + +#: config.py:676 +msgid "\"Send to Device\" Reading Lists" +msgstr "Listes de lecture de \"Envoyer vers le dispositif\"" + +#: config.py:677 config.py:680 config.py:693 config.py:696 +msgid "" +"When enabled, new/updated stories will be automatically added to these " +"lists." +msgstr "Quand activé, les récits nouveaux/mis à jours seront ajoutés automatiquement à ces listes." + +#: config.py:686 +msgid "Add new/updated stories to \"To Read\" Reading List(s)." +msgstr "Ajouter des récits nouveaux/mis à jour à la/aux liste(s) de lecture \" A lire \"." + +#: config.py:687 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." +msgstr "Ajoute automatiquement des récits nouveau/mis à jour à ces listes dans le greffon %(rl)s.\nPropose également un menu option pour enlever des récits depuis les listes \"A lire\"." + +#: config.py:692 +msgid "\"To Read\" Reading Lists" +msgstr "Listes de lecture \"A lire\"" + +#: config.py:702 +msgid "Add stories back to \"Send to Device\" Reading List(s) when marked \"Read\"." +msgstr "Ajouter à nouveau des récits à/aux Liste(s) de lecture \"Envoyer vers le dispositif\" quand marqué \"Lu\"." + +#: config.py:703 +msgid "" +"Menu option to remove from \"To Read\" lists will also add stories back to " +"\"Send to Device\" Reading List(s)" +msgstr "Option du menu pour retirer des listes de \"A lire\" ajoutera également à nouveau des récits à/aux Liste(s) de lecture \"Envoyer vers le dispositif\"" + +#: config.py:725 +msgid "" +"The %(gc)s plugin can create cover images for books using various metadata " +"and configurations. If you have GC installed, FFDL can run GC on new " +"downloads and metadata updates. Pick a GC setting by site or Default." +msgstr "Le greffon %(gc)s peut créer des images de couverture pour les livres en utilisant diverses métadonnées et configurations. Si vous avez installé Generate Cover, FFDL peut exécuter GC lors de nouveaux téléchargements et des mises à jour de métadonnées" + +#: config.py:743 config.py:747 config.py:760 +msgid "Default" +msgstr "Par défaut" + +#: config.py:748 +msgid "" +"On Metadata update, run %(gc)s with this setting, if not selected for " +"specific site." +msgstr "A la mise à jour des métadonnées, exécute %(gc)s avec ces paramètres, s'ils ne sont pas sélectionnés pour un site spécifique." + +#: config.py:751 +msgid "On Metadata update, run %(gc)s with this setting for %(site)s stories." +msgstr "A la mise à jour de métadonnées, exécute %(gc)s avec ces paramètres pour des %(site)s de récits." + +#: config.py:774 +msgid "Run %(gc)s Only on New Books" +msgstr "Exécuter %(gc)s uniquement pour les nouveaux livres" + +#: config.py:775 +msgid "Default is to run GC any time the calibre metadata is updated." +msgstr "L'option par défaut est d'exécuter GC chaque fois que les métadonnées de calibre sont mises à jour." + +#: config.py:779 +msgid "Allow %(gcset)s from %(pini)s to override" +msgstr "Permettre à %(gcset)s depuis %(pini)s d'outrepasser" + +#: config.py:780 +msgid "" +"The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on " +"metadata rather than site, but it's much more complex. %(gcset)s is " +"ignored when this is off." +msgstr "Le paramètre %(pini)s %(gcset)s vous permet de choisir un paramètre Generate Cover basé sur les métadonnées plutôt que le site, mais c'est beaucoup plus complexe.<br\\>%(gcset)s est ignoré quand ceci est à l'arrêt." + +#: config.py:784 +msgid "Use calibre's Polish feature to inject/update the cover" +msgstr "Utiliser la fonction Polish de calibre pour insérer/mettre à jour la couverture" + +#: config.py:785 +msgid "" +"Calibre's Polish feature will be used to inject or update the generated " +"cover into the ebook, EPUB only." +msgstr "L'option Polish de calibre sera utilisée pour insérer ou mettre à jour la couverture générée dans l'e-livre. EPUB uniquement." + +#: config.py:799 +msgid "" +"These settings provide integration with the %(cp)s Plugin. %(cp)s can " +"automatically update custom columns with page, word and reading level " +"statistics. You have to create and configure the columns in %(cp)s first." +msgstr "Ces paramètres permettent l'intégration avec le greffon %(cp)s. %(cp)s peut automatiquement mettre à jour les colonnes personnalisées avec la page, le mot et les statistiques de lecture. Vous devez tout d'abord créer et configurer les colonnes dans %(cp)s." + +#: config.py:804 +msgid "" +"If any of the settings below are checked, when stories are added or updated," +" the %(cp)s Plugin will be called to update the checked statistics." +msgstr "Si n'importe lequel des paramètres ci-dessous est activé, quand les récits sont ajoutés ou mis à jour, le greffon %(cp)s sera appelé pour mettre à jour les statistiques activées." + +#: config.py:810 +msgid "Which column and algorithm to use are configured in %(cp)s." +msgstr "Quels colonne et algorithme à utiliser sont configurés dans %(cp)s." + +#: config.py:818 +msgid "" +"Will overwrite word count from FFDL metadata if set to update the same " +"custom column." +msgstr "Outrepassera le nombre de mots depuis les métadonnées FFDL si réglé sur mettre à jour la même colonne personnalisée." + +#: config.py:849 +msgid "" +"These controls aren't plugin settings as such, but convenience buttons for " +"setting Keyboard shortcuts and getting all the FanFictionDownLoader " +"confirmation dialogs back again." +msgstr "Ces contrôles ne sont pas des paramètres du greffon en soi, mais des boutons de convenance pour paramétrer les raccourcis clavier et l'obtention du rétablissement de tous les dialogues de confirmation de FanFictionDownLoader." + +#: config.py:854 +msgid "Keyboard shortcuts..." +msgstr "Raccourcis clavier..." + +#: config.py:855 +msgid "Edit the keyboard shortcuts associated with this plugin" +msgstr "Editer les raccourcis clavier associés avec ce greffon" + +#: config.py:859 +msgid "Reset disabled &confirmation dialogs" +msgstr "Réinitialiser &les dialogues de confirmation désactivés" + +#: config.py:860 +msgid "Reset all show me again dialogs for the FanFictionDownLoader plugin" +msgstr "Réinitialiser tous les dialogues afficher moi du greffon FanFictionDownLoader" + +#: config.py:864 +msgid "&View library preferences..." +msgstr "&Voir les préférences de la bibliothèque..." + +#: config.py:865 +msgid "View data stored in the library database for this plugin" +msgstr "Voir les données stockées pour ce greffon dans la base de donnée de la bibliothèque" + +#: config.py:876 +msgid "Done" +msgstr "Terminé" + +#: config.py:877 +msgid "Confirmation dialogs have all been reset" +msgstr "Les dialogues de confirmation ont tous été réinitialisés" + +#: config.py:925 +msgid "Category" +msgstr "Catégorie" + +#: config.py:926 +msgid "Genre" +msgstr "Genre" + +#: config.py:927 +msgid "Language" +msgstr "Langue" + +#: config.py:928 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Status" +msgstr "Statut" + +#: config.py:929 +msgid "Status:%(cmplt)s" +msgstr "Statut : %(cmplt)s" + +#: config.py:930 +msgid "Status:%(inprog)s" +msgstr "Statut : %(inprog)s" + +#: config.py:931 config.py:1065 +msgid "Series" +msgstr "Séries" + +#: config.py:932 +msgid "Characters" +msgstr "Caractères" + +#: config.py:933 +msgid "Relationships" +msgstr "Relations" + +#: config.py:934 +msgid "Published" +msgstr "Publié" + +#: config.py:935 ffdl_plugin.py:1437 ffdl_plugin.py:1456 +msgid "Updated" +msgstr "Mis à jour" + +#: config.py:936 +msgid "Created" +msgstr "Créé" + +#: config.py:937 +msgid "Rating" +msgstr "Note" + +#: config.py:938 +msgid "Warnings" +msgstr "Avertissements" + +#: config.py:939 +msgid "Chapters" +msgstr "Chapitres" + +#: config.py:940 +msgid "Words" +msgstr "Mots" + +#: config.py:941 +msgid "Site" +msgstr "Site" + +#: config.py:942 +msgid "Story ID" +msgstr "ID du récit" + +#: config.py:943 +msgid "Author ID" +msgstr "ID de l'auteur" + +#: config.py:944 +msgid "Extra Tags" +msgstr "Etiquettes additionnelles" + +#: config.py:945 config.py:1057 dialogs.py:804 dialogs.py:900 +#: ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Title" +msgstr "Titre" + +#: config.py:946 +msgid "Story URL" +msgstr "URL du récit" + +#: config.py:947 +msgid "Description" +msgstr "Description" + +#: config.py:948 dialogs.py:804 dialogs.py:900 ffdl_plugin.py:1152 +#: ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Author" +msgstr "Auteur" + +#: config.py:949 +msgid "Author URL" +msgstr "URL de l'auteur" + +#: config.py:950 +msgid "File Format" +msgstr "Format de fichier" + +#: config.py:951 +msgid "File Extension" +msgstr "Extension de fichier" + +#: config.py:952 +msgid "Site Abbrev" +msgstr "Site abrégé" + +#: config.py:953 +msgid "FFDL Version" +msgstr "Version FFDL" + +#: config.py:968 +msgid "" +"If you have custom columns defined, they will be listed below. Choose a " +"metadata value type to fill your columns automatically." +msgstr "Si vous avez défini des colonnes personnalisées, elles seront listées ci-dessous. Choisissez un type de valeur de métadonnées pour remplir vos colonnes automatiquement." + +#: config.py:993 +msgid "Update this %s column(%s) with..." +msgstr "Mettre à jour cette %s colonne(%s) avec..." + +#: config.py:1003 +msgid "Values that aren't valid for this enumeration column will be ignored." +msgstr "Les valeurs qui ne sont pas valides pour cette énumération de colonne seront ignorées." + +#: config.py:1003 config.py:1005 +msgid "Metadata values valid for this type of column." +msgstr "Valeurs de métadonnées valides pour ce type de colonne." + +#: config.py:1008 config.py:1084 +msgid "New Only" +msgstr "Nouveau uniquement" + +#: config.py:1009 +msgid "" +"Write to %s(%s) only for new\n" +"books, not updates to existing books." +msgstr "Ecrire %s(%s) uniquement pour les nouveaux\nlivres, pas de mises à jour aux livres existants." + +#: config.py:1020 +msgid "Allow %(ccset)s from %(pini)s to override" +msgstr "Permettre à %(ccset)s depuis %(pini)s d'outrepasser" + +#: config.py:1021 +msgid "" +"The %(pini)s parameter %(ccset)s allows you to set custom columns to site " +"specific values that aren't common to all sites. %(ccset)s is ignored " +"when this is off." +msgstr "Le paramètre %(pini)s %(ccset)s vous permet de régler des colonnes personnalisées à des valeurs spécifiques d'un site qui ne sont pas communes à tous les sites.<br\\>%(ccset)s est ignoré quand ceci est à l'arrêt." + +#: config.py:1026 +msgid "Special column:" +msgstr "Colonne spéciale :" + +#: config.py:1031 +msgid "Update/Overwrite Error Column:" +msgstr "Mettre à jour/outrepasser la colonne d'erreur :" + +#: config.py:1032 +msgid "" +"When an update or overwrite of an existing story fails, record the reason in this column.\n" +"(Text and Long Text columns only.)" +msgstr "Lorsque la mise à jour ou l'écrasement d'un récit existant échoue, enregistrer la raison dans cette colonne.\n(Colonnes de texte et de texte descriptif uniquement.) " + +#: config.py:1058 +msgid "Author(s)" +msgstr "Auteur(s)" + +#: config.py:1059 +msgid "Publisher" +msgstr "Editeur" + +#: config.py:1060 +msgid "Tags" +msgstr "Etiquettes" + +#: config.py:1061 +msgid "Languages" +msgstr "Langue(s)" + +#: config.py:1062 +msgid "Published Date" +msgstr "Date de publication" + +#: config.py:1063 +msgid "Date" +msgstr "Date" + +#: config.py:1064 +msgid "Comments" +msgstr "Commentaires" + +#: config.py:1066 +msgid "Ids(url id only)" +msgstr "Ids(id url seulement)" + +#: config.py:1071 +msgid "" +"The standard calibre metadata columns are listed below. You may choose " +"whether FFDL will fill each column automatically on updates or only for new " +"books." +msgstr "Les colonnes de métadonnées stantards de calibre sont listées ci-dessous. Vous pouvez choisir si FFDL remplira chaque colonne automatiquement lors des mises à jour ou seulement pour de nouveaux livres." + +#: config.py:1085 +msgid "" +"Write to %s only for new\n" +"books, not updates to existing books." +msgstr "Ecrire sur %s uniquement pour les nouveaux\nlivres, pas de mises à jour pour les livres existants." + +#: dialogs.py:69 +msgid "Skip" +msgstr "Ignorer" + +#: dialogs.py:70 +msgid "Add New Book" +msgstr "Ajouter Nouveau Livre" + +#: dialogs.py:71 +msgid "Update EPUB if New Chapters" +msgstr "Mettre à jour l'ePub s'il y a de nouveaux chapitres" + +#: dialogs.py:72 +msgid "Update EPUB Always" +msgstr "Mettre toujours l'ePub à jour" + +#: dialogs.py:73 +msgid "Overwrite if Newer" +msgstr "Ecraser si nouveau" + +#: dialogs.py:74 +msgid "Overwrite Always" +msgstr "Ecraser toujours" + +#: dialogs.py:75 +msgid "Update Calibre Metadata Only" +msgstr "Mettre uniquement à jour les métadonnées de calibre" + +#: dialogs.py:239 ffdl_plugin.py:89 +msgid "FanFictionDownLoader" +msgstr "FanFictionDownLoader" + +#: dialogs.py:256 dialogs.py:703 +msgid "Show Download Options" +msgstr "Afficher les options de téléchargement" + +#: dialogs.py:275 dialogs.py:720 +msgid "Output &Format:" +msgstr "&Format de sortie :" + +#: dialogs.py:283 dialogs.py:728 +msgid "" +"Choose output format to create. May set default from plugin configuration." +msgstr "Choisir le format de sortie à créer. Peut être réglé par défaut depuis la configuration du greffon." + +#: dialogs.py:311 dialogs.py:745 +msgid "Update Calibre &Metadata?" +msgstr "Mettre à jour les &métadonnées calibre ?" + +#: dialogs.py:312 dialogs.py:746 +msgid "" +"Update metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new books.)" +msgstr "Mettre à jour les métadonnées pour les récits existants dans calibre depuis le site web ?\n(Les colonnes définies à \"Nouveau uniquement\" dans les étiquettes de colonne seront uniquement définies pour les nouveaux livres.)" + +#: dialogs.py:318 dialogs.py:750 +msgid "Update EPUB Cover?" +msgstr "Mettre à jour la couverture de l'ePub ?" + +#: dialogs.py:319 dialogs.py:751 +msgid "" +"Update book cover image from site or defaults (if found) inside the " +"EPUB when EPUB is updated." +msgstr "Met à jour l'image de couverture du livre depuis le site ou par défaut (si trouvée) à l'intérieur de l'ePub quand l'ePub est mis à jour." + +#: dialogs.py:366 +msgid "Story URL(s) for anthology, one per line:" +msgstr "URL(s) de récit pour anthologie, une par ligne :" + +#: dialogs.py:367 +msgid "" +"URLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." +msgstr "URLs pour les récits à inclure dans l'anthologie, une par ligne.\nPrendra les URLs depuis le presse papier, mais seulement les URLs valides." + +#: dialogs.py:368 +msgid "If Story Already Exists in Anthology?" +msgstr "Si le récit existe déjà dans Anthologie ?" + +#: dialogs.py:369 +msgid "" +"What to do if there's already an existing story with the same URL in the " +"anthology." +msgstr "Que faire s'il y déjà un récit existant avec la même URL dans l'anthologie." + +#: dialogs.py:378 +msgid "Story URL(s), one per line:" +msgstr "URL(s) de récit, une par ligne :" + +#: dialogs.py:379 +msgid "" +"URLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." +msgstr "URLs pour les récits, une par ligne.\nPrendra les URLs depuis le presse papier, mais seulement les URLs valides.\nAjoute [1,5] après l'URL pour limiter le téléchargement aux chapitres 1-5" + +#: dialogs.py:380 +msgid "If Story Already Exists?" +msgstr "Si le récit existe déjà ?" + +#: dialogs.py:381 +msgid "" +"What to do if there's already an existing story with the same URL or title " +"and author." +msgstr "Que faire s'il y a déjà un récit existant avec la même URL ou titre et auteur." + +#: dialogs.py:481 +msgid "For Individual Books" +msgstr "Pour des livres individuels" + +#: dialogs.py:482 +msgid "Get URLs and go to dialog for individual story downloads." +msgstr "Obtenir les URLs et se rendre dans la boîte de dialogue pour les téléchargments de récit individuel." + +#: dialogs.py:486 +msgid "For Anthology Epub" +msgstr "Pour un ePub Anthologie" + +#: dialogs.py:487 +msgid "" +"Get URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." +msgstr "Obtenir les URLs et se rendre dans la boîte de dialogue pour le téléchargement d'une Anthologie.\nRequiert le greffon %s." + +#: dialogs.py:492 dialogs.py:546 dialogs.py:573 +msgid "Cancel" +msgstr "Annuler" + +#: dialogs.py:524 +msgid "Password" +msgstr "Mot de passe" + +#: dialogs.py:525 +msgid "Author requires a password for this story(%s)." +msgstr "L'Auteur requiert un mot de passe pour ce récit(%s)." + +#: dialogs.py:530 +msgid "User/Password" +msgstr "Utilisateur/Mot de passe" + +#: dialogs.py:531 +msgid "%s requires you to login to download this story." +msgstr "%s requiert que vous vous identifiez pour télécharger ce récit" + +#: dialogs.py:533 +msgid "User:" +msgstr "Utilisateur :" + +#: dialogs.py:537 +msgid "Password:" +msgstr "Mot de passe :" + +#: dialogs.py:568 +msgid "Fetching metadata for stories..." +msgstr "Occupé à rechercher des métadonnées pour les récits..." + +#: dialogs.py:569 +msgid "Downloading metadata for stories" +msgstr "Téléchargement des métadonnées pour les récits" + +#: dialogs.py:570 +msgid "Fetched metadata for" +msgstr "Métadonnées recherchées pour" + +#: dialogs.py:640 ffdl_plugin.py:325 +msgid "About FanFictionDownLoader" +msgstr "Á propos de FanFictionDownLoader" + +#: dialogs.py:694 +msgid "Remove selected books from the list" +msgstr "Retirer les livres sélectionnés de la liste" + +#: dialogs.py:733 +msgid "Update Mode:" +msgstr "Mode de mise à jour : " + +#: dialogs.py:736 +msgid "" +"What sort of update to perform. May set default from plugin configuration." +msgstr "Quel type de mise à jour à effectuer. Peut être réglé par défaut dans la configuration du greffon." + +#: dialogs.py:804 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Comment" +msgstr "Commentaire" + +#: dialogs.py:872 +msgid "Are you sure you want to remove this book from the list?" +msgstr "Êtes-vous sûr de vouloir retirer ce livre de la liste ?" + +#: dialogs.py:874 +msgid "Are you sure you want to remove the selected %d books from the list?" +msgstr "Êtes-vous sûr de vouloir retirer le livre sélectionné %d de la liste ?" + +#: dialogs.py:900 +msgid "Note" +msgstr "Note" + +#: dialogs.py:939 +msgid "Select or Edit Reject Note." +msgstr "Sélectionner ou éditer la note de rejet" + +#: dialogs.py:947 +msgid "Are you sure you want to remove this URL from the list?" +msgstr "Êtes-vous sûr de vouloir retirer cette URL de la liste ?" + +#: dialogs.py:949 +msgid "Are you sure you want to remove the %d selected URLs from the list?" +msgstr "Êtes-vous sûr de vouloir retirer les URLs sélectionnées %d de la liste ?" + +#: dialogs.py:967 +msgid "List of Books to Reject" +msgstr "Liste des livres à rejetter" + +#: dialogs.py:980 +msgid "" +"FFDL will remember these URLs and display the note and offer to reject them " +"if you try to download them again later." +msgstr "FFDL se souviendra de ces URLs, affichera la note et proposera de rejetter celles-ci si vous essayer de les télécharger à nouveau par après." + +#: dialogs.py:994 +msgid "Remove selected URL(s) from the list" +msgstr "Retire les URLs sélectionnées de la liste" + +#: dialogs.py:1009 dialogs.py:1013 +msgid "This will be added to whatever note you've set for each URL above." +msgstr "Ceci sera ajouté à n'importe quelle note que vous avez composée pour chaque URL ci-dessus." + +#: dialogs.py:1022 +msgid "Delete Books (including books without FanFiction URLs)?" +msgstr "Supprimer les livres (incluant les livres sans URL(s) de FanFiction) ?" + +#: dialogs.py:1023 +msgid "Delete the selected books after adding them to the Rejected URLs list." +msgstr "Supprime les livres sélectionnés après les avoir ajoutés à la liste des URLs rejetées." + +#: ffdl_plugin.py:90 +msgid "Download FanFiction stories from various web sites" +msgstr "Télécharger des récits FanFiction de différents sites" + +#: ffdl_plugin.py:120 +msgid "FanFictionDL" +msgstr "FanFictionDL" + +#: ffdl_plugin.py:243 +msgid "&Add New from URL(s)" +msgstr "&Ajouter nouveau depuis l'URL(s)" + +#: ffdl_plugin.py:245 +msgid "Add New FanFiction Book(s) from URL(s)" +msgstr "Ajouter un /des nouveau(x) livre(s) de Fanfiction depuis l'URL(s)" + +#: ffdl_plugin.py:248 +msgid "&Update Existing FanFiction Book(s)" +msgstr "&Mettre à jour le(s) livre(s) FanFiction existant(s)" + +#: ffdl_plugin.py:254 +msgid "Get Story URLs to Download from Web Page" +msgstr "Prendre les URLs de récit à télécharger depuis la page web" + +#: ffdl_plugin.py:258 +msgid "&Make Anthology Epub Manually from URL(s)" +msgstr "Faire manuellement un ePub d'anthologie depuis depuis l'/les URL(s)" + +#: ffdl_plugin.py:260 +msgid "Make FanFiction Anthology Epub Manually from URL(s)" +msgstr "&Faire manuellement un ePub d'anthologie FanFiction depuis depuis l'/les URL(s)" + +#: ffdl_plugin.py:263 +msgid "&Update Anthology Epub" +msgstr "&Mettre à jour un ePub Anthologie" + +#: ffdl_plugin.py:265 +msgid "Update FanFiction Anthology Epub" +msgstr "Mettre à jour un ePub Anthologie FanFiction" + +#: ffdl_plugin.py:273 +msgid "Add to \"To Read\" and \"Send to Device\" Lists" +msgstr "Ajouter aux listes \"A lire\" et \"Envoyer vers le dispositif\"" + +#: ffdl_plugin.py:275 +msgid "Remove from \"To Read\" and add to \"Send to Device\" Lists" +msgstr "Retirer des listes de \"A lire\" et ajouter à \"Envoyer vers le dispositif\"" + +#: ffdl_plugin.py:277 ffdl_plugin.py:282 +msgid "Remove from \"To Read\" Lists" +msgstr "Retirer des listes \"A lire\"." + +#: ffdl_plugin.py:279 +msgid "Add Selected to \"Send to Device\" Lists" +msgstr "Ajouter sélectionné aux listes \"Envoyer vers le dispositif\"" + +#: ffdl_plugin.py:281 +msgid "Add to \"To Read\" Lists" +msgstr "Ajouter aux listes \"A lire\"" + +#: ffdl_plugin.py:297 +msgid "Get URLs from Selected Books" +msgstr "Prendre les URLs depuis les livres sélectionnés" + +#: ffdl_plugin.py:303 ffdl_plugin.py:397 +msgid "Get Story URLs from Web Page" +msgstr "Prendre les URLs de récit depuis la page web" + +#: ffdl_plugin.py:308 +msgid "Reject Selected Books" +msgstr "Rejeter les livres sélectionnés" + +#: ffdl_plugin.py:316 +msgid "&Configure Plugin" +msgstr "&Configurer le greffon" + +#: ffdl_plugin.py:319 +msgid "Configure FanFictionDownLoader" +msgstr "Configurer FanFictionDownLoader" + +#: ffdl_plugin.py:322 +msgid "About Plugin" +msgstr "Á propos du greffon" + +#: ffdl_plugin.py:379 +msgid "Cannot Update Reading Lists from Device View" +msgstr "Ne peut mettre à jour Les listes de lecture depuis la Vue Dispositif" + +#: ffdl_plugin.py:383 +msgid "No Selected Books to Update Reading Lists" +msgstr "Pas de livres sélectionnés pour mettre à jour les Listes de Lecture" + +#: ffdl_plugin.py:408 ffdl_plugin.py:460 +msgid "List of Story URLs" +msgstr "Liste des URLs de Récit" + +#: ffdl_plugin.py:409 +msgid "No Valid Story URLs found on given page." +msgstr "Pas d'URL de récit valide trouvée sur la page donnée" + +#: ffdl_plugin.py:424 +msgid "No Selected Books to Get URLs From" +msgstr "Pas de livres sélectionnés pour y prendre des URLs" + +#: ffdl_plugin.py:442 +msgid "Collecting URLs for stories..." +msgstr "Occupé à collecter des URLs pour des récits" + +#: ffdl_plugin.py:443 +msgid "Get URLs for stories" +msgstr "Prend des URLs pour des récits" + +#: ffdl_plugin.py:444 ffdl_plugin.py:491 ffdl_plugin.py:678 +msgid "URL retrieved" +msgstr "URL récupérée" + +#: ffdl_plugin.py:464 +msgid "List of URLs" +msgstr "Liste ds URLs" + +#: ffdl_plugin.py:465 +msgid "No Story URLs found in selected books." +msgstr "Pas d'URLs de récit trouvée dans les livres sélectionnés" + +#: ffdl_plugin.py:481 +msgid "No Selected Books have URLs to Reject" +msgstr "Aucun des livres sélectionnés n'ont d'URLs à rejeter" + +#: ffdl_plugin.py:489 +msgid "Collecting URLs for Reject List..." +msgstr "Occupé à collecter les URLs pour la liste de rejet..." + +#: ffdl_plugin.py:490 +msgid "Get URLs for Reject List" +msgstr "Récupère les URLs pour la liste de rejet" + +#: ffdl_plugin.py:525 +msgid "Proceed to Remove?" +msgstr "Procéder à la suppression ?" + +#: ffdl_plugin.py:525 +msgid "Rejecting FFDL URLs: None of the books selected have FanFiction URLs." +msgstr "Occupé de rejeter les URLs FFDL : aucun des livres sélectionnés n'ont d'URLs FanFiction" + +#: ffdl_plugin.py:547 +msgid "Cannot Make Anthologys without %s" +msgstr "Ne peut faire d'Anthologies sans %s" + +#: ffdl_plugin.py:551 ffdl_plugin.py:655 +msgid "Cannot Update Books from Device View" +msgstr "Ne peut mettre à jour les livres depuis la Vue Dispositif" + +#: ffdl_plugin.py:555 +msgid "Can only update 1 anthology at a time" +msgstr "Peut seulement mettre à jour 1 Anthologie à la fois" + +#: ffdl_plugin.py:564 +msgid "Can only Update Epub Anthologies" +msgstr "Peut seulement mettre à jour des anthologies ePub" + +#: ffdl_plugin.py:582 ffdl_plugin.py:583 +msgid "Cannot Update Anthology" +msgstr "Ne peut mettre à jour Anthologie" + +#: ffdl_plugin.py:583 +msgid "" +"Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." +msgstr "Le livre n'est pas une Anthologie FFDL ou contient un/des livre(s) sans URLs FFDL valides." + +#: ffdl_plugin.py:641 +msgid "" +"There are %d stories in the current anthology that are not going to " +"be kept if you go ahead." +msgstr "Il y a des récits %d dans l'actuelle Anthologie qui ne vont pas être gardées si vous continuer plus avant." + +#: ffdl_plugin.py:642 +msgid "Story URLs that will be removed:" +msgstr "URLs de Récit qui seront supprimées :" + +#: ffdl_plugin.py:644 +msgid "Update anyway?" +msgstr "Toujours mettre à jour ?" + +#: ffdl_plugin.py:645 +msgid "Stories Removed" +msgstr "Récits supprimés" + +#: ffdl_plugin.py:662 +msgid "No Selected Books to Update" +msgstr "Pas de livres sélectionnés à mettre à jour" + +#: ffdl_plugin.py:676 +msgid "Collecting stories for update..." +msgstr "Occupé à collecter des récits pour la mise à jour..." + +#: ffdl_plugin.py:677 +msgid "Get stories for updates" +msgstr "Prend des récits pour la mise à jour" + +#: ffdl_plugin.py:687 +msgid "Update Existing List" +msgstr "Mettre à jour la liste existante" + +#: ffdl_plugin.py:745 +msgid "Started fetching metadata for %s stories." +msgstr "A démarrré la recherche des métadonnées pour les récits %s." + +#: ffdl_plugin.py:751 +msgid "No valid story URLs entered." +msgstr "Pas d'URLs de récit valides entrées." + +#: ffdl_plugin.py:776 ffdl_plugin.py:782 +msgid "Reject URL?" +msgstr "Rejeter l'URL ?" + +#: ffdl_plugin.py:783 ffdl_plugin.py:801 +msgid "%s is on your Reject URL list:" +msgstr "%s est sur votre liste d'URL Rejetées :" + +#: ffdl_plugin.py:785 +msgid "Click 'Yes' to Reject." +msgstr "Cliquer 'Oui' pour rejeter." + +#: ffdl_plugin.py:786 ffdl_plugin.py:890 +msgid "Click 'No' to download anyway." +msgstr "Cliquer 'Non' pour télécharger quand même." + +#: ffdl_plugin.py:788 +msgid "Story on Reject URLs list (%s)." +msgstr "Récit sur la liste des URLs rejetées (%s)" + +#: ffdl_plugin.py:791 +msgid "Rejected" +msgstr "Rejeté" + +#: ffdl_plugin.py:794 +msgid "Remove Reject URL?" +msgstr "Retirer l'URL rejetée ?" + +#: ffdl_plugin.py:800 +msgid "Remove URL from Reject List?" +msgstr "Retirer l'URL de la Liste de Rejets ?" + +#: ffdl_plugin.py:803 +msgid "Click 'Yes' to remove it from the list," +msgstr "Cliquer 'Oui' pour retirer de la liste," + +#: ffdl_plugin.py:804 +msgid "Click 'No' to leave it on the list." +msgstr "Cliquer 'Non' pour laisser dans la liste." + +#: ffdl_plugin.py:821 +msgid "Cannot update non-epub format." +msgstr "Ne peut mettre à jour un format non-ePub." + +#: ffdl_plugin.py:866 +msgid "Are You an Adult?" +msgstr "Êtes-vous un adulte ?" + +#: ffdl_plugin.py:867 +msgid "" +"%s requires that you be an adult. Please confirm you are an adult in your " +"locale:" +msgstr "%s requiert que vous soyez un adulte, Veuillez confirmer que vous êtes un adulte dans votre situation :" + +#: ffdl_plugin.py:881 +msgid "Skip Story?" +msgstr "Ignorer le récit ?" + +#: ffdl_plugin.py:887 +msgid "Skip Anthology Story?" +msgstr "Ignorer le récit d'Anthologie ?" + +#: ffdl_plugin.py:888 +msgid "" +"\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have an" +" anthology book for." +msgstr "\"%s\" est dans la série \"<a href=\"%s\">%s</a>\" dont vous avez un livre d'anthologie." + +#: ffdl_plugin.py:889 +msgid "Click 'Yes' to Skip." +msgstr "Cliquer 'Oui' pour ignorer." + +#: ffdl_plugin.py:892 +msgid "Story in Series Anthology(%s)." +msgstr "Récit dans la série Anthologie(%s)." + +#: ffdl_plugin.py:897 +msgid "Skipped" +msgstr "Ignoré" + +#: ffdl_plugin.py:925 +msgid "Add" +msgstr "Ajouter" + +#: ffdl_plugin.py:938 +msgid "Meta" +msgstr "Meta" + +#: ffdl_plugin.py:971 +msgid "Skipping duplicate story." +msgstr "Ignore les récits en doublons." + +#: ffdl_plugin.py:974 +msgid "" +"More than one identical book by Identifer URL or title/author(s)--can't tell" +" which book to update/overwrite." +msgstr "Plus d'un livre identique par Identifiant URL ou titre/auteur(s)--ne peut pas dire quel livre mettre à jour/écraser." + +#: ffdl_plugin.py:985 +msgid "Update" +msgstr "Mettre à jour" + +#: ffdl_plugin.py:993 ffdl_plugin.py:1000 +msgid "Change Story URL?" +msgstr "Changer l'URL de Récit ?" + +#: ffdl_plugin.py:1001 +msgid "" +"%s by %s is already in your library with a different source " +"URL:" +msgstr "%s par %s est déjà dans votre bibliothèque avec une source URL différente :" + +#: ffdl_plugin.py:1002 +msgid "In library: <a href=\"%(liburl)s\">%(liburl)s</a>" +msgstr "Dans la bibliothèque : <a href=\"%(liburl)s\">%(liburl)s</a>" + +#: ffdl_plugin.py:1003 ffdl_plugin.py:1017 +msgid "New URL: <a href=\"%(newurl)s\">%(newurl)s</a>" +msgstr "Nouvelle URL : <a href=\"%(newurl)s\">%(newurl)s</a>" + +#: ffdl_plugin.py:1004 +msgid "Click 'Yes' to update/overwrite book with new URL." +msgstr "Cliquer 'Oui' pour mettre à jour/écraser le livre avec la nouvelle URL." + +#: ffdl_plugin.py:1005 +msgid "Click 'No' to skip updating/overwriting this book." +msgstr "Cliquer 'Non' pour ignorer la mise à jour/l'écrasement de ce livre." + +#: ffdl_plugin.py:1007 ffdl_plugin.py:1014 +msgid "Download as New Book?" +msgstr "Télécharger comme un Nouveau Livre ?" + +#: ffdl_plugin.py:1015 +msgid "" +"%s by %s is already in your library with a different source " +"URL." +msgstr "%s par %s est déjà dans votre bibliothèque avec une source URL différente." + +#: ffdl_plugin.py:1016 +msgid "" +"You chose not to update the existing book. Do you want to add a new book " +"for this URL?" +msgstr "Vous choisissez de ne pas mettre à jour le livre existant. Voulez-vous ajouter un nouveau livre pour cette URL ?" + +#: ffdl_plugin.py:1018 +msgid "Click 'Yes' to a new book with new URL." +msgstr "Cliquer 'Oui' pour un nouveau livre avec une nouvelle URL." + +#: ffdl_plugin.py:1019 +msgid "Click 'No' to skip URL." +msgstr "Cliquer 'Non' pour ignorer l'URL." + +#: ffdl_plugin.py:1025 +msgid "Update declined by user due to differing story URL(%s)" +msgstr "Mise à jour déclinée par l'utilisateur en raison d'une URL(%s) de récit différente" + +#: ffdl_plugin.py:1028 +msgid "Different URL" +msgstr "URL différente" + +#: ffdl_plugin.py:1033 +msgid "Metadata collected." +msgstr "Métadonnées collectées" + +#: ffdl_plugin.py:1049 +msgid "Already contains %d chapters." +msgstr "Contient déjà des chapitres %d." + +#: ffdl_plugin.py:1054 jobs.py:199 +msgid "" +"Existing epub contains %d chapters, web site only has %d. Use Overwrite to " +"force update." +msgstr "L'ePub existant contient des chapitres %d, le site web a seulement %d. Utiliser Ecraser pour forcer la mise à jour." + +#: ffdl_plugin.py:1056 +msgid "" +"FFDL doesn't recognize chapters in existing epub, epub is probably from a " +"different source. Use Overwrite to force update." +msgstr "FFDL ne reconnait pas les chapitres dans l'ePub existant, l'ePub est probablement d'une source différente. Utiliser Ecraser pour forcer la mise à jour." + +#: ffdl_plugin.py:1068 +msgid "Not Overwriting, web site is not newer." +msgstr "Ne pas écraser, le site web n'est pas plus récent." + +#: ffdl_plugin.py:1148 +msgid "None of the %d URLs/stories given can be/need to be downloaded." +msgstr "Aucun des URLs/récits %d donnés ne peut être/n'a besoin d'être téléchargé." + +#: ffdl_plugin.py:1149 ffdl_plugin.py:1320 ffdl_plugin.py:1350 +msgid "See log for details." +msgstr "Voir le journal pour les détails." + +#: ffdl_plugin.py:1150 +msgid "Proceed with updating your library(Error Column, if configured)?" +msgstr "Procéder à la mise à jour de votre bibliothèque (Erreur de Colonne, si configuré) ?" + +#: ffdl_plugin.py:1157 ffdl_plugin.py:1332 +msgid "Bad" +msgstr "Mauvais(e)" + +#: ffdl_plugin.py:1165 +msgid "FFDL download ended" +msgstr "Téléchargement FFDL effectué" + +#: ffdl_plugin.py:1165 ffdl_plugin.py:1375 +msgid "FFDL log" +msgstr "Journal de FFDL" + +#: ffdl_plugin.py:1181 +msgid "Download FanFiction Book" +msgstr "Télécharger des livres FanFiction" + +#: ffdl_plugin.py:1188 +msgid "Starting %d FanFictionDownLoads" +msgstr "Démarrage FanFictionDownloads %d" + +#: ffdl_plugin.py:1218 +msgid "Story Details:" +msgstr "Détails du récit : " + +#: ffdl_plugin.py:1221 +msgid "Error Updating Metadata" +msgstr "Erreur de mise à jour des Métadonnées" + +#: ffdl_plugin.py:1222 +msgid "" +"An error has occurred while FFDL was updating calibre's metadata for <a " +"href='%s'>%s</a>." +msgstr "Une erreur s'est produite pendant que FFDL était occupé à mettre à jour les métadonnes de calibre pour <a href='%s'>%s</a>." + +#: ffdl_plugin.py:1223 +msgid "The ebook has been updated, but the metadata has not." +msgstr "Le livre a été mis à jour, mais les métadonnées ne l'ont pas été." + +#: ffdl_plugin.py:1275 +msgid "Finished Adding/Updating %d books." +msgstr "Ajout/mise à jour des livres %d terminé." + +#: ffdl_plugin.py:1283 +msgid "Starting auto conversion of %d books." +msgstr "Démarre l'auto conversion des livres %d." + +#: ffdl_plugin.py:1304 +msgid "No Good Stories for Anthology" +msgstr "Pas de bons récits pour l'Anthologie" + +#: ffdl_plugin.py:1305 +msgid "" +"No good stories/updates where downloaded, Anthology creation/update aborted." +msgstr "Aucun bon récit/mise à jour n'a été téléchargé, la création/mise à jour de l'Anthologie a été abandonnée." + +#: ffdl_plugin.py:1310 ffdl_plugin.py:1349 +msgid "FFDL found %s good and %s bad updates." +msgstr "FFDL trouve la mise à jour %s bonne et %s mauvaise" + +#: ffdl_plugin.py:1317 +msgid "" +"Are you sure you want to continue with creating/updating this Anthology?" +msgstr "Etes-vous certain(e) de vouloir continuer avec la création/mise à jour de cette Anthologie ?" + +#: ffdl_plugin.py:1318 +msgid "Any updates that failed will not be included in the Anthology." +msgstr "Toute mise à jour qui échoue ne sera pas incluse dans l'Anthologie" + +#: ffdl_plugin.py:1319 +msgid "However, if there's an older version, it will still be included." +msgstr "Cependant, s'il y a une version plus ancienne, celle-ci sera quand même incluse." + +#: ffdl_plugin.py:1322 +msgid "Proceed with updating this anthology and your library?" +msgstr "Procéder à la mise à jour de cette anthologie et de votre librairie ?" + +#: ffdl_plugin.py:1330 +msgid "Good" +msgstr "Bon" + +#: ffdl_plugin.py:1351 +msgid "Proceed with updating your library?" +msgstr "Procéder à la mise à jour de votre bibliothèque ?" + +#: ffdl_plugin.py:1375 +msgid "FFDL download complete" +msgstr "Téléchargement FFDL effectué" + +#: ffdl_plugin.py:1388 +msgid "Merging %s books." +msgstr "Fusionnement des livres %s." + +#: ffdl_plugin.py:1428 +msgid "FFDL Adding/Updating books." +msgstr "FFDL ajoute/met à jour des livres." + +#: ffdl_plugin.py:1435 +msgid "Updating calibre for FanFiction stories..." +msgstr "Met à jour calibre depuis des récits FanFiction..." + +#: ffdl_plugin.py:1436 +msgid "Update calibre for FanFiction stories" +msgstr "Mettre à jour calibre depuis des récits FanFiction" + +#: ffdl_plugin.py:1445 +msgid "Adding/Updating %s BAD books." +msgstr "Ajoute/met à jour %s de MAUVAIS livres" + +#: ffdl_plugin.py:1454 +msgid "Updating calibre for BAD FanFiction stories..." +msgstr "Met à jour calibre depuis de MAUVAIS récits Fanfiction..." + +#: ffdl_plugin.py:1455 +msgid "Update calibre for BAD FanFiction stories" +msgstr "Mettre à jour calibre depuis de MAUVAIS récits FanFiction" + +#: ffdl_plugin.py:1481 +msgid "Adding format to book failed for some reason..." +msgstr "Le format ajouté au livre a échoué pour une raison quelconque..." + +#: ffdl_plugin.py:1484 +msgid "Error" +msgstr "Erreur" + +#: ffdl_plugin.py:1757 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading Lists, " +"but you don't have the %s plugin installed anymore?" +msgstr "Vous avec configuré FanFictionDownloader pour mettre à jour automatiquent les Listes de Lecture, mais vous n'avez pas le greffon %s installé ?" + +#: ffdl_plugin.py:1769 +msgid "" +"You configured FanFictionDownLoader to automatically update \"To Read\" " +"Reading Lists, but you don't have any lists set?" +msgstr "Vous avec configuré FanFictionDownloader pour mettre à jour automatiquent les Listes de Lecture \"A lire\", mais vous n'avez paramétré aucunes listes ?" + +#: ffdl_plugin.py:1779 ffdl_plugin.py:1797 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading List " +"'%s', but you don't have a list of that name?" +msgstr "Vous avec configuré FanFictionDownloader pour mettre à jour automatiquent la Liste de Lecture '%s', mais vous n'avez pas de liste à ce nom ?" + +#: ffdl_plugin.py:1785 +msgid "" +"You configured FanFictionDownLoader to automatically update \"Send to " +"Device\" Reading Lists, but you don't have any lists set?" +msgstr "Vous avec configuré FanFictionDownloader pour mette à jour automatiquement les Listes de Lecture \"Envoyé vers le dispositif\", mais vous n'avez paramétré aucune liste ?" + +#: ffdl_plugin.py:1906 +msgid "No story URL found." +msgstr "Pas d'URL de récit trouvée." + +#: ffdl_plugin.py:1909 +msgid "Not Found" +msgstr "Non trouvé(e)" + +#: ffdl_plugin.py:1915 +msgid "URL is not a valid story URL." +msgstr "L'URL n'est pas une URL de récit valide." + +#: ffdl_plugin.py:1918 +msgid "Bad URL" +msgstr "Mauvaise URL" + +#: ffdl_plugin.py:2054 ffdl_plugin.py:2057 +msgid "Anthology containing:" +msgstr "Anthologie contenant : " + +#: ffdl_plugin.py:2055 +msgid "%s by %s" +msgstr "%s par %s" + +#: ffdl_plugin.py:2077 +msgid " Anthology" +msgstr "Anthologie" + +#: ffdl_plugin.py:2114 +msgid "(was set, removed for security)" +msgstr "(a été paramétré, retiré par sécurité)" + +#: jobs.py:73 +msgid "Downloading FanFiction Stories" +msgstr "Téléchargement de Récits FanFiction" + +#: jobs.py:95 +msgid "Successful:" +msgstr "Réussi :" + +#: jobs.py:97 +msgid "Unsuccessful:" +msgstr "Échoué :" + +#: jobs.py:111 +msgid "Download started..." +msgstr "Téléchargement démarré..." + +#: jobs.py:193 +msgid "Already contains %d chapters. Reuse as is." +msgstr "Contient déjà des chapitres %d. Réutilisez tel quel." + +#: jobs.py:210 +msgid "Update %s completed, added %s chapters for %s total." +msgstr "Mise à jour %s terminée, ajouté %s chapitres pour %s au total." diff --git a/calibre-plugin/translations/messages.pot b/calibre-plugin/translations/messages.pot new file mode 100644 index 00000000..1b1e93fa --- /dev/null +++ b/calibre-plugin/translations/messages.pot @@ -0,0 +1,1519 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2014-09-09 15:54+Central Daylight Time\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: ENCODING\n" +"Generated-By: pygettext.py 1.5\n" + + +#: __init__.py:42 +msgid "UI plugin to download FanFiction stories from various sites." +msgstr "" + +#: __init__.py:109 +msgid "Path to the calibre library. Default is to use the path stored in the settings." +msgstr "" + +#: config.py:176 +msgid "FAQs" +msgstr "" + +#: config.py:176 +msgid "List of Supported Sites" +msgstr "" + +#: config.py:190 +msgid "Basic" +msgstr "" + +#: config.py:211 +msgid "Standard Columns" +msgstr "" + +#: config.py:214 +msgid "Custom Columns" +msgstr "" + +#: config.py:217 +msgid "Other" +msgstr "" + +#: config.py:338 +msgid "These settings control the basic features of the plugin--downloading FanFiction." +msgstr "" + +#: config.py:342 +msgid "Defaults Options on Download" +msgstr "" + +#: config.py:346 +msgid "On each download, FFDL offers an option to select the output format. This sets what that option will default to." +msgstr "" + +#: config.py:348 +msgid "Default Output &Format:" +msgstr "" + +#: config.py:363 +msgid "On each download, FFDL offers an option of what happens if that story already exists. This sets what that option will default to." +msgstr "" + +#: config.py:365 +msgid "Default If Story Already Exists?" +msgstr "" + +#: config.py:379 +msgid "Default Update Calibre &Metadata?" +msgstr "" + +#: config.py:380 +msgid "On each download, FFDL offers an option to update Calibre's metadata (title, author, URL, tags, custom columns, etc) from the web site. This sets whether that will default to on or off. Columns set to 'New Only' in the column tabs will only be set for new books." +msgstr "" + +#: config.py:384 +msgid "Default Update EPUB Cover when Updating EPUB?" +msgstr "" + +#: config.py:385 +msgid "On each download, FFDL offers an option to update the book cover image inside the EPUB from the web site when the EPUB is updated. This sets whether that will default to on or off." +msgstr "" + +#: config.py:389 +msgid "Smarten Punctuation (EPUB only)" +msgstr "" + +#: config.py:390 +msgid "Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB download and update." +msgstr "" + +#: config.py:395 +msgid "Updating Calibre Options" +msgstr "" + +#: config.py:399 +msgid "Delete other existing formats?" +msgstr "" + +#: config.py:400 +msgid "" +"Check this to automatically delete all other ebook formats when updating an existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." +msgstr "" + +#: config.py:404 +msgid "Update Calibre Cover when Updating Metadata?" +msgstr "" + +#: config.py:405 +msgid "" +"Update calibre book cover image from EPUB when metadata is updated. (EPUB only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." +msgstr "" + +#: config.py:409 +msgid "Keep Existing Tags when Updating Metadata?" +msgstr "" + +#: config.py:410 +msgid "" +"Existing tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no effect.)" +msgstr "" + +#: config.py:414 +msgid "Force Author into Author Sort?" +msgstr "" + +#: config.py:415 +msgid "" +"If checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc." +msgstr "" + +#: config.py:419 +msgid "Force Title into Title Sort?" +msgstr "" + +#: config.py:420 +msgid "" +"If checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The Title' sort as 'Title, The', etc." +msgstr "" + +#: config.py:424 +msgid "Check for existing Series Anthology books?" +msgstr "" + +#: config.py:425 +msgid "" +"Check for existings Series Anthology books using each new story's series URL before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." +msgstr "" + +#: config.py:429 +msgid "Check for changed Story URL?" +msgstr "" + +#: config.py:430 +msgid "" +"Warn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." +msgstr "" + +#: config.py:434 +msgid "Search EPUB text for Story URL?" +msgstr "" + +#: config.py:435 +msgid "" +"Look for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." +msgstr "" + +#: config.py:439 +msgid "Mark added/updated books when finished?" +msgstr "" + +#: config.py:440 +msgid "" +"Mark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "" + +#: config.py:444 +msgid "Show Marked books when finished?" +msgstr "" + +#: config.py:445 +msgid "" +"Show Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "" + +#: config.py:449 +msgid "Automatically Convert new/update books?" +msgstr "" + +#: config.py:450 +msgid "" +"Automatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." +msgstr "" + +#: config.py:454 +msgid "GUI Options" +msgstr "" + +#: config.py:458 +msgid "Take URLs from Clipboard?" +msgstr "" + +#: config.py:459 +msgid "Prefill URLs from valid URLs in Clipboard when Adding New." +msgstr "" + +#: config.py:463 +msgid "Default to Update when books selected?" +msgstr "" + +#: config.py:464 +msgid "" +"The top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." +msgstr "" + +#: config.py:468 +msgid "Keep 'Add New from URL(s)' dialog on top?" +msgstr "" + +#: config.py:469 +msgid "" +"Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." +msgstr "" + +#: config.py:473 +msgid "Misc Options" +msgstr "" + +#: config.py:478 +msgid "Include images in EPUBs?" +msgstr "" + +#: config.py:479 +msgid "Download and include images in EPUB stories. This is equivalent to adding:%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will override this." +msgstr "" + +#: config.py:483 +msgid "Inject calibre Series when none found?" +msgstr "" + +#: config.py:484 +msgid "If no series is found, inject the calibre series (if there is one) so it appears on the FFDL title page(not cover)." +msgstr "" + +#: config.py:488 +msgid "Reject List" +msgstr "" + +#: config.py:492 +msgid "Edit Reject URL List" +msgstr "" + +#: config.py:493 +msgid "Edit list of URLs FFDL will automatically Reject." +msgstr "" + +#: config.py:497 config.py:571 +msgid "Add Reject URLs" +msgstr "" + +#: config.py:498 +msgid "Add additional URLs to Reject as text." +msgstr "" + +#: config.py:502 +msgid "Edit Reject Reasons List" +msgstr "" + +#: config.py:503 config.py:562 +msgid "Customize the Reasons presented when Rejecting URLs" +msgstr "" + +#: config.py:507 +msgid "Reject Without Confirmation?" +msgstr "" + +#: config.py:508 +msgid "Always reject URLs on the Reject List without stopping and asking." +msgstr "" + +#: config.py:546 +msgid "Edit Reject URLs List" +msgstr "" + +#: config.py:560 +msgid "Reject Reasons" +msgstr "" + +#: config.py:561 +msgid "Customize Reject List Reasons" +msgstr "" + +#: config.py:569 +msgid "Reason why I rejected it" +msgstr "" + +#: config.py:569 +msgid "Title by Author" +msgstr "" + +#: config.py:572 +msgid "Add Reject URLs. Use: http://...,note or http://...,title by author - note Invalid story URLs will be ignored." +msgstr "" + +#: config.py:573 +msgid "" +"One URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" +msgstr "" + +#: config.py:575 dialogs.py:1012 +msgid "Add this reason to all URLs added:" +msgstr "" + +#: config.py:590 +msgid "These settings provide more detailed control over what metadata will be displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s for different sites." +msgstr "" + +#: config.py:608 +msgid "View Defaults" +msgstr "" + +#: config.py:609 +msgid "" +"View all of the plugin's configurable settings\n" +"and their default settings." +msgstr "" + +#: config.py:627 +msgid "Plugin Defaults (%s) (Read-Only)" +msgstr "" + +#: config.py:628 config.py:634 +msgid "" +"These are all of the plugin's configurable options\n" +"and their default settings." +msgstr "" + +#: config.py:629 +msgid "Plugin Defaults" +msgstr "" + +#: config.py:645 dialogs.py:542 dialogs.py:645 +msgid "OK" +msgstr "" + +#: config.py:665 +msgid "These settings provide integration with the %(rl)s Plugin. %(rl)s can automatically send to devices and change custom columns. You have to create and configure the lists in %(rl)s to be useful." +msgstr "" + +#: config.py:670 +msgid "Add new/updated stories to \"Send to Device\" Reading List(s)." +msgstr "" + +#: config.py:671 +msgid "Automatically add new/updated stories to these lists in the %(rl)s plugin." +msgstr "" + +#: config.py:676 +msgid "\"Send to Device\" Reading Lists" +msgstr "" + +#: config.py:677 config.py:680 config.py:693 config.py:696 +msgid "When enabled, new/updated stories will be automatically added to these lists." +msgstr "" + +#: config.py:686 +msgid "Add new/updated stories to \"To Read\" Reading List(s)." +msgstr "" + +#: config.py:687 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." +msgstr "" + +#: config.py:692 +msgid "\"To Read\" Reading Lists" +msgstr "" + +#: config.py:702 +msgid "Add stories back to \"Send to Device\" Reading List(s) when marked \"Read\"." +msgstr "" + +#: config.py:703 +msgid "Menu option to remove from \"To Read\" lists will also add stories back to \"Send to Device\" Reading List(s)" +msgstr "" + +#: config.py:725 +msgid "The %(gc)s plugin can create cover images for books using various metadata and configurations. If you have GC installed, FFDL can run GC on new downloads and metadata updates. Pick a GC setting by site or Default." +msgstr "" + +#: config.py:743 config.py:747 config.py:760 +msgid "Default" +msgstr "" + +#: config.py:748 +msgid "On Metadata update, run %(gc)s with this setting, if not selected for specific site." +msgstr "" + +#: config.py:751 +msgid "On Metadata update, run %(gc)s with this setting for %(site)s stories." +msgstr "" + +#: config.py:774 +msgid "Run %(gc)s Only on New Books" +msgstr "" + +#: config.py:775 +msgid "Default is to run GC any time the calibre metadata is updated." +msgstr "" + +#: config.py:779 +msgid "Allow %(gcset)s from %(pini)s to override" +msgstr "" + +#: config.py:780 +msgid "The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on metadata rather than site, but it's much more complex. %(gcset)s is ignored when this is off." +msgstr "" + +#: config.py:784 +msgid "Use calibre's Polish feature to inject/update the cover" +msgstr "" + +#: config.py:785 +msgid "Calibre's Polish feature will be used to inject or update the generated cover into the ebook, EPUB only." +msgstr "" + +#: config.py:799 +msgid "These settings provide integration with the %(cp)s Plugin. %(cp)s can automatically update custom columns with page, word and reading level statistics. You have to create and configure the columns in %(cp)s first." +msgstr "" + +#: config.py:804 +msgid "If any of the settings below are checked, when stories are added or updated, the %(cp)s Plugin will be called to update the checked statistics." +msgstr "" + +#: config.py:810 +msgid "Which column and algorithm to use are configured in %(cp)s." +msgstr "" + +#: config.py:818 +msgid "Will overwrite word count from FFDL metadata if set to update the same custom column." +msgstr "" + +#: config.py:849 +msgid "These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFictionDownLoader confirmation dialogs back again." +msgstr "" + +#: config.py:854 +msgid "Keyboard shortcuts..." +msgstr "" + +#: config.py:855 +msgid "Edit the keyboard shortcuts associated with this plugin" +msgstr "" + +#: config.py:859 +msgid "Reset disabled &confirmation dialogs" +msgstr "" + +#: config.py:860 +msgid "Reset all show me again dialogs for the FanFictionDownLoader plugin" +msgstr "" + +#: config.py:864 +msgid "&View library preferences..." +msgstr "" + +#: config.py:865 +msgid "View data stored in the library database for this plugin" +msgstr "" + +#: config.py:876 +msgid "Done" +msgstr "" + +#: config.py:877 +msgid "Confirmation dialogs have all been reset" +msgstr "" + +#: config.py:925 +msgid "Category" +msgstr "" + +#: config.py:926 +msgid "Genre" +msgstr "" + +#: config.py:927 +msgid "Language" +msgstr "" + +#: config.py:928 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Status" +msgstr "" + +#: config.py:929 +msgid "Status:%(cmplt)s" +msgstr "" + +#: config.py:930 +msgid "Status:%(inprog)s" +msgstr "" + +#: config.py:931 config.py:1065 +msgid "Series" +msgstr "" + +#: config.py:932 +msgid "Characters" +msgstr "" + +#: config.py:933 +msgid "Relationships" +msgstr "" + +#: config.py:934 +msgid "Published" +msgstr "" + +#: config.py:935 ffdl_plugin.py:1437 ffdl_plugin.py:1456 +msgid "Updated" +msgstr "" + +#: config.py:936 +msgid "Created" +msgstr "" + +#: config.py:937 +msgid "Rating" +msgstr "" + +#: config.py:938 +msgid "Warnings" +msgstr "" + +#: config.py:939 +msgid "Chapters" +msgstr "" + +#: config.py:940 +msgid "Words" +msgstr "" + +#: config.py:941 +msgid "Site" +msgstr "" + +#: config.py:942 +msgid "Story ID" +msgstr "" + +#: config.py:943 +msgid "Author ID" +msgstr "" + +#: config.py:944 +msgid "Extra Tags" +msgstr "" + +#: config.py:945 config.py:1057 dialogs.py:804 dialogs.py:900 +#: ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Title" +msgstr "" + +#: config.py:946 +msgid "Story URL" +msgstr "" + +#: config.py:947 +msgid "Description" +msgstr "" + +#: config.py:948 dialogs.py:804 dialogs.py:900 ffdl_plugin.py:1152 +#: ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Author" +msgstr "" + +#: config.py:949 +msgid "Author URL" +msgstr "" + +#: config.py:950 +msgid "File Format" +msgstr "" + +#: config.py:951 +msgid "File Extension" +msgstr "" + +#: config.py:952 +msgid "Site Abbrev" +msgstr "" + +#: config.py:953 +msgid "FFDL Version" +msgstr "" + +#: config.py:968 +msgid "If you have custom columns defined, they will be listed below. Choose a metadata value type to fill your columns automatically." +msgstr "" + +#: config.py:993 +msgid "Update this %s column(%s) with..." +msgstr "" + +#: config.py:1003 +msgid "Values that aren't valid for this enumeration column will be ignored." +msgstr "" + +#: config.py:1003 config.py:1005 +msgid "Metadata values valid for this type of column." +msgstr "" + +#: config.py:1008 config.py:1084 +msgid "New Only" +msgstr "" + +#: config.py:1009 +msgid "" +"Write to %s(%s) only for new\n" +"books, not updates to existing books." +msgstr "" + +#: config.py:1020 +msgid "Allow %(ccset)s from %(pini)s to override" +msgstr "" + +#: config.py:1021 +msgid "The %(pini)s parameter %(ccset)s allows you to set custom columns to site specific values that aren't common to all sites. %(ccset)s is ignored when this is off." +msgstr "" + +#: config.py:1026 +msgid "Special column:" +msgstr "" + +#: config.py:1031 +msgid "Update/Overwrite Error Column:" +msgstr "" + +#: config.py:1032 +msgid "" +"When an update or overwrite of an existing story fails, record the reason in this column.\n" +"(Text and Long Text columns only.)" +msgstr "" + +#: config.py:1058 +msgid "Author(s)" +msgstr "" + +#: config.py:1059 +msgid "Publisher" +msgstr "" + +#: config.py:1060 +msgid "Tags" +msgstr "" + +#: config.py:1061 +msgid "Languages" +msgstr "" + +#: config.py:1062 +msgid "Published Date" +msgstr "" + +#: config.py:1063 +msgid "Date" +msgstr "" + +#: config.py:1064 +msgid "Comments" +msgstr "" + +#: config.py:1066 +msgid "Ids(url id only)" +msgstr "" + +#: config.py:1071 +msgid "The standard calibre metadata columns are listed below. You may choose whether FFDL will fill each column automatically on updates or only for new books." +msgstr "" + +#: config.py:1085 +msgid "" +"Write to %s only for new\n" +"books, not updates to existing books." +msgstr "" + +#: dialogs.py:69 +msgid "Skip" +msgstr "" + +#: dialogs.py:70 +msgid "Add New Book" +msgstr "" + +#: dialogs.py:71 +msgid "Update EPUB if New Chapters" +msgstr "" + +#: dialogs.py:72 +msgid "Update EPUB Always" +msgstr "" + +#: dialogs.py:73 +msgid "Overwrite if Newer" +msgstr "" + +#: dialogs.py:74 +msgid "Overwrite Always" +msgstr "" + +#: dialogs.py:75 +msgid "Update Calibre Metadata Only" +msgstr "" + +#: dialogs.py:239 ffdl_plugin.py:89 +msgid "FanFictionDownLoader" +msgstr "" + +#: dialogs.py:256 dialogs.py:703 +msgid "Show Download Options" +msgstr "" + +#: dialogs.py:275 dialogs.py:720 +msgid "Output &Format:" +msgstr "" + +#: dialogs.py:283 dialogs.py:728 +msgid "Choose output format to create. May set default from plugin configuration." +msgstr "" + +#: dialogs.py:311 dialogs.py:745 +msgid "Update Calibre &Metadata?" +msgstr "" + +#: dialogs.py:312 dialogs.py:746 +msgid "" +"Update metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new books.)" +msgstr "" + +#: dialogs.py:318 dialogs.py:750 +msgid "Update EPUB Cover?" +msgstr "" + +#: dialogs.py:319 dialogs.py:751 +msgid "Update book cover image from site or defaults (if found) inside the EPUB when EPUB is updated." +msgstr "" + +#: dialogs.py:366 +msgid "Story URL(s) for anthology, one per line:" +msgstr "" + +#: dialogs.py:367 +msgid "" +"URLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." +msgstr "" + +#: dialogs.py:368 +msgid "If Story Already Exists in Anthology?" +msgstr "" + +#: dialogs.py:369 +msgid "What to do if there's already an existing story with the same URL in the anthology." +msgstr "" + +#: dialogs.py:378 +msgid "Story URL(s), one per line:" +msgstr "" + +#: dialogs.py:379 +msgid "" +"URLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." +msgstr "" + +#: dialogs.py:380 +msgid "If Story Already Exists?" +msgstr "" + +#: dialogs.py:381 +msgid "What to do if there's already an existing story with the same URL or title and author." +msgstr "" + +#: dialogs.py:481 +msgid "For Individual Books" +msgstr "" + +#: dialogs.py:482 +msgid "Get URLs and go to dialog for individual story downloads." +msgstr "" + +#: dialogs.py:486 +msgid "For Anthology Epub" +msgstr "" + +#: dialogs.py:487 +msgid "" +"Get URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." +msgstr "" + +#: dialogs.py:492 dialogs.py:546 dialogs.py:573 +msgid "Cancel" +msgstr "" + +#: dialogs.py:524 +msgid "Password" +msgstr "" + +#: dialogs.py:525 +msgid "Author requires a password for this story(%s)." +msgstr "" + +#: dialogs.py:530 +msgid "User/Password" +msgstr "" + +#: dialogs.py:531 +msgid "%s requires you to login to download this story." +msgstr "" + +#: dialogs.py:533 +msgid "User:" +msgstr "" + +#: dialogs.py:537 +msgid "Password:" +msgstr "" + +#: dialogs.py:568 +msgid "Fetching metadata for stories..." +msgstr "" + +#: dialogs.py:569 +msgid "Downloading metadata for stories" +msgstr "" + +#: dialogs.py:570 +msgid "Fetched metadata for" +msgstr "" + +#: dialogs.py:640 ffdl_plugin.py:325 +msgid "About FanFictionDownLoader" +msgstr "" + +#: dialogs.py:694 +msgid "Remove selected books from the list" +msgstr "" + +#: dialogs.py:733 +msgid "Update Mode:" +msgstr "" + +#: dialogs.py:736 +msgid "What sort of update to perform. May set default from plugin configuration." +msgstr "" + +#: dialogs.py:804 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Comment" +msgstr "" + +#: dialogs.py:872 +msgid "Are you sure you want to remove this book from the list?" +msgstr "" + +#: dialogs.py:874 +msgid "Are you sure you want to remove the selected %d books from the list?" +msgstr "" + +#: dialogs.py:900 +msgid "Note" +msgstr "" + +#: dialogs.py:939 +msgid "Select or Edit Reject Note." +msgstr "" + +#: dialogs.py:947 +msgid "Are you sure you want to remove this URL from the list?" +msgstr "" + +#: dialogs.py:949 +msgid "Are you sure you want to remove the %d selected URLs from the list?" +msgstr "" + +#: dialogs.py:967 +msgid "List of Books to Reject" +msgstr "" + +#: dialogs.py:980 +msgid "FFDL will remember these URLs and display the note and offer to reject them if you try to download them again later." +msgstr "" + +#: dialogs.py:994 +msgid "Remove selected URL(s) from the list" +msgstr "" + +#: dialogs.py:1009 dialogs.py:1013 +msgid "This will be added to whatever note you've set for each URL above." +msgstr "" + +#: dialogs.py:1022 +msgid "Delete Books (including books without FanFiction URLs)?" +msgstr "" + +#: dialogs.py:1023 +msgid "Delete the selected books after adding them to the Rejected URLs list." +msgstr "" + +#: ffdl_plugin.py:90 +msgid "Download FanFiction stories from various web sites" +msgstr "" + +#: ffdl_plugin.py:120 +msgid "FanFictionDL" +msgstr "" + +#: ffdl_plugin.py:243 +msgid "&Add New from URL(s)" +msgstr "" + +#: ffdl_plugin.py:245 +msgid "Add New FanFiction Book(s) from URL(s)" +msgstr "" + +#: ffdl_plugin.py:248 +msgid "&Update Existing FanFiction Book(s)" +msgstr "" + +#: ffdl_plugin.py:254 +msgid "Get Story URLs to Download from Web Page" +msgstr "" + +#: ffdl_plugin.py:258 +msgid "&Make Anthology Epub Manually from URL(s)" +msgstr "" + +#: ffdl_plugin.py:260 +msgid "Make FanFiction Anthology Epub Manually from URL(s)" +msgstr "" + +#: ffdl_plugin.py:263 +msgid "&Update Anthology Epub" +msgstr "" + +#: ffdl_plugin.py:265 +msgid "Update FanFiction Anthology Epub" +msgstr "" + +#: ffdl_plugin.py:273 +msgid "Add to \"To Read\" and \"Send to Device\" Lists" +msgstr "" + +#: ffdl_plugin.py:275 +msgid "Remove from \"To Read\" and add to \"Send to Device\" Lists" +msgstr "" + +#: ffdl_plugin.py:277 ffdl_plugin.py:282 +msgid "Remove from \"To Read\" Lists" +msgstr "" + +#: ffdl_plugin.py:279 +msgid "Add Selected to \"Send to Device\" Lists" +msgstr "" + +#: ffdl_plugin.py:281 +msgid "Add to \"To Read\" Lists" +msgstr "" + +#: ffdl_plugin.py:297 +msgid "Get URLs from Selected Books" +msgstr "" + +#: ffdl_plugin.py:303 ffdl_plugin.py:397 +msgid "Get Story URLs from Web Page" +msgstr "" + +#: ffdl_plugin.py:308 +msgid "Reject Selected Books" +msgstr "" + +#: ffdl_plugin.py:316 +msgid "&Configure Plugin" +msgstr "" + +#: ffdl_plugin.py:319 +msgid "Configure FanFictionDownLoader" +msgstr "" + +#: ffdl_plugin.py:322 +msgid "About Plugin" +msgstr "" + +#: ffdl_plugin.py:379 +msgid "Cannot Update Reading Lists from Device View" +msgstr "" + +#: ffdl_plugin.py:383 +msgid "No Selected Books to Update Reading Lists" +msgstr "" + +#: ffdl_plugin.py:408 ffdl_plugin.py:460 +msgid "List of Story URLs" +msgstr "" + +#: ffdl_plugin.py:409 +msgid "No Valid Story URLs found on given page." +msgstr "" + +#: ffdl_plugin.py:424 +msgid "No Selected Books to Get URLs From" +msgstr "" + +#: ffdl_plugin.py:442 +msgid "Collecting URLs for stories..." +msgstr "" + +#: ffdl_plugin.py:443 +msgid "Get URLs for stories" +msgstr "" + +#: ffdl_plugin.py:444 ffdl_plugin.py:491 ffdl_plugin.py:678 +msgid "URL retrieved" +msgstr "" + +#: ffdl_plugin.py:464 +msgid "List of URLs" +msgstr "" + +#: ffdl_plugin.py:465 +msgid "No Story URLs found in selected books." +msgstr "" + +#: ffdl_plugin.py:481 +msgid "No Selected Books have URLs to Reject" +msgstr "" + +#: ffdl_plugin.py:489 +msgid "Collecting URLs for Reject List..." +msgstr "" + +#: ffdl_plugin.py:490 +msgid "Get URLs for Reject List" +msgstr "" + +#: ffdl_plugin.py:525 +msgid "Proceed to Remove?" +msgstr "" + +#: ffdl_plugin.py:525 +msgid "Rejecting FFDL URLs: None of the books selected have FanFiction URLs." +msgstr "" + +#: ffdl_plugin.py:547 +msgid "Cannot Make Anthologys without %s" +msgstr "" + +#: ffdl_plugin.py:551 ffdl_plugin.py:655 +msgid "Cannot Update Books from Device View" +msgstr "" + +#: ffdl_plugin.py:555 +msgid "Can only update 1 anthology at a time" +msgstr "" + +#: ffdl_plugin.py:564 +msgid "Can only Update Epub Anthologies" +msgstr "" + +#: ffdl_plugin.py:582 ffdl_plugin.py:583 +msgid "Cannot Update Anthology" +msgstr "" + +#: ffdl_plugin.py:583 +msgid "Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." +msgstr "" + +#: ffdl_plugin.py:641 +msgid "There are %d stories in the current anthology that are not going to be kept if you go ahead." +msgstr "" + +#: ffdl_plugin.py:642 +msgid "Story URLs that will be removed:" +msgstr "" + +#: ffdl_plugin.py:644 +msgid "Update anyway?" +msgstr "" + +#: ffdl_plugin.py:645 +msgid "Stories Removed" +msgstr "" + +#: ffdl_plugin.py:662 +msgid "No Selected Books to Update" +msgstr "" + +#: ffdl_plugin.py:676 +msgid "Collecting stories for update..." +msgstr "" + +#: ffdl_plugin.py:677 +msgid "Get stories for updates" +msgstr "" + +#: ffdl_plugin.py:687 +msgid "Update Existing List" +msgstr "" + +#: ffdl_plugin.py:745 +msgid "Started fetching metadata for %s stories." +msgstr "" + +#: ffdl_plugin.py:751 +msgid "No valid story URLs entered." +msgstr "" + +#: ffdl_plugin.py:776 ffdl_plugin.py:782 +msgid "Reject URL?" +msgstr "" + +#: ffdl_plugin.py:783 ffdl_plugin.py:801 +msgid "%s is on your Reject URL list:" +msgstr "" + +#: ffdl_plugin.py:785 +msgid "Click 'Yes' to Reject." +msgstr "" + +#: ffdl_plugin.py:786 ffdl_plugin.py:890 +msgid "Click 'No' to download anyway." +msgstr "" + +#: ffdl_plugin.py:788 +msgid "Story on Reject URLs list (%s)." +msgstr "" + +#: ffdl_plugin.py:791 +msgid "Rejected" +msgstr "" + +#: ffdl_plugin.py:794 +msgid "Remove Reject URL?" +msgstr "" + +#: ffdl_plugin.py:800 +msgid "Remove URL from Reject List?" +msgstr "" + +#: ffdl_plugin.py:803 +msgid "Click 'Yes' to remove it from the list," +msgstr "" + +#: ffdl_plugin.py:804 +msgid "Click 'No' to leave it on the list." +msgstr "" + +#: ffdl_plugin.py:821 +msgid "Cannot update non-epub format." +msgstr "" + +#: ffdl_plugin.py:866 +msgid "Are You an Adult?" +msgstr "" + +#: ffdl_plugin.py:867 +msgid "%s requires that you be an adult. Please confirm you are an adult in your locale:" +msgstr "" + +#: ffdl_plugin.py:881 +msgid "Skip Story?" +msgstr "" + +#: ffdl_plugin.py:887 +msgid "Skip Anthology Story?" +msgstr "" + +#: ffdl_plugin.py:888 +msgid "\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have an anthology book for." +msgstr "" + +#: ffdl_plugin.py:889 +msgid "Click 'Yes' to Skip." +msgstr "" + +#: ffdl_plugin.py:892 +msgid "Story in Series Anthology(%s)." +msgstr "" + +#: ffdl_plugin.py:897 +msgid "Skipped" +msgstr "" + +#: ffdl_plugin.py:925 +msgid "Add" +msgstr "" + +#: ffdl_plugin.py:938 +msgid "Meta" +msgstr "" + +#: ffdl_plugin.py:971 +msgid "Skipping duplicate story." +msgstr "" + +#: ffdl_plugin.py:974 +msgid "More than one identical book by Identifer URL or title/author(s)--can't tell which book to update/overwrite." +msgstr "" + +#: ffdl_plugin.py:985 +msgid "Update" +msgstr "" + +#: ffdl_plugin.py:993 ffdl_plugin.py:1000 +msgid "Change Story URL?" +msgstr "" + +#: ffdl_plugin.py:1001 +msgid "%s by %s is already in your library with a different source URL:" +msgstr "" + +#: ffdl_plugin.py:1002 +msgid "In library: <a href=\"%(liburl)s\">%(liburl)s</a>" +msgstr "" + +#: ffdl_plugin.py:1003 ffdl_plugin.py:1017 +msgid "New URL: <a href=\"%(newurl)s\">%(newurl)s</a>" +msgstr "" + +#: ffdl_plugin.py:1004 +msgid "Click 'Yes' to update/overwrite book with new URL." +msgstr "" + +#: ffdl_plugin.py:1005 +msgid "Click 'No' to skip updating/overwriting this book." +msgstr "" + +#: ffdl_plugin.py:1007 ffdl_plugin.py:1014 +msgid "Download as New Book?" +msgstr "" + +#: ffdl_plugin.py:1015 +msgid "%s by %s is already in your library with a different source URL." +msgstr "" + +#: ffdl_plugin.py:1016 +msgid "You chose not to update the existing book. Do you want to add a new book for this URL?" +msgstr "" + +#: ffdl_plugin.py:1018 +msgid "Click 'Yes' to a new book with new URL." +msgstr "" + +#: ffdl_plugin.py:1019 +msgid "Click 'No' to skip URL." +msgstr "" + +#: ffdl_plugin.py:1025 +msgid "Update declined by user due to differing story URL(%s)" +msgstr "" + +#: ffdl_plugin.py:1028 +msgid "Different URL" +msgstr "" + +#: ffdl_plugin.py:1033 +msgid "Metadata collected." +msgstr "" + +#: ffdl_plugin.py:1049 +msgid "Already contains %d chapters." +msgstr "" + +#: ffdl_plugin.py:1054 jobs.py:199 +msgid "Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." +msgstr "" + +#: ffdl_plugin.py:1056 +msgid "FFDL doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update." +msgstr "" + +#: ffdl_plugin.py:1068 +msgid "Not Overwriting, web site is not newer." +msgstr "" + +#: ffdl_plugin.py:1148 +msgid "None of the %d URLs/stories given can be/need to be downloaded." +msgstr "" + +#: ffdl_plugin.py:1149 ffdl_plugin.py:1320 ffdl_plugin.py:1350 +msgid "See log for details." +msgstr "" + +#: ffdl_plugin.py:1150 +msgid "Proceed with updating your library(Error Column, if configured)?" +msgstr "" + +#: ffdl_plugin.py:1157 ffdl_plugin.py:1332 +msgid "Bad" +msgstr "" + +#: ffdl_plugin.py:1165 +msgid "FFDL download ended" +msgstr "" + +#: ffdl_plugin.py:1165 ffdl_plugin.py:1375 +msgid "FFDL log" +msgstr "" + +#: ffdl_plugin.py:1181 +msgid "Download FanFiction Book" +msgstr "" + +#: ffdl_plugin.py:1188 +msgid "Starting %d FanFictionDownLoads" +msgstr "" + +#: ffdl_plugin.py:1218 +msgid "Story Details:" +msgstr "" + +#: ffdl_plugin.py:1221 +msgid "Error Updating Metadata" +msgstr "" + +#: ffdl_plugin.py:1222 +msgid "An error has occurred while FFDL was updating calibre's metadata for <a href='%s'>%s</a>." +msgstr "" + +#: ffdl_plugin.py:1223 +msgid "The ebook has been updated, but the metadata has not." +msgstr "" + +#: ffdl_plugin.py:1275 +msgid "Finished Adding/Updating %d books." +msgstr "" + +#: ffdl_plugin.py:1283 +msgid "Starting auto conversion of %d books." +msgstr "" + +#: ffdl_plugin.py:1304 +msgid "No Good Stories for Anthology" +msgstr "" + +#: ffdl_plugin.py:1305 +msgid "No good stories/updates where downloaded, Anthology creation/update aborted." +msgstr "" + +#: ffdl_plugin.py:1310 ffdl_plugin.py:1349 +msgid "FFDL found %s good and %s bad updates." +msgstr "" + +#: ffdl_plugin.py:1317 +msgid "Are you sure you want to continue with creating/updating this Anthology?" +msgstr "" + +#: ffdl_plugin.py:1318 +msgid "Any updates that failed will not be included in the Anthology." +msgstr "" + +#: ffdl_plugin.py:1319 +msgid "However, if there's an older version, it will still be included." +msgstr "" + +#: ffdl_plugin.py:1322 +msgid "Proceed with updating this anthology and your library?" +msgstr "" + +#: ffdl_plugin.py:1330 +msgid "Good" +msgstr "" + +#: ffdl_plugin.py:1351 +msgid "Proceed with updating your library?" +msgstr "" + +#: ffdl_plugin.py:1375 +msgid "FFDL download complete" +msgstr "" + +#: ffdl_plugin.py:1388 +msgid "Merging %s books." +msgstr "" + +#: ffdl_plugin.py:1428 +msgid "FFDL Adding/Updating books." +msgstr "" + +#: ffdl_plugin.py:1435 +msgid "Updating calibre for FanFiction stories..." +msgstr "" + +#: ffdl_plugin.py:1436 +msgid "Update calibre for FanFiction stories" +msgstr "" + +#: ffdl_plugin.py:1445 +msgid "Adding/Updating %s BAD books." +msgstr "" + +#: ffdl_plugin.py:1454 +msgid "Updating calibre for BAD FanFiction stories..." +msgstr "" + +#: ffdl_plugin.py:1455 +msgid "Update calibre for BAD FanFiction stories" +msgstr "" + +#: ffdl_plugin.py:1481 +msgid "Adding format to book failed for some reason..." +msgstr "" + +#: ffdl_plugin.py:1484 +msgid "Error" +msgstr "" + +#: ffdl_plugin.py:1757 +msgid "You configured FanFictionDownLoader to automatically update Reading Lists, but you don't have the %s plugin installed anymore?" +msgstr "" + +#: ffdl_plugin.py:1769 +msgid "You configured FanFictionDownLoader to automatically update \"To Read\" Reading Lists, but you don't have any lists set?" +msgstr "" + +#: ffdl_plugin.py:1779 ffdl_plugin.py:1797 +msgid "You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?" +msgstr "" + +#: ffdl_plugin.py:1785 +msgid "You configured FanFictionDownLoader to automatically update \"Send to Device\" Reading Lists, but you don't have any lists set?" +msgstr "" + +#: ffdl_plugin.py:1906 +msgid "No story URL found." +msgstr "" + +#: ffdl_plugin.py:1909 +msgid "Not Found" +msgstr "" + +#: ffdl_plugin.py:1915 +msgid "URL is not a valid story URL." +msgstr "" + +#: ffdl_plugin.py:1918 +msgid "Bad URL" +msgstr "" + +#: ffdl_plugin.py:2054 ffdl_plugin.py:2057 +msgid "Anthology containing:" +msgstr "" + +#: ffdl_plugin.py:2055 +msgid "%s by %s" +msgstr "" + +#: ffdl_plugin.py:2077 +msgid " Anthology" +msgstr "" + +#: ffdl_plugin.py:2114 +msgid "(was set, removed for security)" +msgstr "" + +#: jobs.py:73 +msgid "Downloading FanFiction Stories" +msgstr "" + +#: jobs.py:95 +msgid "Successful:" +msgstr "" + +#: jobs.py:97 +msgid "Unsuccessful:" +msgstr "" + +#: jobs.py:111 +msgid "Download started..." +msgstr "" + +#: jobs.py:193 +msgid "Already contains %d chapters. Reuse as is." +msgstr "" + +#: jobs.py:210 +msgid "Update %s completed, added %s chapters for %s total." +msgstr "" + diff --git a/calibre-plugin/translations/pt_BR.po b/calibre-plugin/translations/pt_BR.po new file mode 100644 index 00000000..788d0ec9 --- /dev/null +++ b/calibre-plugin/translations/pt_BR.po @@ -0,0 +1,1624 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# +# Translators: +# Paulo_Neto <layoutbr@lexxa.com.br>, 2014 +msgid "" +msgstr "" +"Project-Id-Version: calibre-plugins\n" +"POT-Creation-Date: 2014-09-09 15:54+Central Daylight Time\n" +"PO-Revision-Date: 2014-09-03 19:20+0000\n" +"Last-Translator: Paulo_Neto <layoutbr@lexxa.com.br>\n" +"Language-Team: Portuguese (Brazil) (http://www.transifex.com/projects/p/calibre-plugins/language/pt_BR/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: ENCODING\n" +"Generated-By: pygettext.py 1.5\n" +"Language: pt_BR\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +#: __init__.py:42 +msgid "UI plugin to download FanFiction stories from various sites." +msgstr "Plugin para transferência de histórias de ficção de vários sites." + +#: __init__.py:109 +msgid "" +"Path to the calibre library. Default is to use the path stored in the " +"settings." +msgstr "Caminho para a biblioteca do calibre. O padrão é usar o caminho armazenado nos ajustes." + +#: config.py:176 +msgid "FAQs" +msgstr "Perguntas e Repostas" + +#: config.py:176 +msgid "List of Supported Sites" +msgstr "Lista de Sites Suportados" + +#: config.py:190 +msgid "Basic" +msgstr "Básico" + +#: config.py:211 +msgid "Standard Columns" +msgstr "Colunas Padrão" + +#: config.py:214 +msgid "Custom Columns" +msgstr "Personalizar Colunas" + +#: config.py:217 +msgid "Other" +msgstr "Outros" + +#: config.py:338 +msgid "" +"These settings control the basic features of the plugin--downloading " +"FanFiction." +msgstr "Estes ajustes controlam os recursos básicos do plugin--transferências de ficção." + +#: config.py:342 +msgid "Defaults Options on Download" +msgstr "Opções Padrão na Transferência" + +#: config.py:346 +msgid "" +"On each download, FFDL offers an option to select the output format. This sets what that option will default to." +msgstr "Em cada transferência, o FFDL oferece uma opção para selecionar o formato de saída. Isso define que opção será o padrão." + +#: config.py:348 +msgid "Default Output &Format:" +msgstr "&Formato do Padrão de Saída:" + +#: config.py:363 +msgid "" +"On each download, FFDL offers an option of what happens if that story " +"already exists. This sets what that option will default to." +msgstr "Em cada transferência, o FFDL oferece uma opção do que acontece se essa história já existe. Isso define que opção será o padrão." + +#: config.py:365 +msgid "Default If Story Already Exists?" +msgstr "Padrão se a história já existe?" + +#: config.py:379 +msgid "Default Update Calibre &Metadata?" +msgstr "Padrão de Atualização de &Metadados do Calibre?" + +#: config.py:380 +msgid "" +"On each download, FFDL offers an option to update Calibre's metadata (title," +" author, URL, tags, custom columns, etc) from the web site. This sets " +"whether that will default to on or off. Columns set to 'New Only' in " +"the column tabs will only be set for new books." +msgstr "Em cada transferência, o FFDL oferece uma opção para atualizar os metadados do Calibre (título, autor, URL, etiquetas, colunas personalizadas, etc) do site. Isso define qual será o padrão para ativar ou desativar. Colunas definidas como 'Apenas Novo' nas abas de coluna apenas serão definidas para novos livros." + +#: config.py:384 +msgid "Default Update EPUB Cover when Updating EPUB?" +msgstr "Padrão de Atualização da Capa do EPUB quando Atualizar o EPUB?" + +#: config.py:385 +msgid "" +"On each download, FFDL offers an option to update the book cover image " +"inside the EPUB from the web site when the EPUB is updated. This" +" sets whether that will default to on or off." +msgstr "Em cada transferência, o FFDL oferece uma opção para atualizar a imagem da capa do livro dentro do EPUB do site quando o EPUB é atualizado. Isso define qual será o padrão para ativar ou desativar." + +#: config.py:389 +msgid "Smarten Punctuation (EPUB only)" +msgstr "Pontuação Inteligente (apenas EPUB)" + +#: config.py:390 +msgid "" +"Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB " +"download and update." +msgstr "Executar a pontuação inteligente do recurso de Polir Livro do calibre em cada transferência e atualização de EPUB." + +#: config.py:395 +msgid "Updating Calibre Options" +msgstr "Atualizando Opções do Calibre" + +#: config.py:399 +msgid "Delete other existing formats?" +msgstr "Apagar outros formatos existentes?" + +#: config.py:400 +msgid "" +"Check this to automatically delete all other ebook formats when updating an existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." +msgstr "Marque esta opção para apagar automaticamente todos os outros formatos de e-books quando atualizar um livro existente.\nÚtil, por exemplo, se você tem tanto um Nook (epub) como um Kindle (mobi)." + +#: config.py:404 +msgid "Update Calibre Cover when Updating Metadata?" +msgstr "Atualizar a capa do calibre ao atualizar os metadados?" + +#: config.py:405 +msgid "" +"Update calibre book cover image from EPUB when metadata is updated. (EPUB only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." +msgstr "Atualizar a imagem de capa do livro do calibre do EPUB ao atualizar os metadados. (EPUB apenas.)\nNão ir à procura de novas imagens ao 'Atualizar Apenas Metadados do Calibre'." + +#: config.py:409 +msgid "Keep Existing Tags when Updating Metadata?" +msgstr "Manter etiquetas existentes ao atualizar os metadados?" + +#: config.py:410 +msgid "" +"Existing tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no effect.)" +msgstr "Etiquetas existentes serão conservadas e quaisquer novas etiquetas adicionadas.\nEtiquetas %(cmplt)s e %(inprog)s ainda serão atualizadas, se conhecidas.\nEtiquetas %(lul)s serão atualizadas de %(lus)s em %(is)s.\n(Se Etiquetas estiver definida para 'Apenas Novo' na aba de Colunas Padrão, esta não será afetada.)" + +#: config.py:414 +msgid "Force Author into Author Sort?" +msgstr "Forçar autor em ordenar autor?" + +#: config.py:415 +msgid "" +"If checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc." +msgstr "Se marcado, o(s) autor(es) como determinado será usado também para a opção Ordenar Autor.\nSe não marcado, o calibre irá aplicar a construção em algoritmo que torna 'Paulo Silva' ordenado como 'Silva, Paulo', etc." + +#: config.py:419 +msgid "Force Title into Title Sort?" +msgstr "Forçar título em ordenar título?" + +#: config.py:420 +msgid "" +"If checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The Title' sort as 'Title, The', etc." +msgstr "Se marcado, o título como determinado será usado também para a opção Ordenar Título.\nSe não marcado, o calibre irá aplicar a construção em algoritmo que torna 'O Título' ordenado como 'Título, O', etc." + +#: config.py:424 +msgid "Check for existing Series Anthology books?" +msgstr "Verificar por livros existentes de série de antologia?" + +#: config.py:425 +msgid "" +"Check for existings Series Anthology books using each new story's series URL before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." +msgstr "Verifica por livros existentes de série de antologia usando cada nova URL de séries históricas antes de transferir.\nOferece ignorar a transferência se uma série de antologia for encontrada." + +#: config.py:429 +msgid "Check for changed Story URL?" +msgstr "Verificar URL de história alterada?" + +#: config.py:430 +msgid "" +"Warn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." +msgstr "Avisá-lo se uma atualização vai alterar a URL de um livro existente.\nURLs fanfiction.net vão mudar de http para https automaticamente." + +#: config.py:434 +msgid "Search EPUB text for Story URL?" +msgstr "Texto de busca EPUB para URL de história?" + +#: config.py:435 +msgid "" +"Look for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." +msgstr "Olhar para a primeira URL válida de história dentro do texto EPUB se não for encontrado nos metadados.\nArriscar um pouco, poderia encontrar URL errada dependendo do conteúdo EPUB.\nTambém encontra e corrige URLs ffnet inválidas de arquivos ficsaver.com." + +#: config.py:439 +msgid "Mark added/updated books when finished?" +msgstr "Marcar livros adicionados/atualizados ao concluir?" + +#: config.py:440 +msgid "" +"Mark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Marca livros adicionados/atualizados ao concluir. Use com a opção abaixo.\nVocê pode também buscar manualmente por 'marked:ffdl_success'.\n'marked:ffdl_failed' também está disponível, ou buscar 'marked:ffdl' para ambos." + +#: config.py:444 +msgid "Show Marked books when finished?" +msgstr "Mostrar livros marcados ao concluir?" + +#: config.py:445 +msgid "" +"Show Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "Mostrar livros marcados como adicionar/atualizar apenas ao concluir.\nVocê pode também buscar manualmente por 'marked:ffdl_success'.\n'marked:ffdl_failed' também está disponível, ou buscar 'marked:ffdl' para ambos." + +#: config.py:449 +msgid "Automatically Convert new/update books?" +msgstr "Converter automaticamente livros novos/atualizados?" + +#: config.py:450 +msgid "" +"Automatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." +msgstr "Chamar automaticamente a conversão do calibre para livros novos/atualizados.\nConverte para o formato de saída atual, escolhido no calibre nas\nPreferências->Ajustes de comportamento." + +#: config.py:454 +msgid "GUI Options" +msgstr "Opções de Interface" + +#: config.py:458 +msgid "Take URLs from Clipboard?" +msgstr "Obter URLs da área de transferência?" + +#: config.py:459 +msgid "Prefill URLs from valid URLs in Clipboard when Adding New." +msgstr "Preencher URLs de URLs válidas na área de transferência quando Adicionar Novo." + +#: config.py:463 +msgid "Default to Update when books selected?" +msgstr "Padrão de atualização ao selecionar livros?" + +#: config.py:464 +msgid "" +"The top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." +msgstr "O botão superior do plugin FanFictionDownLoader vai começar a atualização se\nlivros forem selecionados. Se desmarcado, ele vai sempre trazer 'Adicionar Novo'." + +#: config.py:468 +msgid "Keep 'Add New from URL(s)' dialog on top?" +msgstr "Manter o diálogo 'Adicionar Novo de URL(s)' em cima?" + +#: config.py:469 +msgid "" +"Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." +msgstr "Instrui o sistema operacional e Gerenciador de Janelas manter o diálogo 'Adicionar Novo de URL(s)'\nem cima de todas as outras janelas. Útil para arrastar URLs para ele." + +#: config.py:473 +msgid "Misc Options" +msgstr "Opções Diversas" + +#: config.py:478 +msgid "Include images in EPUBs?" +msgstr "Incluir imagens em EPUBs?" + +#: config.py:479 +msgid "" +"Download and include images in EPUB stories. This is equivalent to " +"adding:%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will" +" override this." +msgstr "Transferir e incluir imagens em EPUB de história. Isso é equivalent ao adicionar:%(imgset)s ...para a parte superior de %(pini)s. Seus ajustes em %(pini)s vão substituir esse." + +#: config.py:483 +msgid "Inject calibre Series when none found?" +msgstr "Injetar série do calibre quando nenhuma for encontrada?" + +#: config.py:484 +msgid "" +"If no series is found, inject the calibre series (if there is one) so it " +"appears on the FFDL title page(not cover)." +msgstr "Se nenhuma série for encontrada, injetar a série do calibre (se houver) para que ele apareça na página do título FFDL (não capa)." + +#: config.py:488 +msgid "Reject List" +msgstr "Lista de Rejeição" + +#: config.py:492 +msgid "Edit Reject URL List" +msgstr "Editar lista de URLs rejeitadas" + +#: config.py:493 +msgid "Edit list of URLs FFDL will automatically Reject." +msgstr "Editar lista de URLs FFDL rejeitará automaticamente." + +#: config.py:497 config.py:571 +msgid "Add Reject URLs" +msgstr "Adicionar URLs Rejeitadas" + +#: config.py:498 +msgid "Add additional URLs to Reject as text." +msgstr "Adicionar URLs adicionais para rejeitar como texto." + +#: config.py:502 +msgid "Edit Reject Reasons List" +msgstr "Editar lista de razões de rejeição" + +#: config.py:503 config.py:562 +msgid "Customize the Reasons presented when Rejecting URLs" +msgstr "Personalizar as razões apresentadas ao rejeitar URLs" + +#: config.py:507 +msgid "Reject Without Confirmation?" +msgstr "Rejeitar sem confirmação?" + +#: config.py:508 +msgid "Always reject URLs on the Reject List without stopping and asking." +msgstr "Sempre rejeitar URLs na lista de rejeição sem parar ou perguntar." + +#: config.py:546 +msgid "Edit Reject URLs List" +msgstr "Editar lista de URLs rejeitadas" + +#: config.py:560 +msgid "Reject Reasons" +msgstr "Razões da Rejeição" + +#: config.py:561 +msgid "Customize Reject List Reasons" +msgstr "Personalizar lista de razões de rejeição" + +#: config.py:569 +msgid "Reason why I rejected it" +msgstr "Razão por que o rejeitou" + +#: config.py:569 +msgid "Title by Author" +msgstr "Título por Autor" + +#: config.py:572 +msgid "" +"Add Reject URLs. Use: http://...,note or http://...,title by " +"author - note Invalid story URLs will be ignored." +msgstr "Adicionar URLs Rejeitadas. Use: http://...,nota ou http://...,título por autor - nota URLs inválidas de história serão ignoradas." + +#: config.py:573 +msgid "" +"One URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" +msgstr "Uma URL por linha:\nhttp://...,nota\nhttp://...,título por autor - nota" + +#: config.py:575 dialogs.py:1012 +msgid "Add this reason to all URLs added:" +msgstr "Adicionar essa razão para todas as URLs adicionadas:" + +#: config.py:590 +msgid "" +"These settings provide more detailed control over what metadata will be " +"displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s " +"for different sites." +msgstr "Estes ajustes fornecem um controle mais detalhado sobre quais metadados serão exibidos dentro do ebook, bem como permitem que você ajuste %(isa)s e %(u)s/%(p)s para diferentes sites." + +#: config.py:608 +msgid "View Defaults" +msgstr "Padrões de Visualização" + +#: config.py:609 +msgid "" +"View all of the plugin's configurable settings\n" +"and their default settings." +msgstr "Visualiza todos os ajustes configuráveis do plugin\ne seus ajustes padrão." + +#: config.py:627 +msgid "Plugin Defaults (%s) (Read-Only)" +msgstr "Padrões do Plugin (%s) (Somente Leitura)" + +#: config.py:628 config.py:634 +msgid "" +"These are all of the plugin's configurable options\n" +"and their default settings." +msgstr "Estas são todas as opções configuráveis do plugin\ne seus ajustes padrão." + +#: config.py:629 +msgid "Plugin Defaults" +msgstr "Padrões do Plugin" + +#: config.py:645 dialogs.py:542 dialogs.py:645 +msgid "OK" +msgstr "OK" + +#: config.py:665 +msgid "" +"These settings provide integration with the %(rl)s Plugin. %(rl)s can " +"automatically send to devices and change custom columns. You have to create" +" and configure the lists in %(rl)s to be useful." +msgstr "Estes ajustes permitem a integração com o Plugin %(rl)s. %(rl)s pode enviar automaticamente para os dispositivos e alterar colunas personalizadas. Você tem que criar e configurar as listas de %(rl)s para ser útil." + +#: config.py:670 +msgid "Add new/updated stories to \"Send to Device\" Reading List(s)." +msgstr "Adicionar histórias novas/atualizadas para Lista(s) de Leitura \"Enviar para o Dispositivo\"." + +#: config.py:671 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin." +msgstr "Adicionar automaticamente histórias novas/atualizadas para essas lista no plugin %(rl)s." + +#: config.py:676 +msgid "\"Send to Device\" Reading Lists" +msgstr "Lista(s) de Leitura \"Enviar para o Dispositivo\"" + +#: config.py:677 config.py:680 config.py:693 config.py:696 +msgid "" +"When enabled, new/updated stories will be automatically added to these " +"lists." +msgstr "Quando ativo, histórias novas/atualizadas serão automaticamente adicionadas para essas listas." + +#: config.py:686 +msgid "Add new/updated stories to \"To Read\" Reading List(s)." +msgstr "Adicionar histórias novas/atualizadas para Lista(s) de Leitura \"Para Ler\"." + +#: config.py:687 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." +msgstr "Adicionar automaticamente histórias novas/atualizadas para essas listas no plugin %(rl)s.\nTambém oferece opção de menu para remover histórias das listas \"Para Ler\"." + +#: config.py:692 +msgid "\"To Read\" Reading Lists" +msgstr "Listas de Leitura \"Para Ler\"" + +#: config.py:702 +msgid "Add stories back to \"Send to Device\" Reading List(s) when marked \"Read\"." +msgstr "Adicionar histórias de volta à Lista(s) de Leitura \"Enviar para o Dispositivo\" quando marcado como \"Ler\"." + +#: config.py:703 +msgid "" +"Menu option to remove from \"To Read\" lists will also add stories back to " +"\"Send to Device\" Reading List(s)" +msgstr "Opção de menu para remover listas \"Para Ler\" também irá adicionar histórias de volta à Lista(s) de Leitura \"Enviar para o Dispositivo\"" + +#: config.py:725 +msgid "" +"The %(gc)s plugin can create cover images for books using various metadata " +"and configurations. If you have GC installed, FFDL can run GC on new " +"downloads and metadata updates. Pick a GC setting by site or Default." +msgstr "O plugin %(gc)s pode criar imagens das capas dos livros usando vários metadados e configurações. Se você tiver instalado GC, o FFDL pode executar GC em novas transferências e atualizações de metadados. Escolha um ajuste do GC pelo site ou Padrão." + +#: config.py:743 config.py:747 config.py:760 +msgid "Default" +msgstr "Padrão" + +#: config.py:748 +msgid "" +"On Metadata update, run %(gc)s with this setting, if not selected for " +"specific site." +msgstr "Na atualização dos metadados, execute %(gc)s com este ajuste, se não for selecionado para local específico." + +#: config.py:751 +msgid "On Metadata update, run %(gc)s with this setting for %(site)s stories." +msgstr "Na atualização de metadados, execute %(gc)s com este ajuste para %(site)s de história." + +#: config.py:774 +msgid "Run %(gc)s Only on New Books" +msgstr "Executar %(gc)s Apenas em Novos Livros" + +#: config.py:775 +msgid "Default is to run GC any time the calibre metadata is updated." +msgstr "O padrão é para executar GC, e a qualquer momento os metadados do calibre é atualizado." + +#: config.py:779 +msgid "Allow %(gcset)s from %(pini)s to override" +msgstr "Permitir %(gcset)s de %(pini)s para substituir" + +#: config.py:780 +msgid "" +"The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on " +"metadata rather than site, but it's much more complex. %(gcset)s is " +"ignored when this is off." +msgstr "O %(pini)s parâmetro %(gcset)s permite que você escolha um ajuste de GC com base em metadados ao invés do site, mas é muito mais complexo. %(gcset)s é ignorado quando isso é desativado." + +#: config.py:784 +msgid "Use calibre's Polish feature to inject/update the cover" +msgstr "Usar o recurso de Polir do caibre para injetar/atualizar a capa" + +#: config.py:785 +msgid "" +"Calibre's Polish feature will be used to inject or update the generated " +"cover into the ebook, EPUB only." +msgstr "O recurso de Polir do Calibre será usado para injetar ou atualizar a capa gerada no ebook, apenas EPUB." + +#: config.py:799 +msgid "" +"These settings provide integration with the %(cp)s Plugin. %(cp)s can " +"automatically update custom columns with page, word and reading level " +"statistics. You have to create and configure the columns in %(cp)s first." +msgstr "Esses ajustes fornecem integração com o Plugin %(cp)s. %(cp)s pode atualizar automaticamente colunas personalizadas com a página, palavra e estatísticas de nível de leitura. Você primeiro tem que criar e configurar as colunas em %(cp)s." + +#: config.py:804 +msgid "" +"If any of the settings below are checked, when stories are added or updated," +" the %(cp)s Plugin will be called to update the checked statistics." +msgstr "Se qualquer uma das definições abaixo são marcadas, quando as histórias são adicionadas ou atualizadas, o Plugin %(cp)s será chamado para atualizar as estatísticas marcadas." + +#: config.py:810 +msgid "Which column and algorithm to use are configured in %(cp)s." +msgstr "Qual coluna e algoritmo será usado são configurados em %(cp)s." + +#: config.py:818 +msgid "" +"Will overwrite word count from FFDL metadata if set to update the same " +"custom column." +msgstr "Substituirá a contagem de palavras de metadados do FFDL se configurado para atualizar a mesma coluna personalizada." + +#: config.py:849 +msgid "" +"These controls aren't plugin settings as such, but convenience buttons for " +"setting Keyboard shortcuts and getting all the FanFictionDownLoader " +"confirmation dialogs back again." +msgstr "Esses controles não são ajustes do plugin como parece, mas botões de conveniência para a criação de atalhos de teclado e retornar todos os diálogos de confirmação do FanFictionDownLoader." + +#: config.py:854 +msgid "Keyboard shortcuts..." +msgstr "Atalhos do teclado..." + +#: config.py:855 +msgid "Edit the keyboard shortcuts associated with this plugin" +msgstr "Edite os atalhos de teclado associados com este plugin" + +#: config.py:859 +msgid "Reset disabled &confirmation dialogs" +msgstr "Redefinir diálogos de &confirmação desativados" + +#: config.py:860 +msgid "Reset all show me again dialogs for the FanFictionDownLoader plugin" +msgstr "Redefinir todos os diálogos mostrar novamente do plugin FanFictionDownLoader" + +#: config.py:864 +msgid "&View library preferences..." +msgstr "&Visualizar preferências da biblioteca..." + +#: config.py:865 +msgid "View data stored in the library database for this plugin" +msgstr "Visualizar dados armazenados no banco de dados da biblioteca para este plugin" + +#: config.py:876 +msgid "Done" +msgstr "Ok" + +#: config.py:877 +msgid "Confirmation dialogs have all been reset" +msgstr "Os diálogos de confirmação foram redefinidos" + +#: config.py:925 +msgid "Category" +msgstr "Categoria" + +#: config.py:926 +msgid "Genre" +msgstr "Gênero" + +#: config.py:927 +msgid "Language" +msgstr "Idioma" + +#: config.py:928 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Status" +msgstr "Estado" + +#: config.py:929 +msgid "Status:%(cmplt)s" +msgstr "Estado:%(cmplt)s" + +#: config.py:930 +msgid "Status:%(inprog)s" +msgstr "Estado:%(inprog)s" + +#: config.py:931 config.py:1065 +msgid "Series" +msgstr "Séries" + +#: config.py:932 +msgid "Characters" +msgstr "Caracteres" + +#: config.py:933 +msgid "Relationships" +msgstr "Relacionamentos" + +#: config.py:934 +msgid "Published" +msgstr "Publicado" + +#: config.py:935 ffdl_plugin.py:1437 ffdl_plugin.py:1456 +msgid "Updated" +msgstr "Atualizado" + +#: config.py:936 +msgid "Created" +msgstr "Criado" + +#: config.py:937 +msgid "Rating" +msgstr "Classificação" + +#: config.py:938 +msgid "Warnings" +msgstr "Avisos" + +#: config.py:939 +msgid "Chapters" +msgstr "Capítulos" + +#: config.py:940 +msgid "Words" +msgstr "Palavras" + +#: config.py:941 +msgid "Site" +msgstr "Site" + +#: config.py:942 +msgid "Story ID" +msgstr "ID da História" + +#: config.py:943 +msgid "Author ID" +msgstr "ID do Autor" + +#: config.py:944 +msgid "Extra Tags" +msgstr "Etiquetas Extras" + +#: config.py:945 config.py:1057 dialogs.py:804 dialogs.py:900 +#: ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Title" +msgstr "Título" + +#: config.py:946 +msgid "Story URL" +msgstr "URL da História" + +#: config.py:947 +msgid "Description" +msgstr "Descrição" + +#: config.py:948 dialogs.py:804 dialogs.py:900 ffdl_plugin.py:1152 +#: ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Author" +msgstr "Autor" + +#: config.py:949 +msgid "Author URL" +msgstr "URL do Autor" + +#: config.py:950 +msgid "File Format" +msgstr "Formato do Arquivo" + +#: config.py:951 +msgid "File Extension" +msgstr "Extensão do Arquivo" + +#: config.py:952 +msgid "Site Abbrev" +msgstr "Abreviação do Site" + +#: config.py:953 +msgid "FFDL Version" +msgstr "Versão do FFDL" + +#: config.py:968 +msgid "" +"If you have custom columns defined, they will be listed below. Choose a " +"metadata value type to fill your columns automatically." +msgstr "Se você tem colunas personalizadas definidas, elas serão listadas abaixo. Escolha um tipo de valor de metadados para preencher as colunas automaticamente." + +#: config.py:993 +msgid "Update this %s column(%s) with..." +msgstr "Atualizar essa %s coluna(%s) com..." + +#: config.py:1003 +msgid "Values that aren't valid for this enumeration column will be ignored." +msgstr "Valores que não são válidos para esta coluna de enumeração serão ignorados." + +#: config.py:1003 config.py:1005 +msgid "Metadata values valid for this type of column." +msgstr "Valores de metadados válidos para este tipo de coluna." + +#: config.py:1008 config.py:1084 +msgid "New Only" +msgstr "Apenas Novo" + +#: config.py:1009 +msgid "" +"Write to %s(%s) only for new\n" +"books, not updates to existing books." +msgstr "Gravar para %s(%s) apenas para novos\nlivros, não atualizações para livros existentes." + +#: config.py:1020 +msgid "Allow %(ccset)s from %(pini)s to override" +msgstr "Permitir %(ccset)s de %(pini)s substituir" + +#: config.py:1021 +msgid "" +"The %(pini)s parameter %(ccset)s allows you to set custom columns to site " +"specific values that aren't common to all sites. %(ccset)s is ignored " +"when this is off." +msgstr "O %(pini)s parâmetro %(ccset)s permite definir colunas personalizadas para valores específicos do site que não são comuns a todos os sites. %(ccset)s é ignorado quando este é desativado." + +#: config.py:1026 +msgid "Special column:" +msgstr "Coluna especial:" + +#: config.py:1031 +msgid "Update/Overwrite Error Column:" +msgstr "Atualizar/Substituir Coluna do Erro:" + +#: config.py:1032 +msgid "" +"When an update or overwrite of an existing story fails, record the reason in this column.\n" +"(Text and Long Text columns only.)" +msgstr "Quando uma atualização ou substituição de uma história já existente falhar, registre o motivo nesta coluna.\n(Apenas colunas de Texto e Texto Longo.)" + +#: config.py:1058 +msgid "Author(s)" +msgstr "Autor(es)" + +#: config.py:1059 +msgid "Publisher" +msgstr "Editor" + +#: config.py:1060 +msgid "Tags" +msgstr "Etiquetas" + +#: config.py:1061 +msgid "Languages" +msgstr "Idiomas" + +#: config.py:1062 +msgid "Published Date" +msgstr "Data de Publicação" + +#: config.py:1063 +msgid "Date" +msgstr "Data" + +#: config.py:1064 +msgid "Comments" +msgstr "Comentários" + +#: config.py:1066 +msgid "Ids(url id only)" +msgstr "Ids(apenas id da url)" + +#: config.py:1071 +msgid "" +"The standard calibre metadata columns are listed below. You may choose " +"whether FFDL will fill each column automatically on updates or only for new " +"books." +msgstr "As colunas padrão de metadados do calibre estão listadas abaixo. Você pode escolher se o FFDL irá preencher cada coluna automaticamente nas atualizações ou apenas para novos livros." + +#: config.py:1085 +msgid "" +"Write to %s only for new\n" +"books, not updates to existing books." +msgstr "Gravar para %s apenas para novos\nlivros, não atualizações dos livros existentes." + +#: dialogs.py:69 +msgid "Skip" +msgstr "Ignorar" + +#: dialogs.py:70 +msgid "Add New Book" +msgstr "Adicionar Novo Livro" + +#: dialogs.py:71 +msgid "Update EPUB if New Chapters" +msgstr "Atualizar EPUB se houver novos capítulos" + +#: dialogs.py:72 +msgid "Update EPUB Always" +msgstr "Sempre Atualizar EPUB" + +#: dialogs.py:73 +msgid "Overwrite if Newer" +msgstr "Substituir se for mais novo" + +#: dialogs.py:74 +msgid "Overwrite Always" +msgstr "Substituir Sempre" + +#: dialogs.py:75 +msgid "Update Calibre Metadata Only" +msgstr "Atualizar Apenas os Metadados do Calibre" + +#: dialogs.py:239 ffdl_plugin.py:89 +msgid "FanFictionDownLoader" +msgstr "FanFictionDownLoader" + +#: dialogs.py:256 dialogs.py:703 +msgid "Show Download Options" +msgstr "Mostrar Opções de Transferência" + +#: dialogs.py:275 dialogs.py:720 +msgid "Output &Format:" +msgstr "&Formato de Saída:" + +#: dialogs.py:283 dialogs.py:728 +msgid "" +"Choose output format to create. May set default from plugin configuration." +msgstr "Escolha o formato de saída para criar. Pode-se definir o padrão de configuração do plugin." + +#: dialogs.py:311 dialogs.py:745 +msgid "Update Calibre &Metadata?" +msgstr "Atualizar &Metadados do Calibre?" + +#: dialogs.py:312 dialogs.py:746 +msgid "" +"Update metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new books.)" +msgstr "Atualizar metadados de histórias existentes no Calibre do site?\n(Colunas definidas como 'Apenas Novo' nas abas da coluna será definido apenas para novos livros.)" + +#: dialogs.py:318 dialogs.py:750 +msgid "Update EPUB Cover?" +msgstr "Atualizar Capa do EPUB?" + +#: dialogs.py:319 dialogs.py:751 +msgid "" +"Update book cover image from site or defaults (if found) inside the " +"EPUB when EPUB is updated." +msgstr "Atualizar imagem da capa do livro do site ou padrões (se encontrado) dentro do EPUB quando o EPUB é atualizado." + +#: dialogs.py:366 +msgid "Story URL(s) for anthology, one per line:" +msgstr "URL(s) de história de antologia, uma por linha:" + +#: dialogs.py:367 +msgid "" +"URLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." +msgstr "URLs de histórias para incluir na antologia, uma por linha.\nTerá URLs da área de transferência, mas apenas URLs válidas." + +#: dialogs.py:368 +msgid "If Story Already Exists in Anthology?" +msgstr "Se a história já existe na antologia?" + +#: dialogs.py:369 +msgid "" +"What to do if there's already an existing story with the same URL in the " +"anthology." +msgstr "O que fazer se já existe uma história com a mesma URL na antologia." + +#: dialogs.py:378 +msgid "Story URL(s), one per line:" +msgstr "URL(s) de história, uma por linha:" + +#: dialogs.py:379 +msgid "" +"URLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." +msgstr "URLs de histórias, uma por linha.\nTerá URLs da área de transferência, mas apenas URLs válidas.\nAdicione [1,5] depois da URL para limitar a transferência para capítulos 1-5." + +#: dialogs.py:380 +msgid "If Story Already Exists?" +msgstr "Se a história já existe?" + +#: dialogs.py:381 +msgid "" +"What to do if there's already an existing story with the same URL or title " +"and author." +msgstr "O que fazer se já existe uma história com a mesma URL ou título e autor." + +#: dialogs.py:481 +msgid "For Individual Books" +msgstr "Para Livros Individuais" + +#: dialogs.py:482 +msgid "Get URLs and go to dialog for individual story downloads." +msgstr "Obter URLs e ir para a caixa de diálogo para transferências individuais de história." + +#: dialogs.py:486 +msgid "For Anthology Epub" +msgstr "Para Antologia de Epub" + +#: dialogs.py:487 +msgid "" +"Get URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." +msgstr "Obter URLs e ir para a caixa de diálogo de transferência de Antologia.\nÉ necessário o plugin %s." + +#: dialogs.py:492 dialogs.py:546 dialogs.py:573 +msgid "Cancel" +msgstr "Cancelar" + +#: dialogs.py:524 +msgid "Password" +msgstr "Senha" + +#: dialogs.py:525 +msgid "Author requires a password for this story(%s)." +msgstr "O autor precisa de uma senha para essa história (%s)." + +#: dialogs.py:530 +msgid "User/Password" +msgstr "Usuário/Senha" + +#: dialogs.py:531 +msgid "%s requires you to login to download this story." +msgstr "%s precisa que você inicie a sessão para transferir esta história." + +#: dialogs.py:533 +msgid "User:" +msgstr "Usuário:" + +#: dialogs.py:537 +msgid "Password:" +msgstr "Senha:" + +#: dialogs.py:568 +msgid "Fetching metadata for stories..." +msgstr "Buscando metadados para histórias..." + +#: dialogs.py:569 +msgid "Downloading metadata for stories" +msgstr "Transferindo metadados para histórias" + +#: dialogs.py:570 +msgid "Fetched metadata for" +msgstr "Metadados pesquisados para" + +#: dialogs.py:640 ffdl_plugin.py:325 +msgid "About FanFictionDownLoader" +msgstr "Sobre o FanFictionDownLoader" + +#: dialogs.py:694 +msgid "Remove selected books from the list" +msgstr "Remover livros selecionados da lista" + +#: dialogs.py:733 +msgid "Update Mode:" +msgstr "Modo de Atualização:" + +#: dialogs.py:736 +msgid "" +"What sort of update to perform. May set default from plugin configuration." +msgstr "Que tipo de atualização executar. Pode-se definir o padrão da configuração do plugin." + +#: dialogs.py:804 ffdl_plugin.py:1152 ffdl_plugin.py:1324 ffdl_plugin.py:1354 +msgid "Comment" +msgstr "Comentários" + +#: dialogs.py:872 +msgid "Are you sure you want to remove this book from the list?" +msgstr "Você deseja realmente remover este livro da lista?" + +#: dialogs.py:874 +msgid "Are you sure you want to remove the selected %d books from the list?" +msgstr "Você deseja realmente remover os %d livros selecionados da lista?" + +#: dialogs.py:900 +msgid "Note" +msgstr "Nota" + +#: dialogs.py:939 +msgid "Select or Edit Reject Note." +msgstr "Selecionar ou editar a nota de rejeição." + +#: dialogs.py:947 +msgid "Are you sure you want to remove this URL from the list?" +msgstr "Você deseja realmente remover esta URL da lista?" + +#: dialogs.py:949 +msgid "Are you sure you want to remove the %d selected URLs from the list?" +msgstr "Você deseja realmente remover as %s URLs selecionadas da lista?" + +#: dialogs.py:967 +msgid "List of Books to Reject" +msgstr "Lista de Livros para Rejeitar" + +#: dialogs.py:980 +msgid "" +"FFDL will remember these URLs and display the note and offer to reject them " +"if you try to download them again later." +msgstr "O FFDL vai lembrar dessas URLs e exibir a nota e oferecer para rejeitá-las se você tentar transferi-las novamente." + +#: dialogs.py:994 +msgid "Remove selected URL(s) from the list" +msgstr "Remover URL(s) selecionada(s) da lista" + +#: dialogs.py:1009 dialogs.py:1013 +msgid "This will be added to whatever note you've set for each URL above." +msgstr "Isto será adicionado a qualquer nota que você definiu para cada URL acima." + +#: dialogs.py:1022 +msgid "Delete Books (including books without FanFiction URLs)?" +msgstr "Apagar Livros (incluindo livros sem URLs de Ficção)?" + +#: dialogs.py:1023 +msgid "Delete the selected books after adding them to the Rejected URLs list." +msgstr "Apagar os livros selecionados depois de adicioná-los à lista de URLs Rejeitadas." + +#: ffdl_plugin.py:90 +msgid "Download FanFiction stories from various web sites" +msgstr "Transferir histórias de ficção de vários sites" + +#: ffdl_plugin.py:120 +msgid "FanFictionDL" +msgstr "FanFictionDL" + +#: ffdl_plugin.py:243 +msgid "&Add New from URL(s)" +msgstr "&Adicionar Novo de URL(s)" + +#: ffdl_plugin.py:245 +msgid "Add New FanFiction Book(s) from URL(s)" +msgstr "Adicionar Novo(s) Livro(s) de Ficção de URL(s)" + +#: ffdl_plugin.py:248 +msgid "&Update Existing FanFiction Book(s)" +msgstr "At&ualizar Livro(s) Existente(s) de Ficção" + +#: ffdl_plugin.py:254 +msgid "Get Story URLs to Download from Web Page" +msgstr "Obter URLs de história para transferir da página do site" + +#: ffdl_plugin.py:258 +msgid "&Make Anthology Epub Manually from URL(s)" +msgstr "Cr&iar Antologia do Epub Manualmente de URL(s)" + +#: ffdl_plugin.py:260 +msgid "Make FanFiction Anthology Epub Manually from URL(s)" +msgstr "Criar Antologia de Epub de Ficção Manualmente de URLs" + +#: ffdl_plugin.py:263 +msgid "&Update Anthology Epub" +msgstr "At&ualizar Antologia de Epub" + +#: ffdl_plugin.py:265 +msgid "Update FanFiction Anthology Epub" +msgstr "Atualizar Antologia de Epub de Ficção" + +#: ffdl_plugin.py:273 +msgid "Add to \"To Read\" and \"Send to Device\" Lists" +msgstr "Adicionar às listas \"Para Ler\" e \"Enviar para o Dispositivo\"" + +#: ffdl_plugin.py:275 +msgid "Remove from \"To Read\" and add to \"Send to Device\" Lists" +msgstr "Remover das listas \"Para Ler\" e adicionar para \"Enviar para o Dispositivo\"" + +#: ffdl_plugin.py:277 ffdl_plugin.py:282 +msgid "Remove from \"To Read\" Lists" +msgstr "Remover das listas \"Para Ler\"" + +#: ffdl_plugin.py:279 +msgid "Add Selected to \"Send to Device\" Lists" +msgstr "Adicionar lista selecionada para \"Enviar para o Dispositivo\"" + +#: ffdl_plugin.py:281 +msgid "Add to \"To Read\" Lists" +msgstr "Adicionar às listas \"Para Ler\"" + +#: ffdl_plugin.py:297 +msgid "Get URLs from Selected Books" +msgstr "Obter URLs dos livros selecionados" + +#: ffdl_plugin.py:303 ffdl_plugin.py:397 +msgid "Get Story URLs from Web Page" +msgstr "Obter URLs de história da página do site" + +#: ffdl_plugin.py:308 +msgid "Reject Selected Books" +msgstr "Rejeitar livros selecionados" + +#: ffdl_plugin.py:316 +msgid "&Configure Plugin" +msgstr "&Configurar Plugin" + +#: ffdl_plugin.py:319 +msgid "Configure FanFictionDownLoader" +msgstr "Configurar o FanFictionDownLoader" + +#: ffdl_plugin.py:322 +msgid "About Plugin" +msgstr "Sobre o Plugin" + +#: ffdl_plugin.py:379 +msgid "Cannot Update Reading Lists from Device View" +msgstr "Não foi possível atualizar as listas de leitura do dispositivo" + +#: ffdl_plugin.py:383 +msgid "No Selected Books to Update Reading Lists" +msgstr "Nenhum livro selecionado para atualizar nas listas de leitura" + +#: ffdl_plugin.py:408 ffdl_plugin.py:460 +msgid "List of Story URLs" +msgstr "Lista de URLs de história" + +#: ffdl_plugin.py:409 +msgid "No Valid Story URLs found on given page." +msgstr "Nenhuma URL de história foi encontrada na página sugerida." + +#: ffdl_plugin.py:424 +msgid "No Selected Books to Get URLs From" +msgstr "Nenhum livro selecionado para obter URLs" + +#: ffdl_plugin.py:442 +msgid "Collecting URLs for stories..." +msgstr "Coletando URLs de história..." + +#: ffdl_plugin.py:443 +msgid "Get URLs for stories" +msgstr "Obtendo URLs de histórias" + +#: ffdl_plugin.py:444 ffdl_plugin.py:491 ffdl_plugin.py:678 +msgid "URL retrieved" +msgstr "URL recuperada" + +#: ffdl_plugin.py:464 +msgid "List of URLs" +msgstr "Lista de URLs" + +#: ffdl_plugin.py:465 +msgid "No Story URLs found in selected books." +msgstr "Nenhuma URL de história foi encontrada nos livros selecionados." + +#: ffdl_plugin.py:481 +msgid "No Selected Books have URLs to Reject" +msgstr "Nenhum livro selecionado tem URLs para rejeitar" + +#: ffdl_plugin.py:489 +msgid "Collecting URLs for Reject List..." +msgstr "Coletando URLs para lista de rejeição..." + +#: ffdl_plugin.py:490 +msgid "Get URLs for Reject List" +msgstr "Obtendo URLs para lista de rejeição" + +#: ffdl_plugin.py:525 +msgid "Proceed to Remove?" +msgstr "Proceder para remover?" + +#: ffdl_plugin.py:525 +msgid "Rejecting FFDL URLs: None of the books selected have FanFiction URLs." +msgstr "Rejeitando URLs FFDL: Nenhum dos livros selecionados tem URLs de ficção." + +#: ffdl_plugin.py:547 +msgid "Cannot Make Anthologys without %s" +msgstr "Não foi possível criar antologias sem %s" + +#: ffdl_plugin.py:551 ffdl_plugin.py:655 +msgid "Cannot Update Books from Device View" +msgstr "Não foi possível atualizar livros do dispositivo" + +#: ffdl_plugin.py:555 +msgid "Can only update 1 anthology at a time" +msgstr "Pode ser atualizada apenas 1 antologia de cada vez" + +#: ffdl_plugin.py:564 +msgid "Can only Update Epub Anthologies" +msgstr "Pode ser atualizada apenas antologias de Epubs" + +#: ffdl_plugin.py:582 ffdl_plugin.py:583 +msgid "Cannot Update Anthology" +msgstr "Não foi possível atualizar antologia" + +#: ffdl_plugin.py:583 +msgid "" +"Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." +msgstr "O livro não é uma Antologia FFDL ou contém livro(s) sem URLs FFDL válidas." + +#: ffdl_plugin.py:641 +msgid "" +"There are %d stories in the current anthology that are not going to " +"be kept if you go ahead." +msgstr "Existem %d histórias na antologia atual que não são serão mantidas, se você seguir em frente." + +#: ffdl_plugin.py:642 +msgid "Story URLs that will be removed:" +msgstr "URLs de história que serão removidas:" + +#: ffdl_plugin.py:644 +msgid "Update anyway?" +msgstr "Atualizar mesmo assim?" + +#: ffdl_plugin.py:645 +msgid "Stories Removed" +msgstr "Histórias Removidas" + +#: ffdl_plugin.py:662 +msgid "No Selected Books to Update" +msgstr "Nenhum livro selecionado para atualizar" + +#: ffdl_plugin.py:676 +msgid "Collecting stories for update..." +msgstr "Coletando histórias para atualizar..." + +#: ffdl_plugin.py:677 +msgid "Get stories for updates" +msgstr "Obter histórias para atualização" + +#: ffdl_plugin.py:687 +msgid "Update Existing List" +msgstr "Atualizar lista existente" + +#: ffdl_plugin.py:745 +msgid "Started fetching metadata for %s stories." +msgstr "Começou a busca de metadados para %s histórias." + +#: ffdl_plugin.py:751 +msgid "No valid story URLs entered." +msgstr "Nenhuma história válida nas URLs inseridas." + +#: ffdl_plugin.py:776 ffdl_plugin.py:782 +msgid "Reject URL?" +msgstr "Rejeitar URL?" + +#: ffdl_plugin.py:783 ffdl_plugin.py:801 +msgid "%s is on your Reject URL list:" +msgstr "%s está em sua lista de URLs rejeitadas:" + +#: ffdl_plugin.py:785 +msgid "Click 'Yes' to Reject." +msgstr "Clique em 'Sim' para rejeitar." + +#: ffdl_plugin.py:786 ffdl_plugin.py:890 +msgid "Click 'No' to download anyway." +msgstr "Clique em 'Não' para transferir de qualquer maneira." + +#: ffdl_plugin.py:788 +msgid "Story on Reject URLs list (%s)." +msgstr "História na lista de URLs rejeitadas (%s)." + +#: ffdl_plugin.py:791 +msgid "Rejected" +msgstr "Rejeitado" + +#: ffdl_plugin.py:794 +msgid "Remove Reject URL?" +msgstr "Remover URL rejeitada?" + +#: ffdl_plugin.py:800 +msgid "Remove URL from Reject List?" +msgstr "Remover URL da lista de rejeição?" + +#: ffdl_plugin.py:803 +msgid "Click 'Yes' to remove it from the list," +msgstr "Clique em 'Sim' para removê-la da lista." + +#: ffdl_plugin.py:804 +msgid "Click 'No' to leave it on the list." +msgstr "Clique em 'Não' para mantê-la na lista." + +#: ffdl_plugin.py:821 +msgid "Cannot update non-epub format." +msgstr "Não é possível atualizar formatos que não sejam epub." + +#: ffdl_plugin.py:866 +msgid "Are You an Adult?" +msgstr "Você é adulto?" + +#: ffdl_plugin.py:867 +msgid "" +"%s requires that you be an adult. Please confirm you are an adult in your " +"locale:" +msgstr "%s requer que você seja um adulto. Confirme que você é um adulto em sua localidade:" + +#: ffdl_plugin.py:881 +msgid "Skip Story?" +msgstr "Ignorar história?" + +#: ffdl_plugin.py:887 +msgid "Skip Anthology Story?" +msgstr "Ignorar antologia da história?" + +#: ffdl_plugin.py:888 +msgid "" +"\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have an" +" anthology book for." +msgstr "\"%s\" está na série \"<a href=\"%s\">%s</a>\" que você tem um livro de antologia." + +#: ffdl_plugin.py:889 +msgid "Click 'Yes' to Skip." +msgstr "Clique em 'Sim' para ignorar." + +#: ffdl_plugin.py:892 +msgid "Story in Series Anthology(%s)." +msgstr "História na série de antologia (%s)." + +#: ffdl_plugin.py:897 +msgid "Skipped" +msgstr "Ignorado" + +#: ffdl_plugin.py:925 +msgid "Add" +msgstr "Adicionar" + +#: ffdl_plugin.py:938 +msgid "Meta" +msgstr "Meta" + +#: ffdl_plugin.py:971 +msgid "Skipping duplicate story." +msgstr "Ignorando história duplicada." + +#: ffdl_plugin.py:974 +msgid "" +"More than one identical book by Identifer URL or title/author(s)--can't tell" +" which book to update/overwrite." +msgstr "Mais do que um livro idêntico por identificador de URL ou título/autor(es)--não pode dizer qual livro atualizar/substituir." + +#: ffdl_plugin.py:985 +msgid "Update" +msgstr "Atualizar" + +#: ffdl_plugin.py:993 ffdl_plugin.py:1000 +msgid "Change Story URL?" +msgstr "Alterar URL de história?" + +#: ffdl_plugin.py:1001 +msgid "" +"%s by %s is already in your library with a different source " +"URL:" +msgstr "%s por %s já está na sua biblioteca com uma URL de origem diferente:" + +#: ffdl_plugin.py:1002 +msgid "In library: <a href=\"%(liburl)s\">%(liburl)s</a>" +msgstr "Na biblioteca: <a href=\"%(liburl)s\">%(liburl)s</a>" + +#: ffdl_plugin.py:1003 ffdl_plugin.py:1017 +msgid "New URL: <a href=\"%(newurl)s\">%(newurl)s</a>" +msgstr "Nova URL: <a href=\"%(newurl)s\">%(newurl)s</a>" + +#: ffdl_plugin.py:1004 +msgid "Click 'Yes' to update/overwrite book with new URL." +msgstr "Clique em 'Sim' para atualizar/substituir livro com a nova URL." + +#: ffdl_plugin.py:1005 +msgid "Click 'No' to skip updating/overwriting this book." +msgstr "Clique em 'Não' para ignorar a atualização/substituição desse livro." + +#: ffdl_plugin.py:1007 ffdl_plugin.py:1014 +msgid "Download as New Book?" +msgstr "Transferir como novo livro?" + +#: ffdl_plugin.py:1015 +msgid "" +"%s by %s is already in your library with a different source " +"URL." +msgstr "%s por %s já está na sua biblioteca com uma URL de origem diferente." + +#: ffdl_plugin.py:1016 +msgid "" +"You chose not to update the existing book. Do you want to add a new book " +"for this URL?" +msgstr "Você optou por não atualizar o livro existente. Você deseja adicionar um novo livro para esta URL?" + +#: ffdl_plugin.py:1018 +msgid "Click 'Yes' to a new book with new URL." +msgstr "Clique em 'Sim' para um novo livro com nova URL." + +#: ffdl_plugin.py:1019 +msgid "Click 'No' to skip URL." +msgstr "Clique em 'Não' para ignorar a URL." + +#: ffdl_plugin.py:1025 +msgid "Update declined by user due to differing story URL(%s)" +msgstr "Atualização recusada pelo usuário devido à diferentes URL(%s) de história" + +#: ffdl_plugin.py:1028 +msgid "Different URL" +msgstr "URL Diferente" + +#: ffdl_plugin.py:1033 +msgid "Metadata collected." +msgstr "Metadados recolhidos." + +#: ffdl_plugin.py:1049 +msgid "Already contains %d chapters." +msgstr "Já contém %s capítulos." + +#: ffdl_plugin.py:1054 jobs.py:199 +msgid "" +"Existing epub contains %d chapters, web site only has %d. Use Overwrite to " +"force update." +msgstr "O epub existente contém %d capítulos, o site tem apenas %d. Use Substituir para forçar uma atualização." + +#: ffdl_plugin.py:1056 +msgid "" +"FFDL doesn't recognize chapters in existing epub, epub is probably from a " +"different source. Use Overwrite to force update." +msgstr "O FFDL não reconhece capítulos em epub existente, o epub é, provavelmente, de uma fonte diferente. Use Substituir para forçar uma atualização." + +#: ffdl_plugin.py:1068 +msgid "Not Overwriting, web site is not newer." +msgstr "Nenhuma substituição, o site não é mais novo." + +#: ffdl_plugin.py:1148 +msgid "None of the %d URLs/stories given can be/need to be downloaded." +msgstr "Nenhuma das %d URLs/histórias fornecidas podem ser/precisam ser transferidas." + +#: ffdl_plugin.py:1149 ffdl_plugin.py:1320 ffdl_plugin.py:1350 +msgid "See log for details." +msgstr "Veja o registro para detalhes." + +#: ffdl_plugin.py:1150 +msgid "Proceed with updating your library(Error Column, if configured)?" +msgstr "Prosseguir com a atualização sua biblioteca (Coluna de erro, se configurado)?" + +#: ffdl_plugin.py:1157 ffdl_plugin.py:1332 +msgid "Bad" +msgstr "Ruim" + +#: ffdl_plugin.py:1165 +msgid "FFDL download ended" +msgstr "O FFDL finalizou a transferência" + +#: ffdl_plugin.py:1165 ffdl_plugin.py:1375 +msgid "FFDL log" +msgstr "Registro do FFDL" + +#: ffdl_plugin.py:1181 +msgid "Download FanFiction Book" +msgstr "Transferir Livro de Ficção" + +#: ffdl_plugin.py:1188 +msgid "Starting %d FanFictionDownLoads" +msgstr "Começando %d FanFictionDownLoads" + +#: ffdl_plugin.py:1218 +msgid "Story Details:" +msgstr "Detalhes da história:" + +#: ffdl_plugin.py:1221 +msgid "Error Updating Metadata" +msgstr "Erro ao atualizar metadados" + +#: ffdl_plugin.py:1222 +msgid "" +"An error has occurred while FFDL was updating calibre's metadata for <a " +"href='%s'>%s</a>." +msgstr "Ocorreu um erro enquanto o FFDL estava atualizando os metadados do calibre de <a href='%s'>%s</a>." + +#: ffdl_plugin.py:1223 +msgid "The ebook has been updated, but the metadata has not." +msgstr "O ebook foi atualizado, mas os metadados não." + +#: ffdl_plugin.py:1275 +msgid "Finished Adding/Updating %d books." +msgstr "O adicionamento/atualização de %s livros está concluído." + +#: ffdl_plugin.py:1283 +msgid "Starting auto conversion of %d books." +msgstr "Começando a conversão automática de %s livros." + +#: ffdl_plugin.py:1304 +msgid "No Good Stories for Anthology" +msgstr "Nenhuma BOA história para antologia" + +#: ffdl_plugin.py:1305 +msgid "" +"No good stories/updates where downloaded, Anthology creation/update aborted." +msgstr "Não há boas histórias/atualizações de onde transferir, a criação/atualização de antologia foi abortada." + +#: ffdl_plugin.py:1310 ffdl_plugin.py:1349 +msgid "FFDL found %s good and %s bad updates." +msgstr "O FFDL encontrou %s atualizações boas e %s ruins." + +#: ffdl_plugin.py:1317 +msgid "" +"Are you sure you want to continue with creating/updating this Anthology?" +msgstr "Você deseja realmente continuar com a criação/atualização desta antologia?" + +#: ffdl_plugin.py:1318 +msgid "Any updates that failed will not be included in the Anthology." +msgstr "Todas as atualizações que falharam não vão ser incluídas na antologia." + +#: ffdl_plugin.py:1319 +msgid "However, if there's an older version, it will still be included." +msgstr "No entanto, se houver uma versão mais antiga, ela será incluída." + +#: ffdl_plugin.py:1322 +msgid "Proceed with updating this anthology and your library?" +msgstr "Prosseguir com a atualização desta antologia e sua biblioteca?" + +#: ffdl_plugin.py:1330 +msgid "Good" +msgstr "BOA" + +#: ffdl_plugin.py:1351 +msgid "Proceed with updating your library?" +msgstr "Prosseguir com a atualização de sua biblioteca?" + +#: ffdl_plugin.py:1375 +msgid "FFDL download complete" +msgstr "Transferência concluída do FFDL" + +#: ffdl_plugin.py:1388 +msgid "Merging %s books." +msgstr "Mesclando %s livros." + +#: ffdl_plugin.py:1428 +msgid "FFDL Adding/Updating books." +msgstr "Adicionar/atualizar livros do FFDL." + +#: ffdl_plugin.py:1435 +msgid "Updating calibre for FanFiction stories..." +msgstr "Atualizando o calibre de histórias de ficção..." + +#: ffdl_plugin.py:1436 +msgid "Update calibre for FanFiction stories" +msgstr "Atualizar o calibre de histórias de ficção" + +#: ffdl_plugin.py:1445 +msgid "Adding/Updating %s BAD books." +msgstr "Adicionando/atualizando %s livros RUINS." + +#: ffdl_plugin.py:1454 +msgid "Updating calibre for BAD FanFiction stories..." +msgstr "Atualizando o calibre de histórias RUINS de ficção..." + +#: ffdl_plugin.py:1455 +msgid "Update calibre for BAD FanFiction stories" +msgstr "Atualizar o calibre de histórias RUINS de ficção" + +#: ffdl_plugin.py:1481 +msgid "Adding format to book failed for some reason..." +msgstr "Falha ao adicionar formato por algum motivo..." + +#: ffdl_plugin.py:1484 +msgid "Error" +msgstr "Erro" + +#: ffdl_plugin.py:1757 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading Lists, " +"but you don't have the %s plugin installed anymore?" +msgstr "Você configurou o FanFictionDownLoader para atualizar automaticamente listas de leitura, mas você não tem mais o plugin %s instalado?" + +#: ffdl_plugin.py:1769 +msgid "" +"You configured FanFictionDownLoader to automatically update \"To Read\" " +"Reading Lists, but you don't have any lists set?" +msgstr "Você configurou o FanFictionDownLoader para atualizar automaticamente listas de leitura \"Para Ler\", mas você não tem nenhuma lista definida?" + +#: ffdl_plugin.py:1779 ffdl_plugin.py:1797 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading List " +"'%s', but you don't have a list of that name?" +msgstr "Você configurou o FanFictionDownLoader para atualizar automaticamente a Lista de Leitura '%s', mas você não tem uma lista com esse nome?" + +#: ffdl_plugin.py:1785 +msgid "" +"You configured FanFictionDownLoader to automatically update \"Send to " +"Device\" Reading Lists, but you don't have any lists set?" +msgstr "Você configurou o FanFictionDownLoader para atualizar automaticamente listas de leitura \"Enviar para Dispositivo\", mas você não tem nenhuma lista definida?" + +#: ffdl_plugin.py:1906 +msgid "No story URL found." +msgstr "Nenhuma URL de história encontrada." + +#: ffdl_plugin.py:1909 +msgid "Not Found" +msgstr "Não encontrado" + +#: ffdl_plugin.py:1915 +msgid "URL is not a valid story URL." +msgstr "A URL não é uma URL válida de história." + +#: ffdl_plugin.py:1918 +msgid "Bad URL" +msgstr "URL ruim:" + +#: ffdl_plugin.py:2054 ffdl_plugin.py:2057 +msgid "Anthology containing:" +msgstr "Antologia contendo:" + +#: ffdl_plugin.py:2055 +msgid "%s by %s" +msgstr "%s por %s" + +#: ffdl_plugin.py:2077 +msgid " Anthology" +msgstr "Antologia" + +#: ffdl_plugin.py:2114 +msgid "(was set, removed for security)" +msgstr "(foi definido, removido para segurança)" + +#: jobs.py:73 +msgid "Downloading FanFiction Stories" +msgstr "Transferir histórias de ficção" + +#: jobs.py:95 +msgid "Successful:" +msgstr "Bem sucedido:" + +#: jobs.py:97 +msgid "Unsuccessful:" +msgstr "Mal sucedido:" + +#: jobs.py:111 +msgid "Download started..." +msgstr "Transferência iniciada..." + +#: jobs.py:193 +msgid "Already contains %d chapters. Reuse as is." +msgstr "Já contém %s capítulos. Reutilizar como está." + +#: jobs.py:210 +msgid "Update %s completed, added %s chapters for %s total." +msgstr "Atualização %s concluída, adicionados %s capítulos de um total de %s." diff --git a/calibre-plugin/translations/zz.po b/calibre-plugin/translations/zz.po new file mode 100644 index 00000000..0f1a9572 --- /dev/null +++ b/calibre-plugin/translations/zz.po @@ -0,0 +1,1794 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: FanFictionDownLoader 1.8\n" +"POT-Creation-Date: 2014-07-14 10:52+Central Daylight Time\n" +"PO-Revision-Date: 2014-08-01 12:43-0600\n" +"Last-Translator: Jim Miller <RetiefJimm@gmail.com>\n" +"Language-Team: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" +"X-Generator: Poedit 1.5.7\n" + +#: __init__.py:42 +msgid "UI plugin to download FanFiction stories from various sites." +msgstr "zzUI plugin to download FanFiction stories from various sites." + +#: __init__.py:109 +msgid "" +"Path to the calibre library. Default is to use the path stored in the " +"settings." +msgstr "" +"zzPath to the calibre library. Default is to use the path stored in the " +"settings." + +#: config.py:161 +msgid "FAQs" +msgstr "zzFAQs" + +#: config.py:161 +msgid "List of Supported Sites" +msgstr "zzList of Supported Sites" + +#: config.py:175 +msgid "Basic" +msgstr "zzBasic" + +#: config.py:196 +msgid "Standard Columns" +msgstr "zzStandard Columns" + +#: config.py:199 +msgid "Custom Columns" +msgstr "zzCustom Columns" + +#: config.py:202 +msgid "Other" +msgstr "zzOther" + +#: config.py:323 +msgid "" +"These settings control the basic features of the plugin--downloading " +"FanFiction." +msgstr "" +"zzThese settings control the basic features of the plugin--downloading " +"FanFiction." + +#: config.py:327 +msgid "Defaults Options on Download" +msgstr "zzDefaults Options on Download" + +#: config.py:331 +msgid "" +"On each download, FFDL offers an option to select the output format. This sets what that option will default to." +msgstr "" +"zzOn each download, FFDL offers an option to select the output format. This sets what that option will default to." + +#: config.py:333 +msgid "Default Output &Format:" +msgstr "zzDefault Output &Format:" + +#: config.py:348 +msgid "" +"On each download, FFDL offers an option of what happens if that story " +"already exists. This sets what that option will default to." +msgstr "" +"zzOn each download, FFDL offers an option of what happens if that story " +"already exists. This sets what that option will default to." + +#: config.py:350 +msgid "Default If Story Already Exists?" +msgstr "zzDefault If Story Already Exists?" + +#: config.py:364 +msgid "Default Update Calibre &Metadata?" +msgstr "zzDefault Update Calibre &Metadata?" + +#: config.py:365 +msgid "" +"On each download, FFDL offers an option to update Calibre's metadata (title, " +"author, URL, tags, custom columns, etc) from the web site. This sets " +"whether that will default to on or off. Columns set to 'New Only' in " +"the column tabs will only be set for new books." +msgstr "" +"zzOn each download, FFDL offers an option to update Calibre's metadata " +"(title, author, URL, tags, custom columns, etc) from the web site. This sets whether that will default to on or off. Columns set to 'New " +"Only' in the column tabs will only be set for new books." + +#: config.py:369 +msgid "Default Update EPUB Cover when Updating EPUB?" +msgstr "zzDefault Update EPUB Cover when Updating EPUB?" + +#: config.py:370 +msgid "" +"On each download, FFDL offers an option to update the book cover image " +"inside the EPUB from the web site when the EPUB is updated. This " +"sets whether that will default to on or off." +msgstr "" +"zzOn each download, FFDL offers an option to update the book cover image " +"inside the EPUB from the web site when the EPUB is updated. This " +"sets whether that will default to on or off." + +#: config.py:374 +msgid "Smarten Punctuation (EPUB only)" +msgstr "zzSmarten Punctuation (EPUB only)" + +#: config.py:375 +msgid "" +"Run Smarten Punctuation from Calibre's Polish Book feature on each EPUB " +"download and update." +msgstr "" +"zzRun Smarten Punctuation from Calibre's Polish Book feature on each EPUB " +"download and update." + +#: config.py:380 +msgid "Updating Calibre Options" +msgstr "zzUpdating Calibre Options" + +#: config.py:384 +msgid "Delete other existing formats?" +msgstr "zzDelete other existing formats?" + +#: config.py:385 +msgid "" +"Check this to automatically delete all other ebook formats when updating an " +"existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." +msgstr "" +"zzCheck this to automatically delete all other ebook formats when updating " +"an existing book.\n" +"Handy if you have both a Nook(epub) and Kindle(mobi), for example." + +#: config.py:389 +msgid "Update Calibre Cover when Updating Metadata?" +msgstr "zzUpdate Calibre Cover when Updating Metadata?" + +#: config.py:390 +msgid "" +"Update calibre book cover image from EPUB when metadata is updated. (EPUB " +"only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." +msgstr "" +"zzUpdate calibre book cover image from EPUB when metadata is updated. (EPUB " +"only.)\n" +"Doesn't go looking for new images on 'Update Calibre Metadata Only'." + +#: config.py:394 +msgid "Keep Existing Tags when Updating Metadata?" +msgstr "zzKeep Existing Tags when Updating Metadata?" + +#: config.py:395 +msgid "" +"Existing tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no " +"effect.)" +msgstr "" +"zzExisting tags will be kept and any new tags added.\n" +"%(cmplt)s and %(inprog)s tags will be still be updated, if known.\n" +"%(lul)s tags will be updated if %(lus)s in %(is)s.\n" +"(If Tags is set to 'New Only' in the Standard Columns tab, this has no " +"effect.)" + +#: config.py:399 +msgid "Force Author into Author Sort?" +msgstr "zzForce Author into Author Sort?" + +#: config.py:400 +msgid "" +"If checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob " +"Smith' sort as 'Smith, Bob', etc." +msgstr "" +"zzIf checked, the author(s) as given will be used for the Author Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'Bob " +"Smith' sort as 'Smith, Bob', etc." + +#: config.py:404 +msgid "Force Title into Title Sort?" +msgstr "zzForce Title into Title Sort?" + +#: config.py:405 +msgid "" +"If checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The " +"Title' sort as 'Title, The', etc." +msgstr "" +"zzIf checked, the title as given will be used for the Title Sort, too.\n" +"If not checked, calibre will apply it's built in algorithm which makes 'The " +"Title' sort as 'Title, The', etc." + +#: config.py:409 +msgid "Check for existing Series Anthology books?" +msgstr "zzCheck for existing Series Anthology books?" + +#: config.py:410 +msgid "" +"Check for existings Series Anthology books using each new story's series URL " +"before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." +msgstr "" +"zzCheck for existings Series Anthology books using each new story's series " +"URL before downloading.\n" +"Offer to skip downloading if a Series Anthology is found." + +#: config.py:414 +msgid "Check for changed Story URL?" +msgstr "zzCheck for changed Story URL?" + +#: config.py:415 +msgid "" +"Warn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." +msgstr "" +"zzWarn you if an update will change the URL of an existing book.\n" +"fanfiction.net URLs will change from http to https silently." + +#: config.py:419 +msgid "Search EPUB text for Story URL?" +msgstr "zzSearch EPUB text for Story URL?" + +#: config.py:420 +msgid "" +"Look for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." +msgstr "" +"zzLook for first valid story URL inside EPUB text if not found in metadata.\n" +"Somewhat risky, could find wrong URL depending on EPUB content.\n" +"Also finds and corrects bad ffnet URLs from ficsaver.com files." + +#: config.py:424 +msgid "Mark added/updated books when finished?" +msgstr "zzMark added/updated books when finished?" + +#: config.py:425 +msgid "" +"Mark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "" +"zzMark added/updated books when finished. Use with option below.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." + +#: config.py:429 +msgid "Show Marked books when finished?" +msgstr "zzShow Marked books when finished?" + +#: config.py:430 +msgid "" +"Show Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." +msgstr "" +"zzShow Marked added/updated books only when finished.\n" +"You can also manually search for 'marked:ffdl_success'.\n" +"'marked:ffdl_failed' is also available, or search 'marked:ffdl' for both." + +#: config.py:434 +msgid "Automatically Convert new/update books?" +msgstr "zzAutomatically Convert new/update books?" + +#: config.py:435 +msgid "" +"Automatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." +msgstr "" +"zzAutomatically call calibre's Convert for new/update books.\n" +"Converts to the current output format as chosen in calibre's\n" +"Preferences->Behavior settings." + +#: config.py:439 +msgid "GUI Options" +msgstr "zzGUI Options" + +#: config.py:443 +msgid "Take URLs from Clipboard?" +msgstr "zzTake URLs from Clipboard?" + +#: config.py:444 +msgid "Prefill URLs from valid URLs in Clipboard when Adding New." +msgstr "zzPrefill URLs from valid URLs in Clipboard when Adding New." + +#: config.py:448 +msgid "Default to Update when books selected?" +msgstr "zzDefault to Update when books selected?" + +#: config.py:449 +msgid "" +"The top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." +msgstr "" +"zzThe top FanFictionDownLoader plugin button will start Update if\n" +"books are selected. If unchecked, it will always bring up 'Add New'." + +#: config.py:453 +msgid "Keep 'Add New from URL(s)' dialog on top?" +msgstr "zzKeep 'Add New from URL(s)' dialog on top?" + +#: config.py:454 +msgid "" +"Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." +msgstr "" +"zzInstructs the OS and Window Manager to keep the 'Add New from URL(s)'\n" +"dialog on top of all other windows. Useful for dragging URLs onto it." + +#: config.py:458 +msgid "Misc Options" +msgstr "zzMisc Options" + +#: config.py:463 +msgid "Include images in EPUBs?" +msgstr "zzInclude images in EPUBs?" + +#: config.py:464 +msgid "" +"Download and include images in EPUB stories. This is equivalent to adding:" +"%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will " +"override this." +msgstr "" +"zzDownload and include images in EPUB stories. This is equivalent to adding:" +"%(imgset)s ...to the top of %(pini)s. Your settings in %(pini)s will " +"override this." + +#: config.py:468 +msgid "Inject calibre Series when none found?" +msgstr "zzInject calibre Series when none found?" + +#: config.py:469 +msgid "" +"If no series is found, inject the calibre series (if there is one) so it " +"appears on the FFDL title page(not cover)." +msgstr "" +"zzIf no series is found, inject the calibre series (if there is one) so it " +"appears on the FFDL title page(not cover)." + +#: config.py:473 +msgid "Reject List" +msgstr "zzReject List" + +#: config.py:477 +msgid "Edit Reject URL List" +msgstr "zzEdit Reject URL List" + +#: config.py:478 +msgid "Edit list of URLs FFDL will automatically Reject." +msgstr "zzEdit list of URLs FFDL will automatically Reject." + +#: config.py:482 config.py:556 +msgid "Add Reject URLs" +msgstr "zzAdd Reject URLs" + +#: config.py:483 +msgid "Add additional URLs to Reject as text." +msgstr "zzAdd additional URLs to Reject as text." + +#: config.py:487 +msgid "Edit Reject Reasons List" +msgstr "zzEdit Reject Reasons List" + +#: config.py:488 config.py:547 +msgid "Customize the Reasons presented when Rejecting URLs" +msgstr "zzCustomize the Reasons presented when Rejecting URLs" + +#: config.py:492 +msgid "Reject Without Confirmation?" +msgstr "zzReject Without Confirmation?" + +#: config.py:493 +msgid "Always reject URLs on the Reject List without stopping and asking." +msgstr "zzAlways reject URLs on the Reject List without stopping and asking." + +#: config.py:531 +msgid "Edit Reject URLs List" +msgstr "zzEdit Reject URLs List" + +#: config.py:545 +msgid "Reject Reasons" +msgstr "zzReject Reasons" + +#: config.py:546 +msgid "Customize Reject List Reasons" +msgstr "zzCustomize Reject List Reasons" + +#: config.py:554 +msgid "Reason why I rejected it" +msgstr "zzReason why I rejected it" + +#: config.py:554 +msgid "Title by Author" +msgstr "zzTitle by Author" + +#: config.py:557 +msgid "" +"Add Reject URLs. Use: http://...,note or http://...,title by " +"author - note Invalid story URLs will be ignored." +msgstr "" +"zzAdd Reject URLs. Use: http://...,note or http://...,title by " +"author - note Invalid story URLs will be ignored." + +#: config.py:558 +msgid "" +"One URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" +msgstr "" +"zzOne URL per line:\n" +"http://...,note\n" +"http://...,title by author - note" + +#: config.py:560 dialogs.py:1031 +msgid "Add this reason to all URLs added:" +msgstr "zzAdd this reason to all URLs added:" + +#: config.py:575 +msgid "" +"These settings provide more detailed control over what metadata will be " +"displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s " +"for different sites." +msgstr "" +"zzThese settings provide more detailed control over what metadata will be " +"displayed inside the ebook as well as let you set %(isa)s and %(u)s/%(p)s " +"for different sites." + +#: config.py:593 +msgid "View Defaults" +msgstr "zzView Defaults" + +#: config.py:594 +msgid "" +"View all of the plugin's configurable settings\n" +"and their default settings." +msgstr "" +"zzView all of the plugin's configurable settings\n" +"and their default settings." + +#: config.py:612 +msgid "Plugin Defaults (%s) (Read-Only)" +msgstr "zzPlugin Defaults (%s) (Read-Only)" + +#: config.py:613 config.py:619 +msgid "" +"These are all of the plugin's configurable options\n" +"and their default settings." +msgstr "" +"zzThese are all of the plugin's configurable options\n" +"and their default settings." + +#: config.py:614 +msgid "Plugin Defaults" +msgstr "zzPlugin Defaults" + +#: config.py:630 dialogs.py:555 dialogs.py:658 +msgid "OK" +msgstr "zzOK" + +# %(rl)s = Reading List. Keep as is. +#: config.py:650 +msgid "" +"These settings provide integration with the %(rl)s Plugin. %(rl)s can " +"automatically send to devices and change custom columns. You have to create " +"and configure the lists in %(rl)s to be useful." +msgstr "" +"zzThese settings provide integration with the %(rl)s Plugin. %(rl)s can " +"automatically send to devices and change custom columns. You have to create " +"and configure the lists in %(rl)s to be useful." + +#: config.py:655 +msgid "Add new/updated stories to \"Send to Device\" Reading List(s)." +msgstr "zzAdd new/updated stories to \"Send to Device\" Reading List(s)." + +# %(rl)s = Reading List. Keep as is. +#: config.py:656 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin." +msgstr "" +"zzAutomatically add new/updated stories to these lists in the %(rl)s plugin." + +#: config.py:661 +msgid "\"Send to Device\" Reading Lists" +msgstr "zz\"Send to Device\" Reading Lists" + +#: config.py:662 config.py:665 config.py:678 config.py:681 +msgid "" +"When enabled, new/updated stories will be automatically added to these lists." +msgstr "" +"zzWhen enabled, new/updated stories will be automatically added to these " +"lists." + +#: config.py:671 +msgid "Add new/updated stories to \"To Read\" Reading List(s)." +msgstr "zzAdd new/updated stories to \"To Read\" Reading List(s)." + +# %(rl)s = Reading List. Keep as is. +#: config.py:672 +msgid "" +"Automatically add new/updated stories to these lists in the %(rl)s plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." +msgstr "" +"zzAutomatically add new/updated stories to these lists in the %(rl)s " +"plugin.\n" +"Also offers menu option to remove stories from the \"To Read\" lists." + +#: config.py:677 +msgid "\"To Read\" Reading Lists" +msgstr "zz\"To Read\" Reading Lists" + +#: config.py:687 +msgid "" +"Add stories back to \"Send to Device\" Reading List(s) when marked \"Read\"." +msgstr "" +"zzAdd stories back to \"Send to Device\" Reading List(s) when marked \"Read" +"\"." + +#: config.py:688 +msgid "" +"Menu option to remove from \"To Read\" lists will also add stories back to " +"\"Send to Device\" Reading List(s)" +msgstr "" +"zzMenu option to remove from \"To Read\" lists will also add stories back to " +"\"Send to Device\" Reading List(s)" + +# %(gc)s = Generate Cover. Keep as is. +#: config.py:710 +msgid "" +"The %(gc)s plugin can create cover images for books using various metadata " +"and configurations. If you have GC installed, FFDL can run GC on new " +"downloads and metadata updates. Pick a GC setting by site or Default." +msgstr "" +"zzThe %(gc)s plugin can create cover images for books using various metadata " +"and configurations. If you have GC installed, FFDL can run GC on new " +"downloads and metadata updates. Pick a GC setting by site or Default." + +#: config.py:728 config.py:732 config.py:745 +msgid "Default" +msgstr "zzDefault" + +# %(gc)s = Generate Cover. Keep as is. +#: config.py:733 +msgid "" +"On Metadata update, run %(gc)s with this setting, if not selected for " +"specific site." +msgstr "" +"zzOn Metadata update, run %(gc)s with this setting, if not selected for " +"specific site." + +# %(gc)s = Generate Cover. Keep as is. +#: config.py:736 +msgid "On Metadata update, run %(gc)s with this setting for %(site)s stories." +msgstr "" +"zzOn Metadata update, run %(gc)s with this setting for %(site)s stories." + +# %(gc)s = Generate Cover. Keep as is. +#: config.py:759 +msgid "Run %(gc)s Only on New Books" +msgstr "zzRun %(gc)s Only on New Books" + +#: config.py:760 +msgid "Default is to run GC any time the calibre metadata is updated." +msgstr "zzDefault is to run GC any time the calibre metadata is updated." + +#: config.py:764 +msgid "Allow %(gcset)s from %(pini)s to override" +msgstr "zzAllow %(gcset)s from %(pini)s to override" + +#: config.py:765 +msgid "" +"The %(pini)s parameter %(gcset)s allows you to choose a GC setting based on " +"metadata rather than site, but it's much more complex. %(gcset)s is " +"ignored when this is off." +msgstr "" +"zzThe %(pini)s parameter %(gcset)s allows you to choose a GC setting based " +"on metadata rather than site, but it's much more complex. %(gcset)s is " +"ignored when this is off." + +#: config.py:769 +msgid "Use calibre's Polish feature to inject/update the cover" +msgstr "zzUse calibre's Polish feature to inject/update the cover" + +#: config.py:770 +msgid "" +"Calibre's Polish feature will be used to inject or update the generated " +"cover into the ebook, EPUB only." +msgstr "" +"zzCalibre's Polish feature will be used to inject or update the generated " +"cover into the ebook, EPUB only." + +#: config.py:784 +msgid "" +"These settings provide integration with the %(cp)s Plugin. %(cp)s can " +"automatically update custom columns with page, word and reading level " +"statistics. You have to create and configure the columns in %(cp)s first." +msgstr "" +"zzThese settings provide integration with the %(cp)s Plugin. %(cp)s can " +"automatically update custom columns with page, word and reading level " +"statistics. You have to create and configure the columns in %(cp)s first." + +#: config.py:789 +msgid "" +"If any of the settings below are checked, when stories are added or updated, " +"the %(cp)s Plugin will be called to update the checked statistics." +msgstr "" +"zzIf any of the settings below are checked, when stories are added or " +"updated, the %(cp)s Plugin will be called to update the checked statistics." + +#: config.py:795 +msgid "Which column and algorithm to use are configured in %(cp)s." +msgstr "zzWhich column and algorithm to use are configured in %(cp)s." + +#: config.py:803 +msgid "" +"Will overwrite word count from FFDL metadata if set to update the same " +"custom column." +msgstr "" +"zzWill overwrite word count from FFDL metadata if set to update the same " +"custom column." + +#: config.py:834 +msgid "" +"These controls aren't plugin settings as such, but convenience buttons for " +"setting Keyboard shortcuts and getting all the FanFictionDownLoader " +"confirmation dialogs back again." +msgstr "" +"zzThese controls aren't plugin settings as such, but convenience buttons for " +"setting Keyboard shortcuts and getting all the FanFictionDownLoader " +"confirmation dialogs back again." + +#: config.py:839 +msgid "Keyboard shortcuts..." +msgstr "zzKeyboard shortcuts..." + +#: config.py:840 +msgid "Edit the keyboard shortcuts associated with this plugin" +msgstr "zzEdit the keyboard shortcuts associated with this plugin" + +#: config.py:844 +msgid "Reset disabled &confirmation dialogs" +msgstr "zzReset disabled &confirmation dialogs" + +#: config.py:845 +msgid "Reset all show me again dialogs for the FanFictionDownLoader plugin" +msgstr "zzReset all show me again dialogs for the FanFictionDownLoader plugin" + +#: config.py:849 +msgid "&View library preferences..." +msgstr "zz&View library preferences..." + +#: config.py:850 +msgid "View data stored in the library database for this plugin" +msgstr "zzView data stored in the library database for this plugin" + +#: config.py:861 +msgid "Done" +msgstr "zzDone" + +#: config.py:862 +msgid "Confirmation dialogs have all been reset" +msgstr "zzConfirmation dialogs have all been reset" + +#: config.py:910 +msgid "Category" +msgstr "zzCategory" + +#: config.py:911 +msgid "Genre" +msgstr "zzGenre" + +#: config.py:912 +msgid "Language" +msgstr "zzLanguage" + +#: config.py:913 ffdl_plugin.py:1126 ffdl_plugin.py:1290 ffdl_plugin.py:1320 +msgid "Status" +msgstr "zzStatus" + +#: config.py:914 +msgid "Status:%(cmplt)s" +msgstr "zzStatus:%(cmplt)s" + +#: config.py:915 +msgid "Status:%(inprog)s" +msgstr "zzStatus:%(inprog)s" + +#: config.py:916 config.py:1050 +msgid "Series" +msgstr "zzSeries" + +#: config.py:917 +msgid "Characters" +msgstr "zzCharacters" + +#: config.py:918 +msgid "Relationships" +msgstr "zzRelationships" + +#: config.py:919 +msgid "Published" +msgstr "zzPublished" + +#: config.py:920 ffdl_plugin.py:1403 ffdl_plugin.py:1422 +msgid "Updated" +msgstr "zzUpdated" + +#: config.py:921 +msgid "Created" +msgstr "zzCreated" + +#: config.py:922 +msgid "Rating" +msgstr "zzRating" + +#: config.py:923 +msgid "Warnings" +msgstr "zzWarnings" + +#: config.py:924 +msgid "Chapters" +msgstr "zzChapters" + +#: config.py:925 +msgid "Words" +msgstr "zzWords" + +#: config.py:926 +msgid "Site" +msgstr "zzSite" + +#: config.py:927 +msgid "Story ID" +msgstr "zzStory ID" + +#: config.py:928 +msgid "Author ID" +msgstr "zzAuthor ID" + +#: config.py:929 +msgid "Extra Tags" +msgstr "zzExtra Tags" + +#: config.py:930 config.py:1042 dialogs.py:817 dialogs.py:913 +#: ffdl_plugin.py:1126 ffdl_plugin.py:1290 ffdl_plugin.py:1320 +msgid "Title" +msgstr "zzTitle" + +#: config.py:931 +msgid "Story URL" +msgstr "zzStory URL" + +#: config.py:932 +msgid "Description" +msgstr "zzDescription" + +#: config.py:933 dialogs.py:817 dialogs.py:913 ffdl_plugin.py:1126 +#: ffdl_plugin.py:1290 ffdl_plugin.py:1320 +msgid "Author" +msgstr "zzAuthor" + +#: config.py:934 +msgid "Author URL" +msgstr "zzAuthor URL" + +#: config.py:935 +msgid "File Format" +msgstr "zzFile Format" + +#: config.py:936 +msgid "File Extension" +msgstr "zzFile Extension" + +#: config.py:937 +msgid "Site Abbrev" +msgstr "zzSite Abbrev" + +#: config.py:938 +msgid "FFDL Version" +msgstr "zzFFDL Version" + +#: config.py:953 +msgid "" +"If you have custom columns defined, they will be listed below. Choose a " +"metadata value type to fill your columns automatically." +msgstr "" +"zzIf you have custom columns defined, they will be listed below. Choose a " +"metadata value type to fill your columns automatically." + +#: config.py:978 +msgid "Update this %s column(%s) with..." +msgstr "zzUpdate this %s column(%s) with..." + +#: config.py:988 +msgid "Values that aren't valid for this enumeration column will be ignored." +msgstr "" +"zzValues that aren't valid for this enumeration column will be ignored." + +#: config.py:988 config.py:990 +msgid "Metadata values valid for this type of column." +msgstr "zzMetadata values valid for this type of column." + +#: config.py:993 config.py:1069 +msgid "New Only" +msgstr "zzNew Only" + +#: config.py:994 +msgid "" +"Write to %s(%s) only for new\n" +"books, not updates to existing books." +msgstr "" +"zzWrite to %s(%s) only for new\n" +"books, not updates to existing books." + +#: config.py:1005 +msgid "Allow %(ccset)s from %(pini)s to override" +msgstr "zzAllow %(ccset)s from %(pini)s to override" + +#: config.py:1006 +msgid "" +"The %(pini)s parameter %(ccset)s allows you to set custom columns to site " +"specific values that aren't common to all sites. %(ccset)s is ignored " +"when this is off." +msgstr "" +"zzThe %(pini)s parameter %(ccset)s allows you to set custom columns to site " +"specific values that aren't common to all sites. %(ccset)s is ignored " +"when this is off." + +#: config.py:1011 +msgid "Special column:" +msgstr "zzSpecial column:" + +#: config.py:1016 +msgid "Update/Overwrite Error Column:" +msgstr "zzUpdate/Overwrite Error Column:" + +#: config.py:1017 +msgid "" +"When an update or overwrite of an existing story fails, record the reason in " +"this column.\n" +"(Text and Long Text columns only.)" +msgstr "" +"zzWhen an update or overwrite of an existing story fails, record the reason " +"in this column.\n" +"(Text and Long Text columns only.)" + +#: config.py:1043 +msgid "Author(s)" +msgstr "zzAuthor(s)" + +#: config.py:1044 +msgid "Publisher" +msgstr "zzPublisher" + +#: config.py:1045 +msgid "Tags" +msgstr "zzTags" + +#: config.py:1046 +msgid "Languages" +msgstr "zzLanguages" + +#: config.py:1047 +msgid "Published Date" +msgstr "zzPublished Date" + +#: config.py:1048 +msgid "Date" +msgstr "zzDate" + +#: config.py:1049 +msgid "Comments" +msgstr "zzComments" + +#: config.py:1051 +msgid "Ids(url id only)" +msgstr "zzIds(url id only)" + +#: config.py:1056 +msgid "" +"The standard calibre metadata columns are listed below. You may choose " +"whether FFDL will fill each column automatically on updates or only for new " +"books." +msgstr "" +"zzThe standard calibre metadata columns are listed below. You may choose " +"whether FFDL will fill each column automatically on updates or only for new " +"books." + +#: config.py:1070 +msgid "" +"Write to %s only for new\n" +"books, not updates to existing books." +msgstr "" +"zzWrite to %s only for new\n" +"books, not updates to existing books." + +#: dialogs.py:69 +msgid "Skip" +msgstr "zzSkip" + +#: dialogs.py:70 +msgid "Add New Book" +msgstr "zzAdd New Book" + +#: dialogs.py:71 +msgid "Update EPUB if New Chapters" +msgstr "zzUpdate EPUB if New Chapters" + +#: dialogs.py:72 +msgid "Update EPUB Always" +msgstr "zzUpdate EPUB Always" + +#: dialogs.py:73 +msgid "Overwrite if Newer" +msgstr "zzOverwrite if Newer" + +#: dialogs.py:74 +msgid "Overwrite Always" +msgstr "zzOverwrite Always" + +#: dialogs.py:75 +msgid "Update Calibre Metadata Only" +msgstr "zzUpdate Calibre Metadata Only" + +#: dialogs.py:252 ffdl_plugin.py:89 +msgid "FanFictionDownLoader" +msgstr "zzFanFictionDownLoader" + +#: dialogs.py:269 dialogs.py:716 +msgid "Show Download Options" +msgstr "zzShow Download Options" + +#: dialogs.py:288 dialogs.py:733 +msgid "Output &Format:" +msgstr "zzOutput &Format:" + +#: dialogs.py:296 dialogs.py:741 +msgid "" +"Choose output format to create. May set default from plugin configuration." +msgstr "" +"zzChoose output format to create. May set default from plugin configuration." + +#: dialogs.py:324 dialogs.py:758 +msgid "Update Calibre &Metadata?" +msgstr "zzUpdate Calibre &Metadata?" + +#: dialogs.py:325 dialogs.py:759 +msgid "" +"Update metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new " +"books.)" +msgstr "" +"zzUpdate metadata for existing stories in Calibre from web site?\n" +"(Columns set to 'New Only' in the column tabs will only be set for new " +"books.)" + +#: dialogs.py:331 dialogs.py:763 +msgid "Update EPUB Cover?" +msgstr "zzUpdate EPUB Cover?" + +#: dialogs.py:332 dialogs.py:764 +msgid "" +"Update book cover image from site or defaults (if found) inside the " +"EPUB when EPUB is updated." +msgstr "" +"zzUpdate book cover image from site or defaults (if found) inside the " +"EPUB when EPUB is updated." + +#: dialogs.py:379 +msgid "Story URL(s) for anthology, one per line:" +msgstr "zzStory URL(s) for anthology, one per line:" + +#: dialogs.py:380 +msgid "" +"URLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." +msgstr "" +"zzURLs for stories to include in the anthology, one per line.\n" +"Will take URLs from clipboard, but only valid URLs." + +#: dialogs.py:381 +msgid "If Story Already Exists in Anthology?" +msgstr "zzIf Story Already Exists in Anthology?" + +#: dialogs.py:382 +msgid "" +"What to do if there's already an existing story with the same URL in the " +"anthology." +msgstr "" +"zzWhat to do if there's already an existing story with the same URL in the " +"anthology." + +#: dialogs.py:391 +msgid "Story URL(s), one per line:" +msgstr "zzStory URL(s), one per line:" + +#: dialogs.py:392 +msgid "" +"URLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." +msgstr "" +"zzURLs for stories, one per line.\n" +"Will take URLs from clipboard, but only valid URLs.\n" +"Add [1,5] after the URL to limit the download to chapters 1-5." + +#: dialogs.py:393 +msgid "If Story Already Exists?" +msgstr "zzIf Story Already Exists?" + +#: dialogs.py:394 +msgid "" +"What to do if there's already an existing story with the same URL or title " +"and author." +msgstr "" +"zzWhat to do if there's already an existing story with the same URL or title " +"and author." + +#: dialogs.py:494 +msgid "For Individual Books" +msgstr "zzFor Individual Books" + +#: dialogs.py:495 +msgid "Get URLs and go to dialog for individual story downloads." +msgstr "zzGet URLs and go to dialog for individual story downloads." + +#: dialogs.py:499 +msgid "For Anthology Epub" +msgstr "zzFor Anthology Epub" + +#: dialogs.py:500 +msgid "" +"Get URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." +msgstr "" +"zzGet URLs and go to dialog for Anthology download.\n" +"Requires %s plugin." + +#: dialogs.py:505 dialogs.py:559 dialogs.py:586 +msgid "Cancel" +msgstr "zzCancel" + +#: dialogs.py:537 +msgid "Password" +msgstr "zzPassword" + +#: dialogs.py:538 +msgid "Author requires a password for this story(%s)." +msgstr "zzAuthor requires a password for this story(%s)." + +#: dialogs.py:543 +msgid "User/Password" +msgstr "zzUser/Password" + +#: dialogs.py:544 +msgid "%s requires you to login to download this story." +msgstr "zz%s requires you to login to download this story." + +#: dialogs.py:546 +msgid "User:" +msgstr "zzUser:" + +#: dialogs.py:550 +msgid "Password:" +msgstr "zzPassword:" + +#: dialogs.py:581 +msgid "Fetching metadata for stories..." +msgstr "zzFetching metadata for stories..." + +#: dialogs.py:582 +msgid "Downloading metadata for stories" +msgstr "zzDownloading metadata for stories" + +#: dialogs.py:583 +msgid "Fetched metadata for" +msgstr "zzFetched metadata for" + +#: dialogs.py:653 ffdl_plugin.py:325 +msgid "About FanFictionDownLoader" +msgstr "zzAbout FanFictionDownLoader" + +#: dialogs.py:707 +msgid "Remove selected books from the list" +msgstr "zzRemove selected books from the list" + +#: dialogs.py:746 +msgid "Update Mode:" +msgstr "zzUpdate Mode:" + +#: dialogs.py:749 +msgid "" +"What sort of update to perform. May set default from plugin configuration." +msgstr "" +"zzWhat sort of update to perform. May set default from plugin configuration." + +#: dialogs.py:817 ffdl_plugin.py:1126 ffdl_plugin.py:1290 ffdl_plugin.py:1320 +msgid "Comment" +msgstr "zzComment" + +#: dialogs.py:885 +msgid "Are you sure you want to remove this book from the list?" +msgstr "zzAre you sure you want to remove this book from the list?" + +#: dialogs.py:887 +msgid "Are you sure you want to remove the selected %d books from the list?" +msgstr "zzAre you sure you want to remove the selected %d books from the list?" + +#: dialogs.py:913 +msgid "Note" +msgstr "zzNote" + +#: dialogs.py:955 +msgid "Select or Edit Reject Note." +msgstr "zzSelect or Edit Reject Note." + +#: dialogs.py:963 +msgid "Are you sure you want to remove this URL from the list?" +msgstr "zzAre you sure you want to remove this URL from the list?" + +#: dialogs.py:965 +msgid "Are you sure you want to remove the %d selected URLs from the list?" +msgstr "zzAre you sure you want to remove the %d selected URLs from the list?" + +#: dialogs.py:983 +msgid "List of Books to Reject" +msgstr "zzList of Books to Reject" + +#: dialogs.py:996 +msgid "" +"FFDL will remember these URLs and display the note and offer to reject them " +"if you try to download them again later." +msgstr "" +"zzFFDL will remember these URLs and display the note and offer to reject " +"them if you try to download them again later." + +#: dialogs.py:1010 +msgid "Remove selected URL(s) from the list" +msgstr "zzRemove selected URL(s) from the list" + +#: dialogs.py:1028 dialogs.py:1032 +msgid "This will be added to whatever note you've set for each URL above." +msgstr "zzThis will be added to whatever note you've set for each URL above." + +#: dialogs.py:1041 +msgid "Delete Books (including books without FanFiction URLs)?" +msgstr "zzDelete Books (including books without FanFiction URLs)?" + +#: dialogs.py:1042 +msgid "Delete the selected books after adding them to the Rejected URLs list." +msgstr "" +"zzDelete the selected books after adding them to the Rejected URLs list." + +#: ffdl_plugin.py:90 +msgid "Download FanFiction stories from various web sites" +msgstr "zzDownload FanFiction stories from various web sites" + +# This is what appears on the plugin button/menu when added to calibre's main toolbar or menu. +#: ffdl_plugin.py:120 +msgid "FanFictionDL" +msgstr "zzFanFictionDL" + +#: ffdl_plugin.py:243 +msgid "&Add New from URL(s)" +msgstr "zz&Add New from URL(s)" + +#: ffdl_plugin.py:245 +msgid "Add New FanFiction Book(s) from URL(s)" +msgstr "zzAdd New FanFiction Book(s) from URL(s)" + +#: ffdl_plugin.py:248 +msgid "&Update Existing FanFiction Book(s)" +msgstr "zz&Update Existing FanFiction Book(s)" + +#: ffdl_plugin.py:254 +msgid "Get Story URLs to Download from Web Page" +msgstr "zzGet Story URLs to Download from Web Page" + +#: ffdl_plugin.py:258 +msgid "&Make Anthology Epub Manually from URL(s)" +msgstr "zz&Make Anthology Epub Manually from URL(s)" + +#: ffdl_plugin.py:260 +msgid "Make FanFiction Anthology Epub Manually from URL(s)" +msgstr "zzMake FanFiction Anthology Epub Manually from URL(s)" + +#: ffdl_plugin.py:263 +msgid "&Update Anthology Epub" +msgstr "zz&Update Anthology Epub" + +#: ffdl_plugin.py:265 +msgid "Update FanFiction Anthology Epub" +msgstr "zzUpdate FanFiction Anthology Epub" + +#: ffdl_plugin.py:273 +msgid "Add to \"To Read\" and \"Send to Device\" Lists" +msgstr "zzAdd to \"To Read\" and \"Send to Device\" Lists" + +#: ffdl_plugin.py:275 +msgid "Remove from \"To Read\" and add to \"Send to Device\" Lists" +msgstr "zzRemove from \"To Read\" and add to \"Send to Device\" Lists" + +#: ffdl_plugin.py:277 ffdl_plugin.py:282 +msgid "Remove from \"To Read\" Lists" +msgstr "zzRemove from \"To Read\" Lists" + +#: ffdl_plugin.py:279 +msgid "Add Selected to \"Send to Device\" Lists" +msgstr "zzAdd Selected to \"Send to Device\" Lists" + +#: ffdl_plugin.py:281 +msgid "Add to \"To Read\" Lists" +msgstr "zzAdd to \"To Read\" Lists" + +#: ffdl_plugin.py:297 +msgid "Get URLs from Selected Books" +msgstr "zzGet URLs from Selected Books" + +#: ffdl_plugin.py:303 ffdl_plugin.py:396 +msgid "Get Story URLs from Web Page" +msgstr "zzGet Story URLs from Web Page" + +#: ffdl_plugin.py:308 +msgid "Reject Selected Books" +msgstr "zzReject Selected Books" + +#: ffdl_plugin.py:316 +msgid "&Configure Plugin" +msgstr "zz&Configure Plugin" + +#: ffdl_plugin.py:319 +msgid "Configure FanFictionDownLoader" +msgstr "zzConfigure FanFictionDownLoader" + +#: ffdl_plugin.py:322 +msgid "About Plugin" +msgstr "zzAbout Plugin" + +#: ffdl_plugin.py:379 +msgid "Cannot Update Reading Lists from Device View" +msgstr "zzCannot Update Reading Lists from Device View" + +#: ffdl_plugin.py:383 +msgid "No Selected Books to Update Reading Lists" +msgstr "zzNo Selected Books to Update Reading Lists" + +#: ffdl_plugin.py:407 ffdl_plugin.py:459 +msgid "List of Story URLs" +msgstr "zzList of Story URLs" + +#: ffdl_plugin.py:408 +msgid "No Valid Story URLs found on given page." +msgstr "zzNo Valid Story URLs found on given page." + +#: ffdl_plugin.py:423 +msgid "No Selected Books to Get URLs From" +msgstr "zzNo Selected Books to Get URLs From" + +#: ffdl_plugin.py:441 +msgid "Collecting URLs for stories..." +msgstr "zzCollecting URLs for stories..." + +#: ffdl_plugin.py:442 +msgid "Get URLs for stories" +msgstr "zzGet URLs for stories" + +#: ffdl_plugin.py:443 ffdl_plugin.py:490 ffdl_plugin.py:677 +msgid "URL retrieved" +msgstr "zzURL retrieved" + +#: ffdl_plugin.py:463 +msgid "List of URLs" +msgstr "zzList of URLs" + +#: ffdl_plugin.py:464 +msgid "No Story URLs found in selected books." +msgstr "zzNo Story URLs found in selected books." + +#: ffdl_plugin.py:480 +msgid "No Selected Books have URLs to Reject" +msgstr "zzNo Selected Books have URLs to Reject" + +#: ffdl_plugin.py:488 +msgid "Collecting URLs for Reject List..." +msgstr "zzCollecting URLs for Reject List..." + +#: ffdl_plugin.py:489 +msgid "Get URLs for Reject List" +msgstr "zzGet URLs for Reject List" + +#: ffdl_plugin.py:524 +msgid "Proceed to Remove?" +msgstr "zzProceed to Remove?" + +#: ffdl_plugin.py:524 +msgid "Rejecting FFDL URLs: None of the books selected have FanFiction URLs." +msgstr "" +"zzRejecting FFDL URLs: None of the books selected have FanFiction URLs." + +# %s = EpubMerge +#: ffdl_plugin.py:546 +msgid "Cannot Make Anthologys without %s" +msgstr "zzCannot Make Anthologys without %s" + +#: ffdl_plugin.py:550 ffdl_plugin.py:654 +msgid "Cannot Update Books from Device View" +msgstr "zzCannot Update Books from Device View" + +#: ffdl_plugin.py:554 +msgid "Can only update 1 anthology at a time" +msgstr "zzCan only update 1 anthology at a time" + +#: ffdl_plugin.py:563 +msgid "Can only Update Epub Anthologies" +msgstr "zzCan only Update Epub Anthologies" + +#: ffdl_plugin.py:581 ffdl_plugin.py:582 +msgid "Cannot Update Anthology" +msgstr "zzCannot Update Anthology" + +#: ffdl_plugin.py:582 +msgid "" +"Book isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." +msgstr "" +"zzBook isn't an FFDL Anthology or contains book(s) without valid FFDL URLs." + +#: ffdl_plugin.py:640 +msgid "" +"There are %d stories in the current anthology that are not going to " +"be kept if you go ahead." +msgstr "" +"zzThere are %d stories in the current anthology that are not going to " +"be kept if you go ahead." + +#: ffdl_plugin.py:641 +msgid "Story URLs that will be removed:" +msgstr "zzStory URLs that will be removed:" + +#: ffdl_plugin.py:643 +msgid "Update anyway?" +msgstr "zzUpdate anyway?" + +#: ffdl_plugin.py:644 +msgid "Stories Removed" +msgstr "zzStories Removed" + +#: ffdl_plugin.py:661 +msgid "No Selected Books to Update" +msgstr "zzNo Selected Books to Update" + +#: ffdl_plugin.py:675 +msgid "Collecting stories for update..." +msgstr "zzCollecting stories for update..." + +#: ffdl_plugin.py:676 +msgid "Get stories for updates" +msgstr "zzGet stories for updates" + +#: ffdl_plugin.py:686 +msgid "Update Existing List" +msgstr "zzUpdate Existing List" + +#: ffdl_plugin.py:738 +msgid "Started fetching metadata for %s stories." +msgstr "zzStarted fetching metadata for %s stories." + +#: ffdl_plugin.py:744 +msgid "No valid story URLs entered." +msgstr "zzNo valid story URLs entered." + +#: ffdl_plugin.py:769 ffdl_plugin.py:775 +msgid "Reject URL?" +msgstr "zzReject URL?" + +#: ffdl_plugin.py:776 ffdl_plugin.py:794 +msgid "%s is on your Reject URL list:" +msgstr "zz%s is on your Reject URL list:" + +#: ffdl_plugin.py:778 +msgid "Click 'Yes' to Reject." +msgstr "zzClick 'Yes' to Reject." + +#: ffdl_plugin.py:779 ffdl_plugin.py:875 +msgid "Click 'No' to download anyway." +msgstr "zzClick 'No' to download anyway." + +#: ffdl_plugin.py:781 +msgid "Story on Reject URLs list (%s)." +msgstr "zzStory on Reject URLs list (%s)." + +#: ffdl_plugin.py:784 +msgid "Rejected" +msgstr "zzRejected" + +#: ffdl_plugin.py:787 +msgid "Remove Reject URL?" +msgstr "zzRemove Reject URL?" + +#: ffdl_plugin.py:793 +msgid "Remove URL from Reject List?" +msgstr "zzRemove URL from Reject List?" + +#: ffdl_plugin.py:796 +msgid "Click 'Yes' to remove it from the list," +msgstr "zzClick 'Yes' to remove it from the list," + +#: ffdl_plugin.py:797 +msgid "Click 'No' to leave it on the list." +msgstr "zzClick 'No' to leave it on the list." + +#: ffdl_plugin.py:814 +msgid "Cannot update non-epub format." +msgstr "zzCannot update non-epub format." + +#: ffdl_plugin.py:851 +msgid "Are You an Adult?" +msgstr "zzAre You an Adult?" + +#: ffdl_plugin.py:852 +msgid "" +"%s requires that you be an adult. Please confirm you are an adult in your " +"locale:" +msgstr "" +"zz%s requires that you be an adult. Please confirm you are an adult in your " +"locale:" + +#: ffdl_plugin.py:866 +msgid "Skip Story?" +msgstr "zzSkip Story?" + +#: ffdl_plugin.py:872 +msgid "Skip Anthology Story?" +msgstr "zzSkip Anthology Story?" + +#: ffdl_plugin.py:873 +msgid "" +"\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have an " +"anthology book for." +msgstr "" +"zz\"%s\" is in series \"<a href=\"%s\">%s</a>\" that you have " +"an anthology book for." + +#: ffdl_plugin.py:874 +msgid "Click 'Yes' to Skip." +msgstr "zzClick 'Yes' to Skip." + +#: ffdl_plugin.py:877 +msgid "Story in Series Anthology(%s)." +msgstr "zzStory in Series Anthology(%s)." + +#: ffdl_plugin.py:882 +msgid "Skipped" +msgstr "zzSkipped" + +#: ffdl_plugin.py:910 +msgid "Add" +msgstr "zzAdd" + +#: ffdl_plugin.py:923 +msgid "Meta" +msgstr "zzMeta" + +#: ffdl_plugin.py:956 +msgid "Skipping duplicate story." +msgstr "zzSkipping duplicate story." + +#: ffdl_plugin.py:959 +msgid "" +"More than one identical book by Identifer URL or title/author(s)--can't tell " +"which book to update/overwrite." +msgstr "" +"zzMore than one identical book by Identifer URL or title/author(s)--can't " +"tell which book to update/overwrite." + +#: ffdl_plugin.py:970 +msgid "Update" +msgstr "zzUpdate" + +#: ffdl_plugin.py:978 ffdl_plugin.py:985 +msgid "Change Story URL?" +msgstr "zzChange Story URL?" + +#: ffdl_plugin.py:986 +msgid "" +"%s by %s is already in your library with a different source " +"URL:" +msgstr "" +"zz%s by %s is already in your library with a different source " +"URL:" + +#: ffdl_plugin.py:987 +msgid "In library: <a href=\"%(liburl)s\">%(liburl)s</a>" +msgstr "zzIn library: <a href=\"%(liburl)s\">%(liburl)s</a>" + +#: ffdl_plugin.py:988 ffdl_plugin.py:1002 +msgid "New URL: <a href=\"%(newurl)s\">%(newurl)s</a>" +msgstr "zzNew URL: <a href=\"%(newurl)s\">%(newurl)s</a>" + +#: ffdl_plugin.py:989 +msgid "Click 'Yes' to update/overwrite book with new URL." +msgstr "zzClick 'Yes' to update/overwrite book with new URL." + +#: ffdl_plugin.py:990 +msgid "Click 'No' to skip updating/overwriting this book." +msgstr "zzClick 'No' to skip updating/overwriting this book." + +#: ffdl_plugin.py:992 ffdl_plugin.py:999 +msgid "Download as New Book?" +msgstr "zzDownload as New Book?" + +#: ffdl_plugin.py:1000 +msgid "" +"%s by %s is already in your library with a different source " +"URL." +msgstr "" +"zz%s by %s is already in your library with a different source " +"URL." + +#: ffdl_plugin.py:1001 +msgid "" +"You chose not to update the existing book. Do you want to add a new book " +"for this URL?" +msgstr "" +"zzYou chose not to update the existing book. Do you want to add a new book " +"for this URL?" + +#: ffdl_plugin.py:1003 +msgid "Click 'Yes' to a new book with new URL." +msgstr "zzClick 'Yes' to a new book with new URL." + +#: ffdl_plugin.py:1004 +msgid "Click 'No' to skip URL." +msgstr "zzClick 'No' to skip URL." + +#: ffdl_plugin.py:1010 +msgid "Update declined by user due to differing story URL(%s)" +msgstr "zzUpdate declined by user due to differing story URL(%s)" + +#: ffdl_plugin.py:1013 +msgid "Different URL" +msgstr "zzDifferent URL" + +#: ffdl_plugin.py:1018 +msgid "Metadata collected." +msgstr "zzMetadata collected." + +#: ffdl_plugin.py:1034 +msgid "Already contains %d chapters." +msgstr "zzAlready contains %d chapters." + +#: ffdl_plugin.py:1039 +msgid "" +"Existing epub contains %d chapters, web site only has %d. Use Overwrite to " +"force update." +msgstr "" +"zzExisting epub contains %d chapters, web site only has %d. Use Overwrite to " +"force update." + +#: ffdl_plugin.py:1041 +msgid "" +"FFDL doesn't recognize chapters in existing epub, epub is probably from a " +"different source. Use Overwrite to force update." +msgstr "" +"zzFFDL doesn't recognize chapters in existing epub, epub is probably from a " +"different source. Use Overwrite to force update." + +#: ffdl_plugin.py:1053 +msgid "Not Overwriting, web site is not newer." +msgstr "zzNot Overwriting, web site is not newer." + +#: ffdl_plugin.py:1122 +msgid "None of the %d URLs/stories given can be/need to be downloaded." +msgstr "" +"zzNone of the %d URLs/stories given can be/need to be downloaded." + +#: ffdl_plugin.py:1123 ffdl_plugin.py:1286 ffdl_plugin.py:1316 +msgid "See log for details." +msgstr "zzSee log for details." + +#: ffdl_plugin.py:1124 +msgid "Proceed with updating your library(Error Column, if configured)?" +msgstr "zzProceed with updating your library(Error Column, if configured)?" + +#: ffdl_plugin.py:1131 ffdl_plugin.py:1298 +msgid "Bad" +msgstr "zzBad" + +#: ffdl_plugin.py:1139 +msgid "FFDL download ended" +msgstr "zzFFDL download ended" + +#: ffdl_plugin.py:1139 ffdl_plugin.py:1341 +msgid "FFDL log" +msgstr "zzFFDL log" + +#: ffdl_plugin.py:1147 +msgid "Download FanFiction Book" +msgstr "zzDownload FanFiction Book" + +#: ffdl_plugin.py:1154 +msgid "Starting %d FanFictionDownLoads" +msgstr "zzStarting %d FanFictionDownLoads" + +#: ffdl_plugin.py:1184 +msgid "Story Details:" +msgstr "zzStory Details:" + +#: ffdl_plugin.py:1187 +msgid "Error Updating Metadata" +msgstr "zzError Updating Metadata" + +#: ffdl_plugin.py:1188 +msgid "" +"An error has occurred while FFDL was updating calibre's metadata for <a " +"href='%s'>%s</a>." +msgstr "" +"zzAn error has occurred while FFDL was updating calibre's metadata for <a " +"href='%s'>%s</a>." + +#: ffdl_plugin.py:1189 +msgid "The ebook has been updated, but the metadata has not." +msgstr "zzThe ebook has been updated, but the metadata has not." + +#: ffdl_plugin.py:1241 +msgid "Finished Adding/Updating %d books." +msgstr "zzFinished Adding/Updating %d books." + +#: ffdl_plugin.py:1249 +msgid "Starting auto conversion of %d books." +msgstr "zzStarting auto conversion of %d books." + +#: ffdl_plugin.py:1270 +msgid "No Good Stories for Anthology" +msgstr "zzNo Good Stories for Anthology" + +#: ffdl_plugin.py:1271 +msgid "" +"No good stories/updates where downloaded, Anthology creation/update aborted." +msgstr "" +"zzNo good stories/updates where downloaded, Anthology creation/update " +"aborted." + +#: ffdl_plugin.py:1276 ffdl_plugin.py:1315 +msgid "FFDL found %s good and %s bad updates." +msgstr "zzFFDL found %s good and %s bad updates." + +#: ffdl_plugin.py:1283 +msgid "" +"Are you sure you want to continue with creating/updating this Anthology?" +msgstr "" +"zzAre you sure you want to continue with creating/updating this Anthology?" + +#: ffdl_plugin.py:1284 +msgid "Any updates that failed will not be included in the Anthology." +msgstr "" +"zzAny updates that failed will not be included in the Anthology." + +#: ffdl_plugin.py:1285 +msgid "However, if there's an older version, it will still be included." +msgstr "zzHowever, if there's an older version, it will still be included." + +#: ffdl_plugin.py:1288 +msgid "Proceed with updating this anthology and your library?" +msgstr "zzProceed with updating this anthology and your library?" + +#: ffdl_plugin.py:1296 +msgid "Good" +msgstr "zzGood" + +#: ffdl_plugin.py:1317 +msgid "Proceed with updating your library?" +msgstr "zzProceed with updating your library?" + +#: ffdl_plugin.py:1341 +msgid "FFDL download complete" +msgstr "zzFFDL download complete" + +#: ffdl_plugin.py:1354 +msgid "Merging %s books." +msgstr "zzMerging %s books." + +#: ffdl_plugin.py:1394 +msgid "FFDL Adding/Updating books." +msgstr "zzFFDL Adding/Updating books." + +#: ffdl_plugin.py:1401 +msgid "Updating calibre for FanFiction stories..." +msgstr "zzUpdating calibre for FanFiction stories..." + +#: ffdl_plugin.py:1402 +msgid "Update calibre for FanFiction stories" +msgstr "zzUpdate calibre for FanFiction stories" + +#: ffdl_plugin.py:1411 +msgid "Adding/Updating %s BAD books." +msgstr "zzAdding/Updating %s BAD books." + +#: ffdl_plugin.py:1420 +msgid "Updating calibre for BAD FanFiction stories..." +msgstr "zzUpdating calibre for BAD FanFiction stories..." + +#: ffdl_plugin.py:1421 +msgid "Update calibre for BAD FanFiction stories" +msgstr "zzUpdate calibre for BAD FanFiction stories" + +#: ffdl_plugin.py:1447 +msgid "Adding format to book failed for some reason..." +msgstr "zzAdding format to book failed for some reason..." + +#: ffdl_plugin.py:1450 +msgid "Error" +msgstr "zzError" + +#: ffdl_plugin.py:1723 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading Lists, " +"but you don't have the %s plugin installed anymore?" +msgstr "" +"zzYou configured FanFictionDownLoader to automatically update Reading Lists, " +"but you don't have the %s plugin installed anymore?" + +#: ffdl_plugin.py:1735 +msgid "" +"You configured FanFictionDownLoader to automatically update \"To Read\" " +"Reading Lists, but you don't have any lists set?" +msgstr "" +"zzYou configured FanFictionDownLoader to automatically update \"To Read\" " +"Reading Lists, but you don't have any lists set?" + +#: ffdl_plugin.py:1745 ffdl_plugin.py:1763 +msgid "" +"You configured FanFictionDownLoader to automatically update Reading List " +"'%s', but you don't have a list of that name?" +msgstr "" +"zzYou configured FanFictionDownLoader to automatically update Reading List " +"'%s', but you don't have a list of that name?" + +#: ffdl_plugin.py:1751 +msgid "" +"You configured FanFictionDownLoader to automatically update \"Send to Device" +"\" Reading Lists, but you don't have any lists set?" +msgstr "" +"zzYou configured FanFictionDownLoader to automatically update \"Send to " +"Device\" Reading Lists, but you don't have any lists set?" + +#: ffdl_plugin.py:1871 +msgid "No story URL found." +msgstr "zzNo story URL found." + +#: ffdl_plugin.py:1874 +msgid "Not Found" +msgstr "zzNot Found" + +#: ffdl_plugin.py:1880 +msgid "URL is not a valid story URL." +msgstr "zzURL is not a valid story URL." + +#: ffdl_plugin.py:1883 +msgid "Bad URL" +msgstr "zzBad URL" + +#: ffdl_plugin.py:2018 +msgid "Anthology containing:" +msgstr "zzAnthology containing:" + +# title by author +#: ffdl_plugin.py:2019 +msgid "%s by %s" +msgstr "zz%s by %s" + +#: ffdl_plugin.py:2038 +msgid " Anthology" +msgstr "zz Anthology" + +#: ffdl_plugin.py:2075 +msgid "(was set, removed for security)" +msgstr "zz(was set, removed for security)" diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..e72999f4 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,10 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 2 hours + +# There's a bug in the Python 2.7 runtime that prevents this from +# working properly. In theory, there should never be orphans anyway. +#- description: orphan cleanup job +# url: /r3m0v3rOrphans +# schedule: every 4 hours diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..eae546b7 --- /dev/null +++ b/css/index.css @@ -0,0 +1,73 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 60%; + margin-left: 20%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ +# margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { +// display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + color: #f00; +} +.recent { + font-size: large; +} diff --git a/defaults.ini b/defaults.ini new file mode 100644 index 00000000..0e5b52d7 --- /dev/null +++ b/defaults.ini @@ -0,0 +1,1997 @@ +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels. Example: + +## [defaults] +## titlepage_entries: category,genre, status +## [www.whofic.com] +## # overrides defaults. +## titlepage_entries: category,genre, status,dateUpdated,rating +## [epub] +## # overrides defaults & site section +## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated +## [www.whofic.com:epub] +## # overrides defaults, site section & format section +## titlepage_entries: category,genre, status,datePublished +## [overrides] +## # overrides all other sections +## titlepage_entries: category + +## Some sites also require the user to confirm they are adult for +## adult content. Uncomment by removing '#' in front of is_adult. +#is_adult:true + +## All available titlepage_entries and the label used for them: +## <entryname>_label:<label> +## Labels may be customized. +title_label:Title +storyUrl_label:Story URL +description_label:Summary +author_label:Author +authorUrl_label:Author URL +## epub, txt, html +formatname_label:File Format +## .epub, .txt, .html +formatext_label:File Extension +## Category and Genre have overlap, depending on the site. +## Sometimes Harry Potter is a category and Fantasy a genre. (fanfiction.net) +## Sometimes Fantasy is category *and* a genre (fictionpress.com) +## Sometimes there are multiple categories and/or genres. +category_label:Category +genre_label:Genre +language_label:Language +characters_label:Characters +ships_label:Relationships +series_label:Series +seriesUrl_label:Series URL +## seriesHTML is series as a link to seriesUrl. +seriesHTML_label:Series +## Completed/In-Progress +status_label:Status +## Dates story first published, last updated, and downloaded(last with time). +datePublished_label:Published +dateUpdated_label:Updated +dateCreated_label:Packaged +## Rating depends on the site. Some use K,T,M,etc, and some PG,R,NC-17 +rating_label:Rating +## Also depends on the site. +warnings_label:Warnings +numChapters_label:Chapters +numWords_label:Words +## www.fanfiction.net, fictionalley.com, etc. +site_label:Publisher +## ffnet, fpcom, etc. +siteabbrev_label:Site Abbrev +## The site's unique story/author identifier. Usually a number. +storyId_label:Story ID +authorId_label:Author ID +## Primarily to put specific values in dc:subject tags for epub. Will +## show up in Calibre as tags. Also carried into mobi when converted. +extratags_label:Extra Tags +## The version of fanficdownloader +version_label:FFDL Version + +## Date formats used by FFDL. Published and Update don't have time. +## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior +## Note that ini format requires % to be escaped as %%. +dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S +datePublished_format:%%Y-%%m-%%d +dateUpdated_format:%%Y-%%m-%%d + +## items to include in the title page +## Empty metadata entries will *not* appear, even if in the list. +## You can include extra text or HTML that will be included as-is in +## the title page. Eg: titlepage_entries: ..., ,summary, ,... +## All current formats already include title and author. +titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description + +## Try to collect series name and number of this story in series. +## Some sites (ab)use 'series' for reading lists and personal +## collections. This lets us turn it on and off by site without +## keeping a lengthy titlepage_entries per site and prevents it +## updating in the plugin. +collect_series: true + +## include title page as first page. +include_titlepage: true + +## include a TOC page before the story text +include_tocpage: true + +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +#website_encodings: auto, utf8, Windows-1252 + +## python string Template, string with ${title}, ${author} etc, same as titlepage_entries +## Can include directories. ${formatext} will be added if not in filename somewhere. +#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext} +#output_filename: books/${formatname}/${siteabbrev}/${authorId}/${title}-${siteabbrev}_${storyId}${formatext} +output_filename: ${title}-${siteabbrev}_${storyId}${formatext} + +## Make directories as needed. +make_directories: true + +## Always overwrite output files. Otherwise, the downloader checks +## the timestamp on the existing file and only overwrites if the story +## has been updated more recently. Command line version only +#always_overwrite: true + +## put output (with output_filename) in a zip file zip_filename. +zip_output: false + +## Can include directories. .zip will be added if not in name somewhere +zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip + +## Normally, try to make the filenames 'safe' by removing invalid +## filename chars. Applies to default_cover_image, output_filename & +## zip_filename. +allow_unsafe_filename: false + +## The regex pattern of 'unsafe' filename chars for above. +#output_filename_safepattern:[^a-zA-Z0-9_\. \[\]&'-]+ + +## entries to make epub subjects and calibre tags +## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" +include_subject_tags: extratags, genre, category, characters, ships, lastupdate, status + +## extra tags (comma separated) to include, primarily for epub. +extratags: FanFiction + +## extra categories, genres, characters, ships and warnings can be +## configured. Used primarily for sites that are dedicated to a genre +## or 'ship and so don't included it for every story. +#extracategories: +#extragenres: +#extracharacters: +#extraships: +#extrawarnings: + +## Add this to genre if there's more than one category. +#add_genre_when_multi_category: Crossover + +## default_value_(entry) can be used to set the value for a metadata +## entry when no value has been found on the site. For example, some +## sites doesn't have a status metadatum. If uncommented, this will +## use 'Unknown' for status when no status is found. +#default_value_status:Unknown +## Can also be used for other metadata values +#default_value_category:FanFiction + +## number of seconds to sleep between calls to the story site. May by +## useful if pulling large numbers of stories or if the site is slow. +#slow_down_sleep_time:0.5 + +## How long to wait for each HTTP connection to finish. Longer times +## are better for sites that are slow to respond. Shorter times +## prevent excessive wait when your network or the site is down. +connect_timeout:60.0 + +## For use only with stand-alone CLI version--run a command on the +## generated file after it's produced. All of the titlepage_entries +## values are available, plus output_filename. +#post_process_cmd: addbook -f "${output_filename}" -t "${title}" + +## Use regular expressions to find and replace (or remove) metadata. +## For example, you could change Sci-Fi=>SF, remove *-Centered tags, +## etc. See http://docs.python.org/library/re.html (look for re.sub) +## for regexp details. +## Make sure to keep at least one space at the start of each line and +## to escape % to %%, if used. +## Two, three or five part lines. Two part effect everything. +## Three part effect only those key(s) lists. +## *Five* part lines. Effect only when trailing conditional key=>regexp matches +## metakey[,metakey]=>pattern=>replacement[&&conditionalkey=>regexp] +## Note that if metakey == conditionalkey the conditional is ignored. +## You can use \s in the replacement to add explicit spaces. (The config parser +## tends to discard trailing spaces.) +## replace_metadata <entry>_LIST options: FFDL replace_metadata lines +## operate on individual list items for list entries. But if you +## want to do a replacement on the joined string for the whole list, +## you can by using <entry>_LIST. Example, if you added +## calibre_author: calibre_author_LIST=>^(.{,100}).*$=>\1 +#replace_metadata: +# genre,category=>Sci-Fi=>SF +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# title=>(.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> +# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers +# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural + +## Include/Exclude metadata +## +## You can use the include/exclude metadata features to either limit +## the values of particular metadata lists to specific values or to +## exclude specific values. Further, you can conditionally apply each +## line depending on other metadata, use exact strings or regular +## expressions(regex) to match values, and negate matches. +## +## The settings are: +## include_metadata_pre +## exclude_metadata_pre +## include_metadata_post +## exclude_metadata_post +## +## The form of each line is: +## metakey[,metakey]==exactvalue +## metakey[,metakey]=~regex +## metakey[,metakey]==exactvalue&&conditionalkey==exactcondvalue +## metakey[,metakey]=~regex&&conditionalkey==exactcondvalue +## metakey[,metakey]==exactvalue&&conditionalkey=~condregex +## +## This is fairly complicated, so it's documented on its own wiki +## page: +## https://code.google.com/p/fanficdownloader/wiki/InExcludeMetadataFeature + +## Some readers don't show horizontal rule (<hr />) tags correctly. +## This replaces them all with a centered '* * *'. (Note centering +## doesn't work on some devices either.) +#replace_hr: false + +## Some sites/authors/stories use br tags instead of p tags for +## paragraphs. This feature uses some heuristics to find and replace +## br paragraphs with p tags while preserving scene breaks. +#replace_br_with_p: false + +## If set false, the summary will have all html stripped. +## Both this and include_images must be true to get images in the +## summary. +keep_summary_html:true + +## If set true, any style attributes on tags in the story HTML will be +## kept. Useful for keeping extra colors & formatting from original. +#keep_style_attr: false + +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers:true. Only want them added back +## on for Table of Contents(toc)? Use add_chapter_numbers:toconly. +## (toconly doesn't work on mobi output.) Don't like the way it +## strips numbers or adds them back? See chapter_title_strip_pattern +## and chapter_title_add_pattern. +strip_chapter_numbers:false + +## add_chapter_numbers can be true, false or toconly +## (Note number is not added when there's only one chapter.) +add_chapter_numbers:false + +## (Two versions of chapter_title_strip_pattern are shown below. You +## should only have one uncommented.) +## This version will remove the leading number from: +## "1." => "" +## "1. The Beginning" => "The Beginning" +## "1: Start" => "Start" +## "2, Chapter the second" => "Chapter the second" +## etc +chapter_title_strip_pattern:^[0-9]+[\.: -]+ + +## This version will strip all of the above *plus* remove 'Chapter 1': +## "Chapter 1" => "" +## "1. Chapter 1" => "" +## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue" +## "Chapter 2 - Pirates Place" => "Pirates Place" +## etc +#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)? + +## Uses a python template substitution. The ${index} is the 'chapter' +## number and ${title} is the chapter title, after applying +## chapter_title_strip_pattern. Those are the only variables available. +## "The Beginning" => "1. The Beginning" +chapter_title_add_pattern:${index}. ${title} + +## Reorder ships so b/a and c/b/a become a/b and a/b/c. Only separates +## on '/', so use replace_metadata to change separator first if +## needed. Something like: ships=>[ ]*(/|&|&)[ ]*=>/ You can use +## ships_LIST to change the / back to something else if you want. +sort_ships:false + +## join_string_<entry> options -- FFDL list entries are comma +## separated by default. You can use this to change that. For example, +## if you want authors separated with ' & ' instead, use +## join_string_calibre_author:\s&\s. (\s == space) +#join_string_author:,\s + +## keep_in_order_<entry> options: FFDL sorts list entries by default +## (except for author/authorUrl/authorId). But if you want to use an +## extra entry derived from author, it ends up sorted. For example, if +## you added calibre_author: keep_in_order_calibre_author:true +#keep_in_order_author:true + +## User-agent +user_agent:FFDL/2.0 + +## Each output format has a section that overrides [defaults] +[html] + +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output formats. +## include_images is *not* available in the web service in any format. +#include_images:false + +## Note that it's *highly* recommended to use zipfile output or story +## unique destination directories to avoid overwriting images. +#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext} +#zip_output: false + +## This switch prevents FFDL from doing any processing on the images. +## Usually they would be converted to jpg, resized and optionally made +## grayscale. +no_image_processing: true + +## output background color--only used by html and epub (and ignored in +## epub by many readers). Included below in output_css--will be +## ignored if not in output_css. +background_color: ffffff + +## Allow customization of CSS. Make sure to keep at least one space +## at the start of each line and to escape % to %%. Also need +## background_color to be in the same section, if included in CSS. +output_css: + body { background-color: #%(background_color)s; } + .CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } + .center {text-align: center;} + .cover {text-align: center;} + .full {width: 100%%; } + .quarter {width: 25%%; } + .smcap {font-variant: small-caps;} + .u {text-decoration: underline;} + .bold {font-weight: bold;} + +[txt] +## Add URLs since there aren't links. +titlepage_entries: series,seriesUrl,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description + +## Width to word wrap text output. 0 indicates no wrapping. +wrap_width: 78 + +## use \r\n for line endings, the windows convention. text output only. +windows_eol: true + +[epub] + +## epub is already a zip file. +zip_output: false + +## epub carries the TOC in metadata. +## mobi generated from epub by calibre will have a TOC at the end. +include_tocpage: false + +## include a Update Log page before the story text. If 'true', the +## log will be updated each time the epub is and all the metadata +## fields that have changed since the last update (typically +## dateUpdated,numChapters,numWords at a minimum) will be shown. +## Great for tracking when chapters came out and when the description, +## etc changed. +include_logpage: false +## If set to 'smart', logpage will only be included if the story is +## status:In-Progress or already had a logpage. That way you don't +## end up with Completed stories that have just one logpage entry. +#include_logpage: smart + +## items to include in the log page Empty metadata entries, or those +## that haven't changed since the last update, will *not* appear, even +## if in the list. You can include extra text or HTML that will be +## included as-is in each log entry. Eg: logpage_entries: ..., , +## summary, ,... +logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings + +## epub->mobi conversions typically don't like tables. +titlepage_use_table: false + +## When using tables, make these span both columns. +wide_titlepage_entries: description, storyUrl, authorUrl, seriesUrl + +## output background color--only used by html and epub (and ignored in +## epub by many readers). Included below in output_css--will be +## ignored if not in output_css. +background_color: ffffff + +## Allow customization of CSS. Make sure to keep at least one space +## at the start of each line and to escape % to %%. Also need +## background_color to be in the same section, if included in CSS. +## 'adobe-hyphenate: none;' prevents hyphenation on newer Nooks +## STR(wG) (1.2.1+ for sure) +output_css: + body { background-color: #%(background_color)s; + text-align: justify; + margin: 2%%; + adobe-hyphenate: none; } + pre { font-size: x-small; } + sml { font-size: small; } + h1 { text-align: center; } + h2 { text-align: center; } + h3 { text-align: center; } + h4 { text-align: center; } + h5 { text-align: center; } + h6 { text-align: center; } + .CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } + .center {text-align: center;} + .cover {text-align: center;} + .full {width: 100%%; } + .quarter {width: 25%%; } + .smcap {font-variant: small-caps;} + .u {text-decoration: underline;} + .bold {font-weight: bold;} + +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output format. +## include_images is *not* available in the web service in any format. +#include_images:false + +## If set, the first image found will be made the cover image. If +## keep_summary_html is true, any images in summary will be before any +## in chapters. +#make_firstimage_cover: false + +## If set, the epub will never have a cover, even include_images is on +## and the site has specific cover images. +#never_make_cover: false + +## If set, and there isn't already a cover image from the adapter or +## from make_firstimage_cover, this image will be made the cover. +## It can be either a 'file:' or 'http:' url. +## Note that if you enable make_firstimage_cover in [epub], but want +## to use default_cover_image for a specific site, use the site:format +## section, for example: [ficwad.com:epub] +## default_cover_image is a python string Template string with +## ${title}, ${author} etc, same as titlepage_entries. Unless +## allow_unsafe_filename is true, invalid filename chars will be +## removed from metadata fields +#default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png +#default_cover_image:file:///C:/Users/username/Desktop/nook/images/${title}/icon.png +#default_cover_image:http://www.somesite.com/someimage.gif + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +#cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + +## Resize images down to width, height, preserving aspect ratio. +## Nook size, with margin. +image_max_size: 580, 725 + +## Change image to grayscale, if graphics library allows, to save +## space. Transparency removed as if remove_transparency: true +#grayscale_images: false + +## jpg or png +## -- jpg produces smaller images, and may be supported by more +## readers, but it's older and doesn't allow transparency. +## Transparency removed as if remove_transparency: true +## -- png is newer but does allow transparency, but only in CLI. +## It doesn't work in calibre PI due to limitations of the API. +convert_images_to: jpg + +## Remove transparency and fill with background_color if true. +remove_transparency: true + +## if the <img> tag doesn't have a div or a p around it, nook gets +## confused and displays it on every page after that under the text +## for the rest of the chapter. I doubt adding a div around the img +## will break any other readers, but in case it does, the fix can be +## turned off. This setting is not used if replace_br_with_p is +## true--replace_br_with_p also fixes the problem. +nook_img_fix:true + +[mobi] +## mobi TOC cannot be turned off right now. +#include_tocpage: true + +## Each site has a section that overrides [defaults]. +## test1.com specifically is not a real story site. Instead, +## it is a fake site for testing configuration and output. It uses +## URLs like: http://test1.com?sid=12345 +[test1.com] +extratags: FanFiction,Testing +# extracategories:Fafner +# extragenres:Romance,Fluff +# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P. +# extraships:Smythe-Smythe/Mokona +# extrawarnings:Extreme Bogosity + +# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL + +# include_in_compositeJ:dateCreated +# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ +# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated, + +# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_subject_tags: metaA,metaB,metaC + +# replace_metadata: +# compositeL=>Val=>VALUE +# series,extratags=>Test=>Plan +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# (.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> +# characters=>Harry P\.=>Harry Potter + + +## If necessary, you can define [<site>:<format>] sections to +## customize the formats differently for the same site. Overrides +## defaults, format and site. +[test1.com:txt] +extratags: FanFiction,Testing,Text + +[test1.com:html] +extratags: FanFiction,Testing,HTML + +[archive.skyehawke.com] + +[archiveofourown.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## archiveofourown.org stories allow chapters to be added out of +## order. So the newest chapter may not be the last one. FFDL update +## doesn't like that. If do_update_hook is uncommented and set true, +## the adapter will discard all existing chapters from the newest one +## on when updating to enforce accurate chapters. +#do_update_hook:false + +## AO3 adapter defines a few extra metadata entries. +## If there's ever more than 4 series, add series04,series04Url etc. +extra_valid_entries:fandoms,freeformtags,freefromtags,ao3categories,comments,kudos,hits,bookmarks,collections,series00,series01,series02,series03,series00Url,series01Url,series02Url,series03Url +fandoms_label:Fandoms +freeformtags_label:Freeform Tags +freefromtags_label:Freeform Tags +ao3categories_label:AO3 Categories +comments_label:Comments +kudos_label:Kudos +hits_label:Hits +collections_label:Collections +bookmarks_label:Bookmarks + +## freeformtags was previously typo'ed as freefromtags. This way, +## freefromtags will still work for people who've used it. +include_in_freefromtags:freeformtags + +## adds to titlepage_entries instead of replacing it. +#extra_titlepage_entries: fandoms,freeformtags,ao3categories,comments,kudos,hits,bookmarks,series00,series01,series02,series03,series00Url,series01Url,series02Url,series03Url + +## adds to include_subject_tags instead of replacing it. +#extra_subject_tags:fandoms,freeformtags,ao3categories + +## AO3 chapters can include several different types of notes. We've +## traditional included them all in the chapter text, but this allows +## you to customize which you include. Copy this parameter to your +## personal.ini and list the ones you don't want. +#exclude_notes:authorheadnotes,chaptersummary,chapterheadnotes,chapterfootnotes,authorfootnotes + +[ashwinder.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Severus Snape,Hermione Granger +extraships:Severus Snape/Hermione Granger + +[asr3.slashzone.org] +## Site dedicated to these categories/characters/ships +extracategories:The Sentinel + +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[bdsm-geschichten.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## This site offers no index page so we can either guess the chapter URLs +## by dec/incrementing numbers ('guess') or walk all the chapters in the metadata +## parsing state ('parse'). Since guessing can lead to errors for non-standard +## story URLs, the default is to parse +#find_chapters:guess + +[bloodshedverse.com] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:Windows-1252,ISO-8859-1,auto + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:warnings,reviews +reviews_label:Reviews + +## Site dedicated to these categories/characters/ships +extracharacters:Spike,Buffy +extracategories:Buffy the Vampire Slayer + +## Strips links found in the story text +## Specific to bloodshedverse.com +strip_text_links:true + +[bloodties-fans.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Blood Ties + +[buffynfaith.net] +## Site dedicated to these categories/characters/ships +extracategories:Buffy: The Vampire Slayer + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[castlefans.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Castle + +[chaos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +[csi-forensics.com] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings +readings_label: Readings + +[dark-solace.org] +## Site dedicated to these categories/characters/ships +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +extracategories:Buffy: The Vampire Slayer +extracharacters:Buffy, Spike +extraships:Spike/Buffy + +[dramione.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Draco Malfoy,Hermione Granger +extraships:Draco Malfoy/Hermione Granger + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +## Some adapters collect additional meta information beyond the +## standard ones. They need to be defined in extra_valid_entries to +## tell the rest of the FFDL system about them. They can be used in +## include_subject_tags, titlepage_entries, extra_titlepage_entries, +## logpage_entries, extra_logpage_entries, and include_in_* config +## items. You can also add additional entries here to build up +## composite metadata entries. dramione.org, for example, adds +## 'cliches' and then defines as the composite of hermiones,dracos in +## include_in_cliches. +extra_valid_entries:themes,hermiones,dracos,timeline,cliches,read,reviews +include_in_cliches:hermiones,dracos + +## For another example, you could, by uncommenting this line, include +## themes in with genre metadata. +#include_in_genre:genre, themes + +## You can give each new valid entry a specific label for use on +## titlepage and logpage. If not defined, it will simply be the +themes_label:Themes +hermiones_label:Hermiones +dracos_label:Dracos +timeline_label:Timeline +cliches_label:Character Cliches + +## extra_titlepage_entries (and extra_logpage_entries) *add* to +## titlepage_entries (and logpage_entries) so you can add site +## specific entries to titlepage/logpage without having to copy the +## entire titlepage_entries line. (But if you want them higher than +## the end, you will need to copy titlepage_entries.) +#extra_titlepage_entries: themes,timeline,cliches +#extra_logpage_entries: themes,timeline,cliches +#extra_subject_tags: themes,timeline,cliches + +[efiction.esteliel.de] +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[erosnsappho.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +[fanfiction.csodaidok.hu] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:ISO-8859-2,auto + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,challenge +reviews_label:Reviews +challenge_label:Challenge + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfic.hu] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:ISO-8859-1,auto + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfiction.mugglenet.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfic.potterheadsanonymous.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfiction.portkey.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extraships:Harry Potter/Hermione Granger + +[fanfiction.tenhawkpresents.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[fannation.shades-of-moonlight.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings,romance +extra_titlepage_entries: readings,romance +readings_label: Readings +romance_label: Romance + +[fhsarchive.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[ficwad.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[fictionmania.tv] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:ISO-8859-1,auto + +## items to include in the log page Empty metadata entries, or those +## that haven't changed since the last update, will *not* appear, even +## if in the list. You can include extra text or HTML that will be +## included as-is in each log entry. Eg: logpage_entries: ..., , +## summary, ,... +## Don't include numChapters since all stories are a single "chapter", there's +## no way to reliably find the next chapter +logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings + +## items to include in the title page +## Empty metadata entries will *not* appear, even if in the list. +## You can include extra text or HTML that will be included as-is in +## the title page. Eg: titlepage_entries: ..., ,summary, ,... +## All current formats already include title and author. +## Don't include numChapters since all stories are a single "chapter", there's +## no way to reliably find the next chapter +titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numWords,site,description + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:fileName,fileSize,oldName,newName,keyWords,mainCharactersAge,readings + +## Turns all space characters into " " HTML entities to forcefully preserve +## formatting with spaces. Enabling this will blow up the filesize quite a bit +## and is probably not a good idea, unless you absolutely need the story +## formatting. +## Specific to fictionmania.tv +non_breaking_spaces:false + +[fictionpad.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +extra_valid_entries:followers,comments,views,likes,dislikes +#extra_titlepage_entries:followers,comments,views,likes,dislikes + +followers_label:Followers +comments_label:Comments +views_label:Views +likes_label:Likes +dislikes_label:Dislikes + +[finestories.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[grangerenchanted.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Hermione Granger + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:read,reviews + +[hlfiction.net] +## Site dedicated to these categories/characters/ships +extracategories:Highlander + +[imagine.e-fic.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[indeath.net] +## Site dedicated to these categories/characters/ships +extracategories:In Death + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/public/style_emoticons/.* + +[ksarchive.com] +## Site dedicated to these categories/characters/ships +extracategories:Star Trek +extracharacters:Kirk,Spock +extraships:Kirk/Spock + +[literotica.com] +extra_valid_entries:eroticatags +eroticatags_label:Erotica Tags +extra_titlepage_entries: eroticatags + +[lotrfanfiction.com] +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings +readings_label: Readings + +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[lumos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[merlinfic.dtwins.co.uk] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Merlin + +[national-library.net] +## Site dedicated to these categories/characters/ships +extracategories:West Wing + +[ncisfic.com] +## Site dedicated to these categories/characters/ships +extracategories:NCIS + +[netraptor.org] + +[nfacommunity.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:NCIS + +[nha.magical-worlds.us] +## Site dedicated to these categories/characters/ships +extracategories:Buffy: The Vampire Slayer +extracharacters:Willow + +[nocturnal-light.net] +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:readings,reviews +readings_label:Readings +reviews_label:Reviews + +## Site dedicated to these categories/characters/ships +extracharacters:Spike,Buffy +extracategories:Buffy the Vampire Slayer + +[occlumency.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Severus Snape + +[onedirectionfanfiction.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:One Direction + +[pommedesang.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Anita Blake Vampire Hunter + +[ponyfictionarchive.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:My Little Pony: Friendship is Magic + +[pretendercentre.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:The Pretender + +[samandjack.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: SG-1 +extracharacters:Sam,Jack +extraships:Sam/Jack + +[samdean.archive.nu] +## Site dedicated to these categories/characters/ships +extracategories:Supernatural +extracharacters:Sam,Dean +extraships:Sam/Dean + +[scarhead.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[sg1-heliopolis.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[sheppardweir.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: Atlantis +extracharacters:John Sheppard,Elizabeth Weir +extraships:John Sheppard/Elizabeth Weir + +[spikeluver.com] +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:warnings,reviews +reviews_label:Reviews + +## Site dedicated to these categories/characters/ships +extracharacters:Spike,Buffy +extracategories:Buffy the Vampire Slayer + +[stargate-atlantis.org] +## Site dedicated to these categories/characters/ships +extracategories:Stargate: Atlantis + +[storiesonline.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Clear FanFiction from defaults, site is original fiction. +extratags: + +extra_valid_entries:size,universe,universeUrl,universeHTML,sitetags,notice +#extra_titlepage_entries:size,universeHTML,codes,notice + +size_label:Size +universe_label:Universe +universeUrl_label:Universe URL +universeHTML_label:Universe +sitetags_label:Site Tags +notice_label:Notice + +## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to +## make entryHTML. +make_linkhtml_entries:universe + +## storiesonline.net stories can be in a series or a universe, but not +## both. By default, universe will be populated in 'series' with +## index=0 +universe_as_series: true + +[svufiction.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[thehexfiles.net] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Draco Malfoy,Harry Potter +extraships:Harry Potter/Draco Malfoy + +[thehookupzone.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Criminal Minds + +[themaplebookshelf.com] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings,challenge +extra_titlepage_entries: readings,challenge +challenge_label: Challenge +readings_label: Readings + +[themasque.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[thequidditchpitch.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[tokra.fandomnet.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: SG-1 + +[tolkienfanfiction.com] +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[trekiverse.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Star Trek + +extra_valid_entries:readings,awards +extra_titlepage_entries:readings,awards +awards_label:Awards +readings_label:Readings + +cover_exclusion_regexp:art/.*Awards.jpg + +[voracity2.e-fic.com] +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,readings +reviews_label:Reviews +readings_label:Readings + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Star Trek + +[www.dracoandginny.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Draco Malfoy,Ginny Weasley +extraships:Draco Malfoy/Ginny Weasley + +[www.thealphagate.com] +## Site dedicated to these categories/characters/ships +extracategories:Stargate: SG-1 + +[www.checkmated.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.destinysgateway.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[www.dokuga.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:InuYasha +extracharacters:Sesshoumaru,Kagome +extraships:Sesshoumaru/Kagome + +[www.dotmoon.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[www.efpfanfic.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:notes,context,type +notes_label:Notes +context_label:Context +type_label:Type of Couple + +[www.fanfiction.net] +user_agent: +## fanfiction.net's 'cover' images are really just tiny thumbnails. +## Set this to true to never use them. +#never_make_cover: false + +## fanfiction.net shows the user's +cover_exclusion_regexp:/imageu/ + +## fanfiction.net is blocking people more aggressively. If you +## download fewer stories less often you can likely get by with +## reducing this sleep. +slow_down_sleep_time:4 + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,favs,follows + +## ffnet uses 'Pairings', not 'Relationship', stating they don't have +## to be romantic pairings. +ships_label:Pairings + +## Date formats used by FFDL. Published and Update don't usually have +## time, but they do now on ffnet. +## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior +## Note that ini format requires % to be escaped as %%. +#dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S +datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S +dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S + +## ffnet used to have a tendency to send out update notices in email +## before all their servers were showing the update on the first +## chapter. It generates another server request and doesn't seem to +## be needed lately, so now default it to off. +check_next_chapter:false + +[www.fanfiktion.de] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[www.ficbook.net] + +[www.fictionalley.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +## fictionally.org storyIds are not unique. Combine with authorId. +output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext} + +## fictionalley.org doesn't have a status metadatum. If uncommented, +## this will be used for status. +#default_value_status:Unknown + +[www.fictionpress.com] +user_agent: +## Clear FanFiction from defaults, fictionpress.com is original fiction. +extratags: + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,favs,follows + +[www.fimfiction.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## fimfiction.net stories can be locked requiring individual +## passwords. If fail_on_password is set, the downloader will fail +## when a password is required rather than prompting every time. +#fail_on_password: false + +## fimfiction.net stories allow chapters to be added out of order. So +## the newest chapter may not be the last one. FFDL update doesn't +## like that. If do_update_hook is uncommented and set true, the +## adapter will discard all existing chapters from the newest one on +## when updating to enforce accurate chapters. +#do_update_hook:false + +## fimfiction.net is reported to misinterprete some BBCode with +## blockquotes incorrectly. This fixes those instances and defaults +## to on, but can be switched off if it is found to cause problems. +fix_fimf_blockquotes:true + +## Site dedicated to these categories/characters/ships +extracategories:My Little Pony: Friendship is Magic + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:likes,dislikes,views,total_views,short_description,groups,groupsUrl,groupsHTML,prequel,prequelUrl,prequelHTML,sequels,sequelsUrl,sequelsHTML,comment_count,coverSource,coverSourceUrl,coverSourceHTML +likes_label:Likes +dislikes_label:Dislikes +views_label:Highest Single Chapter Views +total_views_label:Total Views +short_description_label:Short Summary +groups_label:Groups +groupsUrl_label:Groups URLs +groupsHTML_label:Groups +prequel_label:Prequel +prequelUrl_label:Prequel URL +prequelHTML_label:Prequel +sequels_label:Sequels +sequelsUrl_label:Sequel URLs +sequelsHTML_label:Sequels +comment_count_label:Comment Count +coverSource_label:Cover Source +coverSourceUrl_label:Cover Source URL +coverSourceHTML_label:Cover Source + +keep_in_order_sequels:true +keep_in_order_sequelsUrl:true +keep_in_order_groups:true +keep_in_order_groupsUrl:true + +## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to +## make entryHTML. +make_linkhtml_entries:prequel,sequels,groups,coverSource + +[www.harrypotterfanfiction.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.henneth-annun.net] +## Site dedicated to these categories/characters/ships +extracategories:The Hobbit + +[www.hpfandom.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.hpfanficarchive.com] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.ik-eternal.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:InuYasha +extracharacters:InuYasha,Kagome +extraships:InuYasha/Kagome + +[www.jlaunlimited.com] +## Site dedicated to these categories/characters/ships +extracategories:JLA + +[www.libraryofmoria.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[www.mediaminer.org] + +[www.midnightwhispers.ca] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Queer as Folk + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + +[www.ncisfiction.net] +## Site dedicated to these categories/characters/ships +extracategories:NCIS + +[www.nickandgreg.net] +## Site dedicated to these categories/characters/ships +extracategories:CSI +extraships:Nick Stokes/Greg Sanders + +[www.phoenixsong.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## phoenixsong.net, oddly, can have high rated chapters (login +## required) in the middle of a lower rated story. Use this to force +## FFDL to always login to phoenixsong.net so those stories download +## correctly. If you have a login, this is recommended. +#force_login:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extraships:Harry Potter/Ginny Weasley + +[www.potionsandsnitches.net] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.potterfics.com] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.prisonbreakfic.net] +## Site dedicated to these categories/characters/ships +extracategories:Prison Break + +[www.psychfic.com] +## Site dedicated to these categories/characters/ships +extracategories:Psych + +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[www.qaf-fic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Queer as Folk + +[www.restrictedsection.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extragenres:Erotica + +[www.scarvesandcoffee.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Glee +extracharacters:Kurt Hummel,Blaine Anderson + +[www.simplyundeniable.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.sinful-desire.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Supernatural + +[www.siye.co.uk] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Harry Potter,Ginny Weasley +extraships:Harry Potter/Ginny Weasley + +[www.squidge.org/peja] +## www.squidge.org/peja calls it Fandom <shrug> +category_label:Fandom + +## Remove numWords -- www.squidge.org/peja word counts are inaccurate +titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,description + +[www.squidge.org/peja:txt] +## Add URLs since there aren't links and remove numWords -- +## www.squidge.org/peja word counts are inaccurate +titlepage_entries: series,seriesUrl,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,storyUrl, authorUrl, description + +[www.storiesofarda.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[www.thepetulantpoetess.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[www.twcslibrary.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## twcslibrary.net (ab)uses series as personal reading lists. +collect_series: false + +[www.tthfanfic.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## tth is a little unusual--it doesn't require user/pass, but the site +## keeps track of which chapters you've read and won't send another +## update until it thinks you're up to date. This way, on download, +## it thinks you're up to date. +#username:YourName +#password:yourpassword + +[www.twilightarchives.com] +## Site dedicated to these categories/characters/ships +extracategories:Twilight + +[www.twilighted.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Twilight + +## twilighted.net (ab)uses series as personal reading lists. +collect_series: false + +[www.twiwrite.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Twilight + +## twiwrite.net (ab)uses series as personal reading lists. +collect_series: false + +[www.walkingtheplank.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Severus Snape,Harry Potter +extraships:Severus Snape/Harry Potter + +[www.whofic.com] + +[www.wizardtales.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[www.wolverineandrogue.com] +## Site dedicated to these categories/characters/ships +extracategories:X-Men Movie +extracharacters:Wolverine,Rogue + +[www.wraithbait.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: Atlantis + +extra_valid_entries:reviews +reviews_label:Reviews + +[overrides] +## It may sometimes be useful to override all of the specific format, +## site and site:format sections in your private configuration. For +## example, this extratags param here would override all of the +## extratags params in all other sections. Only commandline options +## beat overrides. +#extratags:fanficdownloader + + +[teststory:defaults] +valid_entries:title,author_list,authorId_list,authorUrl_list,storyUrl, + datePublished,dateUpdated,numWords,status,language,series,seriesUrl, + rating,category_list,genre_list,warnings_list,characters_list,ships_list, + description,site,extratags + +# {{storyId}} is a special case--it's the only one that works. +title:Test Story Title {{storyId}} +author_list:Test Author aa +authorId_list:1 +authorUrl_list:http://test1.com?authid=1 +storyUrl:http://test1.com?sid={{storyId}} +datePublished:1975-03-15 +dateUpdated:1975-04-15 +numWords:123,456 +status:In-Progress +language:English + +chaptertitles:Prologue + +## Add additional sections with different numbers to get different +## parameters for different story urls. +## test1.com?sid=1000 +[teststory:1000] +# note the leading commas when doing add_to_ with valid_entries and *_list +add_to_valid_entries:,favs +title:Testing New Feature {{storyId}} +author_list:Bob Smith +authorId_list:45 +authorUrl_list:http://test1.com?authid=45 +datePublished:2013-03-15 +dateUpdated:2013-04-15 +numWords:1456 +favs:56 +series:The Great Test [4] +seriesUrl:http://test1.com?seriesid=1 +rating:Tweenie +category_list:Harry Potter,Furbie,Crossover,Puella Magi Madoka Magica/魔法少女まどか★マギカ,Magical Girl Lyrical Nanoha +genre_list:Fantasy,Comedy,Sci-Fi,Noir +warnings_list:Swearing,Violence +characters_list:Bob Smith,George Johnson,Fred Smythe + +chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kintikin,3. Chapter 3 diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/downloader.py b/downloader.py new file mode 100644 index 00000000..290f2f93 --- /dev/null +++ b/downloader.py @@ -0,0 +1,319 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys, os +from os.path import normpath, expanduser, isfile, join +from StringIO import StringIO +from optparse import OptionParser +import getpass +import string +import ConfigParser +from subprocess import call +import pprint + +import logging +if sys.version_info >= (2, 7): + # suppresses default logger. Logging is setup in fanficdownload/__init__.py so it works in calibre, too. + rootlogger = logging.getLogger() + loghandler=logging.NullHandler() + loghandler.setFormatter(logging.Formatter("(=====)(levelname)s:%(message)s")) + rootlogger.addHandler(loghandler) + +try: + from calibre.constants import numeric_version as calibre_version + is_calibre = True +except: + is_calibre = False + +# using try/except directly was masking errors during development. +if is_calibre: + # running under calibre + from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters,writers,exceptions + from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration + from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data + from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page +else: + from fanficdownloader import adapters,writers,exceptions + from fanficdownloader.configurable import Configuration + from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data + from fanficdownloader.geturls import get_urls_from_page + + +if sys.version_info < (2, 5): + print "This program requires Python 2.5 or newer." + sys.exit(1) + +def writeStory(config,adapter,writeformat,metaonly=False,outstream=None): + writer = writers.getWriter(writeformat,config,adapter) + writer.writeStory(outstream=outstream,metaonly=metaonly) + output_filename=writer.getOutputFileName() + del writer + return output_filename + +def main(argv, + parser=None, + passed_defaultsini=None, + passed_personalini=None): + # read in args, anything starting with -- will be treated as --<varible>=<value> + if not parser: + parser = OptionParser("usage: %prog [options] storyurl") + parser.add_option("-f", "--format", dest="format", default="epub", + help="write story as FORMAT, epub(default), mobi, text or html", metavar="FORMAT") + + if passed_defaultsini: + config_help="read config from specified file(s) in addition to calibre plugin personal.ini, ~/.fanficdownloader/personal.ini, and ./personal.ini" + else: + config_help="read config from specified file(s) in addition to ~/.fanficdownloader/defaults.ini, ~/.fanficdownloader/personal.ini, ./defaults.ini, and ./personal.ini" + parser.add_option("-c", "--config", + action="append", dest="configfile", default=None, + help=config_help, metavar="CONFIG") + parser.add_option("-b", "--begin", dest="begin", default=None, + help="Begin with Chapter START", metavar="START") + parser.add_option("-e", "--end", dest="end", default=None, + help="End with Chapter END", metavar="END") + parser.add_option("-o", "--option", + action="append", dest="options", + help="set an option NAME=VALUE", metavar="NAME=VALUE") + parser.add_option("-m", "--meta-only", + action="store_true", dest="metaonly", + help="Retrieve metadata and stop. Or, if --update-epub, update metadata title page only.",) + parser.add_option("-u", "--update-epub", + action="store_true", dest="update", + help="Update an existing epub with new chapters, give epub filename instead of storyurl.",) + parser.add_option("--update-cover", + action="store_true", dest="updatecover", + help="Update cover in an existing epub, otherwise existing cover (if any) is used on update. Only valid with --update-epub.",) + parser.add_option("--force", + action="store_true", dest="force", + help="Force overwrite of an existing epub, download and overwrite all chapters.",) + parser.add_option("-l", "--list", + action="store_true", dest="list", + help="Get list of valid story URLs from page given.",) + parser.add_option("-n", "--normalize-list", + action="store_true", dest="normalize",default=False, + help="Get list of valid story URLs from page given, but normalized to standard forms.",) + parser.add_option("-s", "--sites-list", + action="store_true", dest="siteslist",default=False, + help="Get list of valid story URLs examples.",) + parser.add_option("-d", "--debug", + action="store_true", dest="debug", + help="Show debug output while downloading.",) + + (options, args) = parser.parse_args(argv) + + if not options.debug: + logger = logging.getLogger("fanficdownloader") + logger.setLevel(logging.INFO) + + if not options.siteslist and len(args) != 1: + parser.error("incorrect number of arguments") + + if options.siteslist: + for (site,examples) in adapters.getSiteExamples(): + print("\n====%s====\n\nExample URLs:"%site) + for u in examples: + print(" * %s"%u) + return + + if options.update and options.format != 'epub': + parser.error("-u/--update-epub only works with epub") + + ## Attempt to update an existing epub. + chaptercount = None + output_filename = None + if options.update: + try: + (url,chaptercount) = get_dcsource_chaptercount(args[0]) + if not url: + print "No story URL found in epub to update." + return + print "Updating %s, URL: %s" % (args[0],url) + output_filename = args[0] + except: + # if there's an error reading the update file, maybe it's a URL? + # we'll look for an existing outputfile down below. + url = args[0] + else: + url = args[0] + + try: + configuration = Configuration(adapters.getConfigSectionFor(url),options.format) + except exceptions.UnknownSite, e: + if options.list or options.normalize: + # list for page doesn't have to be a supported site. + configuration = Configuration("test1.com",options.format) + else: + raise e + + conflist = [] + homepath = join(expanduser("~"),".fanficdownloader") + + if passed_defaultsini: + configuration.readfp(passed_defaultsini) + + if isfile(join(homepath,"defaults.ini")): + conflist.append(join(homepath,"defaults.ini")) + if isfile("defaults.ini"): + conflist.append("defaults.ini") + + if passed_personalini: + configuration.readfp(passed_personalini) + + if isfile(join(homepath,"personal.ini")): + conflist.append(join(homepath,"personal.ini")) + if isfile("personal.ini"): + conflist.append("personal.ini") + + if options.configfile: + conflist.extend(options.configfile) + + logging.debug('reading %s config file(s), if present'%conflist) + configuration.read(conflist) + + try: + configuration.add_section("overrides") + except ConfigParser.DuplicateSectionError: + pass + + if options.force: + configuration.set("overrides","always_overwrite","true") + + if options.update and chaptercount: + configuration.set("overrides","output_filename",output_filename) + + if options.update and not options.updatecover: + configuration.set("overrides","never_make_cover","true") + + # images only for epub, even if the user mistakenly turned it + # on else where. + if options.format not in ("epub","html"): + configuration.set("overrides","include_images","false") + + if options.options: + for opt in options.options: + (var,val) = opt.split('=') + configuration.set("overrides",var,val) + + if options.list or options.normalize: + retlist = get_urls_from_page(args[0], configuration, normalize=options.normalize) + print "\n".join(retlist) + return + + try: + adapter = adapters.getAdapter(configuration,url) + adapter.setChaptersRange(options.begin,options.end) + + # check for updating from URL (vs from file) + if options.update and not chaptercount: + try: + writer = writers.getWriter("epub",configuration,adapter) + output_filename=writer.getOutputFileName() + (noturl,chaptercount) = get_dcsource_chaptercount(output_filename) + print "Updating %s, URL: %s" % (output_filename,url) + except: + options.update = False + pass + + ## Check for include_images without no_image_processing. In absence of PIL, give warning. + if adapter.getConfig('include_images') and not adapter.getConfig('no_image_processing'): + try: + from calibre.utils.magick import Image + logging.debug("Using calibre.utils.magick") + except: + try: + import Image + logging.debug("Using PIL") + except: + print "You have include_images enabled, but Python Image Library(PIL) isn't found.\nImages will be included full size in original format.\nContinue? (y/n)?" + if not sys.stdin.readline().strip().lower().startswith('y'): + return + + ## three tries, that's enough if both user/pass & is_adult needed, + ## or a couple tries of one or the other + for x in range(0,2): + try: + adapter.getStoryMetadataOnly() + except exceptions.FailedToLogin, f: + if f.passwdonly: + print "Story requires a password." + else: + print "Login Failed, Need Username/Password." + sys.stdout.write("Username: ") + adapter.username = sys.stdin.readline().strip() + adapter.password = getpass.getpass(prompt='Password: ') + #print("Login: `%s`, Password: `%s`" % (adapter.username, adapter.password)) + except exceptions.AdultCheckRequired: + print "Please confirm you are an adult in your locale: (y/n)?" + if sys.stdin.readline().strip().lower().startswith('y'): + adapter.is_adult=True + + if options.update and not options.force: + urlchaptercount = int(adapter.getStoryMetadataOnly().getMetadata('numChapters')) + + if chaptercount == urlchaptercount and not options.metaonly: + print "%s already contains %d chapters." % (output_filename,chaptercount) + elif chaptercount > urlchaptercount: + print "%s contains %d chapters, more than source: %d." % (output_filename,chaptercount,urlchaptercount) + elif chaptercount == 0: + print "%s doesn't contain any recognizable chapters, probably from a different source. Not updating." % (output_filename) + else: + # update now handled by pre-populating the old + # images and chapters in the adapter rather than + # merging epubs. + (url, + chaptercount, + adapter.oldchapters, + adapter.oldimgs, + adapter.oldcover, + adapter.calibrebookmark, + adapter.logfile) = get_update_data(output_filename) + + print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount) + + if not (options.update and chaptercount == urlchaptercount) \ + and adapter.getConfig("do_update_hook"): + chaptercount = adapter.hookForUpdates(chaptercount) + + writeStory(configuration,adapter,"epub") + + else: + # regular download + if options.metaonly: + pprint.pprint(adapter.getStoryMetadataOnly().getAllMetadata()) + + output_filename=writeStory(configuration,adapter,options.format,options.metaonly) + + if not options.metaonly and adapter.getConfig("post_process_cmd"): + metadata = adapter.story.metadata + metadata['output_filename']=output_filename + call(string.Template(adapter.getConfig("post_process_cmd")) + .substitute(metadata), shell=True) + + del adapter + + except exceptions.InvalidStoryURL, isu: + print isu + except exceptions.StoryDoesNotExist, dne: + print dne + except exceptions.UnknownSite, us: + print us + +if __name__ == "__main__": + #import time + #start = time.time() + main(sys.argv[1:]) + #print("Total time seconds:%f"%(time.time()-start)) diff --git a/editconfig.html b/editconfig.html new file mode 100644 index 00000000..93ee1820 --- /dev/null +++ b/editconfig.html @@ -0,0 +1,89 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> +<html> + <head> + <link href="/css/index.css" rel="stylesheet" type="text/css"> + <title>FanFictionDownLoader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + +

+ FanFictionDownLoader +

+ +

+ + +

+ + + +

Default System configuration

+{{ defaultsini }}
+

+ +

+ +

+ + +

+ + diff --git a/epubmerge.py b/epubmerge.py new file mode 100644 index 00000000..f7e76b8c --- /dev/null +++ b/epubmerge.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if __name__ == "__main__": + print(''' +The this utility has been split out into it's own project. +See: http://code.google.com/p/epubmerge/ +...for a CLI epubmerge.py program and calibre plugin. +''') diff --git a/example.ini b/example.ini new file mode 100644 index 00000000..13e4a854 --- /dev/null +++ b/example.ini @@ -0,0 +1,103 @@ +## This is an example of what your personal configuration might look +## like. Uncomment options by removing the '#' in front of them. + +[defaults] +## Some sites also require the user to confirm they are adult for +## adult content. Uncomment by removing '#' in front of is_adult. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#is_adult:true + +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers. Don't like the way it strips +## numbers or adds them back? See chapter_title_strip_pattern and +## chapter_title_add_pattern. +#strip_chapter_numbers:true +#add_chapter_numbers:true + +[epub] +## include images from img tags in the body and summary of stories. +## Images will be converted to jpg for size if possible. Images work +## in epub format only. To get mobi or other format with images, +## download as epub and use Calibre to convert. +#include_images:true + +## If not set, the summary will have all html stripped for safety. +## Both this and include_images must be true to get images in the +## summary. +#keep_summary_html:true + +## If set, the first image found will be made the cover image. If +## keep_summary_html is true, any images in summary will be before any +## in chapters. +#make_firstimage_cover:true + +## Resize images down to width, height, preserving aspect ratio. +## Nook size, with margin. +#image_max_size: 580, 725 + +## Change image to grayscale, if graphics library allows, to save +## space. +#grayscale_images: false + + +## Most common, I expect will be using this to save username/passwords +## for different sites. Here are a few examples. See defaults.ini +## for the full list. + +[www.twilighted.net] +#username:YourPenname +#password:YourPassword +## default is false +#collect_series: true + +[www.ficwad.com] +#username:YourUsername +#password:YourPassword + +[www.twiwrite.net] +#username:YourName +#password:yourpassword +## default is false +#collect_series: true + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + +[www.thewriterscoffeeshop.com] +#username:YourName +#password:yourpassword +#is_adult:true +## default is false +#collect_series: true + +[www.fictionalley.org] +#is_adult:true + +[www.harrypotterfanfiction.com] +#is_adult:true + +[www.fimfiction.net] +#is_adult:true +#fail_on_password: false + +[www.tthfanfic.org] +#is_adult:true +## tth is a little unusual--it doesn't require user/pass, but the site +## keeps track of which chapters you've read and won't send another +## update until it thinks you're up to date. This way, on download, +## it thinks you're up to date. +#username:YourName +#password:yourpassword + + +## This section will override anything in the system defaults or other +## sections here. +[overrides] +## default varies by site. Set true here to force all sites to +## collect series. +#collect_series: true diff --git a/fanficdownloader.zip b/fanficdownloader.zip new file mode 100644 index 00000000..b39fb33e Binary files /dev/null and b/fanficdownloader.zip differ diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
(No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

FooBar *

* should pop to 'p', not 'b'. +

FooBar *
* should pop to 'table', not 'p'. +
Foo
Bar *
* should pop to 'tr', not 'p'. + +
*
* should pop to 'ul', not the first 'li'. +
** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
* * should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a
tag should implicitly close the previous
tag. + +
Para1
Para2 + should be transformed into: +
Para1
Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a
tag should _not_ implicitly close the previous +
tag. + + Alice said:
Bob said:
Blah + should NOT be transformed into: + Alice said:
Bob said:
Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
, + but not close a tag in another table. + +
BlahBlah + should be transformed into: +
BlahBlah + but, + Blah
Blah + should NOT be transformed into + Blah
Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + ")] + data = data[:data.rindex(";")] + data = data.replace('tables:','"tables":') + tables = json.loads(data)['tables'] + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + # looks like only one author per story allowed. + author = tables['users'][0] + story = tables['stories'][0] + story_ver = tables['story_versions'][0] + print("story:%s"%story) + + self.story.setMetadata('authorId',author['id']) + self.story.setMetadata('author',author['display_name']) + self.story.setMetadata('authorUrl','https://'+self.host+'/author/'+author['display_name']+'/stories') + + self.story.setMetadata('title',story_ver['title']) + self.setDescription(url,story_ver['description']) + + if not ('assets/story_versions/covers' in story_ver['profile_image_url@2x']): + self.setCoverImage(url,story_ver['profile_image_url@2x']) + + self.story.setMetadata('datePublished',makeDate(story['published_at'], self.dateformat)) + self.story.setMetadata('dateUpdated',makeDate(story['published_at'], self.dateformat)) + + self.story.setMetadata('followers',story['followers_count']) + self.story.setMetadata('comments',story['comments_count']) + self.story.setMetadata('views',story['views_count']) + self.story.setMetadata('likes',int(story['likes'])) # no idea why they floated these. + if 'dislikes' in story: + self.story.setMetadata('dislikes',int(story['dislikes'])) + + if story_ver['is_complete']: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + self.story.setMetadata('rating', story_ver['maturity_level']) + self.story.setMetadata('numWords', unicode(story_ver['word_count'])) + + for i in tables['fandoms']: + self.story.addToList('category',i['name']) + + for i in tables['genres']: + self.story.addToList('genre',i['name']) + + for i in tables['characters']: + self.story.addToList('characters',i['name']) + + for c in tables['chapters']: + chtitle = "Chapter %d"%c['number'] + if c['title']: + chtitle += " - %s"%c['title'] + self.chapterUrls.append((chtitle,c['body_url'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def getChapterText(self, url): + logger.debug('Getting chapter text from: %s' % url) + if not url: + data = u"This chapter has no text." + else: + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(u"
"+data+u"
") + return self.utf8FromSoup(url,soup) + +def getClass(): + return FictionPadSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py new file mode 100644 index 00000000..795ff941 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import time + +## They're from the same people and pretty much identical. +from adapter_fanfictionnet import FanFictionNetSiteAdapter + +class FictionPressComSiteAdapter(FanFictionNetSiteAdapter): + + def __init__(self, config, url): + FanFictionNetSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fpcom') + + @staticmethod + def getSiteDomain(): + return 'www.fictionpress.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.fictionpress.com','m.fictionpress.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "https://www.fictionpress.com/s/1234/1/ https://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title http://m.fictionpress.com/s/1234/1/" + + def getSiteURLPattern(self): + return r"https?://(www|m)?\.fictionpress\.com/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$" + +def getClass(): + return FictionPressComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py new file mode 100644 index 00000000..6b5b87ee --- /dev/null +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import time +import httplib, urllib + +from .. import BeautifulSoup as bs +from .. import exceptions as exceptions +from ..htmlcleanup import stripHTML + +from base_adapter import BaseSiteAdapter, makeDate + +class FicwadComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fw') + + # get storyId from url--url validation guarantees second part is storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + self.username = "NoneGiven" + self.password = "" + + @staticmethod + def getSiteDomain(): + return 'ficwad.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://ficwad.com/story/1234" + + def getSiteURLPattern(self): + return re.escape(r"http://"+self.getSiteDomain())+"/story/\d+?$" + + def performLogin(self,url): + params = {} + + if self.password: + params['username'] = self.username + params['password'] = self.password + else: + params['username'] = self.getConfig("username") + params['password'] = self.getConfig("password") + + loginUrl = 'http://' + self.getSiteDomain() + '/account/login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['username'])) + d = self._postUrl(loginUrl,params,usecache=False) + + if "Login attempt failed..." in d: + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['username'])) + raise exceptions.FailedToLogin(url,params['username']) + return False + else: + return True + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + def extractChapterUrlsAndMetadata(self): + + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url = self.url + logger.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + data = self._fetchUrl(url) + # non-existent/removed story urls get thrown to the front page. + if "
Welcome to FicWad
" in data: + raise exceptions.StoryDoesNotExist(self.url) + soup = bs.BeautifulSoup(data) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # if blocked, attempt login. + if soup.find("div",{"class":"blocked"}): + if self.performLogin(url): # performLogin raises + # FailedToLogin if it fails. + soup = bs.BeautifulSoup(self._fetchUrl(url,usecache=False)) + + divstory = soup.find('div',id='story') + storya = divstory.find('a',href=re.compile("^/story/\d+$")) + if storya : # if there's a story link in the divstory header, this is a chapter page. + # normalize story URL on chapter list. + self.story.setMetadata('storyId',storya['href'].split('/',)[2]) + url = "http://"+self.getSiteDomain()+storya['href'] + logger.debug("Normalizing to URL: "+url) + self._setURL(url) + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # if blocked, attempt login. + if soup.find("div",{"class":"blocked"}): + if self.performLogin(url): # performLogin raises + # FailedToLogin if it fails. + soup = bs.BeautifulSoup(self._fetchUrl(url,usecache=False)) + + # title - first h4 tag will be title. + titleh4 = soup.find('div',{'class':'storylist'}).find('h4') + self.story.setMetadata('title', stripHTML(titleh4.a)) + + # Find authorid and URL from... author url. + a = soup.find('span',{'class':'author'}).find('a', href=re.compile(r"^/author/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + # description + storydiv = soup.find("div",{"id":"story"}) + self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p) + #self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string) + + # most of the meta data is here: + metap = storydiv.find("p",{"class":"meta"}) + self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string) + + # warnings + # [!!] [R] [V] [Y] + spanreq = metap.find("span",{"class":"story-warnings"}) + if spanreq: # can be no warnings. + for a in spanreq.findAll("a"): + self.story.addToList('warnings',a['title']) + + ## perhaps not the most efficient way to parse this, using + ## regexps for each rather than something more complex, but + ## IMO, it's more readable and amenable to change. + metastr = stripHTML(str(metap)).replace('\n',' ').replace('\t',' ') + #print "metap: (%s)"%metastr + + m = re.match(r".*?Rating: (.+?) -.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genres: (.+?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Characters: (.*?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + if g: + self.story.addToList('characters',g) + + m = re.match(r".*?Published: ([0-9-]+?) -.*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y-%m-%d")) + + # Updated can have more than one space after it. + m = re.match(r".*?Updated: ([0-9-]+?) +-.*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y-%m-%d")) + + m = re.match(r".*? - ([0-9,]+?) words.*?",metastr) + if m: + self.story.setMetadata('numWords',m.group(1)) + + if metastr.endswith("Complete"): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + # get the chapter list first this time because that's how we + # detect the need to login. + storylistul = soup.find('ul',{'class':'storylist'}) + if not storylistul: + # no list found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + chapterlistlis = storylistul.findAll('li') + for chapterli in chapterlistlis: + if "blocked" in chapterli['class']: + # paranoia check. We should already be logged in by now. + raise exceptions.FailedToLogin(url,self.username) + else: + #print "chapterli.h4.a (%s)"%chapterli.h4.a + self.chapterUrls.append((chapterli.h4.a.string, + u'http://%s%s'%(self.getSiteDomain(), + chapterli.h4.a['href']))) + #print "self.chapterUrls:%s"%self.chapterUrls + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + return + + + def getChapterText(self, url): + logger.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'storytext'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,span) + +def getClass(): + return FicwadComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py new file mode 100644 index 00000000..f390dc71 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -0,0 +1,338 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import cookielib as cl +import json + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return FimFictionNetSiteAdapter + +class FimFictionNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fimficnet') + self.story.setMetadata('storyId', self.parsedUrl.path.split('/',)[2]) + self._setURL("http://"+self.getSiteDomain()+"/story/"+self.story.getMetadata('storyId')+"/") + self.is_adult = False + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %Y" + + @staticmethod + def getSiteDomain(): + return 'www.fimfiction.net' + + @classmethod + def getAcceptDomains(cls): + # mobile.fimifction.com isn't actually a valid domain, but we can still get the story id from URLs anyway + return ['www.fimfiction.net','mobile.fimfiction.net', 'www.fimfiction.com', 'mobile.fimfiction.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.fimfiction.net/story/1234/story-title-here http://www.fimfiction.net/story/1234/ http://www.fimfiction.com/story/1234/1/ http://mobile.fimfiction.net/story/1234/1/story-title-here/chapter-title-here" + + def getSiteURLPattern(self): + return r"https?://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*" + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + def doExtractChapterUrlsAndMetadata(self,get_cover=True): + + if self.is_adult or self.getConfig("is_adult"): + cookie = cl.Cookie(version=0, name='view_mature', value='true', + port=None, port_specified=False, + domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False, + path='/story', path_specified=True, + secure=False, + expires=time.time()+10000, + discard=False, + comment=None, + comment_url=None, + rest={'HttpOnly': None}, + rfc2109=False) + self.cookiejar.set_cookie(cookie) + + ##--------------------------------------------------------------------------------------------------- + ## Get the story's title page. Check if it exists. + + try: + # don't use cache if manual is_adult--should only happen + # if it's an adult story and they don't have is_adult in ini. + data = self.do_fix_blockquotes(self._fetchUrl(self.url, + usecache=(not self.is_adult))) + soup = bs.BeautifulSoup(data) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data: + raise exceptions.StoryDoesNotExist(self.url) + + if "This story has been marked as having adult content. Please click below to confirm you are of legal age to view adult material in your country." in data: + raise exceptions.AdultCheckRequired(self.url) + + if self.password: + params = {} + params['password'] = self.password + data = self._postUrl(self.url, params) + soup = bs.BeautifulSoup(data) + + if not (soup.find('form', {'id' : 'password_form'}) == None): + if self.getConfig('fail_on_password'): + raise exceptions.FailedToDownload("%s requires story password and fail_on_password is true."%self.url) + else: + raise exceptions.FailedToLogin(self.url,"Story requires individual password",passwdonly=True) + + ##---------------------------------------------------------------------------------------------------- + ## Extract metadata + + storyContentBox = soup.find('div', {'class':'story_content_box'}) + + # Title + title = storyContentBox.find('a', {'class':re.compile(r'.*\bstory_name\b.*')}) + self.story.setMetadata('title',stripHTML(title)) + + # Author + author = storyContentBox.find('div', {'class':'author'}).find('a') + self.story.setMetadata("author", stripHTML(author)) + #No longer seems to be a way to access Fimfiction's internal author ID + self.story.setMetadata("authorId", self.story.getMetadata("author")) + self.story.setMetadata("authorUrl", "http://%s/user/%s" % (self.getSiteDomain(), stripHTML(author))) + + #Rating text is replaced with full words for historical compatibility after the site changed + #on 2014-10-27 + rating = stripHTML(storyContentBox.find('a', {'class':re.compile(r'.*\bcontent-rating-.*')})) + rating = rating.replace("E", "Everyone").replace("T", "Teen").replace("M", "Mature") + self.story.setMetadata("rating", rating) + + # Chapters + for chapter in storyContentBox.findAll('a',{'class':'chapter_link'}): + self.chapterUrls.append((stripHTML(chapter), 'http://'+self.host+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # Status + # In the case of Fimfiction, possible statuses are 'Completed', 'Incomplete', 'On Hiatus' and 'Cancelled' + # For the sake of bringing it in line with the other adapters, 'Incomplete' becomes 'In-Progress' + # and 'Complete' becomes 'Completed'. 'Cancelled' and 'On Hiatus' are passed through, it's easy now for users + # to change/remove if they want with replace_metadata + status = stripHTML(storyContentBox.find('span', {'class':re.compile(r'.*\bcompleted-status-.*')})) + status = status.replace("Incomplete", "In-Progress").replace("Complete", "Completed") + self.story.setMetadata("status", status) + + # Genres and Warnings + # warnings were folded into general categories in the 2014-10-27 site update + categories = storyContentBox.findAll('a', {'class':re.compile(r'.*\bstory_category\b.*')}) + for category in categories: + category = stripHTML(category) + if category == "Gore" or category == "Sex": + self.story.addToList('warnings', category) + else: + self.story.addToList('genre', category) + + # Word count + wordCountText = stripHTML(storyContentBox.find('li', {'class':'bottom'}).find('div', {'class':'word_count'})) + self.story.setMetadata("numWords", re.sub(r'[^0-9]', '', wordCountText)) + + # Cover image + storyImage = storyContentBox.find('div', {'class':'story_image'}) + if storyImage: + coverurl = storyImage.find('a')['href'] + if coverurl.startswith('//'): # fix for img urls missing 'http:' + coverurl = "http:"+coverurl + if get_cover: + self.setCoverImage(self.url,coverurl) + + coverSource = storyImage.find('a', {'class':'source'}) + if coverSource: + self.story.setMetadata('coverSourceUrl', coverSource['href']) + #There's no text associated with the cover source link, so just + #reuse the URL. Makes it clear it's an external link leading + #outside of the fanfic site, at least. + self.story.setMetadata('coverSource', coverSource['href']) + + # fimf has started including extra stuff inside the description div. + descdivstr = u"%s"%storyContentBox.find("div", {"class":"description"}) + hrstr=u"
" + descdivstr = u'
'+descdivstr[descdivstr.index(hrstr)+len(hrstr):] + self.setDescription(self.url,descdivstr) + + # Find the newest and oldest chapter dates + storyData = storyContentBox.find('div', {'class':'story_data'}) + oldestChapter = None + newestChapter = None + self.newestChapterNum = None # save for comparing during update. + # Scan all chapters to find the oldest and newest, on + # FiMFiction it's possible for authors to insert new chapters + # out-of-order or change the dates of earlier ones by editing + # them--That WILL break epub update. + for index, chapterDate in enumerate(storyData.findAll('span', {'class':'date'})): + chapterDate = self.ordinal_date_string_to_date(chapterDate.contents[1]) + if oldestChapter == None or chapterDate < oldestChapter: + oldestChapter = chapterDate + if newestChapter == None or chapterDate > newestChapter: + newestChapter = chapterDate + self.newestChapterNum = index + + if newestChapter is None: + #this will only be true when updating metadata for stories that have 0 chapters + #there is a "last modified" date given on the page, extract it and use that. + moddatetag = storyContentBox.find('span', {'class':'last_modified'}) + if not moddatetag is None: + newestChapter = self.ordinal_date_string_to_date(moddatetag('span')[1].text) + + # Date updated + self.story.setMetadata("dateUpdated", newestChapter) + + # Date published + # falls back to oldest chapter date for stories that haven't been officially published yet + pubdatetag = storyContentBox.find('span', {'class':'date_approved'}) + if pubdatetag is None: + if oldestChapter is None: + #this will only be true when updating metadata for stories that have 0 chapters + #and that have never been officially published - a rare occurrence. Fall back to last + #modified date as the publication date, it's all that we've got. + self.story.setMetadata("datePublished", newestChapter) + else: + self.story.setMetadata("datePublished", oldestChapter) + else: + pubDate = self.ordinal_date_string_to_date(pubdatetag('span')[1].text) + self.story.setMetadata("datePublished", pubDate) + + # Characters + chars = storyContentBox.find("div", {"class":"extra_story_data"}) + for character in chars.findAll("a", {"class":"character_icon"}): + self.story.addToList("characters", character['title']) + + # Likes and dislikes + storyToolbar = soup.find('div', {'class':'story-toolbar'}) + likes = storyToolbar.find('span', {'class':'likes'}) + if not likes is None: + self.story.setMetadata("likes", stripHTML(likes)) + dislikes = storyToolbar.find('span', {'class':'dislikes'}) + if not dislikes is None: + self.story.setMetadata("dislikes", stripHTML(dislikes)) + + # Highest view for a chapter and total views + viewSpan = storyToolbar.find('span', {'title':re.compile(r'.*\btotal views\b.*')}) + self.story.setMetadata("views", re.sub(r'[^0-9]', '', stripHTML(viewSpan))) + self.story.setMetadata("total_views", re.sub(r'[^0-9]', '', viewSpan['title'])) + + # Comment count + commentSpan = storyToolbar.find('span', {'title':re.compile(r'.*\bcomments\b.*')}) + self.story.setMetadata("comment_count", re.sub(r'[^0-9]', '', stripHTML(commentSpan))) + + # Short description + descriptionMeta = soup.find('meta', {'property':'og:description'}) + self.story.setMetadata("short_description", stripHTML(descriptionMeta['content'])) + + #groups + if soup.find('button', {'id':'button-view-all-groups'}): + groupResponse = self._fetchUrl("http://www.fimfiction.net/ajax/groups/story_groups_list.php?story=%s" % (self.story.getMetadata("storyId"))) + groupData = json.loads(groupResponse) + groupList = bs.BeautifulSoup(groupData["content"]) + else: + groupList = soup.find('ul', {'id':'story-groups-list'}) + + if not (groupList == None): + for groupName in groupList.findAll('a'): + self.story.addToList("groupsUrl", 'http://'+self.host+groupName["href"]) + self.story.addToList("groups",stripHTML(groupName).replace(',', ';')) + + #sequels + sequelStoryHeader = soup.find('h1', {'class':'header-stories'}, text="Sequels") + if not sequelStoryHeader == None: + sequelContainer = sequelStoryHeader.parent.parent + for sequel in sequelContainer.findAll('a', {'class':'story_link'}): + self.story.addToList("sequelsUrl", 'http://'+self.host+sequel["href"]) + self.story.addToList("sequels", stripHTML(sequel).replace(',', ';')) + + #The link to the prequel is embedded in the description text, so erring + #on the side of caution and wrapping this whole thing in a try block. + #If anything goes wrong this probably wasn't a valid prequel link. + try: + description = soup.find('div', {'class':'description'}) + firstHR = description.find("hr") + nextSib = firstHR.nextSibling + if "This story is a sequel to" in nextSib.string: + link = nextSib.nextSibling + if link.name == "a": + self.story.setMetadata("prequelUrl", 'http://'+self.host+link["href"]) + self.story.setMetadata("prequel", stripHTML(link)) + except: + pass + + def ordinal_date_string_to_date(self, datestring): + datestripped=re.sub(r"(\d+)(st|nd|rd|th)", r"\1", datestring.strip()) + return makeDate(datestripped, self.dateformat) + + def hookForUpdates(self,chaptercount): + if self.oldchapters and len(self.oldchapters) > self.newestChapterNum: + print("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1)) + self.oldchapters = self.oldchapters[:self.newestChapterNum] + return len(self.oldchapters) + + def do_fix_blockquotes(self,data): + if self.getConfig('fix_fimf_blockquotes'): + #
+ #
+ # include > in re groups so there's always something in the group. + data = re.sub(r']*>\s*)]*>)',r'\s*)
',r'',data) + return data + + def getChapterText(self, url): + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + + soup = bs.BeautifulSoup(data) + if not (soup.find('form', {'id' : 'password_form'}) == None): + if self.password: + params = {} + params['password'] = self.password + data = self._postUrl(url, params) + else: + print("Chapter %s needed password but no password was present" % url) + + data = self.do_fix_blockquotes(data) + + soup = bs.BeautifulSoup(data,selfClosingTags=('br','hr')).find('div', {'class' : 'chapter_content'}) + if soup == None: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,soup) diff --git a/fanficdownloader/adapters/adapter_finestoriescom.py b/fanficdownloader/adapters/adapter_finestoriescom.py new file mode 100644 index 00000000..6c99f9d8 --- /dev/null +++ b/fanficdownloader/adapters/adapter_finestoriescom.py @@ -0,0 +1,288 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return FineStoriesComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class FineStoriesComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0]) + if 'storyInfo' in self.story.getMetadata('storyId'): + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','fnst') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y-%m-%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'finestories.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/s/1234 http://"+cls.getSiteDomain()+"/s/1234:4010 http://"+cls.getSiteDomain()+"/library/storyInfo.php?id=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain())+r"/(s|library)?/(storyInfo.php\?id=)?\d+(:\d+)?(;\d+)?$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Free Registration' in data \ + or "Invalid Password!" in data \ + or "Invalid User Name!" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['theusername'] = self.username + params['thepassword'] = self.password + else: + params['theusername'] = self.getConfig("username") + params['thepassword'] = self.getConfig("password") + params['rememberMe'] = '1' + params['page'] = 'http://'+self.getSiteDomain()+'/' + params['submit'] = 'Login' + + loginUrl = 'http://' + self.getSiteDomain() + '/login.php' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['theusername'])) + + d = self._fetchUrl(loginUrl, params) + + if "My Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['theusername'])) + raise exceptions.FailedToLogin(url,params['theusername']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"/a/\w+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.text) + + # Find the chapters: + chapters = soup.findAll('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+":\d+$")) + if len(chapters) != 0: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/s/'+self.story.getMetadata('storyId'))) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # surprisingly, the detailed page does not give enough details, so go to author's page + + skip=0 + i=0 + while i == 0: + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')+"&skip="+str(skip))) + + a = asoup.findAll('td', {'class' : 'lc2'}) + for lc2 in a: + if lc2.find('a')['href'] == '/s/'+self.story.getMetadata('storyId'): + i=1 + break + if a[len(a)-1] == lc2: + skip=skip+10 + + for cat in lc2.findAll('div', {'class' : 'typediv'}): + self.story.addToList('category',cat.text) + + self.story.setMetadata('numWords', lc2.findNext('td', {'class' : 'num'}).text) + + lc4 = lc2.findNext('td', {'class' : 'lc4'}) + + + try: + a = lc4.find('a', href=re.compile(r"/library/show_series.php\?id=\d+")) + i = a.parent.text.split('(')[1].split(')')[0] + self.setSeries(a.text, i) + self.story.setMetadata('seriesUrl','http://'+self.host+a['href']) + except: + pass + try: + a = lc4.find('a', href=re.compile(r"/library/universe.php\?id=\d+")) + self.story.addToList("category",a.text) + except: + pass + + for a in lc4.findAll('span', {'class' : 'help'}): + a.extract() + + self.setDescription('http://'+self.host+'/s/'+self.story.getMetadata('storyId'),lc4.text.split('[More Info')[0]) + + for b in lc4.findAll('b'): + label = b.text + value = b.nextSibling + + if 'For Age' in label: + self.story.setMetadata('rating', value) + + if 'Tags' in label: + for genre in value.split(', '): + self.story.addToList('genre',genre) + + if 'Posted' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split('/ (')[0]), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value.split('/ (')[0]), self.dateformat)) + + if 'Concluded' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value.split('/ (')[0]), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value.split('/ (')[0]), self.dateformat)) + + status = lc4.find('span', {'class' : 'ab'}) + if status != None: + self.story.setMetadata('status', 'In-Progress') + if "Last Activity" in status.text: + self.story.setMetadata('dateUpdated', makeDate(status.text.split('Activity: ')[1].split(')')[0], self.dateformat)) + else: + self.story.setMetadata('status', 'Completed') + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + # some big chapters are split over several pages + pager = div.find('span', {'class' : 'pager'}) + if pager != None: + urls=pager.findAll('a') + urls=urls[:len(urls)-1] + + + for ur in urls: + soup = bs.BeautifulSoup(self._fetchUrl("http://"+self.getSiteDomain()+ur['href']), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div1 = soup.find('div', {'id' : 'story'}) + + # appending next section + last=div.findAll('p') + next=div1.find('span', {'class' : 'conTag'}).nextSibling + + last[len(last)-1]=last[len(last)-1].append(next) + div.append(div1) + + # removing all the left-over stuff + for a in div.findAll('span'): + a.extract() + + for a in div.findAll('h1'): + a.extract() + for a in div.findAll('h2'): + a.extract() + for a in div.findAll('h3'): + a.extract() + for a in div.findAll('h4'): + a.extract() + for a in div.findAll('br'): + a.extract() + for a in div.findAll('div', {'class' : 'date'}): + a.extract() + + a = div.find('form') + if a != None: + b = a.nextSibling + while b != None: + a.extract() + a=b + b=b.nextSibling + + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_grangerenchantedcom.py b/fanficdownloader/adapters/adapter_grangerenchantedcom.py new file mode 100644 index 00000000..831f08f1 --- /dev/null +++ b/fanficdownloader/adapters/adapter_grangerenchantedcom.py @@ -0,0 +1,311 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return GrangerEnchantedCom + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class GrangerEnchantedCom(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + self.section=self.parsedUrl.path.split('/',)[1] + + # normalized story URL. + if "malfoymanor" in self.parsedUrl.netloc: + self._setURL('http://malfoymanor.' + self.getSiteDomain() + '/themanor/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.story.addToList("category","The Manor") + else: + self._setURL('http://' + self.getSiteDomain() + '/enchant/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','gech') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%b/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'grangerenchanted.com' + + @classmethod + def getAcceptDomains(cls): + return ['grangerenchanted.com','malfoymanor.grangerenchanted.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://grangerenchanted.com/enchant/viewstory.php?sid=1234 http://malfoymanor.grangerenchanted.com/themanor/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return r"http://(malfoymanor.)?grangerenchanted.com/(enchant|themanor)?/viewstory.php\?sid=\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + if "enchant" in self.section: + loginUrl = 'http://grangerenchanted.com/enchant/user.php?action=login' + else: + loginUrl = 'http://malfoymanor.grangerenchanted.com/themanor/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=1" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Read' in label: + self.story.setMetadata('read', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+self.section+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + except: + # I find it hard to care if the series parsing fails + pass + + try: + self.story.setMetadata('reviews', + stripHTML(soup.find('div',{'id':'sort'}). + findAll('a', href=re.compile(r'^reviews.php'))[1])) + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story1'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py new file mode 100644 index 00000000..bddf4b6c --- /dev/null +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','hp') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.is_adult=False + + # get storyId from url--url validation guarantees query is only psid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.harrypotterfanfiction.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.harrypotterfanfiction.com','harrypotterfanfiction.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.harrypotterfanfiction.com/viewstory.php?psid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',stripHTML(a)) + ## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995' + if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?showuid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + ## hpcom doesn't give us total words--but it does give + ## us words/chapter. I'd rather add than fetch and + ## parse another page. + words=0 + for tr in soup.find('table',{'class':'text'}).findAll('tr'): + tdstr = tr.findAll('td')[2].string + if tdstr and tdstr.isdigit(): + words+=int(tdstr) + self.story.setMetadata('numWords',str(words)) + + # Find the chapters: + tablelist = soup.find('table',{'class':'text'}) + for chapter in tablelist.findAll('a', href=re.compile(r'\?chapterid=\d+')): + #javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?chapterid=433441&i=1' + # just in case there's tags, like in chapter titles. + chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href']) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php'+chpt)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Finding the metadata is a bit of a pain. Desc is the only thing this color. + desctable= soup.find('table',{'bgcolor':'#f0e8e8'}) + self.setDescription(url,desctable) + #self.story.setMetadata('description',stripHTML(desctable)) + + ## Finding the metadata is a bit of a pain. Most of the meta + ## data is in a center.table without a bgcolor. + #for center in soup.findAll('center'): + table = soup.find('table',{'class':'storymaininfo'}) + if table: + metastr = stripHTML(str(table)).replace('\n',' ').replace('\t',' ') + # Rating: 12+ Story Reviews: 3 + # Chapters: 3 + # Characters: Andromeda, Ted, Bellatrix, R. Lestrange, Lucius, Narcissa, OC + # Genre(s): Fluff, Romance, Young Adult Era: OtherPairings: Other Pairing, Lucius/Narcissa + # Status: Completed + # First Published: 2010.09.02 + # Last Published Chapter: 2010.09.28 + # Last Updated: 2010.09.28 + # Favorite Story Of: 1 users + # Warnings: Scenes of a Mild Sexual Nature + + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + m = re.match(r".*?Rating: (.+?) Story Reviews.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre$s$: (.+?) Era.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Characters: (.+?) Genre.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('characters',g) + + m = re.match(r".*?Warnings: (.+).*?",metastr) + if m: + for w in m.group(1).split(','): + if w != 'Now Warnings': + self.story.addToList('warnings',w) + + m = re.match(r".*?First Published: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y.%m.%d")) + + # Updated can have more than one space after it. + m = re.match(r".*?Last Updated: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y.%m.%d")) + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + ## most adapters use BeautifulStoneSoup here, but non-Stone + ## allows nested div tags. + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'fluidtext'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) + +def getClass(): + return HarryPotterFanFictionComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_hennethannunnet.py b/fanficdownloader/adapters/adapter_hennethannunnet.py new file mode 100644 index 00000000..4a5391d7 --- /dev/null +++ b/fanficdownloader/adapters/adapter_hennethannunnet.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return HennethAnnunNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class HennethAnnunNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/stories/chapter.cfm?stid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','htan') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.henneth-annun.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/stories/chapter.cfm?stid=1234" + + def getSiteURLPattern(self): + return "http://"+self.getSiteDomain()+"/stories/chapter(_view)?.cfm\?stid="+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + if "We're sorry. This story is not available." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: This story is not available.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('h2', {'id':'page_heading'}) + self.story.setMetadata('title',stripHTML(a)) + + # Find the chapters: chapter_view.cfm?stid=6663&spordinal=1" + for chapter in soup.findAll('a', href=re.compile(r'chapter_view.cfm\?stid='+self.story.getMetadata('storyId')+"&spordinal=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/stories/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + self.story.setMetadata('numWords', soup.find('tr', {'class':'foot'}).findAll('td')[1].text) + + self.setDescription(url,soup.find('div', {'id':'summary'})) + + # Rated: NC-17
etc + info = soup.find('div', {'id':'storyinformation'}) + labels=info.findAll('b') + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Completion' in label: + if 'Complete' in value.string: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Rating' in label: + self.story.setMetadata('rating', value.string) + + if 'Era:' in label: + self.story.addToList('category',value.string) + + if 'Genre' in label: + self.story.addToList('genre',value.string) + + labels=info.findAll('strong') + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Author' in label: + value=value.nextSibling + self.story.setMetadata('authorId',value['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+value['href']) + self.story.setMetadata('author',value.string) + + if 'Post' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated:' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + for char in soup.findAll('a', href=re.compile(r"/resources/bios_view.cfm\?scid=\d+")): + self.story.addToList('characters',stripHTML(char)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'class' : 'block chapter'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_hlfictionnet.py b/fanficdownloader/adapters/adapter_hlfictionnet.py new file mode 100644 index 00000000..96deb95b --- /dev/null +++ b/fanficdownloader/adapters/adapter_hlfictionnet.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return HLFictionNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class HLFictionNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','hlf') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'hlfiction.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'id' : 'pagetitle'}) + + aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href']) + self.story.setMetadata('author',aut.string) + aut.extract() + + self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)]) + + # Find the chapters: + chapters=soup.find('select') + if chapters != None: + for chapter in chapters.findAll('option'): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}): + a = list.find('a') + if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']: + break + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + labels = list.findAll('span', {'class' : 'classification'}) + for labelspan in labels: + label = labelspan.string + value = labelspan.nextSibling + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'classification': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value[:len(value)-2]) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + for char in value.string.split(', '): + if not 'None' in char: + self.story.addToList('characters',char) + + if 'Genre' in label: + for genre in value.string.split(', '): + if not 'None' in genre: + self.story.addToList('genre',genre) + + if 'Warnings' in label: + for warning in value.string.split(', '): + if not 'None' in warning: + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']: + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_hpfandomnet.py b/fanficdownloader/adapters/adapter_hpfandomnet.py new file mode 100644 index 00000000..16a4cfc6 --- /dev/null +++ b/fanficdownloader/adapters/adapter_hpfandomnet.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return HPFandomNetAdapterAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /eff part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/eff/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','hpfdm') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.hpfandom.net' # XXX + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/eff/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/eff/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/eff/'+a['href']) + self.story.setMetadata('author',a.string) + + ## Going to get the rest from the author page. + authdata = self._fetchUrl(self.story.getMetadata('authorUrl')) + # fix a typo in the site HTML so I can find the Characters list. + authdata = authdata.replace('
", "
',' ') + + # hpfandom.net only seems to indicate adult-only by javascript on the story/chapter links. + if "javascript:if (confirm('Slash/het fiction which incorporates sexual situations to a somewhat graphic degree and some violence. ')) location = 'viewstory.php?sid=%s'"%self.story.getMetadata('storyId') in authdata \ + and not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + + authsoup = bs.BeautifulSoup(authdata) + + reviewsa = authsoup.find('a', href="reviews.php?sid="+self.story.getMetadata('storyId')+"&a=") + # + # + # + labels = metablock.findAll('td',{'width':'10%'}) + for td in labels: + label = td.string + value = td.nextSibling.string + #print("\nlabel:%s\nvalue:%s\n"%(label,value)) + + if 'Category' in label and value: + cats = td.parent.findAll('a',href=re.compile(r'categories.php')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label and value: # this site can have Character label with no + # values, apparently. Others as a precaution. + for char in value.split(','): + self.story.addToList('characters',char.strip()) + + if 'Genre' in label and value: + for genre in value.split(','): + self.story.addToList('genre',genre.strip()) + + if 'Warnings' in label and value: + for warning in value.split(','): + if warning.strip() != 'none': + self.story.addToList('warnings',warning.strip()) + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # There's no good wrapper around the chapter text. :-/ + # There are, however, tables with width=100% just above and below the real text. + data = re.sub(r'
+ metablock = reviewsa.findParent("table") + #print("metablock:%s"%metablock) + + ## Title + titlea = metablock.find('a', href=re.compile("viewstory.php")) + #print("titlea:%s"%titlea) + if titlea == None: + raise exceptions.FailedToDownload("Story URL (%s) not found on author's page, can't use chapter URLs"%url) + self.story.setMetadata('title',stripHTML(titlea)) + + # Find the chapters: !!! hpfandom.net differs from every other + # eFiction site--the sid on viewstory for chapters is + # *different* for each chapter + for chapter in soup.findAll('a', {'href':re.compile(r"viewstory.php\?sid=\d+&i=\d+")}): + m = re.match(r'.*?(viewstory.php\?sid=\d+&i=\d+).*?',chapter['href']) + # just in case there's tags, like in chapter titles. + #print("====chapter===%s"%m.group(1)) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/eff/'+m.group(1))) + + if len(self.chapterUrls) == 0: + self.chapterUrls.append((stripHTML(self.story.getMetadata('title')),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + summary = metablock.find("td",{"class":"summary"}) + summary.name='span' + self.setDescription(url,summary) + + # words & completed in first row of metablock. + firstrow = stripHTML(metablock.find('tr')) + # A Mother's Love xx Going Grey 1 (G+) by Kiristeen | Reviews - 18 | Words: 27468 | Completed: Yes + m = re.match(r".*?$(?P[^)]+)$.*?Words: (?P\d+).*?Completed: (?PYes|No)",firstrow) + if m != None: + if m.group('rating') != None: + self.story.setMetadata('rating', m.group('rating')) + + if m.group('words') != None: + self.story.setMetadata('numWords', m.group('words')) + + if m.group('status') != None: + if 'Yes' in m.group('status'): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + + #
Chapters: 4 Published: 2010.09.29
Completed: Yes Updated: 2010.10.03
.*?
','
', + data,count=1,flags=re.DOTALL) + + data = re.sub(r'.*?
','
', + data,count=1,flags=re.DOTALL) + + soup = bs.BeautifulStoneSoup(data,selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find("div",{'name':'storybody'}) + #print("\n\ndiv:%s\n\n"%div) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_hpfanficarchivecom.py b/fanficdownloader/adapters/adapter_hpfanficarchivecom.py new file mode 100644 index 00000000..9d1c1f1a --- /dev/null +++ b/fanficdownloader/adapters/adapter_hpfanficarchivecom.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return HPFanficArchiveComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class HPFanficArchiveComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','hpffa') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.hpfanficarchive.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/stories/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/stories/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + val = labelspan.nextSibling + value = unicode('') + while val and not defaultGetattr(val,'class') == 'label': + value += unicode(val) + val = val.nextSibling + label = labelspan.string + #print("label:%s\nvalue:%s"%(label,value)) + + if 'Summary' in label: + self.setDescription(url,value) + + if 'Rated' in label: + self.story.setMetadata('rating', stripHTML(value)) + + if 'Word count' in label: + self.story.setMetadata('numWords', stripHTML(value)) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Pairing' in label: + ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) + for ship in ships: + self.story.addToList('ships',ship.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in stripHTML(value): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/stories/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_iketernalnet.py b/fanficdownloader/adapters/adapter_iketernalnet.py new file mode 100644 index 00000000..846aee7f --- /dev/null +++ b/fanficdownloader/adapters/adapter_iketernalnet.py @@ -0,0 +1,283 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return IkEternalNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class IkEternalNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ike') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.ik-eternal.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=1" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + # Since the warning text can change by warning level, let's + # look for the warning pass url. ksarchive uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data,selfClosingTags=('p')) #poor formatting of the paragraphs in the title page + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + asoup = soup.find('div', {'class': 'listbox'}) + for a in asoup.findAll('p'): + a.name='br' + labels = asoup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_imagineeficcom.py b/fanficdownloader/adapters/adapter_imagineeficcom.py new file mode 100644 index 00000000..f32c584b --- /dev/null +++ b/fanficdownloader/adapters/adapter_imagineeficcom.py @@ -0,0 +1,290 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ImagineEFicComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ImagineEFicComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ime') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'imagine.e-fic.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17
etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_indeathnet.py b/fanficdownloader/adapters/adapter_indeathnet.py new file mode 100644 index 00000000..5c0175ec --- /dev/null +++ b/fanficdownloader/adapters/adapter_indeathnet.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return InDeathNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class InDeathNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + + # normalized story URL. + self._setURL('http://www.' + self.getSiteDomain() + '/blog/archive/'+self.story.getMetadata('storyId')+'-'+m.group('name')+'/') + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','idn') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %B %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'indeath.net' + + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/blog/archive/123-story-in-death/" + + def getSiteURLPattern(self): + # http://www.indeath.net/blog/archive/169-ransom-in-death/ + return re.escape("http://")+re.escape(self.getSiteDomain())+r"/blog/(archive/)?(?P\d+)\-(?P[a-z0-9\-]*)/?$" + + + def getDateFromComponents(self, postmonth, postday): + ym = re.search("Entries\ in\ (?PJanuary|February|March|April|May|June|July|August|September|October|November|December)\ (?P\d{4})",postmonth) + d = re.search("(?P\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)",postday) + postdate = makeDate(d.group('day')+' '+ym.group('mon')+' '+ym.group('year'),self.dateformat) + return postdate + + def getAuthorData(self): + + mainUrl = self.url.replace("/archive","") + + try: + maindata = self._fetchUrl(mainUrl) + + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.meta) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + mainsoup = bs.BeautifulSoup(maindata) + + # find first entry + e = mainsoup.find('div',{'class':"entry"}) + + # get post author as author + d = e.find('div',{'class':"desc"}) + a = d.find('strong') + self.story.setMetadata('author',a.contents[0].string.strip()) + + # Don't seem to be able to get author pages anymore + self.story.setMetadata('authorUrl','http://www.indeath.net/') + self.story.setMetadata('authorId','0') + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url + try: + data = self._fetchUrl(url) + + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.meta) + else: + raise e + + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + ## Title + h = soup.find('a', id="blog_title") + t = h.find('span') + self.story.setMetadata('title',stripHTML(t.contents[0]).strip()) + + s = t.find('div') + if s != None: + self.setDescription(url,s) + + # Get Author from main blog page since it's not reliably on the archive page + self.getAuthorData() + + # Find the chapters: + chapters=soup.findAll('a', title="View entry", href=re.compile(r'http://www.indeath.net/blog/'+self.story.getMetadata('storyId')+"/entry\-(\d+)\-([^/]*)/$")) + + #reverse the list since newest at the top + chapters.reverse() + + # Get date published & updated from first & last entries + posttable=soup.find('div', id="main_column") + + postmonths=posttable.findAll('th', text=re.compile(r'Entries\ in\ ')) + postmonths.reverse() + + postdates=posttable.findAll('span', _class="desc", text=re.compile('\d{2}\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)')) + postdates.reverse() + + self.story.setMetadata('datePublished',self.getDateFromComponents(postmonths[0],postdates[0])) + self.story.setMetadata('dateUpdated',self.getDateFromComponents(postmonths[len(postmonths)-1],postdates[len(postdates)-1])) + + # Process List of Chapters + self.story.setMetadata('numChapters',len(chapters)) + logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + for x in range(0,len(chapters)): + # just in case there's tags, like in chapter titles. + chapter=chapters[x] + if len(chapters)==1: + self.chapterUrls.append((self.story.getMetadata('title'),chapter['href'])) + else: + ct = stripHTML(chapter) + tnew = re.match("(?i)"+self.story.getMetadata('title')+r" - (?P.*)$",ct) + if tnew: + chaptertitle = tnew.group('newtitle') + else: + chaptertitle = ct + self.chapterUrls.append((chaptertitle,chapter['href'])) + + + + # grab the text for an individual chapter. + def getChapterText(self, url): + logger.debug('Getting chapter text from: %s' % url) + + #chapter=bs.BeautifulSoup('
') + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data,selfClosingTags=('br','hr','span','center')) + + chapter = soup.find("div", "entry_content") + + if None == chapter: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,chapter) + diff --git a/fanficdownloader/adapters/adapter_ksarchivecom.py b/fanficdownloader/adapters/adapter_ksarchivecom.py new file mode 100644 index 00000000..e3fdd59c --- /dev/null +++ b/fanficdownloader/adapters/adapter_ksarchivecom.py @@ -0,0 +1,315 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return KSArchiveComAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class KSArchiveComAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ksa') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b/%d/%Y" # XXX + + @classmethod + def getAcceptDomains(cls): + return ['www.ksarchive.com','ksarchive.com'] + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'ksarchive.com' # XXX + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return "http://(www.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + + # Furthermore, there's a couple sites now with more than + # one warning level for different ratings. And they're + # fussy about it. midnightwhispers has three: 10, 3 & 5. + # we'll try 5 first. + addurl = "&ageconsent=ok&warning=2" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + # Since the warning text can change by warning level, let's + # look for the warning pass url. ksarchive uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) # title's inside a tag. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',stripHTML(a)) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = stripHTML(labelspan) + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + # poor HTML(unclosed
for one) can cause run on + # over the next label. + if '' in svalue: + svalue = svalue[0:svalue.find('')] + break + else: + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [stripHTML(cat) for cat in cats] + for cat in catstext: + # ran across one story with an empty + # tag in the desc once. + if cat and cat.strip() in ('Poetry','Essays'): + self.story.addToList('category',stripHTML(cat)) + + if 'Characters' in label: + self.story.addToList('characters','Kirk') + self.story.addToList('characters','Spock') + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [stripHTML(char) for char in chars] + for char in charstext: + self.story.addToList('characters',stripHTML(char)) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + genrestext = [stripHTML(genre) for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',stripHTML(genre)) + + ## In addition to Genre (which is very site specific) KSA + ## has 'Story Type', which is much more what most sites + ## call genre. + if 'Story Type' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX + genrestext = [stripHTML(genre) for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',stripHTML(genre)) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [stripHTML(warning) for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',stripHTML(warning)) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = stripHTML(a) + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + if "A fatal MySQL error was encountered" in data: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Database error on the site reported!" % url) + else: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_libraryofmoriacom.py b/fanficdownloader/adapters/adapter_libraryofmoriacom.py new file mode 100644 index 00000000..3df41bdd --- /dev/null +++ b/fanficdownloader/adapters/adapter_libraryofmoriacom.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return LibraryOfMoriaComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class LibraryOfMoriaComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','lom') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.libraryofmoria.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/a/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/a/viewstory.php?sid=")+r"\d+$" + + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/a/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/a/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Type' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warning' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/a/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_literotica.py b/fanficdownloader/adapters/adapter_literotica.py new file mode 100644 index 00000000..1a757b69 --- /dev/null +++ b/fanficdownloader/adapters/adapter_literotica.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import urlparse +import time + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class LiteroticaSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + self.story.setMetadata('siteabbrev','litero') + + # normalize to first chapter. Not sure if they ever have more than 2 digits. + storyId = self.parsedUrl.path.split('/',)[2] + # replace later chapters with first chapter but don't remove numbers + # from the URL that disambiguate stories with the same title. + storyId = re.sub("-ch-?\d\d", "", storyId) + self.story.setMetadata('storyId', storyId) + + ## accept m(mobile)url, but use www. + url = re.sub("^(www|german|spanish|french|dutch|italian|romanian|portuguese|other)\.i", + "\1", + url) + + ## strip ?page=... + url = re.sub("\?page=.*$", "", url) + + ## set url + self._setURL(url) + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = '%m/%d/%y' + + @staticmethod + def getSiteDomain(): + return 'literotica.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.literotica.com', + 'www.i.literotica.com', + 'german.literotica.com', + 'german.i.literotica.com', + 'spanish.literotica.com', + 'spanish.i.literotica.com', + 'french.literotica.com', + 'french.i.literotica.com', + 'dutch.literotica.com', + 'dutch.i.literotica.com', + 'italian.literotica.com', + 'italian.i.literotica.com', + 'romanian.literotica.com', + 'romanian.i.literotica.com', + 'portuguese.literotica.com', + 'portuguese.i.literotica.com', + 'other.literotica.com', + 'other.i.literotica.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title http://portuguese.literotica.com/s/story-title http://german.literotica.com/s/story-title" + + def getSiteURLPattern(self): + return r"https?://(www|german|spanish|french|dutch|italian|romanian|portuguese|other)(\.i)?\.literotica\.com/s/([a-zA-Z0-9_-]+)" + + def extractChapterUrlsAndMetadata(self): + """ + NOTE: Some stories can have versions, + e.g. /my-story-ch-05-version-10 + NOTE: If two stories share the same title, a running index is added, + e.g.: /my-story-ch-02-1 + Strategy: + * Go to author's page, search for the current story link, + * If it's in a tr.root-story => One-part story + * , get metadata and be done + * If it's in a tr.sl => Chapter in series + * Search up from there until we find a tr.ser-ttl (this is the + story) + * Gather metadata + * Search down from there for all tr.sl until the next + tr.ser-ttl, foreach + * Chapter link is there + """ + + if not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + + logger.debug("Chapter/Story URL: <%s> " % self.url) + try: + data1 = self._fetchUrl(self.url) + soup1 = bs.BeautifulSoup(data1) + #strip comments from soup + [comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, bs.Comment))] + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # author + a = soup1.find("span", "b-story-user-y") + self.story.setMetadata('authorId', urlparse.parse_qs(a.a['href'].split('?')[1])['uid'][0]) + authorurl = a.a['href'] + if authorurl.startswith('//'): + authorurl = self.parsedUrl.scheme+':'+authorurl + self.story.setMetadata('authorUrl', authorurl) + self.story.setMetadata('author', a.text) + + # get the author page + try: + dataAuth = self._fetchUrl(authorurl) + soupAuth = bs.BeautifulSoup(dataAuth) + #strip comments from soup + [comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, bs.Comment))] + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(authorurl) + else: + raise e + + ## Find link to url in author's page + ## site has started using //domain.name/asdf urls remove https?: from front + storyLink = soupAuth.find('a', href=self.url[self.url.index(':')+1:]) + + if storyLink is not None: + urlTr = storyLink.parent.parent + if urlTr['class'] == "sl": + isSingleStory = False + else: + isSingleStory = True + else: + raise exceptions.FailedToDownload("Couldn't find story <%s> on author's page <%s>" % (url, authorurl)) + + if isSingleStory: + self.story.setMetadata('title', storyLink.text) + self.story.setMetadata('description', urlTr.findAll("td")[1].text) + self.story.addToList('eroticatags', urlTr.findAll("td")[2].text) + date = urlTr.findAll('td')[-1].text + self.story.setMetadata('datePublished', makeDate(date, self.dateformat)) + self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat)) + self.chapterUrls = [(storyLink.text, self.url)] + else: + seriesTr = urlTr.previousSibling + while seriesTr['class'] != 'ser-ttl': + seriesTr = seriesTr.previousSibling + m = re.match("^(?P.*?):\s(?P<numChapters>\d+)\sPart\sSeries$", seriesTr.find("strong").text) + self.story.setMetadata('title', m.group('title')) + + ## Walk the chapters + chapterTr = seriesTr.nextSibling + self.chapterUrls = [] + dates = [] + descriptions = [] + while chapterTr is not None and chapterTr['class'] == 'sl': + descriptions.append(chapterTr.findAll("td")[1].text) + chapterLink = chapterTr.find("td", "fc").find("a") + self.chapterUrls.append((chapterLink.text, "http:" + chapterLink["href"])) + self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text) + dates.append(makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)) + chapterTr = chapterTr.nextSibling + + ## Set description to joint chapter descriptions + self.story.setMetadata('description', " / ".join(descriptions)) + + ## Set the oldest date as publication date, the newest as update date + dates.sort() + self.story.setMetadata('datePublished', dates[0]) + self.story.setMetadata('dateUpdated', dates[-1]) + + # normalize on first chapter URL. + self._setURL(self.chapterUrls[0][1]) + + self.story.setMetadata('numChapters', len(self.chapterUrls)) + + # set storyId to 'title-author' to avoid duplicates + # self.story.setMetadata('storyId', + # re.sub("[^a-z0-9]", "", self.story.getMetadata('title').lower()) + # + "-" + # + re.sub("[^a-z0-9]", "", self.story.getMetadata('author').lower())) + + return + + def getChapterText(self, url): + logger.debug('Getting chapter text from <%s>' % url) + data1 = self._fetchUrl(url) + # brute force approach to replace the wrapping tag. If + # done by changing tag name, it causes problems with nested + # tags. + data1 = data1.replace('<div class="b-story-body-x x-r15"><div>','<div class="b-story-body-x x-r15"><div>') + soup1 = bs.BeautifulSoup(data1) + + #strip comments from soup + [comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, bs.Comment))] + + # get story text + story1 = soup1.find('div', 'b-story-body-x').div + #print("story1:%s"%story1) + # story1.name='div' + story1.append(' ') + storytext = self.utf8FromSoup(url,story1) + + # find num pages + pgs = int(soup1.find("span", "b-pager-caption-t r-d45").string.split(' ')[0]) + logger.debug("pages: "+str(pgs)) + + # get all the pages + for i in xrange(2, pgs+1): + try: + logger.debug("fetching page "+str(i)) + time.sleep(0.5) + data2 = self._fetchUrl(url, {'page': i}) + # brute force approach to replace the wrapping tag. If + # done by changing tag name, it causes problems with nested + # tags. + data2 = data2.replace('<div class="b-story-body-x x-r15"><div>','<div class="b-story-body-x x-r15"><div>') + soup2 = bs.BeautifulSoup(data2) + [comment.extract() for comment in soup2.findAll(text=lambda text:isinstance(text, bs.Comment))] + story2 = soup2.find('div', 'b-story-body-x').div + # story2.name='div' + story2.append(' ') + storytext += self.utf8FromSoup(url,story2) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + return storytext + + +def getClass(): + return LiteroticaSiteAdapter + + + diff --git a/fanficdownloader/adapters/adapter_lotrfanfictioncom.py b/fanficdownloader/adapters/adapter_lotrfanfictioncom.py new file mode 100644 index 00000000..b20c2ecf --- /dev/null +++ b/fanficdownloader/adapters/adapter_lotrfanfictioncom.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +from base_efiction_adapter import BaseEfictionAdapter + +class TheLOTRFanFictionSiteAdapter(BaseEfictionAdapter): + + @staticmethod + def getSiteDomain(): + return 'lotrfanfiction.com' + + @classmethod + def getSiteAbbrev(seluuf): + return 'lotrff' + + @classmethod + def getDateFormat(self): + return "%d/%m/%y" + +def getClass(): + return TheLOTRFanFictionSiteAdapter diff --git a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py new file mode 100644 index 00000000..805d752a --- /dev/null +++ b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return LumosSycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class LumosSycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','lsph') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'lumos.sycophanthex.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=19" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + rating=pt.text.split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17 etc + + labels = soup.findAll('span',{'class':'label'}) + + value = labels[0].previousSibling + svalue = "" + while value != None: + val = value + value = value.previousSibling + while not defaultGetattr(val,'class') == 'label': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Word count' in label: + self.story.setMetadata('numWords', value.split(' -')[0]) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Complete' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py new file mode 100644 index 00000000..261b1f71 --- /dev/null +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class MediaMinerOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','mm') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.mediaminer.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/fanfic/view_st.php/123456 http://"+cls.getSiteDomain()+"/fanfic/view_ch.php/1234123/123444#fic_c" + + def getSiteURLPattern(self): + ## http://www.mediaminer.org/fanfic/view_st.php/76882 + ## http://www.mediaminer.org/fanfic/view_ch.php/167618/594087#fic_c + return re.escape("http://"+self.getSiteDomain())+\ + "/fanfic/view_(st|ch)\.php/"+r"(?P<id>\d+)(/\d+(#fic_c)?)?$" + + def extractChapterUrlsAndMetadata(self): + + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # [ A - All Readers ], strip '[' ']' + ## Above title because we remove the smtxt font to get title. + smtxt = soup.find("font",{"class":"smtxt"}) + if not smtxt: + raise exceptions.StoryDoesNotExist(self.url) + rating = smtxt.string[1:-1] + self.story.setMetadata('rating',rating) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"/fanfic/src.php/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[-1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + ## Title - Good grief. Title varies by chaptered, 1chapter and 'type=one shot'--and even 'one-shot's can have titled chapter. + ## But, if colspan=2, there's no chapter title. + ## <td class="ffh">Atmosphere: Chapter 1 [ P - Pre-Teen ]</td> + ## <td colspan=2 class="ffh">Hearts of Ice [ P - Pre-Teen ]</td> + ## <td colspan=2 class="ffh">Suzaku no Princess [ P - Pre-Teen ]</td> + ## <td class="ffh">The Kraut, The Bartender, and The Drunkard: Chapter 1 [ P - Pre-Teen ]</td> + ## <td class="ffh">Betrayal and Justice: A Cold Heart ( Chapter 1 ) [ A - All Readers ]</td> + ## <td class="ffh">Question and Answer: Question and Answer ( One-Shot ) [ A - All Readers ]</td> + title = soup.find('td',{'class':'ffh'}) + for font in title.findAll('font'): + font.extract() # removes 'font' tags from inside the td. + if title.has_key('colspan'): + titlet = stripHTML(title) + else: + ## No colspan, it's part chapter title--even if it's a one-shot. + titlet = ':'.join(stripHTML(title).split(':')[:-1]) # strip trailing 'Chapter X' or chapter title + self.story.setMetadata('title',titlet) + ## The story title is difficult to reliably parse from the + ## story pages. Getting it from the author page is, but costs + ## another fetch. + # authsoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + # titlea = authsoup.find('a',{'href':'/fanfic/view_st.php/'+self.story.getMetadata('storyId')}) + # self.story.setMetadata('title',titlea.text) + + # save date from first for later. + firstdate=None + + # Find the chapters + select = soup.find('select',{'name':'cid'}) + if not select: + self.chapterUrls.append(( self.story.getMetadata('title'),self.url)) + else: + for option in select.findAll("option"): + chapter = stripHTML(option.string) + ## chapter can be: Chapter 7 [Jan 23, 2011] + ## or: Vigilant Moonlight ( Chapter 1 ) [Jan 30, 2004] + ## or even: Prologue ( Prologue ) [Jul 31, 2010] + m = re.match(r'^(.*?) ($ .*? $ )?\[(.*?)\]$',chapter) + chapter = m.group(1) + # save date from first for later. + if not firstdate: + firstdate = m.group(3) + self.chapterUrls.append((chapter,'http://'+self.host+'/fanfic/view_ch.php/'+self.story.getMetadata('storyId')+'/'+option['value'])) + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # category + # <a href="/fanfic/src.php/a/567">Ranma 1/2</a> + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/a/")): + self.story.addToList('category',a.string) + + # genre + # <a href="/fanfic/src.php/a/567">Ranma 1/2</a> + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")): + self.story.addToList('genre',a.string) + + # if firstdate, then the block below will only have last updated. + if firstdate: + self.story.setMetadata('datePublished', makeDate(firstdate, "%b %d, %Y")) + # Everything else is in <tr bgcolor="#EEEED4"> + + metastr = stripHTML(soup.find("tr",{"bgcolor":"#EEEED4"})).replace('\n',' ').replace('\r',' ').replace('\t',' ') + # Latest Revision: August 03, 2010 + m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d)",metastr) + if m: + self.story.setMetadata('dateUpdated', makeDate(m.group(1), "%B %d, %Y")) + if not firstdate: + self.story.setMetadata('datePublished', + self.story.getMetadataRaw('dateUpdated')) + + else: + self.story.setMetadata('dateUpdated', + self.story.getMetadataRaw('datePublished')) + + # Words: 123456 + m = re.match(r".*?\| Words: (\d+) \|",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + # Summary: .... + m = re.match(r".*?Summary: (.*)$",metastr) + if m: + self.setDescription(url, m.group(1)) + #self.story.setMetadata('description', m.group(1)) + + # completed + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + return + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data=self._fetchUrl(url) + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + anchor = soup.find('a',{'name':'fic_c'}) + + if None == anchor: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + ## find divs with align=left, those are paragraphs in newer stories. + divlist = anchor.findAllNext('div',{'align':'left'}) + if divlist: + for div in divlist: + div.name='p' # convert to mediaminer uses div with + # a margin for paragraphs. + anchor.append(div) # cheat! stuff all the content + # divs into anchor just as a + # holder. + del div['style'] + del div['align'] + anchor.name='div' + return self.utf8FromSoup(url,anchor) + + else: + logger.debug('Using kludgey text find for older mediaminer story.') + ## Some older mediaminer stories are unparsable with BeautifulSoup. + ## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first. + ## Story stuff falls between: + data = "<div id='HERE'>" + data[data.find('<a name="fic_c">'):] +"</div>" + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + for tag in soup.findAll('td',{'class':'ffh'}) + \ + soup.findAll('div',{'class':'acl'}) + \ + soup.findAll('div',{'class':'footer smtxt'}) + \ + soup.findAll('table',{'class':'tbbrdr'}): + tag.extract() # remove tag from soup. + + return self.utf8FromSoup(url,soup) + + +def getClass(): + return MediaMinerOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py b/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py new file mode 100644 index 00000000..ea518ea8 --- /dev/null +++ b/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py @@ -0,0 +1,294 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return MerlinFicDtwinsCoUk + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class MerlinFicDtwinsCoUk(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','mrfd') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'merlinfic.dtwins.co.uk' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Pairing' in label: + ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for ship in ships: + self.story.addToList('ships',ship.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_midnightwhispersca.py b/fanficdownloader/adapters/adapter_midnightwhispersca.py new file mode 100644 index 00000000..ec884d66 --- /dev/null +++ b/fanficdownloader/adapters/adapter_midnightwhispersca.py @@ -0,0 +1,290 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return MidnightwhispersCaAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','mw') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.midnightwhispers.ca' # XXX + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + + # Furthermore, there's a couple sites now with more than + # one warning level for different ratings. And they're + # fussy about it. midnightwhispers has three: 10, 3 & 5. + # we'll try 5 first. + addurl = "&ageconsent=ok&warning=5" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + # Since the warning text can change by warning level, let's + # look for the warning pass url. nfacommunity uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) # title's inside a tag. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + if "A fatal MySQL error was encountered" in data: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Database error on the site reported!" % url) + else: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_mugglenetcom.py b/fanficdownloader/adapters/adapter_mugglenetcom.py new file mode 100644 index 00000000..261a623f --- /dev/null +++ b/fanficdownloader/adapters/adapter_mugglenetcom.py @@ -0,0 +1,336 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return MuggleNetComAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class MuggleNetComAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','mgln') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. + return 'fanfiction.mugglenet.com' + + @classmethod + def getAcceptDomains(cls): + return ['fanfiction.mugglenet.com','fanfic.mugglenet.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"fanfic(tion)?\.mugglenet\.com"+re.escape("/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if "class='errortext'>Registered Users Only" in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login&sid='+self.story.getMetadata('storyId') + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + # http://fanfiction.mugglenet.com/viewstory.php?sid=91079&ageconsent=ok&warning=3 + addurl = "&ageconsent=ok&warning=3" # XXX &warning=5 + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + #print("\nurl:%s\ndata:\n%s\n"%(url,data)) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + # Since the warning text can change by warning level, let's + # look for the warning pass url. nfacommunity uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=%s((?:&ageconsent=ok)?&warning=\d+)'"%self.story.getMetadata('storyId'),data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + # Not good enough-- content can contain a ('), which ends the content prematurely. + # metadesc = soup.find('meta',{'name':'description'}) + # print("removeAllEntities(metadesc['content']):\n%s\n"%removeAllEntities(metadesc['content'])) + start='Summary: ' + end='Rated:' + summarydata = data[data.index(start)+len(start):data.index(end)] + #print("summarydata:\n%s\n"%summarydata) + self.setDescription(url,bs.BeautifulSoup(summarydata)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + # not good enough--poorly formated summary html will break it. + # if 'Summary' in label: + # ## Everything until the next span class='label' + # svalue = "" + # while not defaultGetattr(value,'class') == 'label': + # svalue += str(value) + # value = value.nextSibling + # self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_nationallibrarynet.py b/fanficdownloader/adapters/adapter_nationallibrarynet.py new file mode 100644 index 00000000..c9d75310 --- /dev/null +++ b/fanficdownloader/adapters/adapter_nationallibrarynet.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return NationalLibraryNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class NationalLibraryNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only storyid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ntlb') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m-%d-%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + return 'national-library.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.national-library.net','national-library.net'] + + @classmethod + def getSiteExampleURLs(cls): + # ONLY the stories archived on or after June 17, 2006 and that are hosted on the website: + return "http://"+cls.getSiteDomain()+"/viewstory.php?storyid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?storyid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for p in soup.findAll('p'): + chapters = p.findAll('a', href=re.compile(r'viewstory.php\?storyid='+self.story.getMetadata('storyId')+"&chapnum=\d+$")) + if len(chapters) > 0: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + break + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + self.story.setMetadata('status', 'Completed') + + # Rated: NC-17 etc + labels = soup.findAll('b') + for x in range(2,len(labels)): + value = labels[x].nextSibling + label = labels[x].string + + if 'Summary' in label: + self.setDescription(url,value) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rating' in label: + self.story.setMetadata('rating', stripHTML(value.nextSibling)) + + if 'Word Count' in label: + self.story.setMetadata('numWords', value.string) + + if 'Category' in label: + for cat in value.string.split(', '): + self.story.addToList('category',cat) + if 'Crossover Shows' in label: + for cat in value.string.split(', '): + if "No Show" not in cat: + self.story.addToList('category',cat) + + if 'Character' in label: + for char in value.string.split(', '): + self.story.addToList('characters',char) + + if 'Pairing' in label: + for char in value.string.split(', '): + self.story.addToList('ships',char) + + if 'Warnings' in label: + for warning in value.string.split(', '): + self.story.addToList('warnings',warning) + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Series' in label: + self.setSeries(stripHTML(value.nextSibling), value.nextSibling.nextSibling.string[2:]) + self.story.setMetadata('seriesUrl','http://'+self.host+'/'+value.nextSibling['href']) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + story=asoup.find('a', href=re.compile(r'viewstory.php\?storyid='+self.story.getMetadata('storyId'))) + + a=story.findNext(text=re.compile('Genre')).parent.nextSibling.string.split(', ') + for genre in a: + self.story.setMetadata('genre', genre) + + a=story.findNext(text=re.compile('Archived')) + self.story.setMetadata('datePublished', makeDate(stripHTML(a.parent.nextSibling), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(a.parent.nextSibling), self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div') + + # bit messy since higly inconsistent + for p in soup.findAll('p', {'align' : 'center'}): + p.extract() + p = soup.findAll('p') + for x in range(0,3): + p[x].extract() + if "Chapters: " in stripHTML(p[3]): + p[3].extract() + for x in range(len(p)-2,len(p)-1): + p[x].extract() + + for p in soup.findAll('h1'): + p.extract() + for p in soup.findAll('h3'): + p.extract() + for p in soup.findAll('a'): + p.extract() + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_ncisficcom.py b/fanficdownloader/adapters/adapter_ncisficcom.py new file mode 100644 index 00000000..dbbf3fb6 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ncisficcom.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return NCISFicComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class NCISFicComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only storyid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ncisf') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m-%d-%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + return 'ncisfic.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.ncisfic.com','ncisfic.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?storyid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?storyid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for p in soup.findAll('p'): + chapters = p.findAll('a', href=re.compile(r'viewstory.php\?storyid='+self.story.getMetadata('storyId')+"&chapnum=\d+$")) + if len(chapters) > 0: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + break + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + self.story.setMetadata('status', 'Completed') + + # Rated: NC-17 etc + labels = soup.findAll('b') + for x in range(2,len(labels)): + value = labels[x].nextSibling + label = labels[x].string + + if 'Summary' in label: + self.setDescription(url,value) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rating' in label: + self.story.setMetadata('rating', stripHTML(value.nextSibling)) + + if 'Word Count' in label: + self.story.setMetadata('numWords', value.string) + + if 'Category' in label: + for cat in value.string.split(', '): + self.story.addToList('category',cat) + if 'Crossover Shows' in label: + for cat in value.string.split(', '): + if "No Show" not in cat: + self.story.addToList('category',cat) + + if 'Character' in label: + for char in value.string.split(', '): + self.story.addToList('characters',char) + + if 'Pairing' in label: + for char in value.string.split(', '): + self.story.addToList('ships',char) + + if 'Warnings' in label: + for warning in value.string.split(', '): + self.story.addToList('warnings',warning) + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Series' in label: + if "No Series" not in value.nextSibling.string: + self.setSeries(stripHTML(value.nextSibling), value.nextSibling.nextSibling.string[2:]) + self.story.setMetadata('seriesUrl','http://'+self.host+'/'+value.nextSibling['href']) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + story=asoup.find('a', href=re.compile(r'viewstory.php\?storyid='+self.story.getMetadata('storyId'))) + + a=story.findNext('font') + if 'Complete' in a.string: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + a=story.findNext(text=re.compile('Genre')).parent.nextSibling.string.split(', ') + for genre in a: + self.story.setMetadata('genre', genre) + + a=story.findNext(text=re.compile('Archived')) + self.story.setMetadata('datePublished', makeDate(stripHTML(a.parent.nextSibling), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(a.parent.nextSibling), self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div') + + # bit messy since higly inconsistent + for p in soup.findAll('p', {'align' : 'center'}): + p.extract() + p = soup.findAll('p') + for x in range(0,3): + p[x].extract() + if "Chapters: " in stripHTML(p[3]): + p[3].extract() + for x in range(len(p)-2,len(p)-1): + p[x].extract() + + for p in soup.findAll('h1'): + p.extract() + for p in soup.findAll('h3'): + p.extract() + for p in soup.findAll('a'): + p.extract() + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_ncisfictionnet.py b/fanficdownloader/adapters/adapter_ncisfictionnet.py new file mode 100644 index 00000000..58abedf7 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ncisfictionnet.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return NCISFictionNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class NCISFictionNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["iso-8859-1", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/chapters.php?stid="+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ncisfn') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.ncisfiction.net' + + ## Changed from www.ncisfiction.com to www.ncisfiction.net Oct + ## 2012 due to the ncisfiction.com domain expiring. Still accept + ## .com domains for existing updates, etc. + + @classmethod + def getAcceptDomains(cls): + return ['www.ncisfiction.net','www.ncisfiction.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/story.php?stid=01234 http://"+cls.getSiteDomain()+"/chapters.php?stid=1234" + + def getSiteURLPattern(self): + return r'http://www\.ncisfiction\.(net|com)/(chapters|story)?.php\?stid=\d+' + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulStoneSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'class' : 'main_title'}) + + aut = a.find('a') + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href']) + self.story.setMetadata('author',aut.string) + + aut.extract() + self.story.setMetadata('title',stripHTML(a)[:len(stripHTML(a))-2]) + + # Find the chapters: + i=0 + chapters=soup.findAll('table', {'class' : 'story_table'}) + for chapter in chapters: + ch=chapter.find('a') + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(ch),'http://'+self.host+'/'+ch['href'])) + if i == 0: + self.story.setMetadata('datePublished', makeDate(stripHTML(chapter.find('td')).split('Added: ')[1], self.dateformat)) + if i == len(chapters)-1: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(chapter.find('td')).split('Added: ')[1], self.dateformat)) + i=i+1 + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + info = soup.find('table', {'class' : 'story_info'}) + + # no convenient way to calculate word count as it is logged differently for stories with and without series + + labels = info.findAll('tr') + for tr in labels: + value = tr.find('td') + label = tr.find('th').string + + if 'Summary' in label: + self.setDescription(url,value) + + if 'Rating' in label: + self.story.setMetadata('rating', value.string) + + if 'Category' in label: + cats = value.findAll('a') + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = value.findAll('a') + for char in chars: + self.story.addToList('characters',char.string) + + if 'Pairing' in label: + ships = value.findAll('a') + for ship in ships: + self.story.addToList('ships',ship.string) + + if 'Genre' in label: + genres = value.findAll('a') + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = value.findAll('a') + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Status' in label: + if 'not completed' in value.text: + self.story.setMetadata('status', 'In-Progress') + else: + self.story.setMetadata('status', 'Completed') + + try: + # Find Series name from series URL. + a = soup.find('div',{'class' : 'sub_header'}) + series_name = a.find('a').string + i = a.text.split('#')[1] + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl','http://'+self.host+'/'+a.find('a')['href']) + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'class' : 'story_text'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_netraptororg.py b/fanficdownloader/adapters/adapter_netraptororg.py new file mode 100644 index 00000000..6d4effb4 --- /dev/null +++ b/fanficdownloader/adapters/adapter_netraptororg.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return NetRaptorOrgAdapter + +class NetRaptorOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','netrap') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'netraptor.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/fanfiction/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfiction/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pagetitle = soup.find('div',{'id':'pagetitle'}) + a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/fanfiction/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfiction/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfiction/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_nfacommunitycom.py b/fanficdownloader/adapters/adapter_nfacommunitycom.py new file mode 100644 index 00000000..4d428e7e --- /dev/null +++ b/fanficdownloader/adapters/adapter_nfacommunitycom.py @@ -0,0 +1,290 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return NfaCommunityComAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class NfaCommunityComAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','nfa') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" # XXX + + @classmethod + def getAcceptDomains(cls): + return ['www.nfacommunity.com','nfacommunity.com'] + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'nfacommunity.com' # XXX + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return "http://(www.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + + # Furthermore, there's a couple sites now with more than + # one warning level for different ratings. And they're + # fussy about it. nfacommunity has two: 4 & 5. + # we'll try 5 first. + addurl = "&ageconsent=ok&warning=5" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + # Since the warning text can change by warning level, let's + # look for the warning pass url. nfacommunity uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_nhamagicalworldsus.py b/fanficdownloader/adapters/adapter_nhamagicalworldsus.py new file mode 100644 index 00000000..bd541cc8 --- /dev/null +++ b/fanficdownloader/adapters/adapter_nhamagicalworldsus.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return NHAMagicalWorldsUsAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class NHAMagicalWorldsUsAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','nha') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = " %d/%m/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'nha.magical-worlds.us' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + try: + # in case link points somewhere other than the first chapter + a = soup.findAll('option')[1]['value'] + self.story.setMetadata('storyId',a.split('=',)[1]) + url = 'http://'+self.host+'/'+a + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except: + pass + + for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}): + a = info.find('a') + if 'viewstory.php?sid='+self.story.getMetadata('storyId') == a['href'] or \ + ('viewstory.php?sid='+self.story.getMetadata('storyId')+'&') in a['href']: + self.story.setMetadata('title',stripHTML(a)) + break + + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+'&chapter=\d+$')) + if len(chapters) == 0: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d): + try: + return d.name + except: + return "" + + cats = info.findAll('a',href=re.compile('categories.php')) + for cat in cats: + self.story.addToList('category',cat.string) + + a = info.find('a', href=re.compile(r'viewuser.php')) + val = a.nextSibling + svalue = "" + while not defaultGetattr(val) == 'br': + val = val.nextSibling + val = val.nextSibling + while not defaultGetattr(val) == 'br': + svalue += unicode(val) + val = val.nextSibling + self.setDescription(url,svalue) + + #does not provide convenient way to get word count + labels = info.findAll('i') + for labelspan in labels: + value = labelspan.nextSibling + label = stripHTML(labelspan) + + if 'Rating' in label: + self.story.setMetadata('rating', value.split(' -')[0]) + + if 'Genres' in label: + genres = value.string.split(', ') + for genre in genres: + if 'None' not in genre: + self.story.addToList('genre',genre.split(' -')[0]) + + if 'Characters' in label: + chars = value.string.split(', ') + for char in chars: + if 'None' not in char: + self.story.addToList('characters',char.split(' -')[0]) + + if 'Warnings' in label: + warnings = value.string.split(', ') + for warning in warnings: + if 'None' not in warning: + self.story.addToList('warnings',warning.split(' -')[0]) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.split(' -')[0], self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.split(' -')[0], self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + + soup = bs.BeautifulSoup(data, selfClosingTags=('br','hr','span','center')) # some chapters seem to be hanging up on those tags, so it is safer to close them + + story = soup.find('div', {"id" : "story"}) + + if None == story: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,story) diff --git a/fanficdownloader/adapters/adapter_nickandgregnet.py b/fanficdownloader/adapters/adapter_nickandgregnet.py new file mode 100644 index 00000000..cd0fd488 --- /dev/null +++ b/fanficdownloader/adapters/adapter_nickandgregnet.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return NickAndGregNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class NickAndGregNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/desert_archive/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','nag') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y/%m/%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.nickandgreg.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/desert_archive/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/desert_archive/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&i=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/desert_archive/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + chapters = soup.find('select') + for chapter in chapters.findAll('option'): + if chapter.text != 'Story Index' and chapter.text != 'Chapters': + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/desert_archive/'+chapter['value'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + for div in asoup.findAll('td', {'class' : 'tblborder6'}): + a = div.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + break + + self.setDescription(url,div.find('br').nextSibling) + + a=div.text.split('Rating:') + if len(a) == 2: self.story.setMetadata('rating', a[1].split(' -')[0]) + + a=div.text.split('Characters:') + if len(a) == 2: + for char in a[1].split(' -')[0].split(', '): + self.story.addToList('characters',char) + + a=div.text.split('Genres:') + if len(a) == 2: + for genre in a[1].split(' -')[0].split(', '): + self.story.addToList('genre',genre) + + a=div.text.split('Warnings:') + if len(a) == 2: + for warn in a[1].split(' -')[0].split(', '): + if 'none' not in warn: + self.story.addToList('warnings',warn) + + a=div.text.split('Completed:') + if len(a) ==2: + if 'Yes' in a[1]: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + a=div.text.split('Published:') + if len(a) == 2: self.story.setMetadata('datePublished', makeDate(stripHTML(a[1].split(' -')[0]), self.dateformat)) + + a=div.text.split('Updated:') + if len(a) == 2: self.story.setMetadata('dateUpdated', makeDate(stripHTML(a[1].split(' -')[0]), self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + # wrap a div around it. + divsoup = bs.BeautifulStoneSoup('<div class="story"></div>', + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + div = divsoup.find('div') + div.append(soup.find('table', {'class' : 'tblborder6'})) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_nocturnallightnet.py b/fanficdownloader/adapters/adapter_nocturnallightnet.py new file mode 100644 index 00000000..214e38b8 --- /dev/null +++ b/fanficdownloader/adapters/adapter_nocturnallightnet.py @@ -0,0 +1,177 @@ +import re +import urllib2 +import urlparse + +from .. import BeautifulSoup + +from base_adapter import BaseSiteAdapter, makeDate +from .. import exceptions + + +def getClass(): + return NocturnalLightNetAdapter + + +# yields Tag _and_ NavigableString siblings from the given tag. The +# BeautifulSoup findNextSiblings() method for some reasons only returns either +# NavigableStrings _or_ Tag objects, not both. +def _yield_next_siblings(tag): + sibling = tag.nextSibling + while sibling: + yield sibling + sibling = sibling.nextSibling + + +class NocturnalLightNetAdapter(BaseSiteAdapter): + SITE_ABBREVIATION = 'nln' + SITE_DOMAIN = 'nocturnal-light.net' + BASE_URL = 'http://' + SITE_DOMAIN + '/fanfiction/' + STORY_URL_TEMPLATE = BASE_URL + 'story/%s' + AUTHORS_URL_TEMPLATE = BASE_URL + 'authors/%s' + + DATETIME_FORMAT = '%m-%d-%y' + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + url_tokens = self.parsedUrl.path.split('/') + story_id = url_tokens[url_tokens.index('story') + 1] + + self.story.setMetadata('storyId', story_id) + self._setURL(self.STORY_URL_TEMPLATE % story_id) + self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION) + + def _customized_fetch_url(self, url, exception=None, parameters=None): + if exception: + try: + data = self._fetchUrl(url, parameters) + except urllib2.HTTPError: + raise exception(self.url) + # Just let self._fetchUrl throw the exception, don't catch and + # customize it. + else: + data = self._fetchUrl(url, parameters) + + return BeautifulSoup.BeautifulSoup(data) + + @staticmethod + def getSiteDomain(): + return NocturnalLightNetAdapter.SITE_DOMAIN + + @classmethod + def getSiteExampleURLs(cls): + return cls.STORY_URL_TEMPLATE % 1234 + + def getSiteURLPattern(self): + return re.escape(self.STORY_URL_TEMPLATE[:-2]) + r'\d+.*$' + + def extractChapterUrlsAndMetadata(self): + soup = self._customized_fetch_url(self.url) + + # Since no 404 error code we have to raise the exception ourselves. + # A title that is just 'by' indicates that there is no author name + # and no story title available. + if soup.title.string.strip() == 'by': + raise exceptions.StoryDoesNotExist(self.url) + + # "storycontent" is found in a single-chapter story + author_anchor = soup.find('div', id=lambda id: id in ('main', 'storycontent')).h1.a + self.story.setMetadata('author', author_anchor.string) + + url_tokens = author_anchor['href'].split('/') + author_id = url_tokens[url_tokens.index('authors')+1] + self.story.setMetadata('authorId', author_id) + self.story.setMetadata('authorUrl', self.AUTHORS_URL_TEMPLATE % author_id) + + chapter_anchors = soup('a', href=lambda href: href and href.startswith('/fanfiction/story/')) + for chapter_anchor in chapter_anchors: + url = urlparse.urljoin(self.BASE_URL, chapter_anchor['href']) + self.chapterUrls.append((chapter_anchor.string, url)) + + author_url = urlparse.urljoin(self.BASE_URL, author_anchor['href']) + soup = self._customized_fetch_url(author_url) + story_id = self.story.getMetadata('storyId') + for listbox in soup('div', {'class': 'listbox'}): + url_tokens = listbox.a['href'].split('/') + # Found the div containing the story's metadata; break the loop and + # parse the element + if story_id == url_tokens[url_tokens.index('story')+1]: + break + else: + raise exceptions.FailedToDownload(self.url) + + title = listbox.a.string + self.story.setMetadata('title', title) + + # No chapter anchors found in the original story URL, so the story has + # only a single chapter. + if not chapter_anchors: + self.chapterUrls.append((title, self.url)) + + for b_tag in listbox('b'): + key = b_tag.string.strip(':') + try: + value = b_tag.nextSibling.string.replace('•', '').strip(': ') + # This can happen with some fancy markup in the summary. Just + # ignore this error and set value to None, the summary parsing + # takes care of this + except AttributeError: + value = None + + if key == 'Summary': + contents = [] + keep_summary_html = self.getConfig('keep_summary_html') + + for sibling in _yield_next_siblings(b_tag): + if isinstance(sibling, BeautifulSoup.Tag): + if sibling.name == 'b' and sibling.findPreviousSibling().name == 'br': + break + + if keep_summary_html: + contents.append(self.utf8FromSoup(author_url, sibling)) + else: + contents.append(''.join(sibling(text=True))) + else: + contents.append(sibling) + + # Pop last break line tag + contents.pop() + self.story.setMetadata('description', ''.join(contents)) + + elif key == 'Category': + for sibling in b_tag.findNextSiblings(['a', 'b']): + if sibling.name == 'b': + break + + self.story.addToList('category', sibling.string) + + elif key == 'Rating': + self.story.setMetadata('rating', value) + + elif key == 'Chapters': + self.story.setMetadata('numChapters', int(value)) + + # Also parse reviews number which lies right after the chapters + # section + reviews_anchor = b_tag.findNextSibling('a') + reviews = reviews_anchor.string.split(' ')[1].strip('()') + self.story.setMetadata('reviews', reviews) + + elif key == 'Completed': + self.story.setMetadata('status', 'Completed' if value == 'Yes' else 'In-Progress') + + elif key == 'Date Added': + self.story.setMetadata('datePublished', makeDate(value, self.DATETIME_FORMAT)) + + elif key == 'Last Updated': + self.story.setMetadata('dateUpdated', makeDate(value, self.DATETIME_FORMAT)) + + elif key == 'Read': + self.story.setMetadata('readings', value.split()[0]) + + if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')): + raise exceptions.AdultCheckRequired(self.url) + + def getChapterText(self, url): + soup = self._customized_fetch_url(url) + return self.utf8FromSoup(url, soup.find('div', id='storytext')) diff --git a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py new file mode 100644 index 00000000..87008a53 --- /dev/null +++ b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py @@ -0,0 +1,263 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return OcclumencySycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class OcclumencySycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','osph') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'occlumency.sycophanthex.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'This story contains adult content and/or themes.' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['rememberme'] = '1' + params['sid'] = '' + params['intent'] = '' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Logout" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + try: + # in case link points somewhere other than the first chapter + a = soup.findAll('option')[1]['value'] + self.story.setMetadata('storyId',a.split('=',)[1]) + url = 'http://'+self.host+'/'+a + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except: + pass + + for info in asoup.findAll('table', {'class' : 'border'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',stripHTML(a)) + break + + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) + if len(chapters) == 0: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d): + try: + return d.name + except: + return "" + + cats = info.findAll('a',href=re.compile('categories.php')) + for cat in cats: + self.story.addToList('category',cat.string) + + + a = info.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId'))) + val = a.nextSibling + svalue = "" + while not defaultGetattr(val) == 'br': + val = val.nextSibling + val = val.nextSibling + while not defaultGetattr(val) == 'table': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + # Rated: NC-17 etc + labels = info.findAll('b') + for labelspan in labels: + value = labelspan.nextSibling + label = stripHTML(labelspan) + + if 'Rating' in label: + self.story.setMetadata('rating', value) + + if 'Word Count' in label: + self.story.setMetadata('numWords', value) + + if 'Genres' in label: + genres = value.string.split(', ') + for genre in genres: + if genre != 'none': + self.story.addToList('genre',genre) + + if 'Characters' in label: + chars = value.string.split(', ') + for char in chars: + if char != 'none': + self.story.addToList('characters',char) + + if 'Warnings' in label: + warnings = value.string.split(', ') + for warning in warnings: + if warning != ' none': + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + data = data.replace('<div align="left"', '<div align="left">') + + soup = bs.BeautifulSoup(data, selfClosingTags=('br','hr')) + + story = soup.find('div', {"align" : "left"}) + + if None == story: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,story) diff --git a/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py b/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py new file mode 100644 index 00000000..d4ae9097 --- /dev/null +++ b/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return OneDirectionFanfictionComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class OneDirectionFanfictionComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','odf') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'onedirectionfanfiction.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.onedirectionfanfiction.com','onedirectionfanfiction.com'] + + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while value and not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=6')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_phoenixsongnet.py b/fanficdownloader/adapters/adapter_phoenixsongnet.py new file mode 100644 index 00000000..d8918d01 --- /dev/null +++ b/fanficdownloader/adapters/adapter_phoenixsongnet.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2, urllib, cookielib + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PhoenixSongNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PhoenixSongNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/') + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','phs') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.phoenixsong.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/fanfiction/story/1234/" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfiction/story/")+r"\d+/?$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Please login to continue.' in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['txtusername'] = self.username + params['txtpassword'] = self.password + else: + params['txtusername'] = self.getConfig("username") + params['txtpassword'] = self.getConfig("password") + #params['remember'] = '1' + params['login'] = 'Login' + + loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['txtusername'])) + d = self._fetchUrl(loginUrl, params) + + if 'Please login to continue.' in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['txtusername'])) + raise exceptions.FailedToLogin(url,params['txtusername']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + if self.getConfig('force_login'): + self.performLogin(url) + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + b = soup.find('div', {'id' : 'nav25'}) + a = b.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. /fanfiction/stories.php?psid=125 + a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + chapters = soup.find('select') + if chapters == None: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + for b in soup.findAll('b'): + if b.text == "Updated": + date = b.nextSibling.string.split(': ')[1].split(',') + self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat)) + else: + i = 0 + chapters = chapters.findAll('option') + for chapter in chapters: + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['value'])) + if i == 0: + self.story.setMetadata('storyId',chapter['value'].split('/')[3]) + head = bs.BeautifulSoup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b') + for b in head: + if b.text == "Updated": + date = b.nextSibling.string.split(': ')[1].split(',') + self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) + + if i == (len(chapters)-1): + head = bs.BeautifulSoup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b') + for b in head: + if b.text == "Updated": + date = b.nextSibling.string.split(': ')[1].split(',') + self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat)) + i = i+1 + + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + info = asoup.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$")) + while info != None: + info = info.findNext('div') + b = info.find('b') + val = b.nextSibling + + if 'Rating' in b.string: + self.story.setMetadata('rating', val.string.split(': ')[1]) + + if 'Words' in b.string: + self.story.setMetadata('numWords', val.string.split(': ')[1]) + + if 'Setting' in b.string: + self.story.addToList('category', val.string.split(': ')[1]) + + if 'Status' in b.string: + if 'Completed' in val: + val = 'Completed' + else: + val = 'In-Progress' + self.story.setMetadata('status', val) + + if 'Summary' in b.string: + b.extract() + info.find('br').extract() + self.setDescription(url,info) + break + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + chapter=bs.BeautifulSoup('<div class="story"></div>') + for p in soup.findAll('p'): + if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p): + break + chapter.append(p) + + for a in chapter.findAll('div'): + a.extract() + for a in chapter.findAll('table'): + a.extract() + for a in chapter.findAll('script'): + a.extract() + for a in chapter.findAll('form'): + a.extract() + for a in chapter.findAll('textarea'): + a.extract() + + + if None == chapter: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,chapter) diff --git a/fanficdownloader/adapters/adapter_pommedesangcom.py b/fanficdownloader/adapters/adapter_pommedesangcom.py new file mode 100644 index 00000000..4d553a0f --- /dev/null +++ b/fanficdownloader/adapters/adapter_pommedesangcom.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PommeDeSangComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PommeDeSangComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # pommedesang.com has two 'sections', shown in URL as + # 'efiction' and 'sds' that change how things should be + # handled. + # http://pommedesang.com/efiction/viewstory.php?sid=1234 + # http://pommedesang.com/sds/viewstory.php?sid=1234 + self.section=self.parsedUrl.path.split('/',)[1] + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','pmds') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + if 'efiction' in self.section: + self.dateformat = "%b %d, %Y" + else: + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'pommedesang.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234 http://"+cls.getSiteDomain()+"/sds/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return r"http://"+self.getSiteDomain()+"/(efiction|sds)?/viewstory.php\?sid=\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/'+self.section+'/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=5" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile('viewstory.php\?sid=\d+')) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + +# summary, rated, word count, categories, characters, genre, warnings, completed, published, updated, seires + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+self.section+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile('viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']: + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py b/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py new file mode 100644 index 00000000..eceb3325 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PonyFictionArchiveNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PonyFictionArchiveNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # normalized story URL. + if "explicit" in self.parsedUrl.netloc: + self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%d/%b/%y" + else: + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%d %b %Y" + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','pffa') + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'ponyfictionarchive.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.ponyfictionarchive.net','ponyfictionarchive.net','explicit.ponyfictionarchive.net'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234 http://explicit."+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.|explicit\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=9" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + genres = soup.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + warnings = soup.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + status = soup.find('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + self.story.setMetadata('status',status.string) + + section = soup.findAll('span', {'class' : 'General'})[1] + + self.story.setMetadata('rating', section.previousSibling.previousSibling.string) + + value = section.nextSibling + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url)) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_portkeyorg.py b/fanficdownloader/adapters/adapter_portkeyorg.py new file mode 100644 index 00000000..0a8e417d --- /dev/null +++ b/fanficdownloader/adapters/adapter_portkeyorg.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import cookielib as cl + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return PortkeyOrgAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PortkeyOrgAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/story/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','prtky') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'fanfiction.portkey.org' # XXX + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/story/1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/story/")+r"\d+(/\d+)?$" + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url + logger.debug("URL: "+url) + + # portkey screws around with using a different URL to set the + # cookie and it's a pain. So... cheat! + if self.is_adult or self.getConfig("is_adult"): + cookie = cl.Cookie(version=0, name='verify17', value='1', + port=None, port_specified=False, + domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False, + path='/', path_specified=True, + secure=False, + expires=time.time()+10000, + discard=False, + comment=None, + comment_url=None, + rest={'HttpOnly': None}, + rfc2109=False) + self.cookiejar.set_cookie(cookie) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "You must be over 18 years of age to view it" in data: # XXX + raise exceptions.AdultCheckRequired(self.url) + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + #print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"/profile/\d+")) + #print("======a:%s"%a) + self.story.setMetadata('authorId',a['href'].split('/')[-1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + ## Going to get the rest from the author page. + authsoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + ## Title + titlea = authsoup.find('a', href=re.compile(r'/story/'+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(titlea)) + metablock = titlea.parent + + # Find the chapters: + for chapter in soup.find('select',{'name':'select5'}).findAll('option', {'value':re.compile(r'/story/'+self.story.getMetadata('storyId')+"/\d+$")}): + # just in case there's tags, like in chapter titles. + chtitle = stripHTML(chapter) + if not chtitle: + chtitle = "(Untitled Chapter)" + self.chapterUrls.append((chtitle,'http://'+self.host+chapter['value'])) + + if len(self.chapterUrls) == 0: + self.chapterUrls.append((stripHTML(self.story.getMetadata('title')),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + # Contents: NC17 + # Published: 12/11/07 + # + # Description: A special book helps Harry tap into the power the Dark Lord knows not. Of course it’s a book on sex magic and rituals… but Harry’s not complaining. Spurned on by the ghost of a pervert founder, Harry leads his friends in the hunt for Voldemort’s Horcruxes. + # EROTIC COMEDY! Loads of crude humor and sexual situations! + # + labels = metablock.findAll('span',{'class':'dark-small-bold'}) + for labelspan in labels: + value = labelspan.findNext('span').string + label = stripHTML(labelspan) +# print("\nlabel:%s\nlabel:%s\nvalue:%s\n"%(labelspan,label,value)) + + if 'Description' in label: + self.setDescription(url,value) + + if 'Contents' in label: + self.story.setMetadata('rating', value) + + if 'Words' in label: + self.story.setMetadata('numWords', value) + + # if 'Categories' in label: + # cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + # catstext = [cat.string for cat in cats] + # for cat in catstext: + # self.story.addToList('category',cat.string) + + # if 'Characters' in label: + # chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + # charstext = [char.string for char in chars] + # for char in charstext: + # self.story.addToList('characters',char.string) + + if 'Genre' in label: + # genre is typo'ed on the site--it falls between the + # dark-small-bold label and dark-small-bold content + # spans. + svalue = "" + value = labelspan.nextSibling + while not defaultGetattr(value,'class') == 'dark-small-bold': + svalue += str(value) + value = value.nextSibling + + for genre in svalue.split("/"): + genre = genre.strip() + if genre != 'None': + self.story.addToList('genre',genre) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + # if 'Warnings' in label: + # warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + # warningstext = [warning.string for warning in warnings] + # self.warning = ', '.join(warningstext) + # for warning in warningstext: + # self.story.addToList('warnings',warning.string) + + if 'Status' in label: + if 'Completed' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + # try: + # # Find Series name from series URL. + # a = metablock.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + # series_name = a.string + # series_url = 'http://'+self.host+'/'+a['href'] + + # # use BeautifulSoup HTML parser to make everything easier to find. + # seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + # i=1 + # for a in storyas: + # if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + # self.setSeries(series_name, i) + # break + # i+=1 + # except: + # # I find it hard to care if the series parsing fails + # pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + data = self._fetchUrl(url) + + data = data.replace("HTML>","div>") + + soup = bs.BeautifulSoup(data) + + #print("soup:%s"%soup) + tag = soup.find('td', {'class' : 'story'}) + if tag == None and "<center>Chapter does not exist!</center>" in data: + print("Chapter is missing at: %s"%url) + return self.utf8FromSoup(url,bs.BeautifulStoneSoup("<div><center>Chapter does not exist!</center>Chapter is missing at: <a href='%s'>%s</a></div>"%(url,url))) + tag.name='div' # force to be a div to avoid problems with nook. + + centers = tag.findAll('center') + # first two and last two center tags are some script, 'report + # story', 'report story' and an ad. + centers[0].extract() + centers[1].extract() + centers[-1].extract() + centers[-2].extract() + + if None == tag: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,tag) diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py new file mode 100644 index 00000000..da9e8459 --- /dev/null +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -0,0 +1,211 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','pns') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.potionsandsnitches.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.potionsandsnitches.net','potionsandsnitches.net'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.potionsandsnitches.net/fanfiction/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("potionsandsnitches.net/fanfiction/viewstory.php?sid=")+r"\d+$" + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/fanfiction/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfiction/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='Description ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next div class='listbox' + svalue = "" + while not defaultGetattr(value,'class') == 'listbox': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + if "Snape and Harry (required)" in char: + self.story.addToList('characters',"Snape") + self.story.addToList('characters',"Harry") + else: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + # limit date values, there's some extra chars. + self.story.setMetadata('datePublished', makeDate(stripHTML(value[:12]), "%d %b %Y")) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value[:12]), "%d %b %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfiction/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) + +def getClass(): + return PotionsAndSnitchesNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_potterficscom.py b/fanficdownloader/adapters/adapter_potterficscom.py new file mode 100644 index 00000000..c8379274 --- /dev/null +++ b/fanficdownloader/adapters/adapter_potterficscom.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return PotterFicsComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PotterFicsComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + + # normalized story URL. gets rid of chapter if there, left with chapter index URL + nurl = "http://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId') + self._setURL(nurl) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','potficscom') + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.potterfics.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.potterfics.com/historias/12345 http://www.potterfics.com/historias/12345/capitulo-1 " + + def getSiteURLPattern(self): + #http://www.potterfics.com/historias/127583 + #http://www.potterfics.com/historias/127583/capitulo-1 + #http://www.potterfics.com/historias/127583/capitulo-4 + #http://www.potterfics.com/historias/92810 -> Complete story + #http://www.potterfics.com/historias/111194 -> Complete, single chap + p = re.escape("http://"+self.getSiteDomain()+"/historias/")+\ + r"(?P<id>\d+)(/capitulo-(?P<ch>\d+))?/?$" + return p + + def needToLoginCheck(self, data): + # partials used to avoid having to figure out what was wrong + # with included utf8 higher chars. + if 'Para ver esta historia, por favor inicia tu sesi' in data \ + or '<script>alert("El nombre de usuario o contrase' in data: + return True + else: + return False + + def performLogin(self,url): + params = {} + + if self.password: + params['login_usuario'] = self.username + params['login_password'] = self.password + else: + params['login_usuario'] = self.getConfig("username") + params['login_password'] = self.getConfig("password") + params['login_ck'] = '1' + + loginUrl = 'http://www.potterfics.com/secciones/usuarios/login.php' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['login_usuario'])) + d = self._postUrl(loginUrl,params) + + #print("d:%s"%d) + if '<script>alert("El nombre de usuario o contrase' in d: + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['login_usuario'])) + raise exceptions.FailedToLogin(url,params['login_usuario']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + #this converts '/historias/12345' to 'http://www.potterfics.com/historias/12345' + def makeAbsoluteURL(url): + if url[0] == '/': + url = 'http://'+self.getSiteDomain()+url + return url + + #use this to get month numbers from Spanish months + SpanishMonths = { + 'enero' : '01', + 'febrero' : '02', + 'marzo' : '03', + 'abril' : '04', + 'mayo' : '05', + 'junio' : '06', + 'julio' : '07', + 'agosto' : '08', + 'septiembre' : '09', + 'octubre' : '10', + 'noviembre' : '11', + 'diciembre' : '12' + } + + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + #print data + + #deal with adult content login + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url,usecache=False) + + #set constant meta for this site: + #Set Language = Spanish + self.story.setMetadata('language', 'Spanish') + #Set Category = Harry Potter + # This is better done in plugin-defaults.ini and defaults.ini + # by adding a section for this site with the line: + # extracategories:Harry Potter + #self.story.addToList('category','Harry Potter') + + #get the rest of the meta + # use BeautifulSoup HTML parser to make everything easier to find. + #self closing br and img present! + soup = bs.BeautifulSoup(data,selfClosingTags=('br','img')) + + #we want the second table directly under the body, contains all the metadata + table = soup.html.body.findAll('table', recursive=False)[1] + #within that, we want the second row, first cell + cell = table.tr.findNextSibling('tr').td + + #find first metadata block--isn't first if logged in + mb = cell.div.findNextSibling('div',{'align':'left'}) + #Get meta... + self.story.setMetadata('title', stripHTML(mb.b)) + #strip out brackets on rating + self.story.setMetadata('rating', mb.span.string[1:-1]) + #Completion status is denoted by the presence of this image: + if mb.find('img',title="Historia terminada"): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + #find next metadata block + #author details + mb = mb.findNextSibling('div') + self.story.setMetadata('author', mb.b.a.string.strip()) + self.story.setMetadata('authorUrl', makeAbsoluteURL(mb.b.a['href'])) + self.story.setMetadata('authorId', self.story.getMetadata('authorUrl').split('/')[4]) + #dates and times + mb = mb.find('span') + #posted/published = Escrita + date = mb.find(text=re.compile('Escrita el ')).strip().split() + year = int(date[7][:-1]) # need to remove the last char from year, it is a comma + month = int(SpanishMonths[date[5].lower()]) + day = int(date[3]) + time = date[8].split(':') + hour = int(time[0]) + minute = int(time[1]) + self.story.setMetadata('datePublished', datetime.datetime(year, month, day, hour, minute)) + #updated = Actualizada + date = mb.find(text=re.compile('Actualizada el ')).strip().split() + year = int(date[7][:-1]) # need to remove the last char from year, it is a comma + month = int(SpanishMonths[date[5].lower()]) + day = int(date[3]) + time = date[8].split(':') + hour = int(time[0]) + minute = int(time[1]) + self.story.setMetadata('dateUpdated', datetime.datetime(year, month, day, hour, minute)) + + mb = mb.span.findNextSibling('span').findNextSibling('span') + wc = mb.find(text=re.compile(' palabras en total')).strip() + self.story.setMetadata('numWords', wc.split()[0]) + + #then we come to categories and genres. Oh dear. On this site, categories hold everything from genre, to ships, to crossovers. + #To make things worse, there is also another genre field, which often holds similar/duplicate info. Links to genre pages do not work + #though, so perhaps those will be phased out? + #for now, put them all into the genre list + links = mb.findAll('a',href=re.compile('/(categorias|generos)/\d+')) + genlist = [i.string.strip() for i in links] + self.story.extendList('genre',genlist) + + #get the chapter urls + #we can go back to the table cell we found before + #get its last element and work backwards to find the last ordered list on the page + list = cell.contents[len(cell)-1].findPrevious('ol') + chapters = [] + revs = 0 + chnum = 0 + for li in list: + chnum += 1 + chTitle = str(chnum) + '. ' + li.a.b.string.strip() + chURL = makeAbsoluteURL(li.a['href']) + chapters.append((chTitle,chURL)) + #Get reviews, add to total + revs += int(li.div.a.string.split()[0]) + + self.chapterUrls.extend(chapters) + self.story.setMetadata('numChapters', len(chapters)) + self.story.setMetadata('reviews', revs) + + #Now for the description... this may be tricky... + #if it is there (doesn't have to be), it will be before the chapter list, + #separated by a horizontal rule, and after the google ad bar + + #get list's parent div + mb = list.parent + #get the div before that, will either be the description, or the google ad bar + mb = mb.findPreviousSibling('div') + if 'google_ad_client' in str(mb): + #couldn't find description, leaving it blank + pass + else: + self.setDescription(url,mb) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr','img')) + + div = soup.find('div', {'id' : 'cuerpoHistoria'}) + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py b/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py new file mode 100644 index 00000000..c8f061c7 --- /dev/null +++ b/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py @@ -0,0 +1,299 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PotterHeadsAnonymousComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PotterHeadsAnonymousComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','pha') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'fanfic.potterheadsanonymous.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # Since the warning text can change by warning level, let's + # look for the warning pass url. ksarchive uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + pagetitle = soup.find('div',{'id':'pagetitle'}) + + ## Title + a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_pretendercentrecom.py b/fanficdownloader/adapters/adapter_pretendercentrecom.py new file mode 100644 index 00000000..5e9590a5 --- /dev/null +++ b/fanficdownloader/adapters/adapter_pretendercentrecom.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PretenderCenterComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PretenderCenterComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ptdc') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'pretendercentre.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.pretendercentre.com','pretendercentre.com'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/missingpieces/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/missingpieces/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/missingpieces/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/missingpieces/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/missingpieces/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.findAll('div', {'id' : 'story'})[1] + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_psychficcom.py b/fanficdownloader/adapters/adapter_psychficcom.py new file mode 100644 index 00000000..fa53815b --- /dev/null +++ b/fanficdownloader/adapters/adapter_psychficcom.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PsychFicComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PsychFicComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','psyf') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.psychfic.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.text + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_qafficcom.py b/fanficdownloader/adapters/adapter_qafficcom.py new file mode 100644 index 00000000..c60c330d --- /dev/null +++ b/fanficdownloader/adapters/adapter_qafficcom.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return QafFicComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class QafFicComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','atp') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.qaf-fic.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/atp/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=NC-17" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\s+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'id' : 'pagetitle'}) + + aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/atp/'+aut['href']) + self.story.setMetadata('author',aut.string) + aut.extract() + + self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)]) + + # Find the chapters: + chapters=soup.find('select') + if chapters != None: + for chapter in chapters.findAll('option'): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}): + a = list.find('a') + if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']: + break + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = list.findAll('span', {'class' : 'classification'}) + for labelspan in labels: + label = labelspan.string + value = labelspan.nextSibling + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'classification' and value != None: + if "Featured Stories" not in value: + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value[:len(value)-2]) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + for char in value.string.split(', '): + if not 'None' in char: + self.story.addToList('characters',char) + + if 'Genre' in label: + for genre in value.string.split(', '): + if not 'None' in genre: + self.story.addToList('genre',genre) + + if 'Warnings' in label: + for warning in value.string.split(', '): + if not 'None' in warning: + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' ::')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + if list.find('a', href=re.compile(r"series.php")) != None: + for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")): + # Find Series name from series URL. + series_url = 'http://'+self.host+'/atp/'+series['href'] + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + name=seriessoup.find('div', {'id' : 'pagetitle'}) + name.find('a').extract() + self.setSeries(name.text.split(' by[')[0], i) + self.story.setMetadata('seriesUrl',series_url) + i=0 + break + i+=1 + if i == 0: + break + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_restrictedsectionorg.py b/fanficdownloader/adapters/adapter_restrictedsectionorg.py new file mode 100644 index 00000000..f03b4e34 --- /dev/null +++ b/fanficdownloader/adapters/adapter_restrictedsectionorg.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import cookielib as cl +from datetime import datetime + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return RestrictedSectionOrgSiteAdapter + +class RestrictedSectionOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + + # normalized story URL. + # get story/file and storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/' + m.group('filestory') + '.php?' + m.group('filestory') + '=' + self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + self.story.setMetadata('siteabbrev','ressec') + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %Y" # 20 Nov 2005 + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + return 'www.restrictedsection.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/story.php?story=1234 http://"+cls.getSiteDomain()+"/file.php?file=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain())+r"/(?P<filestory>file|story).php\?(file|story)=(?P<id>\d+)$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url + logger.debug("URL: "+url) + + # one-shot stories use file url instead of story. 'Luckily', + # we don't have to worry about one-shots becoming + # multi-chapter because ressec is frozen. Still need 'story' + # url for metadata, however. + try: + if 'file' in url: + data = self._postUrlUP(url) + soup = bs.BeautifulSoup(data) + storya = soup.find('a',href=re.compile(r"^story.php\?story=\d+")) + url = 'http://'+self.host+'/'+storya['href'].split('&')[0] # strip rs_session + + fileas = soup.find('a',href=re.compile(r"^file.php\?file=\d+")) + if fileas: + for filea in fileas: + if 'Previous Chapter' in filea.string or 'Next Chapter' in filea.string: + raise exceptions.FailedToDownload(self.getSiteDomain() +" Cannot use chapter url with multi-chapter stories on this site.") + + logger.debug("metadata URL: "+url) + data = self._fetchUrl(url) + # print data + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + if "Story not found" in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Story not found.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # check user/pass on a chapter for multi-chapter + if 'file' not in self.url: + self._postUrlUP('http://'+self.host+'/'+soup.find('a', href=re.compile(r"^file.php\?file=\d+"))['href']) + + ## Title + h2 = soup.find('h2') + + # Find authorid and URL from... author url. + a = h2.find('a') + ahref = a['href'].split('&')[0] # strip rs_session + + self.story.setMetadata('authorId',ahref.split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+ahref) + self.story.setMetadata('author',stripHTML(a)) + + # title, remove byauthorname. + auth=stripHTML(a) + title=stripHTML(h2) + self.story.setMetadata('title',title[:title.index(" by "+auth)]) + + dates = soup.findAll('span', {'class':'date'}) + if dates: # only for multi-chapter + self.story.setMetadata('datePublished', makeDate(stripHTML(dates[0]), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(dates[-1]), self.dateformat)) + + words = soup.findAll('span', {'class':'size'}) + wordcount=0 + for w in words: + wordcount = wordcount + int(w.string[:-6].replace(',','')) + + self.story.setMetadata('numWords',"%s"%wordcount) + + self.story.setMetadata('rating', soup.find('a',href=re.compile(r"^rating.php\?rating=\d+")).string) + + # other tags + + labels = soup.find('table', {'class':'info'}).findAll('th') + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if label != None: + + if 'Categories' in label: + for g in stripHTML(value).split('\n'): + self.story.addToList('genre',g) + + if 'Pairings' in label: + for g in stripHTML(value).split('\n'): + self.story.addToList('ships',g) + + if 'Summary' in label: + self.setDescription(url,stripHTML(value).replace("\n"," ").replace("\r","")) + value.extract() # remove summary incase it contains file URLs. + + if 'Updated' in label: # one-shots only. + print "value:%s"%value + value.find('sup').extract() # remove 'st', 'nd', 'th' ordinals + print "value:%s"%value + date = makeDate(stripHTML(value), '%d %B %Y') # full month name + self.story.setMetadata('datePublished', date) + + if 'Length' in label: # one-shots only. + self.story.setMetadata('numWords',value.string[:-6]) + + # one-shot. + if 'file' in self.url: + self.chapterUrls.append((self.story.getMetadata('title'),self.url)) + else: # multi-chapter + # Find the chapters: 'library_storyview.php?chapterid=3 + chapters=soup.findAll('a', href=re.compile(r"^file.php\?file=\d+")) + if len(chapters)==0: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: No chapters found.") + else: + for chapter in chapters: + chhref = chapter['href'].split('&')[0] # strip rs_session + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chhref)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + + def _postUrlUP(self, url): + params = {} + if self.password: + params['username'] = self.username + params['password'] = self.password + else: + params['username'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['accept.x'] = 1 + params['accept.y'] = 1 + + excpt=None + for sleeptime in [0.5, 1.5, 4, 9]: + time.sleep(sleeptime) + try: + data = self._postUrl(url, params) + if data == "Unable to connect to the database": + raise exceptions.FailedToDownload("Site reported 'Unable to connect to the database'") + if "I certify that I am over the age of 18 and that accessing the following story will not violate the laws of my country or local ordinances." in data: + raise exceptions.FailedToLogin(url,params['username']) + return data + except exceptions.FailedToLogin, ftl: + # no need to retry these. + raise(ftl) + except Exception, e: + excpt=e + logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) + + logger.error("Giving up on %s" %url) + logger.exception(excpt) + raise(excpt) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._postUrlUP(url) + #print("data:%s"%data) + + # some stories have html that confuses the parser. For story + # text we don't care about anything before '<table id="page"' + # and seems to clear the issue. + data = data[data.index('<table id="page"'):] + + soup = bs.BeautifulSoup(data) + + div = soup.find('td',{'id':'page_content'}) + div.name='div' + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + ## Remove stuff from page_content + + # Remove all tags before the first <hr> after class=info table (including hr) + hr = div.find('table',{'class':'info'}).findNext('hr') + for tag in hr.findAllPrevious(): + tag.extract() + hr.extract() + + # Remove all tags after the last <hr> (including hr) + hr = div.findAll('hr')[-1] + for tag in hr.findAllNext(): + tag.extract() + hr.extract() + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_samandjacknet.py b/fanficdownloader/adapters/adapter_samandjacknet.py new file mode 100644 index 00000000..dbfa2097 --- /dev/null +++ b/fanficdownloader/adapters/adapter_samandjacknet.py @@ -0,0 +1,342 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# By virtue of being recent and requiring both is_adult and user/pass, +# adapter_fanficcastletvnet.py is the best choice for learning to +# write adapters--especially for sites that use the eFiction system. +# Most sites that have ".../viewstory.php?sid=123" in the story URL +# are eFiction. + +# For non-eFiction sites, it can be considerably more complex, but +# this is still a good starting point. + +# In general an 'adapter' needs to do these five things: + +# - 'Register' correctly with the downloader +# - Site Login (if needed) +# - 'Are you adult?' check (if needed--some do one, some the other, some both) +# - Grab the chapter list +# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page) +# - Grab the chapter texts + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return SamAndJackNetAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SamAndJackNetAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','sjn') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'samandjack.net' # XXX + + @classmethod + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/fanfics/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + + # Furthermore, there's a couple sites now with more than + # one warning level for different ratings. And they're + # fussy about it. midnightwhispers has three: 10, 3 & 5. + # we'll try 5 first. + addurl = "&ageconsent=ok&warning=5" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + # Since the warning text can change by warning level, let's + # look for the warning pass url. nfacommunity uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + pagetitle = soup.find('div',{'id':'pagetitle'}) + ## Title + a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + # (fetch multiple authors) + alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) + for a in alist: + self.story.addToList('authorId',a['href'].split('=')[1]) + self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href']) + self.story.addToList('author',a.string) + + # Reviews + reviewdata = soup.find('div', {'id' : 'sort'}) + a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one. + self.story.setMetadata('reviews',stripHTML(a)) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfics/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + self.setDescription(url,value) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + value=value.replace(' | ','') + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfics/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_samdeanarchivenu.py b/fanficdownloader/adapters/adapter_samdeanarchivenu.py new file mode 100644 index 00000000..398a2bc8 --- /dev/null +++ b/fanficdownloader/adapters/adapter_samdeanarchivenu.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return SamDeanArchiveNuAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SamDeanArchiveNuAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','sda') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'samdean.archive.nu' + + @classmethod + def getAcceptDomains(cls): + return ['www.samdean.archive.nu','samdean.archive.nu'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'id' : 'pagetitle'}) + + aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href']) + self.story.setMetadata('author',aut.string) + aut.extract() + + self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)]) + + # Find the chapters: + chapters=soup.find('select') + if chapters != None: + for chapter in chapters.findAll('option'): + # just in case there's tags, like in chapter titles. http://samdean.archive.nu/viewstory.php?sid=4317&chapter=2 + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}): + a = list.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + break + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = list.findAll('span', {'class' : 'classification'}) + for labelspan in labels: + label = labelspan.string + value = labelspan.nextSibling + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'classification': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value[:len(value)-2]) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + for char in value.string.split(', '): + if not 'None' in char: + self.story.addToList('characters',char) + + if 'Genre' in label: + for genre in value.string.split(', '): + if not 'None' in genre: + self.story.addToList('genre',genre) + + if 'Warnings' in label: + for warning in value.string.split(', '): + if not 'None' in warning: + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_scarheadnet.py b/fanficdownloader/adapters/adapter_scarheadnet.py new file mode 100644 index 00000000..8ba60440 --- /dev/null +++ b/fanficdownloader/adapters/adapter_scarheadnet.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ScarHeadNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ScarHeadNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','shn') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'scarhead.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=5" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # Since the warning text can change by warning level, let's + # look for the warning pass url. ksarchive uses + # &warning= -- actually, so do other sites. Must be an + # eFiction book. + + # viewstory.php?sid=1882&warning=4 + # viewstory.php?sid=1654&ageconsent=ok&warning=5 + #print data + #m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data) + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + pagetitle = soup.find('tr',{'valign':'top'}) + + ## Title + a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + cats = soup.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + if '/' == cat.string[0]: + self.story.addToList('ships','Harry Potter'+cat.string.split('(')[0]) + elif 'Harry' in cat.string: + self.story.addToList('ships',cat.string.split('(')[0]) + else: + self.story.addToList('category',cat.string) + if '(' in cat.string: + self.story.addToList('category',cat.string.split('(')[1].split(')')[0]) + + + + + chars = soup.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + genres = soup.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for genre in genres: + self.story.addToList('genre',genre.string) + + warnings = soup.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + textsoup = stripHTML(soup) + + a = textsoup.split('Published: ')[1].split(' ')[0] + self.story.setMetadata('datePublished', makeDate(stripHTML(a), self.dateformat)) + a = textsoup.split('Updated: ')[1].split(' ')[0] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(a), self.dateformat)) + a = textsoup.split('Rating: ')[1].split(' ')[0] + self.story.setMetadata('rating', a) + a = textsoup.split('Length: ')[1].split('(')[1].split(' ')[0] + self.story.setMetadata('numWords', a) + a = textsoup.split('Completed: ')[1].split(' ')[0] + if 'Yes' in a: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + #a = textsoup.split('Summary: ')[1].split('Add Story to Favorites')[0] + #self.setDescription(url,a) + + + + a=soup.find(text=re.compile("Summary: ")) + i=0 + svalue = "" + while i == 0: + try: + b = str(a) + svalue += b.split('Summary: ')[1] + except: + svalue += str(a) + if a.nextSibling != None: + a = a.nextSibling + else: + a = a.parent.nextSibling + if 'Disclaimer: ' in stripHTML(a): + i=1 + self.setDescription(url,svalue) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py b/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py new file mode 100644 index 00000000..f90a77e5 --- /dev/null +++ b/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ScarvesAndCoffeeNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','scacf') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.scarvesandcoffee.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=20" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('div',{"id":"pagetitle"}).find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('genre',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_sg1heliopoliscom.py b/fanficdownloader/adapters/adapter_sg1heliopoliscom.py new file mode 100644 index 00000000..f36edaea --- /dev/null +++ b/fanficdownloader/adapters/adapter_sg1heliopoliscom.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return SG1HeliopolisComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SG1HeliopolisComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + self.section=self.parsedUrl.path.split('/',)[1] + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','sghp') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. Can't use extracategories, could be Atlantis or SG-1 + if 'atlantis' in self.section: + self.story.addToList("category","Stargate: Atlantis") + else: + self.story.addToList("category","Stargate: SG-1") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'sg1-heliopolis.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234 http://"+cls.getSiteDomain()+"/adult/viewstory.php?sid=1234 http://"+cls.getSiteDomain()+"/atlantis/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return r"http://sg1-heliopolis.com/(archive|adult|atlantis)?/viewstory.php\?sid=\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+self.section+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_sheppardweircom.py b/fanficdownloader/adapters/adapter_sheppardweircom.py new file mode 100644 index 00000000..ddce0717 --- /dev/null +++ b/fanficdownloader/adapters/adapter_sheppardweircom.py @@ -0,0 +1,317 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# By virtue of being recent and requiring both is_adult and user/pass, +# adapter_fanficcastletvnet.py is the best choice for learning to +# write adapters--especially for sites that use the eFiction system. +# Most sites that have ".../viewstory.php?sid=123" in the story URL +# are eFiction. + +# For non-eFiction sites, it can be considerably more complex, but +# this is still a good starting point. + +# In general an 'adapter' needs to do these five things: + +# - 'Register' correctly with the downloader +# - Site Login (if needed) +# - 'Are you adult?' check (if needed--some do one, some the other, some both) +# - Grab the chapter list +# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page) +# - Grab the chapter texts + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return SheppardWeirComAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SheppardWeirComAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','swf') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'sheppardweir.com' # XXX + + @classmethod + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/fanfics/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: # XXX + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + pagetitle = soup.find('div',{'id':'pagetitle'}) + ## Title + a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + # (fetch multiple authors) + alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) + for a in alist: + self.story.addToList('authorId',a['href'].split('=')[1]) + self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href']) + self.story.addToList('author',a.string) + + + # Reviews + reviewdata = soup.find('div', {'id' : 'sort'}) + a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one. + self.story.setMetadata('reviews',stripHTML(a)) + + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfics/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Summary + summarydata = unicode(soup.find('div',{'class':'content'})) + start='Summary: ' + end='</div>' + summarydata = summarydata[summarydata.index(start)+len(start):summarydata.rindex(end)] + self.setDescription(url,bs.BeautifulSoup(summarydata)) + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + value=value.replace(' - ','') + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfics/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_simplyundeniablecom.py b/fanficdownloader/adapters/adapter_simplyundeniablecom.py new file mode 100644 index 00000000..131b8be3 --- /dev/null +++ b/fanficdownloader/adapters/adapter_simplyundeniablecom.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return SimplyUndeniableComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SimplyUndeniableComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','sud') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.simplyundeniable.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Please log in now' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "My Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + if "Please log in now" in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: You need to have access to the restricted section.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + for info in asoup.findAll('table', {'cellpadding' : '5'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',stripHTML(a)) + break + + # Find the chapters: + if "Disclaimer" in data: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + a = info.find('td', {'valign' : 'top'}).find('p') + self.setDescription(url,a) + + a = info.find('td', {'class' : 'greysm'}).findAll('b') + self.story.setMetadata('datePublished', makeDate(stripHTML(a[0].nextSibling), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(a[1].nextSibling), self.dateformat)) + if 'Yes' in a[2].nextSibling: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + self.story.setMetadata('rating', a[3].nextSibling) + self.story.setMetadata('numWords', a[4].nextSibling) + + + warnings = info.find('td', {'width' : '45'}).nextSibling.nextSibling.text.split(', ') + for warning in warnings: + if 'none' not in warning: + self.story.addToList('warnings',warning) + chars = info.find('td', {'width' : '51'}).nextSibling.nextSibling.text.split(', ') + for char in chars: + if '&' in char: + self.story.addToList('ships',char) + else: + self.story.addToList('characters',char) + genres = info.find('td', {'width' : '36'}).nextSibling.nextSibling.text.split(', ') + for genre in genres: + self.story.addToList('genre',genre) + + cat = info.find('a', href=re.compile(r'categories.php')) + self.story.addToList('category',cat.string) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('span', {'class' : 'style'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + div.name='div' + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_sinfuldesireorg.py b/fanficdownloader/adapters/adapter_sinfuldesireorg.py new file mode 100644 index 00000000..2187c687 --- /dev/null +++ b/fanficdownloader/adapters/adapter_sinfuldesireorg.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return SinfulDesireOrgAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SinfulDesireOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','snds') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.sinful-desire.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/archive/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=5" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/archive/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/archive/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/archive/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_siyecouk.py b/fanficdownloader/adapters/adapter_siyecouk.py new file mode 100644 index 00000000..9129bfcb --- /dev/null +++ b/fanficdownloader/adapters/adapter_siyecouk.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return SiyeCoUkAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SiyeCoUkAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8",]# 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','siye') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.siye.co.uk' # XXX + + @classmethod + def getAcceptDomains(cls): + return ['www.siye.co.uk','siye.co.uk'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/siye/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + # Except it doesn't this time. :-/ + url = self.url #+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/siye/'+a['href']) + self.story.setMetadata('author',a.string) + + # need(or easier) to pull other metadata from the author's list page. + authsoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + ## Title + titlea = authsoup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(titlea)) + + # Find the chapters (from soup, not authsoup): + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/siye/'+chapter['href'])) + + if self.chapterUrls: + self.story.setMetadata('numChapters',len(self.chapterUrls)) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + self.story.setMetadata('numChapters',1) + + # The stuff we can get from the chapter list/one-shot page are + # in the first table with 95% width. + metatable = soup.find('table',{'width':'95%'}) + + # Categories + cat_as = metatable.findAll('a', href=re.compile(r'categories.php')) + for cat_a in cat_as: + self.story.addToList('category',stripHTML(cat_a)) + + moremetaparts = stripHTML(metatable).split('\n') + for part in moremetaparts: + part = part.strip() + if part.startswith("Characters:"): + part = part[part.find(':')+1:] + for item in part.split(','): + if item.strip() == "Harry/Ginny": + self.story.addToList('characters',"Harry") + self.story.addToList('characters',"Ginny") + elif item.strip() not in ("None","All"): + self.story.addToList('characters',item) + + if part.startswith("Genres:"): + part = part[part.find(':')+1:] + for item in part.split(','): + if item.strip() != "None": + self.story.addToList('genre',item) + + if part.startswith("Warnings:"): + part = part[part.find(':')+1:] + for item in part.split(','): + if item.strip() != "None": + self.story.addToList('warnings',item) + + if part.startswith("Rating:"): + part = part[part.find(':')+1:] + self.story.setMetadata('rating',part) + + if part.startswith("Summary:"): + part = part[part.find(':')+1:] + self.setDescription(url,part) + #self.story.setMetadata('description',part) + + # want to get the next tr of the table. + #print("%s"%titlea.parent.parent.findNextSibling('tr')) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # SIYE formats stories in the author list differently when their part of a series. + # Look for non-series... + divdesc = titlea.parent.parent.find('div',{'class':'desc'}) + if not divdesc: + # ... now look for series. + divdesc = titlea.parent.parent.findNextSibling('tr').find('div',{'class':'desc'}) + + moremeta = stripHTML(divdesc) + #print("moremeta:%s"%moremeta) + for part in moremeta.replace(' - ','\n').split('\n'): + #print("part:%s"%part) + try: + (name,value) = part.split(': ') + except: + # not going to worry about fancier processing for the bits + # that don't match. + continue + name=name.strip() + value=value.strip() + if name == 'Published': + self.story.setMetadata('datePublished', makeDate(value, self.dateformat)) + if name == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat)) + if name == 'Completed': + if value == 'Yes': + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + if name == 'Words': + self.story.setMetadata('numWords', value) + + try: + # Find Series name from series URL. + a = titlea.findPrevious('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + # soup = bs.BeautifulSoup(self._fetchUrl(url)) + # BeautifulSoup objects to inside , which + # technically isn't allowed. + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + # not the most unique thing in the world, but it appears to be + # the best we can do here. + story = soup.find('span', {'style' : 'font-size: 100%;'}) + + if None == story: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + story.name='div' + + return self.utf8FromSoup(url,story) diff --git a/fanficdownloader/adapters/adapter_spikeluvercom.py b/fanficdownloader/adapters/adapter_spikeluvercom.py new file mode 100644 index 00000000..511463d2 --- /dev/null +++ b/fanficdownloader/adapters/adapter_spikeluvercom.py @@ -0,0 +1,208 @@ +# Software: eFiction +import re +import urllib2 +import urlparse + +from .. import BeautifulSoup +from ..htmlcleanup import stripHTML + +from base_adapter import BaseSiteAdapter, makeDate +from .. import exceptions + + +def getClass(): + return SpikeluverComAdapter + + +# yields Tag _and_ NavigableString siblings from the given tag. The +# BeautifulSoup findNextSiblings() method for some reasons only returns either +# NavigableStrings _or_ Tag objects, not both. +def _yield_next_siblings(tag): + sibling = tag.nextSibling + while sibling: + yield sibling + sibling = sibling.nextSibling + + +class SpikeluverComAdapter(BaseSiteAdapter): + SITE_ABBREVIATION = 'slc' + SITE_DOMAIN = 'spikeluver.com' + + BASE_URL = 'http://' + SITE_DOMAIN + '/SpuffyRealm/' + LOGIN_URL = BASE_URL + 'user.php?action=login' + VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%d' + METADATA_URL_SUFFIX = '&index=1' + AGE_CONSENT_URL_SUFFIX = '&ageconsent=ok&warning=5' + + DATETIME_FORMAT = '%m/%d/%Y' + STORY_DOES_NOT_EXIST_ERROR_TEXT = 'That story does not exist on this archive. You may search for it or return to the home page.' + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + query_data = urlparse.parse_qs(self.parsedUrl.query) + story_id = query_data['sid'][0] + + self.story.setMetadata('storyId', story_id) + self._setURL(self.VIEW_STORY_URL_TEMPLATE % int(story_id)) + self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION) + + def _customized_fetch_url(self, url, exception=None, parameters=None): + if exception: + try: + data = self._fetchUrl(url, parameters) + except urllib2.HTTPError: + raise exception(self.url) + # Just let self._fetchUrl throw the exception, don't catch and + # customize it. + else: + data = self._fetchUrl(url, parameters) + + return BeautifulSoup.BeautifulSoup(data) + + @staticmethod + def getSiteDomain(): + return SpikeluverComAdapter.SITE_DOMAIN + + @classmethod + def getSiteExampleURLs(cls): + return cls.VIEW_STORY_URL_TEMPLATE % 1234 + + def getSiteURLPattern(self): + return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$' + + def extractChapterUrlsAndMetadata(self): + soup = self._customized_fetch_url(self.url + self.METADATA_URL_SUFFIX) + + errortext_div = soup.find('div', {'class': 'errortext'}) + if errortext_div: + error_text = ''.join(errortext_div(text=True)).strip() + if error_text == self.STORY_DOES_NOT_EXIST_ERROR_TEXT: + raise exceptions.StoryDoesNotExist(self.url) + + # No additional login is required, just check for adult + pagetitle_div = soup.find('div', id='pagetitle') + if pagetitle_div.a['href'].startswith('javascript:'): + if not(self.is_adult or self.getConfig('is_adult')): + raise exceptions.AdultCheckRequired(self.url) + + url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX]) + soup = self._customized_fetch_url(url) + + pagetitle_div = soup.find('div', id='pagetitle') + self.story.setMetadata('title', stripHTML(pagetitle_div.a)) + + author_anchor = pagetitle_div.a.findNextSibling('a') + url = urlparse.urljoin(self.BASE_URL, author_anchor['href']) + components = urlparse.urlparse(url) + query_data = urlparse.parse_qs(components.query) + + self.story.setMetadata('author', stripHTML(author_anchor)) + self.story.setMetadata('authorId', query_data['uid']) + self.story.setMetadata('authorUrl', url) + + sort_div = soup.find('div', id='sort') + self.story.setMetadata('reviews', stripHTML(sort_div('a')[1])) + + listbox_tag = soup.find('div', {'class': 'listbox'}) + for span_tag in listbox_tag('span'): + key = span_tag.string.strip(' :') + try: + value = stripHTML(span_tag.nextSibling) + # This can happen with some fancy markup in the summary. Just + # ignore this error and set value to None, the summary parsing + # takes care of this + except AttributeError: + value = None + + if key == 'Summary': + contents = [] + keep_summary_html = self.getConfig('keep_summary_html') + + for sibling in _yield_next_siblings(span_tag): + if isinstance(sibling, BeautifulSoup.Tag): + # Encountered next label, break. Not as bad as other + # e-fiction sites, let's hope this is enough for proper + # parsing. + if sibling.name == 'span' and sibling.get('class', None) == 'label': + break + + if keep_summary_html: + contents.append(self.utf8FromSoup(self.url, sibling)) + else: + contents.append(''.join(sibling(text=True))) + else: + contents.append(sibling) + + # Remove the preceding break line tag and other crud + contents.pop() + contents.pop() + self.story.setMetadata('description', ''.join(contents)) + + elif key == 'Rated': + self.story.setMetadata('rating', value) + + elif key == 'Categories': + for sibling in span_tag.findNextSiblings(['a', 'br']): + if sibling.name == 'br': + break + + self.story.addToList('category', stripHTML(sibling)) + + # Seems to be always "None" for some reason + elif key == 'Characters': + for sibling in span_tag.findNextSiblings(['a', 'br']): + if sibling.name == 'br': + break + self.story.addToList('characters', stripHTML(sibling)) + + elif key == 'Genres': + for sibling in span_tag.findNextSiblings(['a', 'br']): + if sibling.name == 'br': + break + + self.story.addToList('genre', stripHTML(sibling)) + + elif key == 'Warnings': + for sibling in span_tag.findNextSiblings(['a', 'br']): + if sibling.name == 'br': + break + self.story.addToList('warnings', stripHTML(sibling)) + + # Challenges + + elif key == 'Series': + a = span_tag.findNextSibling('a') + if not a: + continue + self.story.setMetadata('series', stripHTML(a)) + self.story.setMetadata('seriesUrl', urlparse.urljoin(self.BASE_URL, a['href'])) + + elif key == 'Chapters': + self.story.setMetadata('numChapters', int(value)) + + elif key == 'Completed': + self.story.setMetadata('status', 'Completed' if value == 'Yes' else 'In-Progress') + + elif key == 'Word count': + self.story.setMetadata('numWords', value) + + elif key == 'Published': + self.story.setMetadata('datePublished', makeDate(value, self.DATETIME_FORMAT)) + + elif key == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, self.DATETIME_FORMAT)) + + for p_tag in listbox_tag.findNextSiblings('p'): + chapter_anchor = p_tag.find('a', href=lambda href: href and href.startswith('viewstory.php?sid=')) + if not chapter_anchor: + continue + + title = stripHTML(chapter_anchor) + url = urlparse.urljoin(self.BASE_URL, chapter_anchor['href']) + self.chapterUrls.append((title, url)) + + def getChapterText(self, url): + url += self.AGE_CONSENT_URL_SUFFIX + soup = self._customized_fetch_url(url) + return self.utf8FromSoup(url, soup.find('div', id='story')) diff --git a/fanficdownloader/adapters/adapter_squidgeorgpeja.py b/fanficdownloader/adapters/adapter_squidgeorgpeja.py new file mode 100644 index 00000000..e7048502 --- /dev/null +++ b/fanficdownloader/adapters/adapter_squidgeorgpeja.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return SquidgeOrgPejaAdapter + +## XXX IMPORTANT NOTE!! This adapter is for squidge.org/peja ONLY! +## There are lots of other sites and stuff under squidge.org that +## we're not supporting. If/When we ever want to support more +## sections of squidge.org, FFDL will need to be changed more +## fundamentally to find different adapters under the same domain. +## +## For now, I've only implemented the part for ini section names so +## if/when more adapters under squidge.org come along, existing ini +## files will still work correctly. + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SquidgeOrgPejaAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('https://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wwomb') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.squidge.org' + + @classmethod # must be @classmethod, don't remove it. + def getConfigSection(cls): + # The config section name. Only override if != site domain. + return cls.getSiteDomain()+'/peja' + + @classmethod + def getSiteExampleURLs(cls): + return "https://"+cls.getSiteDomain()+"/peja/cgi-bin/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + # but not https://www.squidge.org/peja/cgi-bin/viewstory.php?sid=47746 -- that's the 'Site Map' negative look aead + return r"https?"+re.escape("://"+self.getSiteDomain()+"/")+r"~?"+re.escape("peja/cgi-bin/viewstory.php?sid=")+r"(?!47746)\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + data = self._fetchUrl(url) + + if "fatal MySQL error was encountered" in data: + raise exceptions.FailedToDownload("Site SQL Error--bad story") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + author = soup.find('div', {'id':"pagetitle"}).find('a') + self.story.setMetadata('authorId',author['href'].split('=')[1]) + self.story.setMetadata('authorUrl','https://'+self.host+'/peja/cgi-bin/'+author['href']) + self.story.setMetadata('author',author.string) + + authorSoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + title = authorSoup.find('a',{'href':'viewstory.php?sid='+self.story.getMetadata('storyId')}) + self.story.setMetadata('title',stripHTML(title)) + titleblock=title.parent.parent + + chapterselect=soup.find('select',{'name':'chapter'}) + if chapterselect: + for ch in chapterselect.findAll('option'): + self.chapterUrls.append((stripHTML(ch),'https://'+self.host+'/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+ch['value'])) + else: + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = titleblock.findAll('span',{'class':'classification'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'classification': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + if value.endswith("["): + value = value[:-1] + self.story.setMetadata('rating', value) + + if 'Characters' in label: + for char in value.split(','): + self.story.addToList('characters',char.strip()) + + if 'Genre' in label: + for genre in value.split(','): + if genre.strip() != "None": + self.story.addToList('genre',genre.strip()) + + if 'Warnings' in label: + for warning in value.split(','): + if warning.strip() != 'None': + self.story.addToList('warnings',warning.strip()) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Fandoms' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + # http://www.squidge.org/peja/cgi-bin/series.php?seriesid=254 + a = titleblock.find('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'https://'+self.host+'/peja/cgi-bin/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + # don't count the 'site map' story. See the url pattern method. + if '47746' not in a['href']: + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + chaptext = soup.find('div',{'id':"story"}).find('span') + + if None == chaptext: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,chaptext) diff --git a/fanficdownloader/adapters/adapter_stargateatlantisorg.py b/fanficdownloader/adapters/adapter_stargateatlantisorg.py new file mode 100644 index 00000000..f8428534 --- /dev/null +++ b/fanficdownloader/adapters/adapter_stargateatlantisorg.py @@ -0,0 +1,230 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return StargateAtlantisOrgAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class StargateAtlantisOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','stat') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'stargate-atlantis.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/fanfics/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'id' : 'pagetitle'}) + + aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/fanfics/'+aut['href']) + self.story.setMetadata('author',aut.string) + aut.extract() + + self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)]) + + # Find the chapters: + chapters=soup.findAll('div', {'class' : 'content'}) + if len(chapters) > 1: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + link = chapter.find('a') + self.chapterUrls.append((stripHTML(link),'http://'+self.host+'/fanfics/'+link['href'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}): + a = list.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + break + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = list.findAll('span', {'class' : 'classification'}) + for labelspan in labels: + label = labelspan.string + value = labelspan.nextSibling + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'tail' and value != None: + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value[:len(value)-2]) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + for char in value.string.split(', '): + if not 'None' in char: + self.story.addToList('characters',char) + + if 'Genre' in label: + for genre in value.string.split(', '): + if not 'None' in genre: + self.story.addToList('genre',genre) + + if 'Warnings' in label: + for warning in value.string.split(', '): + if not 'None' in warning: + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' ::')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfics/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_storiesofardacom.py b/fanficdownloader/adapters/adapter_storiesofardacom.py new file mode 100644 index 00000000..5ad3ef83 --- /dev/null +++ b/fanficdownloader/adapters/adapter_storiesofardacom.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return StoriesOfArdaComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class StoriesOfArdaComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','soa') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.storiesofarda.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/chapterlistview.asp?SID=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/chapterlistview.asp?SID=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('th', {'colspan' : '3'}) + + aut = a.find('a') + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href']) + self.story.setMetadata('author',aut.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + a.find('em').extract() + self.story.setMetadata('title',stripHTML(a)) + + # Find the chapters: chapterview.asp?sid=7000&cid=30919 + chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+"&cid=\d+$")) + if len(chapters)==1: + self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/'+chapters[0]['href'])) + else: + for chapter in chapters: + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + summary = soup.find('td', {'colspan' : '3'}) + self.setDescription(url,summary) + + # no convenient way to get word count + + for td in asoup.findAll('td', {'colspan' : '3'}): + if td.find('a', href=re.compile('chapterlistview.asp\?SID='+self.story.getMetadata('storyId'))) != None: + break + td=td.nextSibling.nextSibling + self.story.setMetadata('dateUpdated', makeDate(stripHTML(td).split(': ')[1], self.dateformat)) + tr=td.parent.nextSibling.nextSibling.nextSibling.nextSibling + td=tr.findAll('td') + self.story.setMetadata('rating', td[0].string.split(': ')[1]) + self.story.setMetadata('status', td[2].string.split(': ')[1]) + self.story.setMetadata('datePublished', makeDate(stripHTML(td[4]).split(': ')[1], self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + if self.getConfig('is_adult'): + params = {'confirmAge':'1'} + data = self._postUrl(url,params) + else: + data = self._fetchUrl(url) + + data = data[data.index('<table width="90%" align="center">'):] + data.replace("<body","<notbody").replace("<BODY","<NOTBODY") + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + if "Please indicate that you are an adult by selecting the appropriate choice below" in data: + raise exceptions.FailedToDownload("Chapter requires you be an adult. Set is_adult in personal.ini (chapter url:%s)" % url) + + div = soup.find('table', {'width' : '90%'}).find('td') + div.name='div' + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_storiesonlinenet.py b/fanficdownloader/adapters/adapter_storiesonlinenet.py new file mode 100644 index 00000000..66f7aa5d --- /dev/null +++ b/fanficdownloader/adapters/adapter_storiesonlinenet.py @@ -0,0 +1,418 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return StoriesOnlineNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class StoriesOnlineNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0]) + if 'storyInfo' in self.story.getMetadata('storyId'): + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','strol') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y-%m-%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'storiesonline.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/s/1234 http://"+cls.getSiteDomain()+"/s/1234:4010" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain())+r"/s/\d+((:\d+)?(;\d+)?$|(:i)?$)?" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if self.needToLogin \ + or 'Free Registration' in data \ + or "Invalid Password!" in data \ + or "Invalid User Name!" in data \ + or "Log In" in data \ + or "Access to unlinked chapters requires" in data: + self.needToLogin = True + return self.needToLogin + + def performLogin(self, url): + params = {} + + if self.password: + params['theusername'] = self.username + params['thepassword'] = self.password + else: + params['theusername'] = self.getConfig("username") + params['thepassword'] = self.getConfig("password") + params['rememberMe'] = '1' + params['page'] = 'http://'+self.getSiteDomain()+'/' + params['submit'] = 'Login' + + loginUrl = 'http://' + self.getSiteDomain() + '/login.php' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['theusername'])) + + d = self._fetchUrl(loginUrl, params,usecache=False) + + if "My Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['theusername'])) + raise exceptions.FailedToLogin(url,params['theusername']) + return False + else: + return True + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + self.needToLogin = False + try: + data = self._fetchUrl(url+":i") + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + elif e.code == 401: + self.needToLogin = True + data = '' + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url+":i",usecache=False) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + elif "Error! The story you're trying to access is being filtered by your choice of contents filtering." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + #print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',stripHTML(a)) + + notice = soup.find('div', {'class' : 'notice'}) + if notice: + self.story.setMetadata('notice',unicode(notice)) + + # Find authorid and URL from... author url. + for a in soup.findAll('a', href=re.compile(r"/a/\w+")): + self.story.addToList('authorId',a['href'].split('/')[2]) + self.story.addToList('authorUrl','http://'+self.host+a['href']) + self.story.addToList('author',stripHTML(a).replace("'s Page","")) + + # Find the chapters: + chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+$")) + if len(chapters) != 0: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/s/'+self.story.getMetadata('storyId'))) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # surprisingly, the detailed page does not give enough details, so go to author's page + page=0 + i=0 + while i == 0: + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+str(page))) + + a = asoup.findAll('td', {'class' : 'lc2'}) + for lc2 in a: + if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))): + i=1 + break + if a[len(a)-1] == lc2: + page=page+1 + + for cat in lc2.findAll('div', {'class' : 'typediv'}): + self.story.addToList('genre',cat.text) + + # in lieu of word count. + self.story.setMetadata('size', lc2.findNext('td', {'class' : 'num'}).text) + + lc4 = lc2.findNext('td', {'class' : 'lc4'}) + desc = lc4.contents[0] + + try: + a = lc4.find('a', href=re.compile(r"/series/\d+/.*")) + if a: + # if there's a number after the series name, series_contents is a two element list: + # [<a href="...">Title</a>, u' (2)'] + series_contents = a.parent.contents + i = 0 if len(series_contents) == 1 else series_contents[1].strip(' ()') + seriesUrl = 'http://'+self.host+a['href'] + self.story.setMetadata('seriesUrl',seriesUrl) + series_name = stripHTML(a) + logger.debug("Series name= %s" % series_name) + series_soup = bs.BeautifulSoup(self._fetchUrl(seriesUrl)) + if series_soup: + logger.debug("Retrieving Series - looking for name") + series_name = series_soup.find('span', {'id' : 'ptitle'}).text.partition(' — ')[0] + logger.debug("Series name: '{0}'".format(series_name)) + self.setSeries(series_name, i) + desc = lc4.contents[2] + # Check if series is in a universe + universe_url = self.story.getList('authorUrl')[0] + "&type=uni" + universes_soup = bs.BeautifulSoup(self._fetchUrl(universe_url) ) + logger.debug("Universe url='{0}'".format(universe_url)) + if universes_soup: + universes = universes_soup.findAll('div', {'class' : 'ser-box'}) + logger.debug("Number of Universes: %d" % len(universes)) + for universe in universes: + logger.debug("universe.find('a')={0}".format(universe.find('a'))) + # The universe id is in an "a" tag that has an id but nothing else. It is the first tag. + # The id is prefixed with the letter "u". + universe_id = universe.find('a')['id'][1:] + logger.debug("universe_id='%s'" % universe_id) + universe_name = universe.find('div', {'class' : 'ser-name'}).text.partition(' ')[2] + logger.debug("universe_name='%s'" % universe_name) + # If there is link to the story, we have the right universe + story_a = universe.find('a', href=re.compile('/s/'+self.story.getMetadata('storyId'))) + if story_a: + logger.debug("Story is in a series that is in a universe! The universe is '%s'" % universe_name) + self.story.setMetadata("universe", universe_name) + self.story.setMetadata('universeUrl','http://'+self.host+ '/library/universe.php?id=' + universe_id) + break + else: + logger.debug("No universe page") + except: + pass + try: + a = lc4.find('a', href=re.compile(r"/universe/\d+/.*")) + logger.debug("Looking for universe - a='{0}'".format(a)) + if a: + self.story.setMetadata("universe",stripHTML(a)) + desc = lc4.contents[2] + # Assumed only one universe, but it does have a URL--use universeHTML + universe_name = stripHTML(a) + universeUrl = 'http://'+self.host+a['href'] + logger.debug("Retrieving Universe - about to get page - universeUrl='{0}".format(universeUrl)) + universe_soup = bs.BeautifulSoup(self._fetchUrl(universeUrl)) + logger.debug("Retrieving Universe - have page") + if universe_soup: + logger.debug("Retrieving Universe - looking for name") + universe_name = universe_soup.find('h1', {'id' : 'ptitle'}).text.partition(' —')[0] + logger.debug("Universes name: '{0}'".format(universe_name)) + + self.story.setMetadata('universeUrl',universeUrl) + logger.debug("Setting universe name: '{0}'".format(universe_name)) + self.story.setMetadata('universe',universe_name) + if self.getConfig("universe_as_series"): + self.setSeries(universe_name, 0) + self.story.setMetadata('seriesUrl',universeUrl) + else: + logger.debug("Do not have a universe") + except: + pass + + self.setDescription('http://'+self.host+'/s/'+self.story.getMetadata('storyId'),desc) + + for b in lc4.findAll('b'): + #logger.debug('Getting metadata: "%s"' % b) + label = b.text + if label in ['Posted:', 'Concluded:', 'Updated:']: + value = b.findNext('noscript').text + #logger.debug('Have a date field label: "%s", value: "%s"' % (label, value)) + else: + value = b.nextSibling + #logger.debug('label: "%s", value: "%s"' % (label, value)) + + if 'Sex' in label: + self.story.setMetadata('rating', value) + + if 'Tags' in label: + for code in re.split(r'\s*,\s*', value.strip()): + self.story.addToList('sitetags',code) + + if 'Posted' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + if 'Concluded' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) +# + status = lc4.find('span', {'class' : 'ab'}) + if status != None: + self.story.setMetadata('status', 'In-Progress') + if "Last Activity" in status.text: + # date is passed as a timestamp and converted in JS. + value = status.findNext('noscript').text + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + else: + self.story.setMetadata('status', 'Completed') + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + # some big chapters are split over several pages + pager = div.find('span', {'class' : 'pager'}) + if pager != None: + a = pager.previousSibling + while a != None: + logger.debug("before pager: {0}".format(a)) + b = a.previousSibling + a.extract() + a = b + + urls=pager.findAll('a') + urls=urls[:len(urls)-1] + pager.extract() + div.contents = div.contents[2:] +# logger.debug(div) + + for ur in urls: + soup = bs.BeautifulSoup(self._fetchUrl("http://"+self.getSiteDomain()+ur['href']), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div1 = soup.find('div', {'id' : 'story'}) + + # Find the "Continues" marker on the current page and remove everything after that. + continues = div.find('span', {'class' : 'conTag'}) + if continues != None: + while continues != None: +# logger.debug("removing end: {0}".format(continues)) + b = continues.nextSibling + continues.extract() + continues = b + + # Find the "Continued" marker and delete everything before that + continued = div1.find('span', {'class' : 'conTag'}) + if continued != None: + a = continued.previousSibling + while a != None: +# logger.debug("before conTag: {0}".format(a)) + b = a.previousSibling + a.extract() + a = b + # Remove the pager from the end if this is the last page + endPager = div1.find('span', {'class' : 'pager'}) + if endPager != None: + b = endPager.nextSibling + while endPager != None: + logger.debug("removing end: {0}".format(endPager)) + b = endPager.nextSibling + endPager.extract() + endPager = b + div1.contents = div1.contents[:len(div1) - 2] +# logger.debug("after removing pager: {0}".format(div1)) + for tag in div1.contents[2:]: + div.append(tag) + + # If it is a chapter, there are dates at the start for when it was posted or modified. These plus + # everything before them can be discarded. + postedDates = div.findAll('div', {'class' : 'date'}) + if postedDates: + a = postedDates[0].previousSibling + while a != None: +# logger.debug("before dates: {0}".format(a)) + b = a.previousSibling + a.extract() + a = b + for a in div.findAll('div', {'class' : 'date'}): + a.extract() + + # For single chapter stories, there is a copyright statement. Remove this and everything + # before it. + copy = div.find('h4', {'class': 'copy'}) + while copy != None: +# logger.debug("before copyright: {0}".format(copy)) + b = copy.previousSibling + copy.extract() + copy = b + + # For a story or the last chapter, remove voting form and the in library box + a = div.find('div', {'id' : 'vote-form'}) + if a != None: + a.extract() + a = div.find('div', {'id' : 'b-man-div'}) + if a != None: + a.extract() + + # Kill the "The End" header and everything after it. + a = div.find(['h2', 'h3'], {'class' : 'end'}) + logger.debug("Chapter end= '{0}'".format(a)) + while a != None: + b = a.nextSibling + a.extract() + a=b + + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py new file mode 100644 index 00000000..634adfec --- /dev/null +++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py @@ -0,0 +1,257 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','thpc') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%b %d %Y" + + + @staticmethod + def getSiteDomain(): + return 'fanfiction.tenhawkpresents.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + addurl = "&ageconsent=ok&warning=4" + url = self.url+'&index=1'+addurl + logger.debug("Changing URL: "+url) + self.performLogin(url) + data = self._fetchUrl(url,usecache=False) + + if "This story contains mature content which may include violence, sexual situations, and coarse language" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,span) + +def getClass(): + return TenhawkPresentsComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py new file mode 100644 index 00000000..e3b5c5f3 --- /dev/null +++ b/fanficdownloader/adapters/adapter_test1.py @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import time +import logging +logger = logging.getLogger(__name__) + +from .. import BeautifulSoup as bs +from .. import exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class TestSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tst1') + self.crazystring = u" crazy tests:[bare amp(&) quote(') amp(&) gt(>) lt(<) ATnT(AT&T) pound(£)]" + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + self.username='' + self.is_adult=False + + @staticmethod + def getSiteDomain(): + return 'test1.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"?sid=1234" + + def getSiteURLPattern(self): + return BaseSiteAdapter.getSiteURLPattern(self)+r'/?\?sid=\d+$' + + def extractChapterUrlsAndMetadata(self): + idstr = self.story.getMetadata('storyId') + idnum = int(idstr) + self.do_sleep() + + if idnum >= 1000: + logger.warn("storyId:%s - Custom INI data will be used."%idstr) + + sections = ['teststory:%s'%idstr,'teststory:defaults'] + #print("self.get_config_list(sections,'valid_entries'):%s"%self.get_config_list(sections,'valid_entries')) + for key in self.get_config_list(sections,'valid_entries'): + if key.endswith("_list"): + nkey = key[:-len("_list")] + #print("addList:%s"%(nkey)) + for val in self.get_config_list(sections,key): + #print("addList:%s->%s"%(nkey,val)) + self.story.addToList(nkey,val.decode('utf-8').replace('{{storyId}}',idstr)) + else: + # Special cases: + if key in ['datePublished','dateUpdated']: + self.story.setMetadata(key,makeDate(self.get_config(sections,key),"%Y-%m-%d")) + else: + self.story.setMetadata(key,self.get_config(sections,key).decode('utf-8').replace('{{storyId}}',idstr)) + #print("set:%s->%s"%(key,self.story.getMetadata(key))) + + self.chapterUrls = [] + for (j,chap) in enumerate(self.get_config_list(sections,'chaptertitles'),start=1): + self.chapterUrls.append( (chap,self.url+"&chapter=%d"%j) ) + # self.chapterUrls = [(u'Prologue '+self.crazystring,self.url+"&chapter=1"), + # ('Chapter 1, Xenos on Cinnabar',self.url+"&chapter=2"), + # ] + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + return + + if idstr == '665' and not (self.is_adult or self.getConfig("is_adult")): + logger.warn("self.is_adult:%s"%self.is_adult) + raise exceptions.AdultCheckRequired(self.url) + + if idstr == '666': + raise exceptions.StoryDoesNotExist(self.url) + + if idstr.startswith('670'): + time.sleep(1.0) + + if idstr.startswith('671'): + time.sleep(1.0) + + if self.getConfig("username"): + self.username = self.getConfig("username") + + if idstr == '668' and self.username != "Me" : + raise exceptions.FailedToLogin(self.url,self.username) + + if idstr == '664': + self.story.setMetadata(u'title',"Test Story Title "+idstr+self.crazystring) + self.story.setMetadata('author','Test Author aa bare amp(&) quote(') amp(&)') + else: + self.story.setMetadata(u'title',"Test Story Title "+idstr) + self.story.setMetadata('author','Test Author aa') + self.story.setMetadata('storyUrl',self.url) + self.setDescription(self.url,u'Description '+self.crazystring+u''' Done + +Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic" +''') + self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d")) + if idstr == '669': + self.story.setMetadata('dateUpdated',datetime.datetime.now()) + else: + self.story.setMetadata('dateUpdated',makeDate("1975-04-15","%Y-%m-%d")) + self.story.setMetadata('numWords','123456') + + if idnum % 2 == 1: + self.story.setMetadata('status','In-Progress') + else: + self.story.setMetadata('status','Completed') + + # greater than 10, no language or series. + if idnum < 10: + langs = { + 0:"English", + 1:"Russian", + 2:"French", + 3:"German", + } + self.story.setMetadata('language',langs[idnum%len(langs)]) + self.setSeries('The Great Test',idnum) + self.story.setMetadata('seriesUrl','http://test1.com?seriesid=1') + if idnum == 0: + self.setSeries("A Nook Hyphen Test "+self.story.getMetadata('dateCreated'),idnum) + self.story.setMetadata('seriesUrl','http://test1.com?seriesid=0') + + self.story.setMetadata('rating','Tweenie') + + if idstr == '673': + self.story.addToList('author','Author From List 1') + self.story.addToList('author','Author From List 2') + self.story.addToList('author','Author From List 3') + self.story.addToList('author','Author From List 4') + self.story.addToList('author','Author From List 5') + self.story.addToList('author','Author From List 6') + self.story.addToList('author','Author From List 7') + self.story.addToList('author','Author From List 8') + self.story.addToList('author','Author From List 9') + self.story.addToList('author','Author From List 0') + self.story.addToList('author','Author From List q') + self.story.addToList('author','Author From List w') + self.story.addToList('author','Author From List e') + self.story.addToList('author','Author From List r') + self.story.addToList('author','Author From List t') + self.story.addToList('author','Author From List y') + self.story.addToList('author','Author From List u') + self.story.addToList('author','Author From List i') + self.story.addToList('author','Author From List o') + + self.story.addToList('authorId','98765-1') + self.story.addToList('authorId','98765-2') + self.story.addToList('authorId','98765-3') + self.story.addToList('authorId','98765-4') + self.story.addToList('authorId','98765-5') + self.story.addToList('authorId','98765-6') + self.story.addToList('authorId','98765-7') + self.story.addToList('authorId','98765-8') + self.story.addToList('authorId','98765-9') + self.story.addToList('authorId','98765-0') + self.story.addToList('authorId','98765-q') + self.story.addToList('authorId','98765-w') + self.story.addToList('authorId','98765-e') + self.story.addToList('authorId','98765-r') + self.story.addToList('authorId','98765-t') + self.story.addToList('authorId','98765-y') + self.story.addToList('authorId','98765-u') + self.story.addToList('authorId','98765-i') + self.story.addToList('authorId','98765-o') + + self.story.addToList('authorUrl','http://author/url-1') + self.story.addToList('authorUrl','http://author/url-2') + self.story.addToList('authorUrl','http://author/url-3') + self.story.addToList('authorUrl','http://author/url-4') + self.story.addToList('authorUrl','http://author/url-5') + self.story.addToList('authorUrl','http://author/url-6') + self.story.addToList('authorUrl','http://author/url-7') + self.story.addToList('authorUrl','http://author/url-8') + self.story.addToList('authorUrl','http://author/url-9') + self.story.addToList('authorUrl','http://author/url-0') + self.story.addToList('authorUrl','http://author/url-q') + self.story.addToList('authorUrl','http://author/url-w') + self.story.addToList('authorUrl','http://author/url-e') + self.story.addToList('authorUrl','http://author/url-r') + self.story.addToList('authorUrl','http://author/url-t') + self.story.addToList('authorUrl','http://author/url-y') + self.story.addToList('authorUrl','http://author/url-u') + self.story.addToList('authorUrl','http://author/url-i') + self.story.addToList('authorUrl','http://author/url-o') + + self.story.addToList('category','Power Rangers') + self.story.addToList('category','SG-1') + self.story.addToList('genre','Porn') + self.story.addToList('genre','Drama') + else: + self.story.setMetadata('authorId','98765') + self.story.setMetadata('authorUrl','http://author/url') + + self.story.addToList('warnings','Swearing') + self.story.addToList('warnings','Violence') + + if idstr == '80': + self.story.addToList('category',u'Rizzoli & Isles') + self.story.addToList('characters','J. Rizzoli') + elif idstr == '81': + self.story.addToList('category',u'Pitch Perfect') + self.story.addToList('characters','Chloe B.') + elif idstr == '82': + self.story.addToList('characters','Henry (Once Upon a Time)') + self.story.addToList('category',u'Once Upon a Time (TV)') + elif idstr == '83': + self.story.addToList('category',u'Rizzoli & Isles') + self.story.addToList('characters','J. Rizzoli') + self.story.addToList('category',u'Pitch Perfect') + self.story.addToList('characters','Chloe B.') + self.story.addToList('ships','Chloe B. & J. Rizzoli') + elif idstr == '90': + self.story.setMetadata('characters','Henry (Once Upon a Time)') + self.story.setMetadata('category',u'Once Upon a Time (TV)') + else: + self.story.addToList('category','Harry Potter') + self.story.addToList('category','Furbie') + self.story.addToList('category','Crossover') + self.story.addToList('category',u'Puella Magi Madoka Magica/魔法少女まどか★マギカ') + self.story.addToList('category',u'Magical Girl Lyrical Nanoha') + self.story.addToList('category',u'Once Upon a Time (TV)') + self.story.addToList('characters','Bob Smith') + self.story.addToList('characters','George Johnson') + self.story.addToList('characters','Fred Smythe') + self.story.addToList('ships','Harry Potter/Ginny Weasley') + self.story.addToList('ships','Harry Potter/Ginny Weasley/Albus Dumbledore') + self.story.addToList('ships','Harry Potter & Hermione Granger') + + self.story.addToList('genre','Fantasy') + self.story.addToList('genre','Comedy') + self.story.addToList('genre','Sci-Fi') + self.story.addToList('genre','Noir') + + self.story.addToList('listX','xVal1') + self.story.addToList('listX','xVal2') + self.story.addToList('listX','xVal3') + self.story.addToList('listX','xVal4') + + self.story.addToList('listY','yVal1') + self.story.addToList('listY','yVal2') + self.story.addToList('listY','yVal3') + self.story.addToList('listY','yVal4') + + self.story.addToList('listZ','zVal1') + self.story.addToList('listZ','zVal2') + self.story.addToList('listZ','zVal3') + self.story.addToList('listZ','zVal4') + + self.story.setMetadata('metaA','98765') + self.story.setMetadata('metaB','01245') + self.story.setMetadata('metaC','The mighty metaC!') + + self.chapterUrls = [(u'Prologue '+self.crazystring,self.url+"&chapter=1"), + ('Chapter 1, Xenos on Cinnabar',self.url+"&chapter=2"), + ('Chapter 2, Sinmay on Kintikin',self.url+"&chapter=3"), + ('Chapter 3, Over Cinnabar',self.url+"&chapter=4"), + ('Chapter 4',self.url+"&chapter=5"), + ('Chapter 5',self.url+"&chapter=6"), + ('Chapter 6',self.url+"&chapter=7"), + ('Chapter 7',self.url+"&chapter=8"), + ('Chapter 8',self.url+"&chapter=9"), + #('Chapter 9',self.url+"&chapter=0"), + #('Chapter 0',self.url+"&chapter=a"), + #('Chapter a',self.url+"&chapter=b"), + #('Chapter b',self.url+"&chapter=c"), + #('Chapter c',self.url+"&chapter=d"), + #('Chapter d',self.url+"&chapter=e"), + #('Chapter e',self.url+"&chapter=f"), + #('Chapter f',self.url+"&chapter=g"), + #('Chapter g',self.url+"&chapter=h"), + #('Chapter h',self.url+"&chapter=i"), + #('Chapter i',self.url+"&chapter=j"), + #('Chapter j',self.url+"&chapter=k"), + #('Chapter k',self.url+"&chapter=l"), + #('Chapter l',self.url+"&chapter=m"), + #('Chapter m',self.url+"&chapter=n"), + #('Chapter n',self.url+"&chapter=o"), + ] + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + def getChapterText(self, url): + logger.debug('Getting chapter text from: %s' % url) + self.do_sleep() + if self.story.getMetadata('storyId').startswith('670') or \ + self.story.getMetadata('storyId').startswith('672'): + time.sleep(1.0) + + if "chapter=1" in url : + text=u''' +<div> +<h3>Prologue</h3> +This is a fake adapter for testing purposes. Different sid's will give different errors: +<h4>Config(personal.ini)</h4> +sid>=1000 will use custom test story data from your configuration(personal.ini) +Hard coded ids: +http://test1.com?sid=664 - Crazy string title +http://test1.com?sid=665 - raises AdultCheckRequired +http://test1.com?sid=666 - raises StoryDoesNotExist +http://test1.com?sid=667 - raises FailedToDownload on chapters 2+ +http://test1.com?sid=668 - raises FailedToLogin unless username='Me' +http://test1.com?sid=669 - Succeeds with Updated Date=now +http://test1.com?sid=670 - Succeeds, but sleeps 2sec on each chapter +http://test1.com?sid=671 - Succeeds, but sleeps 2sec metadata only +http://test1.com?sid=672 - Succeeds, quick meta, sleeps 2sec chapters only +http://test1.com?sid=673 - Succeeds, multiple authors, extra categories, genres +http://test1.com?sid=0 - Succeeds, generates some text specifically for testing hyphenation problems with Nook STR/STRwG +Odd sid's will be In-Progress, evens complete. sid<10 will be assigned one of four languages and included in a series. +</div> +''' + elif self.story.getMetadata('storyId') == '0': + text=u'''<div> +<h3>45. Pronglet Returns to Hogwarts: Chapter 7</h3> + + eyes… but I’m not convinced we should automatically + +Thanks to the latest to recommend me: Alastor + + “Sure, invite her along. Does she have children?” + +</div> +''' + else: + if self.story.getMetadata('storyId') == '667': + raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) + + text=u''' +<div> +<h3>Chapter title from site</h3> +Timestamp:'''+datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")+''' +Lorem '''+self.crazystring+u''' italics, bold, underline consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +br breaks +Puella Magi Madoka Magica/魔法少女まどか★マギカ + +br breaks +Don't—e;ver—d;o—that—a;gain, 法 é +<hr> +horizontal rules +<hr size=1 noshade> +"Lorem ipsum dolor sit amet", consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore--et dolore magna aliqua. 'Ut enim ad minim veniam', quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +</div> +''' + soup = bs.BeautifulStoneSoup(text,selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + return self.utf8FromSoup(url,soup) + +def getClass(): + return TestSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_thealphagatecom.py b/fanficdownloader/adapters/adapter_thealphagatecom.py new file mode 100644 index 00000000..44206161 --- /dev/null +++ b/fanficdownloader/adapters/adapter_thealphagatecom.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TheAlphaGateComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TheAlphaGateComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','tag') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.thealphagate.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_thehexfilesnet.py b/fanficdownloader/adapters/adapter_thehexfilesnet.py new file mode 100644 index 00000000..1316b080 --- /dev/null +++ b/fanficdownloader/adapters/adapter_thehexfilesnet.py @@ -0,0 +1,207 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TheHexFilesNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TheHexFilesNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','thf') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'thehexfiles.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.thehexfiles.net','thehexfiles.net'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',stripHTML(a)) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + try: + # in case link points somewhere other than the first chapter + a = soup.findAll('option')[1]['value'] + self.story.setMetadata('storyId',a.split('=',)[1]) + url = 'http://'+self.host+'/'+a + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except: + pass + + for info in asoup.findAll('table', {'cellspacing' : '4'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',stripHTML(a)) + break + + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) + if len(chapters) == 0: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + cats = info.findAll('a',href=re.compile('categories.php')) + for cat in cats: + self.story.addToList('category',cat.string) + + words = info.find(text=re.compile('Words:')).split('|')[1].split(': ')[1] + self.story.setMetadata('numWords', words) + + comp = info.find('span', {'class' : 'completed'}).string.split(': ')[1] + if 'Yes' in comp: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + summary = info.find('td', {'class' : 'summary'}) + summary.name='div' # change td to div so it doesn't mess up the display when using table titlepage. + self.setDescription(url,summary) + + rating=stripHTML(info.find('td', {'align' : 'left'})).split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + labels = info.findAll('td', {'width' : '10%'}) + values = info.findAll('td', {'width' : '40%'}) + for i in range(0,len(labels)): + value = stripHTML(values[i]) + label = stripHTML(labels[i]) + + if 'Genres' in label: + genres = value.split(', ') + for genre in genres: + if genre != 'none': + self.story.addToList('genre',genre) + + + if 'Warnings' in label: + warnings = value.split(', ') + for warning in warnings: + if warning != 'none': + self.story.addToList('warnings',warning) + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags. + + if None == soup: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + # Ugh. chapter html doesn't haven't anything useful around it to demarcate. + for a in soup.findAll('table'): + a.extract() + + for a in soup.findAll('head'): + a.extract() + + html = soup.find('html') + html.name='div' + + return self.utf8FromSoup(url,soup) diff --git a/fanficdownloader/adapters/adapter_thehookupzonenet.py b/fanficdownloader/adapters/adapter_thehookupzonenet.py new file mode 100644 index 00000000..2b6c52e1 --- /dev/null +++ b/fanficdownloader/adapters/adapter_thehookupzonenet.py @@ -0,0 +1,309 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# By virtue of being recent and requiring both is_adult and user/pass, +# adapter_fanficcastletvnet.py is the best choice for learning to +# write adapters--especially for sites that use the eFiction system. +# Most sites that have ".../viewstory.php?sid=123" in the story URL +# are eFiction. + +# For non-eFiction sites, it can be considerably more complex, but +# this is still a good starting point. + +# In general an 'adapter' needs to do these five things: + +# - 'Register' correctly with the downloader +# - Site Login (if needed) +# - 'Are you adult?' check (if needed--some do one, some the other, some both) +# - Grab the chapter list +# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page) +# - Grab the chapter texts + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return TheHookupZoneNetAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/CriminalMinds/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','thupz') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'thehookupzone.net' # XXX + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/CriminalMinds/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/CriminalMinds/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: # XXX + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/CriminalMinds/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/CriminalMinds/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/CriminalMinds/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_themaplebookshelf.py b/fanficdownloader/adapters/adapter_themaplebookshelf.py new file mode 100644 index 00000000..1d55deb2 --- /dev/null +++ b/fanficdownloader/adapters/adapter_themaplebookshelf.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +from base_efiction_adapter import BaseEfictionAdapter + +class TheMapleBookshelfComSiteAdapter(BaseEfictionAdapter): + + @staticmethod + def getSiteDomain(): + return 'themaplebookshelf.com' + + @classmethod + def getPathToArchive(self): + return '/Literati' + + @classmethod + def getSiteAbbrev(seluuf): + return 'maplebook' + + @classmethod + def getDateFormat(self): + return "%b %d, %Y" + +def getClass(): + return TheMapleBookshelfComSiteAdapter diff --git a/fanficdownloader/adapters/adapter_themasquenet.py b/fanficdownloader/adapters/adapter_themasquenet.py new file mode 100644 index 00000000..db65ee68 --- /dev/null +++ b/fanficdownloader/adapters/adapter_themasquenet.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TheMasqueNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TheMasqueNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + if self.parsedUrl.path.split('/',)[1] == 'wiktt': + self.story.addToList("category","Harry Potter") + self.section='/wiktt/efiction/' + self.dateformat = "%m/%d/%Y" + else: + self.story.addToList("category","Originals") + self.section='/efiction/' + self.dateformat = "%b %d, %Y" + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + self.section + 'viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','msq') + + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'themasque.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://themasque.net/wiktt/efiction/viewstory.php?sid=1234 http://themasque.net/efiction/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain())+"(/wiktt)?/efiction"+re.escape("/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.section + chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + +# summary, rated, word count, categories, characters, genre, warnings, completed, published, updated, seires + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.text + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py new file mode 100644 index 00000000..b35eaa53 --- /dev/null +++ b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ThePetulantPoetessComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ThePetulantPoetessComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1') + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','tpp') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y/%m/%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.thepetulantpoetess.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'You must be a member to read this story.' in data \ + or "The Ministry of Magic does not have a record of that password. " in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "My Account Page" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + chapters=soup.find('select', {'name' : 'sid'}) + if chapters == None: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters.findAll('option', value=re.compile(r"viewstory.php\?sid=\d+&i=1")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['value'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # make sure that the story id is from the first chapter + self.story.setMetadata('storyId',self.chapterUrls[0][1].split('=')[1].split('&')[0]) + + #locate the story on author's page + index = 1 + found = 0 + while found == 0: + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')+"&page="+str(index))) + + for info in asoup.findAll('td', {'class' : 'highlightcolor1'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',stripHTML(a)) + found = 1 + break + index=index+1 + + # extract metadata + b=info.find('b') + b.find('a').extract() + self.story.setMetadata('rating', b.text.split('[')[1].split(']')[0]) + + info = info.findNext('td', {'colspan' : '2'}) + for label in info.findAll('b'): + value = label.nextSibling + + if "Category" in label.text: + for cat in info.findAll('a'): + self.story.addToList('category',cat.string) + + if "Characters" in label.text: + for char in value.split(', '): + self.story.addToList('characters',char) + + if "Genres" in label.text: + for genre in value.split(', '): + if "General" not in genre: + self.story.addToList('genre',genre) + + if "Warnings" in label.text: + for warning in value.split(', '): + if "none" not in warning.lower(): + self.story.addToList('warnings',warning) + + + info = info.findNext('td', {'class' : 'tblborder'}) + info.find('b').extract() + self.setDescription(url,info) + + info = info.findNext('td', {'class' : 'highlightcolor2'}) + for label in info.findAll('b'): + value = label.nextSibling + + if "Completed" in label.text: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label.text: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label.text: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + if 'Word Count' in label.text: + self.story.setMetadata('numWords', value) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.findAll('table')[2].findAll('td')[1] + for a in div.findAll('div'): + a.extract() + div.name='div' + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py new file mode 100644 index 00000000..68401bfc --- /dev/null +++ b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return TheQuidditchPitchOrgAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + # XXX Most sites don't have the part. Replace all to remove it usually. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','tqdpch') # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'thequidditchpitch.org' # XXX + + @classmethod + def getAcceptDomains(cls): + return ['www.thequidditchpitch.org','thequidditchpitch.org'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only - Not suitable for readers under the age of legal consent in their country.' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if ("Not suitable for readers under the age of legal consent in their country." in data \ + or "Not suitable for readers under 16 yrs. \r\nStories may contain violence, slight nudity, and/or sexual situations." in data ) \ + and not (self.is_adult or self.getConfig("is_adult")): # XXX + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + #print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + # span? Really? span? Yeah... I don't think so. + div = soup.find('span', {'style' : 'font-size: 100%;'}) + div.name='div' + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_tokrafandomnetcom.py b/fanficdownloader/adapters/adapter_tokrafandomnetcom.py new file mode 100644 index 00000000..1d3a9278 --- /dev/null +++ b/fanficdownloader/adapters/adapter_tokrafandomnetcom.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TokraFandomnetComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TokraFandomnetComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','tokra') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. But it + # doesn't matter too much anymore. + return 'tokra.fandomnet.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + #print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Rating + rate = stripHTML(soup.find('div',{'id':'pagetitle'})) + rate = rate[rate.rindex('[')+1:rate.rindex(']')] + self.story.setMetadata('rating', rate) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + metadiv = soup.find('div',{'class':'content'}) + smalldiv = metadiv.find('div',{'class':'small'}) + + # tokra categories -> genre + # categories will be filled from ini. + genres = smalldiv.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for genre in genres: + self.story.addToList('genre',genre.string) + + chars = smalldiv.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + metatext = stripHTML(smalldiv) + + if 'Completed: Yes' in metatext: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + wordstart=metatext.rindex('Word count:')+12 + words = metatext[wordstart:metatext.index(' ',wordstart)] + self.story.setMetadata('numWords', words) + + datesdiv = soup.find('div',{'class':'bottom'}) + dates = stripHTML(datesdiv).split() + # Published: 04/26/2011 Updated: 03/06/2013 + self.story.setMetadata('datePublished', makeDate(dates[1], self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(dates[3], self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # remove 'small' leaving only summary. + smalldiv.extract() + self.setDescription(url,metadiv) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + div = soup.find('div', {'class' : 'content'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + # remove some decorations while keeping notes. + remove = div.find('div', {'id' : 'pagetitle'}) + remove.extract() + + for remove in div.findAll('div', {'class' : 'right'}): + remove.extract() + + for remove in div.findAll('div', {'class' : 'left'}): + remove.extract() + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_tolkienfanfiction.py b/fanficdownloader/adapters/adapter_tolkienfanfiction.py new file mode 100644 index 00000000..b964baf4 --- /dev/null +++ b/fanficdownloader/adapters/adapter_tolkienfanfiction.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- + +""" +FFDL Adapter for TolkienFanFiction.com. + +Chapter URL: http://www.tolkienfanfiction.com/Story_Read_Chapter.php?CHid=1234 + Metadata + Link to Story URL [Index] + chapterTitle + storyTitle +Story URL: http://www.tolkienfanfiction.com/Story_Read_Head.php?STid=1034 + Metadata + Links to Chapter URLs + storyTitle + chapterTitle[s] + author + authorId + authorUrl + numChapters + wordCount + description/summary + rating TODO + genre TODO + Characters + Ages (specific) TODO +Search: http://www.tolkienfanfiction.com/Story_Chapter_Search.php?text=From+Wilderness+to+Cities+White&field=1&type=3&search=Search + Strategy + Search by exact phrase for styo + Metadata + dateUpdated + Parameters + field (field to search) + 1: title + 2: description + 3: chapter text + type (any, all or exact phrase) + 1: any + 2: all + 3: exact phrase + +""" +# Copyright 2014 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 +import urlparse +import string + +from .. import BeautifulSoup as bs +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def _is_story_url(url): + return "Story_Read_Head.php" in url + +def _latinize(text): + """ + See http://stackoverflow.com/a/19114706/201318 + """ + src = u"áâäÉéêëíóôöúû" + tgt = u"aaaEeeeiooouu" + src_ord = [ord(char) for char in src] + translate_table = dict(zip(src_ord, tgt)) + return text.translate(translate_table) + +def _fix_broken_markup(html): + """Replaces invalid comment tags""" + if html.startswith("<CENTER>"): + logger.error("TolkienFanFiction.com couldn't handle this request: '%s'" % html) + html = re.sub("<!-.+?->", "", html) + return html + + +class TolkienFanfictionAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["ISO-8859-1", "Windows-1252"] + + self.story.setMetadata('siteabbrev','tolkien') + + self.dateformat = '%B %d, %Y' + + self._normalizeURL(url) + + def _normalizeURL(self, url): + if _is_story_url(url): + self.story.setMetadata('storyId', re.compile(self.getSiteURLPattern()).match(url).group('storyId')) + self._setURL('http://' + self.getSiteDomain() + '/Story_Read_Head.php?STid=' + self.story.getMetadata('storyId')) + + @staticmethod + def getSiteDomain(): + return 'tolkienfanfiction.com' + + @classmethod + def getAcceptDomains(cls): + return ['tolkienfanfiction.com', 'www.tolkienfanfiction.com'] + + @classmethod + def getSiteExampleURLs(cls): + return 'http://www.tolkienfanfiction.com/Story_Read_Head.php?STid=1034 http://www.tolkienfanfiction.com/Story_Read_Chapter.php?CHid=4945' + + def getSiteURLPattern(self): + return r"http://(?:www.)?tolkienfanfiction.com/(?:Story_Read_Chapter\.php\?CH|Story_Read_Head\.php\?ST)id=(?P<storyId>[0-9]+)" + + def extractChapterUrlsAndMetadata(self): + + if not _is_story_url(self.url): + # Get the link to the index page + try: + chapterHtml = _fix_broken_markup(self._fetchUrl(self.url)) + chapterSoup = bs.BeautifulSoup(chapterHtml) + indexLink = chapterSoup.find("a", text="[Index]").parent + self._normalizeURL('http://' + self.getSiteDomain() + '/' + indexLink.get('href')) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + logger.debug("Determined index page: <%s>" % self.url) + + try: + indexHtml = _fix_broken_markup(self._fetchUrl(self.url)) + soup = bs.BeautifulSoup(indexHtml) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # chapterUrls + for pfLink in soup.findAll("a", text='[PF] '): + chapterLink = pfLink.parent.findNext("a") + chapterTitle = chapterLink.string + if self.getConfig('strip_chapter_numeral'): + chapterTitle = re.sub("^\d+:", "", chapterTitle) + chapterUrl = 'http://' + self.host + '/' + chapterLink['href'] + self.chapterUrls.append((chapterTitle, chapterUrl)) + numChapters = len(self.chapterUrls) + self.story.setMetadata('numChapters', numChapters) + logger.debug('Number of Chapters: %s' % numChapters) + + # title + title = soup.find("table", "headertitle").find("tr").contents[1].string + logger.debug("Title: '%s'" % title) + self.story.setMetadata('title', title) + + # author + authorLink = soup.find("a", {"href":lambda x: x.startswith("Author_Profile.php")}) + authorName = authorLink.find("b").string + authorHref = authorLink['href'] + authorUrl = 'http:' + self.host + '/' + authorHref + authorId = authorHref[authorHref.index('=')+1:] + self.story.setMetadata('author', authorName) + self.story.setMetadata('authorId', authorId) + self.story.setMetadata('authorUrl', authorUrl) + logger.debug("Author: %s [%s] @ <%s>" % (authorId, authorName, authorUrl)) + + # numWords + numWordsMatch = re.search("Word Count: (\d+) ", indexHtml) + if numWordsMatch: + numWords = numWordsMatch.group(1) + logger.debug('Number of words: %s' % numWords) + self.story.setMetadata('numWords', numWords) + + # description + description = soup.find("b", text="Description:").parent.nextSibling.nextSibling + self.story.setDescription(description) + logger.debug("Summary: '%s'" % description) + + # characters + characters = soup.find("b", text="Characters").parent.nextSibling.nextSibling.nextSibling + for character in characters.split(", "): + self.story.addToList('characters', character) + logger.debug("Characters: %s" % self.story.getMetadata('characters')) + + logger.debug('Title as `str`: ' + str(title)) + # For publication date we need to search + try: + queryString = urllib.urlencode(( + ('type', 3), + ('field', 1), + # need translate here for the weird accented letters + ('text', _latinize(title)), + ('search', 'Search'), + )) + searchUrl = 'http://%s/Story_Chapter_Search.php?%s' % (self.host, queryString) + logger.debug("Search URL: <%s>" % searchUrl) + searchHtml = _fix_broken_markup(self._fetchUrl(searchUrl)) + searchSoup = bs.BeautifulSoup(searchHtml) + date = searchSoup.find(text="Updated:").nextSibling.string + logger.debug("Last Updated: '%s'" % date) + self.story.setMetadata('dateUpdated', makeDate(date, self.dateformat)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + def getChapterText(self, url): + + logger.debug('Downloading chapter <%s>' % url) + + time.sleep(0.5) + htmldata = _fix_broken_markup(self._fetchUrl(url)) + soup = bs.BeautifulSoup(htmldata) + + #strip comments from soup + [comment.extract() for comment in soup.findAll(text=lambda text:isinstance(text, bs.Comment))] + + # Strip redundant headings + [font.parent.extract() for font in soup.findAll("font", {"size": "4"})] + + # get story text + textDiv = soup.find("div", "text") + return self.utf8FromSoup(url, textDiv) + +def getClass(): + return TolkienFanfictionAdapter diff --git a/fanficdownloader/adapters/adapter_trekiverseorg.py b/fanficdownloader/adapters/adapter_trekiverseorg.py new file mode 100644 index 00000000..7b440681 --- /dev/null +++ b/fanficdownloader/adapters/adapter_trekiverseorg.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TrekiverseOrgAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TrekiverseOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["iso-8859-1", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/efiction/viewstory.php?sid="+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','trkvs') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. + return 'trekiverse.org' + + @classmethod + def getAcceptDomains(cls): + return ['trekiverse.org','efiction.trekiverse.org'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234 http://efiction."+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return r'(http://trekiverse\.org/efiction/viewstory\.php\?sid=\d+|http://efiction\.trekiverse\.org/viewstory\.php\?sid=\d+)' + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&index=1&ageconsent=ok&warning=5" + else: + addurl="&index=1" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'id' : 'pagetitle'}) + aut = a.find('a', href=re.compile(r"^viewuser\.php\?uid=")) + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+aut['href']) + self.story.setMetadata('author',aut.string) + + ttl = a.find('a', href=re.compile(r'^viewstory.php\?sid=%s$'%self.story.getMetadata('storyId'))) + self.story.setMetadata('title',ttl.string) + + # Find the chapters: + outputdiv = soup.find('div', {'id':'output'}) + # (amp;)? because it should be &, but is escaped to & in URL. + # viewstory.php?sid=35&chapter=3 + chapters=outputdiv.findAll('a', href=re.compile(r'^viewstory.php\?sid=%s&(amp;)?chapter=\d+$'%self.story.getMetadata('storyId'))) + if len(chapters)==0: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: No php/html chapters found.") + if len(chapters)==1: + self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/efiction/'+chapters[0]['href'])) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/efiction/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = '' + while value and not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + # sometimes poorly formated desc ( w/o ) leads + # to all labels being included. + svalue=svalue[:svalue.find('')] + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=9')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Awards' in label: + awards = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=12')) + awardstext = [award.string for award in awards] + self.award = ', '.join(awardstext) + for award in awardstext: + self.story.addToList('awards',award.string) + + if 'Pairing' in label: + ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=10')) + shipstext = [ship.string for ship in ships] + self.ship = ', '.join(shipstext) + for ship in shipstext: + self.story.addToList('ships',ship.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=11')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y")) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/efiction/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url)) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + notesdiv = soup.find('div', {'class':'noteinfo'}) + if notesdiv != None: + div.insert(0,"<hr>") + div.insert(0,notesdiv) + div.insert(0,"<hr>") + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py new file mode 100644 index 00000000..d9bc28b2 --- /dev/null +++ b/fanficdownloader/adapters/adapter_tthfanficorg.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 +import time + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tth') + self.dateformat = "%d %b %y" + self.is_adult=False + self.username = None + self.password = None + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/Story-"+self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.tthfanfic.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.tthfanfic.org/Story-1234 http://www.tthfanfic.org/Story-1234/Author+Story+Title.htm http://www.tthfanfic.org/T-99999999/Story-1234-1/Author+Story+Title.htm http://www.tthfanfic.org/story.php?no=12345" + + # http://www.tthfanfic.org/T-999999999999/Story-12345-1/Author+Story+Title.htm + # http://www.tthfanfic.org/Story-12345 + # http://www.tthfanfic.org/Story-12345/Author+Story+Title.htm + # http://www.tthfanfic.org/story.php?no=12345 + def getSiteURLPattern(self): + return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$" + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + # tth won't send you future updates if you aren't 'caught up' + # on the story. Login isn't required for F21, but logging in will + # mark stories you've downloaded as 'read' on tth. + def performLogin(self): + params = {} + + if self.password: + params['urealname'] = self.username + params['password'] = self.password + else: + params['urealname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['loginsubmit'] = 'Login' + + if not params['password']: + return + + loginUrl = 'http://' + self.getSiteDomain() + '/login.php' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['urealname'])) + + ## need to pull empty login page first to get ctkn and + ## password name, which are BUSs +# <form method='post' action='/login.php' accept-charset="utf-8"> +# <input type='hidden' name='ctkn' value='4bdf761f5bea06bf4477072afcbd0f8d721d1a4f989c09945a9e87afb7a66de1'/> +# <input type='text' id='urealname' name='urealname' value=''/> +# <input type='password' id='password' name='6bb3fcd148d148629223690bf19733b8'/> +# <input type='submit' value='Login' name='loginsubmit'/> + soup = bs.BeautifulSoup(self._fetchUrl(loginUrl)) + params['ctkn']=soup.find('input', {'name':'ctkn'})['value'] + params[soup.find('input', {'id':'password'})['name']] = params['password'] + + d = self._fetchUrl(loginUrl, params) + + if "Stories Published" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['urealname'])) + raise exceptions.FailedToLogin(self.url,params['urealname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url=self.url + logger.debug("URL: "+url) + + # tth won't send you future updates if you aren't 'caught up' + # on the story. Login isn't required for F21, but logging in will + # mark stories you've downloaded as 'read' on tth. + self.performLogin() + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + descurl = url + + if "<h2>Story Not Found</h2>" in data: + raise exceptions.StoryDoesNotExist(url) + + if self.is_adult or self.getConfig("is_adult"): + form = soup.find('form', {'id':'sitemaxratingform'}) + # if is_adult and rating isn't already set to FR21, set it so. + if not form.find('option',{'value':'5'}).get('selected'): + params={'ctkn':form.find('input', {'name':'ctkn'})['value'], + 'sitemaxrating':'5'} + logger.info("Attempting to get rating cookie for %s" % url) + data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params) + # refetch story page. + ## XXX - needs cache invalidate? Or at least check that it this needs doing... + data = self._fetchUrl(url,usecache=False) + soup = bs.BeautifulSoup(data) + + if "NOTE: This story is rated FR21 which is above your chosen filter level." in data: + raise exceptions.AdultCheckRequired(self.url) + + # http://www.tthfanfic.org/AuthorStories-3449/Greywizard.htm + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/AuthorStories-\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[1].split('-')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',stripHTML(a)) + authorurl = 'http://'+self.host+a['href'] + + try: + # going to pull part of the meta data from *primary* author list page. + logger.debug("**AUTHOR** URL: "+authorurl) + authordata = self._fetchUrl(authorurl) + descurl=authorurl + authorsoup = bs.BeautifulSoup(authordata) + # author can have several pages, scan until we find it. + while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ): + nextarrow = authorsoup.find('a', {'class':'arrowf'}) + if not nextarrow: + ## if rating is set lower than story, it won't be + ## visible on author lists unless. The *story* is + ## visible via the url, just not the entry on + ## author list. + raise exceptions.AdultCheckRequired(self.url) + nextpage = 'http://'+self.host+nextarrow['href'] + logger.debug("**AUTHOR** nextpage URL: "+nextpage) + authordata = self._fetchUrl(nextpage) + descurl=nextpage + authorsoup = bs.BeautifulSoup(authordata) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + storydiv = authorsoup.find('div', {'id':'st'+self.story.getMetadata('storyId'), 'class':re.compile(r"storylistitem")}) + self.setDescription(descurl,storydiv.find('div',{'class':'storydesc'})) + #self.story.setMetadata('description',stripHTML(storydiv.find('div',{'class':'storydesc'}))) + self.story.setMetadata('title',stripHTML(storydiv.find('a',{'class':'storylink'}))) + + ainfo = soup.find('a', href='/StoryInfo-%s-1'%self.story.getMetadata('storyId')) + if ainfo != None: # indicates multiple authors/contributors. + try: + # going to pull part of the meta data from author list page. + infourl = 'http://'+self.host+ainfo['href'] + logger.debug("**StoryInfo** URL: "+infourl) + infodata = self._fetchUrl(infourl) + infosoup = bs.BeautifulSoup(infodata) + + # for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")): + # self.story.addToList('authorId',a['href'].split('/')[1].split('-')[1]) + # self.story.addToList('authorUrl','http://'+self.host+a['href'].replace("/Author-","/AuthorStories-")) + # self.story.addToList('author',stripHTML(a)) + + # second verticaltable is the chapter list. + table = infosoup.findAll('table',{'class':'verticaltable'})[1] + for a in table.findAll('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))): + autha = a.findNext('a',href=re.compile(r"^/Author-\d+")) + self.story.addToList('authorId',autha['href'].split('/')[1].split('-')[1]) + self.story.addToList('authorUrl','http://'+self.host+autha['href'].replace("/Author-","/AuthorStories-")) + self.story.addToList('author',stripHTML(autha)) + # include leading number to match 1. ... 2. ... + self.chapterUrls.append(("%d. %s by %s"%(len(self.chapterUrls)+1, + stripHTML(a), + stripHTML(autha)),'http://'+self.host+a['href'])) + + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + else: # single author: + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapnav' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = "http://"+self.host+o['value'] + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(o),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + verticaltable = soup.find('table', {'class':'verticaltable'}) + + BtVS = True + BtVSNonX = False + for cat in verticaltable.findAll('a', href=re.compile(r"^/Category-")): + if cat.string not in ['General', 'Non-BtVS/AtS Stories', 'Non-BTVS/AtS Stories', 'BtVS/AtS Non-Crossover', 'Non-BtVS Crossovers']: + self.story.addToList('category',cat.string) + else: + if 'Non-BtVS' in cat.string or 'Non-BTVS' in cat.string: + BtVS = False + if 'BtVS/AtS Non-Crossover' == cat.string: + BtVSNonX = True + + verticaltabletds = verticaltable.findAll('td') + self.story.setMetadata('rating', verticaltabletds[2].string) + self.story.setMetadata('numWords', verticaltabletds[4].string) + + # Complete--if completed. + if 'Yes' in verticaltabletds[10].string: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + self.story.setMetadata('datePublished',makeDate(stripHTML(verticaltabletds[8].string), self.dateformat)) + self.story.setMetadata('dateUpdated',makeDate(stripHTML(verticaltabletds[9].string), self.dateformat)) + + for icon in storydiv.find('span',{'class':'storyicons'}).findAll('img'): + if( icon['title'] not in ['Non-Crossover'] ) : + self.story.addToList('genre',icon['title']) + else: + if not BtVSNonX: + BtVS = False # Don't add BtVS if Non-Crossover, unless it's a BtVS/AtS Non-Crossover + + #print("BtVS: %s BtVSNonX: %s"%(BtVS,BtVSNonX)) + if BtVS: + self.story.addToList('category','Buffy: The Vampire Slayer') + + pseries = soup.find('p', {'style':'margin-top:0px'}) + m = re.match('This story is No\. (?P<num>\d+) in the series "(?P<series>.+)"\.', + pseries.text) + if m: + self.setSeries(m.group('series'),m.group('num')) + self.story.setMetadata('seriesUrl',"http://"+self.host+pseries.find('a')['href']) + + def getChapterText(self, url): + logger.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + div = soup.find('div', {'id' : 'storyinnerbody'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + # strip out included chapter title, if present, to avoid doubling up. + try: + div.find('h3').extract() + except: + pass + return self.utf8FromSoup(url,div) + +def getClass(): + return TwistingTheHellmouthSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twcslibrarynet.py b/fanficdownloader/adapters/adapter_twcslibrarynet.py new file mode 100644 index 00000000..6bae06a3 --- /dev/null +++ b/fanficdownloader/adapters/adapter_twcslibrarynet.py @@ -0,0 +1,272 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twcs') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%d %b %Y" + + + @classmethod + def getAcceptDomains(cls): + return ['thewriterscoffeeshop.com','twcslibrary.net'] + + @staticmethod + def getSiteDomain(): + return 'www.twcslibrary.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+"(www.)?("+'|'.join(self.getAcceptDomains())+")/(library/)?"+re.escape("viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + # poor HTML(unclosed for one) can cause run on + # over the next label. + if '' in svalue: + svalue = svalue[0:svalue.find('')] + break + else: + value = value.nextSibling + self.setDescription(url,svalue) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + chapter=bs.BeautifulSoup('<div class="story"></div>') + + soup = bs.BeautifulSoup(data) + + found=False + for div in soup.findAll('div'): + if div.has_key('class') and div['class'] == 'notes': + chapter.append(div) + if div.has_key('id') and div['id'] == 'story': + chapter.append(div) + found=True + + if not found: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,chapter) + +def getClass(): + return TheWritersCoffeeShopComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twilightarchivescom.py b/fanficdownloader/adapters/adapter_twilightarchivescom.py new file mode 100644 index 00000000..2b8fc206 --- /dev/null +++ b/fanficdownloader/adapters/adapter_twilightarchivescom.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TwilightArchivesComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TwilightArchivesComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + + # normalized story URL. http://www.twilightarchives.com/read/9353 + self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','twa') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.twilightarchives.com' + + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/read/1234" + + def getSiteURLPattern(self): + return re.escape("http://" + self.getSiteDomain()+"/read/")+r"\d+(/d+)?$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',stripHTML(a)) + + # Find the chapters: + chapters=soup.find('ol', {'class' : 'chapters'}) + if chapters != None: + for chapter in chapters.findAll('a', href=re.compile(r'/read/'+self.story.getMetadata('storyId')+"/\d+$")): + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + +# rated, genre, warnings, seires + + summary = soup.find('p', {'class' : 'images'}) + self.setDescription(url,summary) + + for c in soup.findAll('h2', {'class' : 'title'}): + div = c.nextSibling.nextSibling + + if 'Information' in c.text: + for dt in div.findAll('dt'): + dd=dt.nextSibling.nextSibling + + if 'Author' in dt.text: + a=dd.find('a') + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.text) + + if 'Words' in dt.text: + self.story.setMetadata('numWords', dd.text) + + if 'Published' in dt.text: + self.story.setMetadata('datePublished', makeDate(stripHTML(dd.text), self.dateformat)) + + if 'Updated' in dt.text: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(dd.text), self.dateformat)) + + if 'Status' in dt.text: + if 'Complete' in dd.text: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Categories' in c.text: + for a in div.findAll('a'): + self.story.addToList('category',a.text) + + if 'Characters' in c.text: + for a in div.findAll('a'): + self.story.addToList('category',a.text) + + if 'Series' in c.text: + a=div.find('a') + series_name = a.text + series_url = 'http://'+self.host+a['href'] + + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.find('tbody').findAll('a', href=re.compile(r'^/read/\d+$')) + i=1 + for a in storyas: + if a['href'] == ('/read/'+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + a=asoup.find('tbody').find('a', href=re.compile(r'^/read/'+self.story.getMetadata('storyId'))) + self.story.setMetadata('rating',a.parent.nextSibling.nextSibling.text) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'class' : 'size images medium'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py new file mode 100644 index 00000000..e70932d4 --- /dev/null +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class TwilightedNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tw') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twilighted.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twilighted.net','twilighted.net'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.twilighted.net/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twilighted.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + # twilighted isn't writing <body> ??? wtf? + data = "<html><body>"+data[data.index("</head>"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## twilighted.net doesn't use genre. + # if 'Genre' in label: + # genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + # genrestext = [genre.string for genre in genres] + # self.genre = ', '.join(genrestext) + # for genre in genrestext: + # self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + # twilighted isn't writing <body> ??? wtf? + data = "<html><body>"+data[data.index("</head>"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,span) + +def getClass(): + return TwilightedNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py new file mode 100644 index 00000000..2e9ae98f --- /dev/null +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class TwiwriteNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twrt') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.is_adult = False + self.username = "NoneGiven" # if left empty, twiwrite.net doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twiwrite.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twiwrite.net','twiwrite.net'] + + @classmethod + def getSiteExampleURLs(cls): + return "http://www.twiwrite.net/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twiwrite.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.info("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=1" # XXX + else: + addurl="" + + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + if "Contains Explicit Content for mature adults only! May contain graphic violence, mature sexual situations, and explicit language. Read with caution." in data: + raise exceptions.AdultCheckRequired(self.url) + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + pagetitlediv = soup.find('div',id='pagetitle') + + ## Title + a = pagetitlediv.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pagetitlediv.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='Description ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=8')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warning',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,span) + +def getClass(): + return TwiwriteNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_voracity2eficcom.py b/fanficdownloader/adapters/adapter_voracity2eficcom.py new file mode 100644 index 00000000..56c42573 --- /dev/null +++ b/fanficdownloader/adapters/adapter_voracity2eficcom.py @@ -0,0 +1,233 @@ +# Software: eFiction +import re +import urllib2 +import urlparse + +from .. import BeautifulSoup + +from base_adapter import BaseSiteAdapter, makeDate +from .. import exceptions + + +def getClass(): + return Voracity2EficComAdapter + + +# yields Tag _and_ NavigableString siblings from the given tag. The +# BeautifulSoup findNextSiblings() method for some reasons only returns either +# NavigableStrings _or_ Tag objects, not both. +def _yield_next_siblings(tag): + sibling = tag.nextSibling + while sibling: + yield sibling + sibling = sibling.nextSibling + + +class Voracity2EficComAdapter(BaseSiteAdapter): + SITE_ABBREVIATION = 'voe' + SITE_DOMAIN = 'voracity2.e-fic.com' + + BASE_URL = 'http://' + SITE_DOMAIN + '/' + LOGIN_URL = BASE_URL + 'user.php?action=login' + VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%d' + METADATA_URL_SUFFIX = '&index=1' + AGE_CONSENT_URL_SUFFIX = '&ageconsent=ok&warning=4' + + DATETIME_FORMAT = '%m/%d/%Y' + REQUIRED_SKIN = 'Simple Elegance' + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + query_data = urlparse.parse_qs(self.parsedUrl.query) + story_id = query_data['sid'][0] + + self.story.setMetadata('storyId', story_id) + self._setURL(self.VIEW_STORY_URL_TEMPLATE % int(story_id)) + self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION) + + self.is_logged_in = False + + def _login(self): + # Apparently self.password is only set when login fails, i.e. + # the FailedToLogin exception is raised, so the adapter gets new + # login data and tries again + if self.password: + password = self.password + username = self.username + else: + username = self.getConfig('username') + password = self.getConfig('password') + + parameters = { + 'penname': username, + 'password': password, + 'submit': 'Submit'} + + class CustomizedFailedToLogin(exceptions.FailedToLogin): + def __init__(self, url, passwdonly=False): + # Use username variable from outer scope + exceptions.FailedToLogin.__init__(self, url, username, passwdonly) + + soup = self._customized_fetch_url(self.LOGIN_URL, CustomizedFailedToLogin, parameters) + div = soup.find('div', id='useropts') + if not div: + raise CustomizedFailedToLogin(self.LOGIN_URL) + + self.is_logged_in = True + + def _customized_fetch_url(self, url, exception=None, parameters=None): + if exception: + try: + data = self._fetchUrl(url, parameters) + except urllib2.HTTPError: + raise exception(self.url) + # Just let self._fetchUrl throw the exception, don't catch and + # customize it. + else: + data = self._fetchUrl(url, parameters) + + return BeautifulSoup.BeautifulSoup(data) + + @staticmethod + def getSiteDomain(): + return Voracity2EficComAdapter.SITE_DOMAIN + + @classmethod + def getSiteExampleURLs(cls): + return cls.VIEW_STORY_URL_TEMPLATE % 1234 + + def getSiteURLPattern(self): + return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$' + + def extractChapterUrlsAndMetadata(self): + soup = self._customized_fetch_url(self.url + self.METADATA_URL_SUFFIX) + + # Check if the story is for "Registered Users Only", i.e. has adult + # content. Based on the "is_adult" attributes either login or raise an + # error. + errortext_div = soup.find('div', {'class': 'errortext'}) + if errortext_div: + error_text = ''.join(errortext_div(text=True)).strip() + if error_text == 'Registered Users Only': + if not (self.is_adult or self.getConfig('is_adult')): + raise exceptions.AdultCheckRequired(self.url) + self._login() + else: + # This case usually occurs when the story doesn't exist, but + # might potentially be something else, so just raise + # FailedToDownload exception with the found error text. + raise exceptions.FailedToDownload(error_text) + + url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX]) + soup = self._customized_fetch_url(url) + + # If logged in and the skin doesn't match the required skin throw an + # error + if self.is_logged_in: + skin = soup.find('select', {'name': 'skin'}).find('option', selected=True)['value'] + if skin != self.REQUIRED_SKIN: + raise exceptions.FailedToDownload('Required skin "%s" must be set in preferences' % self.REQUIRED_SKIN) + + pagetitle_div = soup.find('div', id='pagetitle') + self.story.setMetadata('title', pagetitle_div.a.string) + + author_anchor = pagetitle_div.a.findNextSibling('a') + url = urlparse.urljoin(self.BASE_URL, author_anchor['href']) + components = urlparse.urlparse(url) + query_data = urlparse.parse_qs(components.query) + + self.story.setMetadata('author', author_anchor.string) + self.story.setMetadata('authorId', query_data['uid']) + self.story.setMetadata('authorUrl', url) + + sort_div = soup.find('div', id='sort') + self.story.setMetadata('reviews', sort_div('a')[1].string) + + for b_tag in soup.find('div', {'class': 'listbox'})('b'): + key = b_tag.string.strip(' :') + try: + value = b_tag.nextSibling.string.strip() + # This can happen with some fancy markup in the summary. Just + # ignore this error and set value to None, the summary parsing + # takes care of this + except AttributeError: + value = None + + if key == 'Summary': + contents = [] + keep_summary_html = self.getConfig('keep_summary_html') + + for sibling in _yield_next_siblings(b_tag): + if isinstance(sibling, BeautifulSoup.Tag): + # Encountered next label, break. This method is the + # safest and most reliable I could think of. Blame + # e-fiction sites that allow their users to include + # arbitrary markup into their summaries and the + # horrible HTML markup. + if sibling.name == 'b' and sibling.findPreviousSibling().name == 'br': + break + + if keep_summary_html: + contents.append(self.utf8FromSoup(self.url, sibling)) + else: + contents.append(''.join(sibling(text=True))) + else: + contents.append(sibling) + + # Remove the preceding break line tag and other crud + contents.pop() + contents.pop() + self.story.setMetadata('description', ''.join(contents)) + + elif key == 'Rating': + self.story.setMetadata('rating', value) + + elif key == 'Category': + for sibling in b_tag.findNextSiblings(['a', 'br']): + if sibling.name == 'br': + break + self.story.addToList('category', sibling.string) + + # Seems to be always "None" for some reason + elif key == 'Characters': + for sibling in b_tag.findNextSiblings(['a', 'br']): + if sibling.name == 'br': + break + self.story.addToList('characters', sibling.string) + + elif key == 'Series': + a = b_tag.findNextSibling('a') + if not a: + continue + self.story.setMetadata('series', a.string) + self.story.setMetadata('seriesUrl', urlparse.urljoin(self.BASE_URL, a['href'])) + + elif key == 'Chapter': + self.story.setMetadata('numChapters', int(value)) + + elif key == 'Completed': + self.story.setMetadata('status', 'Completed' if value == 'Yes' else 'In-Progress') + + elif key == 'Words': + self.story.setMetadata('numWords', value) + + elif key == 'Read': + self.story.setMetadata('readings', value) + + elif key == 'Published': + self.story.setMetadata('datePublished', makeDate(value, self.DATETIME_FORMAT)) + + elif key == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, self.DATETIME_FORMAT)) + + for b_tag in soup.find('div', id='output').findNextSiblings('b'): + chapter_anchor = b_tag.a + title = chapter_anchor.string + url = urlparse.urljoin(self.BASE_URL, chapter_anchor['href']) + self.chapterUrls.append((title, url)) + + def getChapterText(self, url): + url += self.AGE_CONSENT_URL_SUFFIX + soup = self._customized_fetch_url(url) + return self.utf8FromSoup(url, soup.find('div', id='story')) diff --git a/fanficdownloader/adapters/adapter_walkingtheplankorg.py b/fanficdownloader/adapters/adapter_walkingtheplankorg.py new file mode 100644 index 00000000..8e0cb196 --- /dev/null +++ b/fanficdownloader/adapters/adapter_walkingtheplankorg.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return WalkingThePlankOrgAdapter + +class WalkingThePlankOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wtp') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.walkingtheplank.org' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/archive/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + if "By clicking this link, you acknowledge" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/archive/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/archive/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/archive/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py new file mode 100644 index 00000000..099dcf4f --- /dev/null +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +class WhoficComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','whof') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + @staticmethod + def getSiteDomain(): + return 'www.whofic.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+"\d+$" + + def extractChapterUrlsAndMetadata(self): + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # fetch the first chapter. From that we will: + # - determine title, authorname, authorid + # - get chapter list, if not one-shot. + + url = self.url+'&chapter=1' + logger.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # pull title(title) and author from the HTML title. + title = stripHTML(soup.find('title')) + logger.debug('Title: %s' % title) + title = title.split('::')[1].strip() + self.story.setMetadata('title',title.split(' by ')[0].strip()) + self.story.setMetadata('author',title.split(' by ')[1].strip()) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapter' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = self.url + "&chapter=%s" % o['value'] + # just in case there's tags, like in chapter titles. + title = "%s" % o + title = re.sub(r'<[^>]+>','',title) + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Whofic.com puts none of the other meta data in the chapters + ## or even the story chapter index page. Need to scrape the + ## author page to find it. + + # <table width="100%" bordercolor="#333399" border="0" cellspacing="0" cellpadding="2"><tr><td> + # <a href="viewstory.php?sid=38220">Accompaniment 2</a> by <a href="viewuser.php?uid=12412">clandestinemiscreant</a> [<a href="reviews.php?sid=38220">Reviews</a> - <a href="reviews.php?sid=38220">0</a>] + # This is a series of short stories written as an accompaniment to Season 2, Season 28 for us oldies, and each is unrelated except for that one factor. Each story is canon, in that it does not change established events at time of airing, based on things mentioned and/or implied and missing or deleted scenes that were not seen in the final aired episodes. + # <a href="categories.php?catid=15">Tenth Doctor</a> - All Ages - None - Humor, Hurt/Comfort, Romance + # Characters: Rose Tyler + # Series: None + # Published: 2010.08.15 - Updated: 2010.08.16 - Chapters: 4 - Completed: Yes - Word Count: 4890 + # </td></tr></table> + + logger.debug("Author URL: "+self.story.getMetadata('authorUrl')) + soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')), + selfClosingTags=('br')) # normalize tags to + # find this story in the list, parse it's metadata based on + # lots of assumptions about the html, since there's little + # tagging. + # Found a story once that had the story URL in the desc for a + # series on the same author's page. Now using the reviews + # link instead to find the appropriate metadata. + a = soup.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId'))) + metadata = a.findParent('td') + metadatachunks = self.utf8FromSoup(None,metadata).split(' ') + # process metadata for this story. + self.setDescription(url,metadatachunks[1]) + #self.story.setMetadata('description', metadatachunks[1]) + + # First line of the stuff with ' - ' separators + moremeta = metadatachunks[2] + moremeta = re.sub(r'<[^>]+>','',moremeta) # strip tags. + + moremetaparts = moremeta.split(' - ') + + # first part is category--whofic.com has categories + # Doctor One-11, Torchwood, etc. We're going to + # prepend any with 'Doctor' or 'Era' (Multi-Era, Other + # Era) as 'Doctor Who'. + # + # Also push each in as 'extra tags'. + category = moremetaparts[0] + if 'Doctor' in category or 'Era' in category : + self.story.addToList('category','Doctor Who') + + for cat in category.split(', '): + self.story.addToList('category',cat) + + # next in that line is age rating. + self.story.setMetadata('rating',moremetaparts[1]) + + # after that is a possible list fo specific warnings, + # Explicit Violence, Swearing, etc + if "None" not in moremetaparts[2]: + for warn in moremetaparts[2].split(', '): + self.story.addToList('warnings',warn) + + # then genre. It's another comma list. All together + # in genre, plus each in extra tags. + genre=moremetaparts[3] + for g in genre.split(r', '): + self.story.addToList('genre',g) + + # line 3 is characters. + chars = metadatachunks[3] + charsearch="Characters:" + if charsearch in chars: + chars = chars[metadatachunks[3].index(charsearch)+len(charsearch):] + for c in chars.split(','): + if c.strip() != u'None': + self.story.addToList('characters',c) + + # the next line is stuff with ' - ' separators *and* names--with tags. + moremeta = metadatachunks[5] + moremeta = re.sub(r'<[^>]+>','',moremeta) # strip tags. + + moremetaparts = moremeta.split(' - ') + + for part in moremetaparts: + (name,value) = part.split(': ') + name=name.strip() + value=value.strip() + if name == 'Published': + self.story.setMetadata('datePublished', makeDate(value, '%Y.%m.%d')) + if name == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, '%Y.%m.%d')) + if name == 'Completed': + if value == 'Yes': + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + if name == 'Word Count': + self.story.setMetadata('numWords', value) + + try: + # Find Series name from series URL. + a = metadata.find('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + + # hardly a great identifier, I know, but whofic really doesn't + # give us anything better to work with. + span = soup.find('span', {'style' : 'font-size: 100%;'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + span.name='div' + return self.utf8FromSoup(url,span) + +def getClass(): + return WhoficComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_wizardtalesnet.py b/fanficdownloader/adapters/adapter_wizardtalesnet.py new file mode 100644 index 00000000..38451962 --- /dev/null +++ b/fanficdownloader/adapters/adapter_wizardtalesnet.py @@ -0,0 +1,303 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return WizardTalesNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class WizardTalesNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wzt') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.wizardtales.net' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logger.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + rating=pt.text.split('[')[1].split(']')[0] + self.story.setMetadata('rating', rating) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # Rated: NC-17 etc + content=soup.find('div',{'class' : 'content'}) + + for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')): + self.story.addToList('genre',genre.string) + + for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')): + self.story.addToList('warnings',warning.string) + + labels = content.findAll('b') + + value = labels[0].previousSibling + svalue = "" + while value != None: + val = value + value = value.previousSibling + while "Categories" not in val: + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + + + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + + if 'Word count' in label: + self.story.setMetadata('numWords', stripHTML(value).split(': ')[1].split(';')[0]) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value).split(': ')[1].split(';')[0], self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value).split(': ')[1], self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.findAll('div', {'id' : 'story'})[1] + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_wolverineandroguecom.py b/fanficdownloader/adapters/adapter_wolverineandroguecom.py new file mode 100644 index 00000000..d2d88a31 --- /dev/null +++ b/fanficdownloader/adapters/adapter_wolverineandroguecom.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return WolverineAndRogueComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class WolverineAndRogueComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wrfa') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.wolverineandrogue.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/wrfa/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/wrfa/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1' + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/wrfa/'+a['href']) + self.story.setMetadata('author',a.string) + + rating=pt.text.split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/wrfa/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # Rated: NC-17 etc + content=soup.find('div',{'class' : 'content'}) + labels = soup.findAll('span',{'class':'label'}) + + value = labels[0].previousSibling + svalue = "" + while value != None: + val = value + value = value.previousSibling + while "Categories" not in str(val): + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value.split(' -')[0]) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Complete' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.split(' -')[0], self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/wrfa/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + # can't use ^viewstory...$ in case of higher rated stories with javascript href. + storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) + i=1 + for a in storyas: + # skip 'report this' and 'TOC' links + if 'contact.php' not in a['href'] and 'index' not in a['href']: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_wraithbaitcom.py b/fanficdownloader/adapters/adapter_wraithbaitcom.py new file mode 100644 index 00000000..589ec227 --- /dev/null +++ b/fanficdownloader/adapters/adapter_wraithbaitcom.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return WraithBaitComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class WraithBaitComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + + + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wb') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.wraithbait.com' + + @classmethod + def getSiteExampleURLs(cls): + return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=12" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "for adults only" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',stripHTML(a)) + + # Find authorid and URL from... author url. + alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) + for a in alist: + self.story.addToList('authorId',a['href'].split('=')[1]) + self.story.addToList('authorUrl','http://'+self.host+'/'+a['href']) + self.story.addToList('author',a.string) + + rating=pt.text.split('[')[1].split(']')[0] + self.story.setMetadata('rating', rating) + + st = soup.find('div', {'class' : 'storytitle'}) + a = st.findAll('a', href=re.compile(r'reviews.php\?type=ST&item='+self.story.getMetadata('storyId')+"$"))[1] # second one. + self.story.setMetadata('reviews',stripHTML(a)) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # include author on chapters if multiple authors. + if len(alist) > 1: + add = " by %s"%stripHTML(chapter.findNext('a', href=re.compile(r"viewuser.php\?uid=\d+"))) + else: + add = "" + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter)+add,'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + info = soup.find('div', {'class' : 'small'}) + + word=info.find(text=re.compile("Word count:")).split(':') + self.story.setMetadata('numWords', word[1]) + + cats = info.findAll('a',href=re.compile(r'browse.php\?type=categories&id=\d')) + for cat in cats: + if "General" != cat.string: + self.story.addToList('category',cat.string) + + chars = info.findAll('a',href=re.compile(r'browse.php\?type=characters&charid=\d')) + for char in chars: + self.story.addToList('characters',char.string) + + completed=info.find(text=re.compile("Completed: Yes")) + if completed != None: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + date=soup.find('div',{'class' : 'bottom'}) + pd=date.find(text=re.compile("Published:")).string.split(': ') + self.story.setMetadata('datePublished', makeDate(stripHTML(pd[1].split(' U')[0]), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(pd[2]), self.dateformat)) + + # Rated: NC-17 etc + labels = soup.findAll('span',{'class':'label'}) + pub=0 + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Genres' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + self.story.setMetadata('seriesUrl',series_url) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + info.extract() + summary = soup.find('div', {'class' : 'content'}) + self.setDescription(url,summary) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) + diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py new file mode 100644 index 00000000..dd20a148 --- /dev/null +++ b/fanficdownloader/adapters/base_adapter.py @@ -0,0 +1,576 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import datetime +import time +import logging +import urllib +import urllib2 as u2 +import urlparse as up +import cookielib as cl +from functools import partial +import pickle + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from ..htmlheuristics import replace_br_with_p + +logger = logging.getLogger(__name__) + +try: + from google.appengine.api import apiproxy_stub_map + def urlfetch_timeout_hook(service, call, request, response): + if call != 'Fetch': + return + # Make the default deadline 10 seconds instead of 5. + if not request.has_deadline(): + request.set_deadline(10.0) + + apiproxy_stub_map.apiproxy.GetPreCallHooks().Append( + 'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch') + logger.info("Hook to make default deadline 10.0 installed.") +except: + pass + #logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine") + +from ..story import Story +from ..gziphttp import GZipProcessor +from ..configurable import Configurable +from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML +from ..exceptions import InvalidStoryURL + +try: + from .. import chardet as chardet +except ImportError: + chardet = None + +class BaseSiteAdapter(Configurable): + + @classmethod + def matchesSite(cls,site): + return site in cls.getAcceptDomains() + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def validateURL(self): + return re.match(self.getSiteURLPattern(), self.url) + + @staticmethod + def get_empty_cookiejar(): + return cl.LWPCookieJar() + + @staticmethod + def get_empty_pagecache(): + return {} + + def __init__(self, configuration, url): + Configurable.__init__(self, configuration) + + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + self.override_sleep = None + self.cookiejar = self.get_empty_cookiejar() + self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor()) + # self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) + ## Specific UA because too many sites are blocking the default python UA. + self.opener.addheaders = [('User-Agent', self.getConfig('user_agent'))] + self.storyDone = False + self.metadataDone = False + self.story = Story(configuration) + self.story.setMetadata('site',self.getConfigSection()) + self.story.setMetadata('dateCreated',datetime.datetime.now()) + self.chapterUrls = [] # tuples of (chapter title,chapter url) + self.chapterFirst = None + self.chapterLast = None + self.oldchapters = None + self.oldimgs = None + self.oldcover = None # (data of existing cover html, data of existing cover image) + self.calibrebookmark = None + self.logfile = None + + self.pagecache = self.get_empty_pagecache() + + ## order of preference for decoding. + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of + # iso-8859-1. Most sites that + # claim to be iso-8859-1 (and + # some that claim to be utf8) + # are really windows-1252. + self._setURL(url) + if not self.validateURL(): + raise InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + def get_cookiejar(self): + return self.cookiejar + + def set_cookiejar(self,cj): + self.cookiejar = cj + self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor()) + self.opener.addheaders = [('User-Agent', self.getConfig('user_agent'))] + + def load_cookiejar(self,filename): + ''' + Needs to be called after adapter create, but before any fetchs + are done. Takes file *name*. + ''' + self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True) + + # def save_cookiejar(self,filename): + # ''' + # Assumed to be a FileCookieJar if self.cookiejar set. + # Takes file *name*. + # ''' + # self.get_cookiejar().save(filename, ignore_discard=True, ignore_expires=True) + + # def save_pagecache(self,filename): + # ''' + # Writes pickle of pagecache to file *name* + # ''' + # with open(filename, 'wb') as f: + # pickle.dump(self.get_pagecache(), + # f,protocol=pickle.HIGHEST_PROTOCOL) + + # def load_pagecache(self,filename): + # ''' + # Reads pickle of pagecache from file *name* + # ''' + # with open(filename, 'rb') as f: + # self.set_pagecache(pickle.load(f)) + + def get_pagecache(self): + return self.pagecache + + def set_pagecache(self,d): + self.pagecache=d + + def _get_cachekey(self, url, parameters=None, headers=None): + keylist=[url] + if parameters != None: + keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items()))) + if headers != None: + keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(headers.items()))) + return '?'.join(keylist) + + def _has_cachekey(self,cachekey): + return self.use_pagecache() and cachekey in self.get_pagecache() + + def _get_from_pagecache(self,cachekey): + if self.use_pagecache(): + return self.get_pagecache().get(cachekey) + else: + return None + + def _set_to_pagecache(self,cachekey,data): + if self.use_pagecache(): + self.get_pagecache()[cachekey] = data + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return False + + # def story_load(self,filename): + # d = pickle.load(self.story.metadata,filename) + # self.story.metadata = d['metadata'] + # self.chapterUrls = d['chapterlist'] + # self.story.metadataDone = True + + def _setURL(self,url): + self.url = url + self.parsedUrl = up.urlparse(url) + self.host = self.parsedUrl.netloc + self.path = self.parsedUrl.path + self.story.setMetadata('storyUrl',self.url) + +## website encoding(s)--in theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8". The special value 'auto' +## will call chardet and use the encoding it reports if it has +90% +## confidence. 'auto' is not reliable. + def _decode(self,data): + if self.getConfig('website_encodings'): + decode = self.getConfigList('website_encodings') + else: + decode = self.decode + + for code in decode: + try: + #print code + if code == "auto": + if not chardet: + logger.info("chardet not available, skipping 'auto' encoding") + continue + detected = chardet.detect(data) + #print detected + if detected['confidence'] > 0.9: + code=detected['encoding'] + else: + continue + return data.decode(code) + except: + logger.debug("code failed:"+code) + pass + logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) + return "".join([x for x in data if ord(x) < 128]) + + # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s + def _postUrl(self, url, + parameters={}, + headers={}, + extrasleep=None, + usecache=True): + ''' + When should cache be cleared or not used? logins... + + extrasleep is primarily for ffnet adapter which has extra + sleeps. Passed into fetchs so it can be bypassed when + cache hits. + ''' + cachekey=self._get_cachekey(url, parameters, headers) + if usecache and self._has_cachekey(cachekey): + logger.debug("#####################################\npagecache HIT: %s"%cachekey) + return self._get_from_pagecache(cachekey) + + logger.debug("#####################################\npagecache MISS: %s"%cachekey) + self.do_sleep(extrasleep) + + ## u2.Request assumes POST when data!=None. Also assumes data + ## is application/x-www-form-urlencoded. + if 'Content-type' not in headers: + headers['Content-type']='application/x-www-form-urlencoded' + if 'Accept' not in headers: + headers['Accept']="text/html,*/*" + req = u2.Request(url, + data=urllib.urlencode(parameters), + headers=headers) + data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read()) + self._set_to_pagecache(cachekey,data) + return data + + def _fetchUrlRaw(self, url, + parameters=None, + extrasleep=None, + usecache=True): + ''' + When should cache be cleared or not used? logins... + + extrasleep is primarily for ffnet adapter which has extra + sleeps. Passed into fetchs so it can be bypassed when + cache hits. + ''' + cachekey=self._get_cachekey(url, parameters) + if usecache and self._has_cachekey(cachekey): + logger.debug("#####################################\npagecache HIT: %s"%cachekey) + return self._get_from_pagecache(cachekey) + + logger.debug("#####################################\npagecache MISS: %s"%cachekey) + self.do_sleep(extrasleep) + if parameters != None: + data = self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read() + else: + data = self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read() + self._set_to_pagecache(cachekey,data) + return data + + def set_sleep(self,val): + print("\n===========\n set sleep time %s\n==========="%val) + self.override_sleep = val + + def do_sleep(self,extrasleep=None): + if extrasleep: + time.sleep(float(extrasleep)) + if self.override_sleep: + time.sleep(float(self.override_sleep)) + elif self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + # parameters is a dict() + def _fetchUrl(self, url, + parameters=None, + usecache=True, + extrasleep=None): + + excpt=None + for sleeptime in [0, 0.5, 4, 9]: + time.sleep(sleeptime) + try: + return self._decode(self._fetchUrlRaw(url, + parameters=parameters, + usecache=usecache, + extrasleep=extrasleep)) + except u2.HTTPError, he: + excpt=he + if he.code == 404: + logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(he))) + break # break out on 404 + # except Exception, e: + # excpt=e + # logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) + + logger.error("Giving up on %s" %url) + logger.exception(excpt) + raise(excpt) + + # Limit chapters to download. Input starts at 1, list starts at 0 + def setChaptersRange(self,first=None,last=None): + if first: + self.chapterFirst=int(first)-1 + if last: + self.chapterLast=int(last)-1 + + # Does the download the first time it's called. + def getStory(self): + if not self.storyDone: + self.getStoryMetadataOnly(get_cover=True) + + for index, (title,url) in enumerate(self.chapterUrls): + if (self.chapterFirst!=None and index < self.chapterFirst) or \ + (self.chapterLast!=None and index > self.chapterLast): + self.story.addChapter(url, + removeEntities(title), + None) + else: + if self.oldchapters and index < len(self.oldchapters): + data = self.utf8FromSoup(None, + self.oldchapters[index], + partial(cachedfetch,self._fetchUrlRaw,self.oldimgs)) + else: + data = self.getChapterText(url) + self.story.addChapter(url, + removeEntities(title), + removeEntities(data)) + self.storyDone = True + + # include image, but no cover from story, add default_cover_image cover. + if self.getConfig('include_images') and \ + not self.story.cover and \ + self.getConfig('default_cover_image'): + self.story.addImgUrl(None, + #self.getConfig('default_cover_image'), + self.story.formatFileName(self.getConfig('default_cover_image'), + self.getConfig('allow_unsafe_filename')), + self._fetchUrlRaw, + cover=True) + + # no new cover, set old cover, if there is one. + if not self.story.cover and self.oldcover: + self.story.oldcover = self.oldcover + + # cheesy way to carry calibre bookmark file forward across update. + if self.calibrebookmark: + self.story.calibrebookmark = self.calibrebookmark + if self.logfile: + self.story.logfile = self.logfile + + return self.story + + def getStoryMetadataOnly(self,get_cover=True): + if not self.metadataDone: + self.doExtractChapterUrlsAndMetadata(get_cover=get_cover) + + if not self.story.getMetadataRaw('dateUpdated'): + self.story.setMetadata('dateUpdated',self.story.getMetadataRaw('datePublished')) + + self.metadataDone = True + return self.story + + def hookForUpdates(self,chaptercount): + "Usually not needed." + return chaptercount + + ############################### + + @staticmethod + def getSiteDomain(): + "Needs to be overriden in each adapter class." + return 'no such domain' + + @classmethod + def getConfigSection(cls): + "Only needs to be overriden if != site domain." + return cls.getSiteDomain() + + @classmethod + def stripURLParameters(cls,url): + "Only needs to be overriden if URL contains more than one parameter" + ## remove any trailing '&' parameters--?sid=999 will be left. + ## that's all that any of the current adapters need or want. + return re.sub(r"&.*$","",url) + + ## URL pattern validation is done *after* picking an adaptor based + ## on domain instead of *as* the adaptor selector so we can offer + ## the user example(s) for that particular site. + ## Override validateURL(self) instead if you need more control. + def getSiteURLPattern(self): + "Used to validate URL. Should be override in each adapter class." + return '^http://'+re.escape(self.getSiteDomain()) + + @classmethod + def getSiteExampleURLs(cls): + """ + Return a string of space separated example URLs. + Needs to be overriden in each adapter class. It's the adapter + writer's responsibility to make sure the example(s) pass the + validateURL method. + """ + return 'no such example' + + def doExtractChapterUrlsAndMetadata(self,get_cover=True): + ''' + There are a handful of adapters that fetch a cover image while + collecting metadata. That isn't needed while *just* + collecting metadata in FG in plugin. Those few will override + this instead of extractChapterUrlsAndMetadata() + ''' + return self.extractChapterUrlsAndMetadata() + + def extractChapterUrlsAndMetadata(self): + "Needs to be overriden in each adapter class. Populates self.story metadata and self.chapterUrls" + pass + + def getChapterText(self, url): + "Needs to be overriden in each adapter class." + pass + + # Just for series, in case we choose to change how it's stored or represented later. + def setSeries(self,name,num): + if self.getConfig('collect_series'): + self.story.setMetadata('series','%s [%s]'%(name, int(num))) + + def setDescription(self,url,svalue): + #print("\n\nsvalue:\n%s\n"%svalue) + if self.getConfig('keep_summary_html'): + if isinstance(svalue,basestring): + svalue = bs.BeautifulSoup(svalue) + self.story.setMetadata('description',self.utf8FromSoup(url,svalue)) + else: + self.story.setMetadata('description',stripHTML(svalue)) + #print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n") + + def setCoverImage(self,storyurl,imgurl): + if self.getConfig('include_images'): + self.story.addImgUrl(storyurl,imgurl,self._fetchUrlRaw,cover=True, + coverexclusion=self.getConfig('cover_exclusion_regexp')) + + # This gives us a unicode object, not just a string containing bytes. + # (I gave soup a unicode string, you'd think it could give it back...) + # Now also does a bunch of other common processing for us. + def utf8FromSoup(self,url,soup,fetch=None): + if not fetch: + fetch=self._fetchUrlRaw + + acceptable_attributes = ['href','name','class','id'] + if self.getConfig("keep_style_attr"): + acceptable_attributes.append('style') + #print("include_images:"+self.getConfig('include_images')) + if self.getConfig('include_images'): + acceptable_attributes.extend(('src','alt','longdesc')) + for img in soup.findAll('img'): + # some pre-existing epubs have img tags that had src stripped off. + if img.has_key('src'): + (img['src'],img['longdesc'])=self.story.addImgUrl(url,img['src'],fetch, + coverexclusion=self.getConfig('cover_exclusion_regexp')) + + for attr in soup._getAttrMap().keys(): + if attr not in acceptable_attributes: + del soup[attr] ## strip all tag attributes except href and name + + for t in soup.findAll(recursive=True): + for attr in t._getAttrMap().keys(): + if attr not in acceptable_attributes: + del t[attr] ## strip all tag attributes except href and name + + # these are not acceptable strict XHTML. But we do already have + # CSS classes of the same names defined + if t.name in ('u'): + t['class']=t.name + t.name='span' + if t.name in ('center'): + t['class']=t.name + t.name='div' + # removes paired, but empty non paragraph tags. + if t.name not in ('p') and t.string != None and len(t.string.strip()) == 0 : + t.extract() + + retval = soup.__str__('utf8').decode('utf-8') + + if self.getConfig('nook_img_fix') and not self.getConfig('replace_br_with_p'): + # if the <img> tag doesn't have a div or a p around it, + # nook gets confused and displays it on every page after + # that under the text for the rest of the chapter. + retval = re.sub(r"(?!<(div|p)>)\s*(?P<imgtag><img[^>]+>)\s*(?!</(div|p)>)", + "<div>\g<imgtag></div>",retval) + + # Don't want body tags in chapter html--writers add them. + # This is primarily for epub updates. + retval = re.sub(r"</?body[^>]*>\r?\n?","",retval) + + if self.getConfig("replace_br_with_p"): + # Apply heuristic processing to replace paragraph + # breaks with tags. + retval = replace_br_with_p(retval) + + if self.getConfig('replace_hr'): + # replacing a self-closing tag with a container tag in the + # soup is more difficult than it first appears. So cheat. + retval = retval.replace("<hr />","<div class='center'>* * *</div>") + + return retval + +def cachedfetch(realfetch,cache,url): + if url in cache: + return cache[url] + else: + return realfetch(url) + +fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05", + "June":"06","July":"07", "August":"08", "September":"09", "October":"10", + "November":"11", "December":"12" } + +def makeDate(string,dateform): + # Surprise! Abstracting this turned out to be more useful than + # just saving bytes. + + # fudge english month names for people who's locale is set to + # non-english. All our current sites date in english, even if + # there's non-english content. -- ficbook.net now makes that a + # lie. It has to do something even more complicated to get + # Russian month names correct everywhere. + do_abbrev = "%b" in dateform + + if "%B" in dateform or do_abbrev: + dateform = dateform.replace("%B","%m").replace("%b","%m") + for (name,num) in fullmon.items(): + if do_abbrev: + name = name[:3] # first three for abbrev + if name in string: + string = string.replace(name,num) + break + + return datetime.datetime.strptime(string,dateform) + diff --git a/fanficdownloader/adapters/base_efiction_adapter.py b/fanficdownloader/adapters/base_efiction_adapter.py new file mode 100644 index 00000000..403a28a8 --- /dev/null +++ b/fanficdownloader/adapters/base_efiction_adapter.py @@ -0,0 +1,426 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Software: eFiction +# import time +# import urllib +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +""" +This is a generic adapter for eFiction based archives (see +http://fanlore.org/wiki/List_of_eFiction_Archives for a list). + +Most of them share common traits: + * No HTTPS + * 'www.' is optional + * Default story template is 'viewstory.php' with arguments + * 'sid' the storyId + * 'chapter' for chapters (will be thrown away anyway by + stripURLParameters in base_adapter + Use Printable version which is easier to parse and has everything in one + page and cache between extractChapterUrlsAndMetadata and getChapterText +""" + +# PHP constants +_RUSERSONLY = 'Registered Users Only' +_NOSUCHACCOUNT = "There is no such account on our website" +_WRONGPASSWORD = "That password doesn't match the one in our database" +_USERACCOUNT = 'Member Account' + +# Regular expressions +_REGEX_WARING_PARAM = re.compile("warning=(?P<warningId>\d+)") +_REGEX_CHAPTER_B = re.compile("^(?P<chapterId>\d+)\.") +_REGEX_CHAPTER_PARAM = re.compile("chapter=(?P<chapterId>\d+)$") +_REGEX_CHAPTER_FRAGMENT = re.compile("^#(?P<chapterId>\d+)$") +_REGEX_DOESNT_START_WITH_HTTP = re.compile("^(?!http)") + +class BaseEfictionAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev',self.getSiteAbbrev()) + self.decode = self.getEncoding() + storyId = re.compile(self.getSiteURLPattern()).match(self.url).group('storyId') + self.story.setMetadata('storyId', storyId) + self._setURL(self.getViewStoryUrl(storyId)) + self.triedLoggingIn = False + self.triedAcceptWarnings = False + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain(),'www.' + cls.getSiteDomain()] + + @classmethod + def getSiteExampleURLs(cls): + return cls.getViewStoryUrl('1234') + ' ' + cls.getViewStoryUrl('1234') + '&chapter=2' + + @classmethod + def getSiteURLPattern(self): + return r"http://(www\.)?%s%s/%s\?sid=(?P<storyId>\d+)" % (self.getSiteDomain(), self.getPathToArchive(), self.getViewStoryPhpName()) + + @classmethod + def getEncoding(cls): + """ + Return an array of character encodings to try to decode the HTML with + """ + return ["Windows-1252", "utf8"] + + + @classmethod + def getPathToArchive(cls): + """ + Get the path segment of the archive, default '/'. + + In many cases, it's '/archive' or '/fanfiction' + """ + return "" + + @classmethod + def getViewStoryPhpName(cls): + """ + Get the name of the story PHP script, by default 'viewstory.php' + """ + return "viewstory.php" + + @classmethod + def getViewUserPhpName(cls): + """ + Get the name of the user PHP script, by default 'viewuser.php' + """ + return "viewuser.php" + + @classmethod + def getUserPhpName(cls): + """ + Get the name of the user PHP script, by default 'viewuser.php' + """ + return "user.php" + + @classmethod + def getDateFormat(self): + """ + Describe the date format of this site in terms of strftime + See http://docs.python.org/library/datetime.html#strftime-strptime-behavior + """ + return "%d %b %Y" + + @classmethod + def getUrlForPhp(self, php): + return "http://%s%s/%s" % (self.getSiteDomain(), self.getPathToArchive(), php) + + @classmethod + def getViewStoryUrl(self, storyId): + """ + Get the URL to a user page on this site. + """ + return "%s?sid=%s" % (self.getUrlForPhp(self.getViewStoryPhpName()), storyId) + + @classmethod + def getViewUserUrl(self, userId): + """ + Get the URL to a user page on this site. + """ + return "%s?sid=%s" % (self.getUrlForPhp(self.getViewUserPhpName()), userId) + + @classmethod + def getLoginUrl(self): + """ + Get the URL to the login page on this site. + """ + return "%s?action=login" % self.getUrlForPhp(self.getUserPhpName()) + + @classmethod + def getMessageRegisteredUsersOnly(self): + """ + Constant _RUSERSONLY defined in languages/en.php + """ + return _RUSERSONLY + + @classmethod + def getMessageThereIsNoSuchAccount(self): + """ + Constant _NOSUCHACCOUNT defined in languages/en.php + """ + return _NOSUCHACCOUNT + + @classmethod + def getMessageWrongPassword(self): + """ + Constant _WRONGPASSWORD defined in languages/en.php + """ + return _WRONGPASSWORD + + @classmethod + def getMessageMemberAccount(self): + """ + Constant _USERACCOUNT defined in languages/en.php + """ + return _USERACCOUNT + + ## Login seems to be reasonably standard across eFiction sites. + @classmethod + def needToLoginCheck(self, html): + """ + Return whether the HTML contains either of _RUSERSONLY, _NOSUCHACCOUNT or _WRONGPASSWORD + """ + return getMessageRegisteredUsersOnly() in html \ + or getMessageThereIsNoSuchAccount in html \ + or getMessageWrongPassword in html + + def _fetch_to_soup(self, url): + """ + Fetch a HTML document, fix it and parse it to BeautifulSoup. + + Replaces old characters, broken meta-tags, non-self-closing hr/br. + + Makes image links absolute so they can be downloaded + """ + try: + html = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # Some site use old, old-school Comments <!- comment -> (single dash) + html = re.sub("<!-.+?->", "", html) + + # There is a problem with meta tags on some sites where spaces aren't + # properly encoded + html = re.sub("<meta[^<>]+>(.*</meta>)?", "", html) + + # fix non-closing hr/br + html = html.replace("<hr>", "<hr/>") + html = html.replace(" ", " ") + + soup = bs.BeautifulSoup(html, selfClosingTags=['br','hr']) # otherwise soup eats the br/hr tags.) + + ## fix all local image 'src' to absolute + for img in soup.findAll("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}): + # TODO handle '../../' and so on + if img['src'].startswith('/'): + img['src'] = img['src'][1:] + img['src'] = "http://%s%s/%s" % (self.getSiteDomain(), self.getPathToArchive(), img['src']) + + return soup + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + logger.debug("Will now login to URL (%s) as (%s)" % (self.getLoginUrl(), params['penname'])) + + d = self._fetchUrl(self.getLoginUrl(), params) + + if self.getMessageMemberAccount() not in d : #Member Account + logger.info("Failed to login to URL <%s> as '%s'" % (self.getLoginUrl(), params['penname'])) + raise exceptions.FailedToLogin(url, params['penname']) + return False + else: + return True + + def handleMetadataPair(self, key, value): + """ + Handles a key-value pair of story metadata. + + Returns straight away if the value is 'None' (that's a string) + + Can be overridden by subclasses:: + def handleMetadataPair(self, key, value): + if key == 'MyCustomKey': + self.story.setMetadata('somekye', value) + else: + super(NameOfMyAdapter, self).handleMetadata(key, value) + """ + # logger.debug("metadata: '%s' == '%s'" % (key, value)) + if value == 'None': + return + elif key == 'Summary': + self.setDescription(self.url, value) + elif 'Genre' in key: + for val in re.split("\s*,\s*", value): + self.story.addToList('genre', val) + elif 'Warning' in key: + for val in re.split("\s*,\s*", value): + self.story.addToList('warnings', val) + elif 'Characters' in key: + for val in re.split("\s*,\s*", value): + self.story.addToList('characters', val) + elif 'Categories' in key: + for val in re.split("\s*,\s*", value): + self.story.addToList('category', val) + elif 'Challenges' in key: + for val in re.split("\s*,\s*", value): + # TODO this should be an official field I guess + self.story.addToList('challenge', val) + elif key == 'Chapters': + self.story.setMetadata('numChapters', int(value)) + elif key == 'Rating': + self.story.setMetadata('rating', value) + elif key == 'Word count': + self.story.setMetadata('numWords', value) + elif key == 'Completed': + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + elif key == 'Read': + # TODO this should be an official field I guess + self.story.setMetadata('readings', value) + elif key == 'Published': + self.story.setMetadata('datePublished', makeDate(value, self.getDateFormat())) + elif key == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, self.getDateFormat())) + elif key == 'Pairing': + for val in re.split("\s*,\s*", value): + self.story.addToList('ships', val) + elif key == 'Series': + ## TODO is not a link in the printable view, so no seriesURL possible + self.story.setMetadata('series', value) + else: + logger.info("Unhandled metadata pair: '%s' : '%s'" % (key, value)) + + def extractChapterUrlsAndMetadata(self): + printUrl = self.url + '&action=printable&textsize=0&chapter=' + if self.getConfig('bulk_load'): + printUrl += 'all' + else: + printUrl += '1' + + + soup = self._fetch_to_soup(printUrl) + + ## Handle warnings and login checks + errorDiv = soup.find("div", "errortext") + while errorDiv is not None: + if self.getMessageRegisteredUsersOnly() in errorDiv.prettify(): + if not self.triedLoggingIn: + self.performLogin(self.url) + soup = self._fetch_to_soup(printUrl) + errorDiv = soup.find("div", "errortext") + self.triedLoggingIn = True + else: + raise exceptions.FailedToLogin(self.url, str(errorDiv)) + else: + warningLink = errorDiv.find("a") + if warningLink is not None and ( \ + 'ageconsent' in warningLink['href'] \ + or 'warning' in warningLink['href']): + if not self.triedAcceptWarnings: + if not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + # XXX Using this method, we're independent of # getHighestWarningLevel + printUrl += "&ageconsent=ok&warning=%s" % (_REGEX_WARING_PARAM.search(warningLink['href']).group(1)) + # printUrl += "&ageconsent=ok&warning=%s" % self.getHighestWarningLevel() + soup = self._fetch_to_soup(printUrl) + errorDiv = soup.find("div", "errortext") + self.triedAcceptWarnings = True + else: + raise exception.FailedToDownload(self.url, str(errorDiv)) + else: + raise exception.FailedToDownload(self.url, str(errorDiv)) + + # title and author + pagetitleDiv = soup.find("div", {"id": "pagetitle"}) + if pagetitleDiv.find('a') is None: + raise execeptions.FailedToDownload("Couldn't find title and author") + self.story.setMetadata('title', pagetitleDiv.find("a").text) + authorLink = pagetitleDiv.findAll("a")[1] + self.story.setMetadata('author', authorLink.text) + self.story.setMetadata('authorId', re.search("\d+", authorLink['href']).group(0)) + self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId'))) + + ## Parse the infobox + labelSpans = soup.find("div", "infobox").find("div", "content").findAll("span", "label") + for labelSpan in labelSpans: + valueStr = "" + nextEl = labelSpan.nextSibling + while nextEl is not None and not (\ + type(nextEl) is bs.Tag \ + and nextEl.name == "span" \ + and nextEl['class'] =='label' \ + ): + ## must string copy nextEl or nextEl will change trees + if (type(nextEl) is bs.Tag): + valueStr += nextEl.prettify() + else: + valueStr += str(nextEl) + nextEl = nextEl.nextSibling + key = labelSpan.text.strip() + + ## strip trailing line breaks + valueStr = re.sub(" ", "", valueStr) + + ## strip trailing colons + key = re.sub("\s*:\s*$", "", key) + + ## strip whitespace + key = key.strip() + valueStr = stripHTML(valueStr) + + self.handleMetadataPair(key, valueStr) + + ## Chapter URLs + + # If we didn't bulk-load the whole chapter we now need to load + # the non-printable HTML version of the landing page (i.e. the story + # URL to get the Chapter titles + if not self.getConfig('bulk_load'): + soup = self._fetch_to_soup(self.url + '&index=1') + + chapterLinks = [] + for b in soup.findAll("b", text=_REGEX_CHAPTER_B): + chapterId = _REGEX_CHAPTER_B.search(b).group('chapterId') + chapterLink = b.findNext("a") + chapterLink['href'] = "%s&chapter=%s" % (self.url, chapterId) + self.chapterUrls.append((chapterLink.text, chapterLink['href'])) + + ## Store reference to soup for getChapterText + self.html = soup + + def getChapterText(self, url): + if self.getConfig('bulk_load'): + logger.debug('Cached chapter text from <%s>' % url) + anchor = _REGEX_CHAPTER_PARAM.search(url).group(1) + chapterDiv = self.html.find("a", {"name": anchor}).parent.findNext("div", "chapter") + else: + logger.debug('Download chapter text from <%s>' % url) + soup = self._fetch_to_soup(url + '&action=printable') + chapterDiv = soup.find("div", "chapter") + return self.utf8FromSoup(self.url, chapterDiv) + +def getClass(): + return BaseEfictionAdapter diff --git a/fanficdownloader/chardet/__init__.py b/fanficdownloader/chardet/__init__.py new file mode 100644 index 00000000..953b3994 --- /dev/null +++ b/fanficdownloader/chardet/__init__.py @@ -0,0 +1,26 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +__version__ = "2.0.1" + +def detect(aBuf): + import universaldetector + u = universaldetector.UniversalDetector() + u.reset() + u.feed(aBuf) + u.close() + return u.result diff --git a/fanficdownloader/chardet/big5freq.py b/fanficdownloader/chardet/big5freq.py new file mode 100644 index 00000000..c1b0f3ce --- /dev/null +++ b/fanficdownloader/chardet/big5freq.py @@ -0,0 +1,923 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Big5 frequency table +# by Taiwan's Mandarin Promotion Council +# <http://www.edu.tw:81/mandr/> +# +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +#Char to FreqOrder table +BIG5_TABLE_SIZE = 5376 + +Big5CharToFreqOrder = ( \ + 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 +3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 +1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 + 63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64 +3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80 +4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96 +5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112 + 630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128 + 179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144 + 995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160 +2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176 +1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192 +3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208 + 706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240 +3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256 +2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272 + 437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288 +3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304 +1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320 +5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336 + 266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352 +5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368 +1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384 + 32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400 + 188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416 +3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432 +3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448 + 324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464 +2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480 +2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496 + 314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512 + 287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528 +3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544 +1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560 +1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576 +1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592 +2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608 + 265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624 +4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640 +1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656 +5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672 +2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688 + 383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704 + 98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720 + 523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736 + 710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752 +5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768 + 379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784 +1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800 + 585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816 + 690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832 +5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848 +1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864 + 544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880 +3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896 +4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912 +3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928 + 279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944 + 610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960 +1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976 +4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992 +3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008 +3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024 +2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040 +5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056 +3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072 +5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088 +1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104 +2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120 +1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136 + 78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152 +1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168 +4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184 +3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200 + 534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216 + 165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232 + 626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248 +2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264 +5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280 +1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296 +2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312 +1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328 +1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344 +5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360 +5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376 +5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392 +3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408 +4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424 +4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440 +2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456 +5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472 +3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488 + 598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504 +5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520 +5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536 +1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552 +2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568 +3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584 +4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600 +5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616 +3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632 +4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648 +1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664 +1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680 +4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696 +1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712 + 240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728 +1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744 +1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760 +3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776 + 619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792 +5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808 +2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824 +1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840 +1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856 +5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872 + 829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888 +4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904 + 375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920 +2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936 + 444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952 +1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968 +1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984 + 730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000 +4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016 +4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032 +1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048 +3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064 +5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080 +5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096 +1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112 +2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128 +1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144 +3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160 +2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176 +3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192 +2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208 +4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224 +4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240 +3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256 + 97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272 +3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288 + 424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304 +3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320 +4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336 +3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352 +1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368 +5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384 + 199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400 +5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416 +1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432 + 391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448 +4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464 +4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480 + 397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496 +2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512 +2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528 +3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544 +1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560 +4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576 +2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592 +1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608 +1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624 +2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640 +3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656 +1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672 +5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688 +1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704 +4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720 +1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736 + 135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752 +1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768 +4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784 +4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800 +2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816 +1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832 +4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848 + 660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864 +5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880 +2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896 +3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912 +4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928 + 790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944 +5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960 +5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976 +1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992 +4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008 +4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024 +2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040 +3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056 +3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072 +2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088 +1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104 +4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120 +3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136 +3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152 +2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168 +4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184 +5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200 +3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216 +2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232 +3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248 +1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264 +2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280 +3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296 +4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312 +2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328 +2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344 +5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360 +1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376 +2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392 +1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408 +3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424 +4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440 +2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456 +3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472 +3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488 +2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504 +4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520 +2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536 +3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552 +4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568 +5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584 +3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600 + 194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616 +1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632 +4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648 +1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664 +4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680 +5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696 + 510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712 +5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728 +5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744 +2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760 +3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776 +2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792 +2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808 + 681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824 +1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840 +4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856 +3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872 +3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888 + 838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904 +2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920 + 625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936 +2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952 +4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968 +1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984 +4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000 +1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016 +3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032 + 574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048 +3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064 +5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080 +5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096 +3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112 +3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128 +1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144 +2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160 +5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176 +1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192 +1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208 +3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224 + 919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240 +1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256 +4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272 +5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288 +2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304 +3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320 + 516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336 +1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352 +2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368 +2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384 +5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400 +5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416 +5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432 +2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448 +2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464 +1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480 +4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496 +3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512 +3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528 +4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544 +4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560 +2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576 +2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592 +5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608 +4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624 +5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640 +4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656 + 502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672 + 121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688 +1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704 +3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720 +4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736 +1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752 +5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768 +2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784 +2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800 +3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816 +5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832 +1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848 +3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864 +5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880 +1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896 +5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912 +2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928 +3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944 +2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960 +3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976 +3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992 +3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008 +4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024 + 803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040 +2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056 +4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072 +3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088 +5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104 +1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120 +5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136 + 425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152 +1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168 + 479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184 +4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200 +1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216 +4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232 +1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248 + 433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264 +3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280 +4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296 +5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312 + 938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 +3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 + 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 +2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512 +#Everything below is of no interest for detection purpose +2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392 +2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408 +5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424 +5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440 +5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456 +5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472 +5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488 +5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504 +5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520 +5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536 +5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552 +5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568 +5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584 +5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600 +6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616 +6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632 +6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648 +6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664 +6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680 +6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696 +6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712 +6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728 +6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744 +6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760 +6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776 +6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792 +6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808 +6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824 +6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840 +6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856 +6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872 +6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888 +6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904 +6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920 +6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936 +6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952 +6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968 +6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984 +6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000 +6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016 +6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032 +6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048 +6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064 +6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080 +6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096 +6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112 +6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128 +6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144 +6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160 +6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176 +6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192 +6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208 +6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224 +6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240 +6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256 +3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272 +6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288 +6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304 +3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320 +6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336 +6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352 +6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368 +6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384 +6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400 +6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416 +6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432 +4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448 +6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464 +6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480 +3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496 +6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512 +6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528 +6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544 +6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560 +6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576 +6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592 +6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608 +6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624 +6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640 +6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656 +6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672 +7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688 +7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704 +7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720 +7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736 +7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752 +7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768 +7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784 +7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800 +7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816 +7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832 +7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848 +7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864 +7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880 +7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896 +7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912 +7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928 +7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944 +7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960 +7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976 +7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992 +7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008 +7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024 +7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040 +7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056 +7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072 +7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088 +7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104 +7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120 +7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136 +7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152 +7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168 +7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184 +7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200 +7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216 +7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232 +7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248 +7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264 +7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280 +7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296 +7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312 +7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328 +7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344 +7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360 +7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376 +7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392 +7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408 +7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424 +7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440 +3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456 +7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472 +7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488 +7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504 +7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520 +4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536 +7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552 +7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568 +7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584 +7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600 +7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616 +7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632 +7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648 +7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664 +7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680 +7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696 +7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712 +8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728 +8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744 +8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760 +8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776 +8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792 +8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808 +8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824 +8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840 +8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856 +8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872 +8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888 +8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904 +8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920 +8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936 +8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952 +8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968 +8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984 +8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000 +8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016 +8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032 +8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048 +8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064 +8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080 +8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096 +8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112 +8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128 +8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144 +8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160 +8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176 +8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192 +8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208 +8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224 +8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240 +8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256 +8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272 +8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288 +8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304 +8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320 +8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336 +8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352 +8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368 +8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384 +8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400 +8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416 +8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432 +8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448 +8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464 +8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480 +8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496 +8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512 +8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528 +8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544 +8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560 +8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576 +8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592 +8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608 +8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624 +8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640 +8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656 +8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672 +8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688 +4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704 +8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720 +8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736 +8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752 +8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768 +9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784 +9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800 +9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816 +9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832 +9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848 +9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864 +9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880 +9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896 +9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912 +9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928 +9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944 +9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960 +9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976 +9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992 +9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008 +9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024 +9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040 +9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056 +9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072 +9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088 +9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104 +9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120 +9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136 +9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152 +9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168 +9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184 +9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200 +9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216 +9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232 +9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248 +9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264 +9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280 +9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296 +9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312 +9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328 +9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344 +9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360 +9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376 +3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392 +9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408 +9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424 +9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440 +4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456 +9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472 +9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488 +9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504 +9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520 +9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536 +9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552 +9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568 +9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584 +9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600 +9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616 +9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632 +9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648 +9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664 +9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680 +9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696 +9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712 +9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728 +9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744 +9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760 +9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776 +9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792 +9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808 +9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824 +10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840 +10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856 +10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872 +10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888 +10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904 +10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920 +10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936 +10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952 +10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968 +4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984 +10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000 +10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016 +10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032 +10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048 +10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064 +10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080 +10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096 +10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112 +4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128 +10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144 +10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160 +10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176 +10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192 +10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208 +10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224 +10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240 +10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256 +10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272 +10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288 +10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304 +10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320 +10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336 +10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352 +10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368 +10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384 +10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400 +4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416 +10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432 +10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448 +10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464 +10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480 +10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496 +10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512 +10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528 +10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544 +10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560 +10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576 +10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592 +10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608 +10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624 +10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640 +10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656 +10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672 +10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688 +10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704 +10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720 +10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736 +10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752 +10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768 +10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784 +10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800 +10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816 +10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832 +10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848 +10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864 +10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880 +10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896 +11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912 +11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928 +11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944 +4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960 +11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976 +11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992 +11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008 +11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024 +11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040 +11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056 +11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072 +11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088 +11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104 +11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120 +11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136 +11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152 +11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168 +11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184 +11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200 +11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216 +11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232 +11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248 +11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264 +11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280 +11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296 +11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312 +11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328 +11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344 +11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360 +11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376 +11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392 +11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408 +11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424 +11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440 +11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456 +11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472 +4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488 +11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504 +11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520 +11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536 +11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552 +11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568 +11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584 +11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600 +11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616 +11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632 +11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648 +11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664 +11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680 +11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696 +11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712 +11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728 +11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744 +11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760 +11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776 +11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792 +11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808 +11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824 +11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840 +11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856 +11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872 +11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888 +11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904 +11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920 +11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936 +12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952 +12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968 +12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984 +12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000 +12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016 +12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032 +12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048 +12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064 +12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080 +12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096 +12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112 +12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128 +12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144 +12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160 +12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176 +4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192 +4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208 +4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224 +12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240 +12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256 +12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272 +12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288 +12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304 +12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320 +12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336 +12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352 +12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368 +12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384 +12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400 +12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416 +12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432 +12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448 +12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464 +12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480 +12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496 +12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512 +12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528 +12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544 +12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560 +12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576 +12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592 +12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608 +12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624 +12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640 +12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656 +12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672 +12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688 +12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704 +12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720 +12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736 +12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752 +12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768 +12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784 +12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800 +12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816 +12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832 +12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848 +12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864 +12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880 +12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896 +12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912 +12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928 +12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944 +12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960 +12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976 +4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992 +13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008 +13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024 +13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040 +13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056 +13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072 +13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088 +13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104 +4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120 +13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136 +13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152 +13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168 +13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184 +13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200 +13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216 +13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232 +13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248 +13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264 +13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280 +13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296 +13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312 +13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328 +13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344 +13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360 +5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376 +13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392 +13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408 +13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424 +13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440 +13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456 +13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472 +13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488 +13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504 +13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520 +13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536 +13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552 +13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568 +13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584 +13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600 +13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616 +13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632 +13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648 +13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664 +13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680 +13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696 +13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712 +13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728 +13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744 +13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760 +13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776 +13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792 +13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808 +13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824 +13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840 +13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856 +13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872 +13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888 +13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904 +13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920 +13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936 +13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952 +13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968 +13968,13969,13970,13971,13972) #13973 diff --git a/fanficdownloader/chardet/big5prober.py b/fanficdownloader/chardet/big5prober.py new file mode 100644 index 00000000..e6b52aad --- /dev/null +++ b/fanficdownloader/chardet/big5prober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import Big5DistributionAnalysis +from mbcssm import Big5SMModel + +class Big5Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(Big5SMModel) + self._mDistributionAnalyzer = Big5DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "Big5" diff --git a/fanficdownloader/chardet/chardistribution.py b/fanficdownloader/chardet/chardistribution.py new file mode 100644 index 00000000..b8933418 --- /dev/null +++ b/fanficdownloader/chardet/chardistribution.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants +from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO +from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO +from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO +from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO +from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO + +ENOUGH_DATA_THRESHOLD = 1024 +SURE_YES = 0.99 +SURE_NO = 0.01 + +class CharDistributionAnalysis: + def __init__(self): + self._mCharToFreqOrder = None # Mapping table to get frequency order from char order (get from GetOrder()) + self._mTableSize = None # Size of above table + self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence. See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail. + self.reset() + + def reset(self): + """reset analyser, clear any state""" + self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made + self._mTotalChars = 0 # Total characters encountered + self._mFreqChars = 0 # The number of characters whose frequency order is less than 512 + + def feed(self, aStr, aCharLen): + """feed a character with known length""" + if aCharLen == 2: + # we only care about 2-bytes character in our distribution analysis + order = self.get_order(aStr) + else: + order = -1 + if order >= 0: + self._mTotalChars += 1 + # order is valid + if order < self._mTableSize: + if 512 > self._mCharToFreqOrder[order]: + self._mFreqChars += 1 + + def get_confidence(self): + """return confidence based on existing data""" + # if we didn't receive any character in our consideration range, return negative answer + if self._mTotalChars <= 0: + return SURE_NO + + if self._mTotalChars != self._mFreqChars: + r = self._mFreqChars / ((self._mTotalChars - self._mFreqChars) * self._mTypicalDistributionRatio) + if r < SURE_YES: + return r + + # normalize confidence (we don't want to be 100% sure) + return SURE_YES + + def got_enough_data(self): + # It is not necessary to receive all data to draw conclusion. For charset detection, + # certain amount of data is enough + return self._mTotalChars > ENOUGH_DATA_THRESHOLD + + def get_order(self, aStr): + # We do not handle characters based on the original encoding string, but + # convert this encoding string to a number, here called order. + # This allows multiple encodings of a language to share one frequency table. + return -1 + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCTWCharToFreqOrder + self._mTableSize = EUCTW_TABLE_SIZE + self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-TW encoding, we are interested + # first byte range: 0xc4 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xC4': + return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1 + else: + return -1 + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCKRCharToFreqOrder + self._mTableSize = EUCKR_TABLE_SIZE + self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-KR encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xB0': + return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 + else: + return -1; + +class GB2312DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = GB2312CharToFreqOrder + self._mTableSize = GB2312_TABLE_SIZE + self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for GB2312 encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'): + return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 + else: + return -1; + +class Big5DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = Big5CharToFreqOrder + self._mTableSize = BIG5_TABLE_SIZE + self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for big5 encoding, we are interested + # first byte range: 0xa4 -- 0xfe + # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xA4': + if aStr[1] >= '\xA1': + return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63 + else: + return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40 + else: + return -1 + +class SJISDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for sjis encoding, we are interested + # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe + # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe + # no validation needed here. State machine has done that + if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'): + order = 188 * (ord(aStr[0]) - 0x81) + elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'): + order = 188 * (ord(aStr[0]) - 0xE0 + 31) + else: + return -1; + order = order + ord(aStr[1]) - 0x40 + if aStr[1] > '\x7F': + order =- 1 + return order + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-JP encoding, we are interested + # first byte range: 0xa0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xA0': + return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xa1 + else: + return -1 diff --git a/fanficdownloader/chardet/charsetgroupprober.py b/fanficdownloader/chardet/charsetgroupprober.py new file mode 100644 index 00000000..51880694 --- /dev/null +++ b/fanficdownloader/chardet/charsetgroupprober.py @@ -0,0 +1,96 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetprober import CharSetProber + +class CharSetGroupProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mActiveNum = 0 + self._mProbers = [] + self._mBestGuessProber = None + + def reset(self): + CharSetProber.reset(self) + self._mActiveNum = 0 + for prober in self._mProbers: + if prober: + prober.reset() + prober.active = constants.True + self._mActiveNum += 1 + self._mBestGuessProber = None + + def get_charset_name(self): + if not self._mBestGuessProber: + self.get_confidence() + if not self._mBestGuessProber: return None +# self._mBestGuessProber = self._mProbers[0] + return self._mBestGuessProber.get_charset_name() + + def feed(self, aBuf): + for prober in self._mProbers: + if not prober: continue + if not prober.active: continue + st = prober.feed(aBuf) + if not st: continue + if st == constants.eFoundIt: + self._mBestGuessProber = prober + return self.get_state() + elif st == constants.eNotMe: + prober.active = constants.False + self._mActiveNum -= 1 + if self._mActiveNum <= 0: + self._mState = constants.eNotMe + return self.get_state() + return self.get_state() + + def get_confidence(self): + st = self.get_state() + if st == constants.eFoundIt: + return 0.99 + elif st == constants.eNotMe: + return 0.01 + bestConf = 0.0 + self._mBestGuessProber = None + for prober in self._mProbers: + if not prober: continue + if not prober.active: + if constants._debug: + sys.stderr.write(prober.get_charset_name() + ' not active\n') + continue + cf = prober.get_confidence() + if constants._debug: + sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf)) + if bestConf < cf: + bestConf = cf + self._mBestGuessProber = prober + if not self._mBestGuessProber: return 0.0 + return bestConf +# else: +# self._mBestGuessProber = self._mProbers[0] +# return self._mBestGuessProber.get_confidence() diff --git a/fanficdownloader/chardet/charsetprober.py b/fanficdownloader/chardet/charsetprober.py new file mode 100644 index 00000000..3ac1683c --- /dev/null +++ b/fanficdownloader/chardet/charsetprober.py @@ -0,0 +1,60 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, re + +class CharSetProber: + def __init__(self): + pass + + def reset(self): + self._mState = constants.eDetecting + + def get_charset_name(self): + return None + + def feed(self, aBuf): + pass + + def get_state(self): + return self._mState + + def get_confidence(self): + return 0.0 + + def filter_high_bit_only(self, aBuf): + aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf) + return aBuf + + def filter_without_english_letters(self, aBuf): + aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf) + return aBuf + + def filter_with_english_letters(self, aBuf): + # TODO + return aBuf diff --git a/fanficdownloader/chardet/codingstatemachine.py b/fanficdownloader/chardet/codingstatemachine.py new file mode 100644 index 00000000..452d3b0a --- /dev/null +++ b/fanficdownloader/chardet/codingstatemachine.py @@ -0,0 +1,56 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +class CodingStateMachine: + def __init__(self, sm): + self._mModel = sm + self._mCurrentBytePos = 0 + self._mCurrentCharLen = 0 + self.reset() + + def reset(self): + self._mCurrentState = eStart + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + byteCls = self._mModel['classTable'][ord(c)] + if self._mCurrentState == eStart: + self._mCurrentBytePos = 0 + self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] + # from byte's class and stateTable, we get its next state + self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls] + self._mCurrentBytePos += 1 + return self._mCurrentState + + def get_current_charlen(self): + return self._mCurrentCharLen + + def get_coding_state_machine(self): + return self._mModel['name'] diff --git a/fanficdownloader/chardet/constants.py b/fanficdownloader/chardet/constants.py new file mode 100644 index 00000000..e94e226b --- /dev/null +++ b/fanficdownloader/chardet/constants.py @@ -0,0 +1,47 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +_debug = 0 + +eDetecting = 0 +eFoundIt = 1 +eNotMe = 2 + +eStart = 0 +eError = 1 +eItsMe = 2 + +SHORTCUT_THRESHOLD = 0.95 + +import __builtin__ +if not hasattr(__builtin__, 'False'): + False = 0 + True = 1 +else: + False = __builtin__.False + True = __builtin__.True diff --git a/fanficdownloader/chardet/escprober.py b/fanficdownloader/chardet/escprober.py new file mode 100644 index 00000000..572ed7be --- /dev/null +++ b/fanficdownloader/chardet/escprober.py @@ -0,0 +1,79 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel +from charsetprober import CharSetProber +from codingstatemachine import CodingStateMachine + +class EscCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = [ \ + CodingStateMachine(HZSMModel), + CodingStateMachine(ISO2022CNSMModel), + CodingStateMachine(ISO2022JPSMModel), + CodingStateMachine(ISO2022KRSMModel) + ] + self.reset() + + def reset(self): + CharSetProber.reset(self) + for codingSM in self._mCodingSM: + if not codingSM: continue + codingSM.active = constants.True + codingSM.reset() + self._mActiveSM = len(self._mCodingSM) + self._mDetectedCharset = None + + def get_charset_name(self): + return self._mDetectedCharset + + def get_confidence(self): + if self._mDetectedCharset: + return 0.99 + else: + return 0.00 + + def feed(self, aBuf): + for c in aBuf: + for codingSM in self._mCodingSM: + if not codingSM: continue + if not codingSM.active: continue + codingState = codingSM.next_state(c) + if codingState == constants.eError: + codingSM.active = constants.False + self._mActiveSM -= 1 + if self._mActiveSM <= 0: + self._mState = constants.eNotMe + return self.get_state() + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + self._mDetectedCharset = codingSM.get_coding_state_machine() + return self.get_state() + + return self.get_state() diff --git a/fanficdownloader/chardet/escsm.py b/fanficdownloader/chardet/escsm.py new file mode 100644 index 00000000..9fa22952 --- /dev/null +++ b/fanficdownloader/chardet/escsm.py @@ -0,0 +1,240 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +HZ_cls = ( \ +1,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,0,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,4,0,5,2,0, # 78 - 7f +1,1,1,1,1,1,1,1, # 80 - 87 +1,1,1,1,1,1,1,1, # 88 - 8f +1,1,1,1,1,1,1,1, # 90 - 97 +1,1,1,1,1,1,1,1, # 98 - 9f +1,1,1,1,1,1,1,1, # a0 - a7 +1,1,1,1,1,1,1,1, # a8 - af +1,1,1,1,1,1,1,1, # b0 - b7 +1,1,1,1,1,1,1,1, # b8 - bf +1,1,1,1,1,1,1,1, # c0 - c7 +1,1,1,1,1,1,1,1, # c8 - cf +1,1,1,1,1,1,1,1, # d0 - d7 +1,1,1,1,1,1,1,1, # d8 - df +1,1,1,1,1,1,1,1, # e0 - e7 +1,1,1,1,1,1,1,1, # e8 - ef +1,1,1,1,1,1,1,1, # f0 - f7 +1,1,1,1,1,1,1,1, # f8 - ff +) + +HZ_st = ( \ +eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 + 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f + 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 + 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +) + +HZCharLenTable = (0, 0, 0, 0, 0, 0) + +HZSMModel = {'classTable': HZ_cls, + 'classFactor': 6, + 'stateTable': HZ_st, + 'charLenTable': HZCharLenTable, + 'name': "HZ-GB-2312"} + +ISO2022CN_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,3,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,4,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022CN_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 + 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 +eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +) + +ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CNSMModel = {'classTable': ISO2022CN_cls, + 'classFactor': 9, + 'stateTable': ISO2022CN_st, + 'charLenTable': ISO2022CNCharLenTable, + 'name': "ISO-2022-CN"} + +ISO2022JP_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,2,2, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,7,0,0,0, # 20 - 27 +3,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +6,0,4,0,8,0,0,0, # 40 - 47 +0,9,5,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022JP_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f +eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 +eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f +eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f +eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +) + +ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JPSMModel = {'classTable': ISO2022JP_cls, + 'classFactor': 10, + 'stateTable': ISO2022JP_st, + 'charLenTable': ISO2022JPCharLenTable, + 'name': "ISO-2022-JP"} + +ISO2022KR_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,3,0,0,0, # 20 - 27 +0,4,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,5,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022KR_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 +eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f +eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +) + +ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) + +ISO2022KRSMModel = {'classTable': ISO2022KR_cls, + 'classFactor': 6, + 'stateTable': ISO2022KR_st, + 'charLenTable': ISO2022KRCharLenTable, + 'name': "ISO-2022-KR"} diff --git a/fanficdownloader/chardet/eucjpprober.py b/fanficdownloader/chardet/eucjpprober.py new file mode 100644 index 00000000..46a8b38b --- /dev/null +++ b/fanficdownloader/chardet/eucjpprober.py @@ -0,0 +1,85 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCJPDistributionAnalysis +from jpcntx import EUCJPContextAnalysis +from mbcssm import EUCJPSMModel + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCJPSMModel) + self._mDistributionAnalyzer = EUCJPDistributionAnalysis() + self._mContextAnalyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "EUC-JP" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar, charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i-1:i+1], charLen) + self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mContextAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/fanficdownloader/chardet/euckrfreq.py b/fanficdownloader/chardet/euckrfreq.py new file mode 100644 index 00000000..1463fa1d --- /dev/null +++ b/fanficdownloader/chardet/euckrfreq.py @@ -0,0 +1,594 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology + +# 128 --> 0.79 +# 256 --> 0.92 +# 512 --> 0.986 +# 1024 --> 0.99944 +# 2048 --> 0.99999 +# +# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 +# Random Distribution Ration = 512 / (2350-512) = 0.279. +# +# Typical Distribution Ratio + +EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 + +EUCKR_TABLE_SIZE = 2352 + +# Char to FreqOrder table , +EUCKRCharToFreqOrder = ( \ + 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, +1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, +1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, + 945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739, + 116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622, + 708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750, +1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856, + 344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205, + 709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779, +1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19, +1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567, +1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797, +1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802, +1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899, + 885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818, +1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409, +1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697, +1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770, +1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723, + 544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416, +1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300, + 119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083, + 893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857, +1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871, + 282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420, +1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885, + 127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889, + 0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893, +1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317, +1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841, +1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910, +1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610, + 269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375, +1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939, + 887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870, + 217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934, +1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888, +1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950, +1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065, +1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002, +1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965, +1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467, + 50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285, + 639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7, + 103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979, +1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985, + 818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994, +1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250, + 423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824, + 532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003, +2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745, + 619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61, + 191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023, +2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032, +2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912, +2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224, + 719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012, + 819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050, +2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681, + 499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414, +1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068, +2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075, +1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850, +2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606, +2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449, +1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452, + 949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112, +2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121, +2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130, + 22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274, + 962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139, +2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721, +1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298, +2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463, +2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747, +2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285, +2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187, +2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10, +2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350, +1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201, +2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972, +2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219, +2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233, +2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242, +2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247, +1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178, +1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255, +2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259, +1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262, +2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702, +1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273, + 295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541, +2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117, + 432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187, +2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800, + 808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312, +2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229, +2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315, + 501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484, +2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170, +1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335, + 425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601, +1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395, +2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354, +1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476, +2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035, + 416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498, +2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310, +1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389, +2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504, +1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505, +2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145, +1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624, + 593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700, +2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221, +2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377, + 644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448, + 915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485, +1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705, +1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465, + 291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471, +2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997, +2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486, + 797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494, + 434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771, + 585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323, +2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491, + 95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510, + 161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519, +2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532, +2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199, + 704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544, +2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247, +1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441, + 249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562, +2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362, +2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583, +2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465, + 3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431, + 202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151, + 974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596, +2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406, +2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611, +2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619, +1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, +2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, + 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 +#Everything below is of no interest for detection purpose +2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, +2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674, +2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690, +2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704, +2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720, +2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734, +2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750, +2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765, +2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779, +2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793, +2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809, +2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824, +2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840, +2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856, +1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869, +2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883, +2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899, +2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915, +2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331, +2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945, +2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961, +2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976, +2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992, +2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008, +3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021, +3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037, +3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052, +3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066, +3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080, +3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095, +3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110, +3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124, +3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140, +3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156, +3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172, +3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187, +3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201, +3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217, +3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233, +3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248, +3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264, +3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279, +3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295, +3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311, +3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327, +3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343, +3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359, +3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374, +3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389, +3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405, +3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338, +3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432, +3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446, +3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191, +3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471, +3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486, +1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499, +1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513, +3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525, +3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541, +3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557, +3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573, +3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587, +3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603, +3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618, +3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632, +3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648, +3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663, +3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679, +3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695, +3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583, +1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722, +3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738, +3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753, +3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767, +3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782, +3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796, +3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810, +3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591, +1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836, +3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851, +3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866, +3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880, +3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895, +1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905, +3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921, +3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934, +3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603, +3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964, +3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978, +3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993, +3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009, +4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024, +4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040, +1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055, +4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069, +4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083, +4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098, +4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113, +4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610, +4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142, +4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157, +4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173, +4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189, +4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205, +4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220, +4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234, +4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249, +4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265, +4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279, +4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294, +4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310, +4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326, +4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341, +4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357, +4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371, +4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387, +4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403, +4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418, +4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432, +4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446, +4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461, +4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476, +4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491, +4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507, +4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623, +4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536, +4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551, +4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567, +4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581, +4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627, +4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611, +4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626, +4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642, +4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657, +4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672, +4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687, +1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700, +4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715, +4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731, +4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633, +4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758, +4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773, +4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788, +4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803, +4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817, +4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832, +4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847, +4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863, +4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879, +4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893, +4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909, +4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923, +4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938, +4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954, +4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970, +4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645, +4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999, +5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078, +5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028, +1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042, +5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056, +5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072, +5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087, +5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103, +5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118, +1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132, +5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148, +5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161, +5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177, +5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192, +5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206, +1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218, +5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234, +5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249, +5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262, +5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278, +5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293, +5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308, +5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323, +5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338, +5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353, +5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369, +5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385, +5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400, +5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415, +5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430, +5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445, +5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461, +5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477, +5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491, +5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507, +5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523, +5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539, +5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554, +5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570, +1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585, +5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600, +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615, +5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631, +5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646, +5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660, +1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673, +5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688, +5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703, +5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716, +5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729, +5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744, +1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758, +5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773, +1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786, +5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801, +5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815, +5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831, +5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847, +5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862, +5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876, +5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889, +5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905, +5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687, +5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951, +5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963, +5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979, +5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993, +5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009, +6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025, +6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039, +6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055, +6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071, +6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086, +6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102, +6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118, +6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133, +6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147, +6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163, +6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179, +6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194, +6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210, +6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225, +6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241, +6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256, +6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024 +6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287, +6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699, +6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317, +6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333, +6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347, +6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363, +6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379, +6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395, +6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411, +6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425, +6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440, +6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456, +6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472, +6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488, +6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266, +6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519, +6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535, +6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551, +1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565, +6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581, +6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597, +6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613, +6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629, +6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644, +1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659, +6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674, +1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689, +6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705, +6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721, +6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736, +1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748, +6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763, +6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779, +6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794, +6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711, +6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825, +6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840, +6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856, +6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872, +6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888, +6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903, +6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918, +6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934, +6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950, +6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966, +6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981, +6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996, +6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011, +7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027, +7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042, +7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058, +7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074, +7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090, +7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106, +7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122, +7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138, +7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154, +7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170, +7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186, +7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202, +7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216, +7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232, +7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248, +7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264, +7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280, +7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296, +7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312, +7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327, +7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343, +7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359, +7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375, +7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391, +7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407, +7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423, +7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439, +7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455, +7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471, +7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487, +7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503, +7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519, +7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535, +7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551, +7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, +7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583, +7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599, +7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615, +7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631, +7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647, +7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663, +7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679, +7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695, +7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711, +7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727, +7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743, +7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759, +7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775, +7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791, +7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807, +7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823, +7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839, +7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855, +7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871, +7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887, +7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903, +7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919, +7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, +7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, +7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, +7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, +7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, +8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, +8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, +8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, +8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, +8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, +8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, +8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, +8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, +8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, +8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, +8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, +8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, +8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, +8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, +8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, +8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, +8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, +8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287, +8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303, +8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319, +8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335, +8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351, +8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367, +8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383, +8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399, +8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415, +8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431, +8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447, +8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463, +8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479, +8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495, +8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511, +8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527, +8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543, +8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559, +8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575, +8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591, +8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607, +8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623, +8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639, +8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655, +8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671, +8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687, +8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, +8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, +8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, +8736,8737,8738,8739,8740,8741) diff --git a/fanficdownloader/chardet/euckrprober.py b/fanficdownloader/chardet/euckrprober.py new file mode 100644 index 00000000..bd697ebf --- /dev/null +++ b/fanficdownloader/chardet/euckrprober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCKRDistributionAnalysis +from mbcssm import EUCKRSMModel + +class EUCKRProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCKRSMModel) + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-KR" diff --git a/fanficdownloader/chardet/euctwfreq.py b/fanficdownloader/chardet/euctwfreq.py new file mode 100644 index 00000000..c0572095 --- /dev/null +++ b/fanficdownloader/chardet/euctwfreq.py @@ -0,0 +1,426 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# EUCTW frequency table +# Converted from big5 work +# by Taiwan's Mandarin Promotion Council +# <http:#www.edu.tw:81/mandr/> + +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +# Char to FreqOrder table , +EUCTW_TABLE_SIZE = 8102 + +EUCTWCharToFreqOrder = ( \ + 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 +3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 +1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 + 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 +3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 +4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 +7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 + 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 + 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 + 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 +2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 +1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 +3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 + 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 +3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 +2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 + 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 +3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 +1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 +7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 + 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 +7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 +1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 + 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 + 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 +3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 +3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 + 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 +2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 +2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 + 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 + 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 +3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 +1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 +1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 +1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 +2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 + 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 +4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 +1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 +7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 +2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 + 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 + 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 + 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 + 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 +7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 + 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 +1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 + 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 + 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 +7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 +1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 + 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 +3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 +4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 +3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 + 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 + 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 +1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 +4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 +3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 +3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 +2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 +7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 +3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 +7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 +1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 +2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 +1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 + 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 +1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 +4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 +3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 + 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 + 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 + 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 +2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 +7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 +1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 +2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 +1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 +1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 +7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 +7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 +7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 +3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 +4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 +1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 +7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 +2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 +7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 +3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 +3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 +7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 +2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 +7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 + 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 +4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 +2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 +7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 +3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 +2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 +2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 + 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 +2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 +1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 +1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 +2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 +1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 +7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 +7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 +2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 +4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 +1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 +7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 + 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 +4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 + 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 +2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 + 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 +1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 +1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 + 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 +3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 +3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 +1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 +3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 +7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 +7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 +1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 +2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 +1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 +3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 +2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 +3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 +2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 +4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 +4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 +3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 + 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 +3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 + 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 +3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 +3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 +3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 +1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 +7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 + 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 +7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 +1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 + 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 +4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 +3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 + 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 +2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 +2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 +3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 +1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 +4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 +2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 +1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 +1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 +2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 +3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 +1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 +7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 +1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 +4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 +1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 + 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 +1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 +3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 +3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 +2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 +1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 +4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 + 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 +7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 +2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 +3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 +4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 + 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 +7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 +7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 +1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 +4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 +3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 +2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 +3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 +3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 +2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 +1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 +4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 +3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 +3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 +2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 +4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 +7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 +3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 +2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 +3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 +1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 +2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 +3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 +4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 +2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 +2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 +7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 +1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 +2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 +1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 +3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 +4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 +2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 +3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 +3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 +2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 +4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 +2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 +3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 +4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 +7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 +3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 + 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 +1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 +4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 +1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 +4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 +7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 + 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 +7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 +2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 +1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 +1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 +3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 + 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 + 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 + 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 +3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 +2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 + 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 +7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 +1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 +3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 +7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 +1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 +7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 +4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 +1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 +2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 +2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 +4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 + 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 + 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 +3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 +3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 +1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 +2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 +7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 +1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 +1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 +3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 + 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 +1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 +4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 +7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 +2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 +3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 + 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 +1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 +2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 +2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 +7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 +7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 +7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 +2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 +2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 +1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 +4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 +3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 +3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 +4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 +4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 +2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 +2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 +7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 +4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 +7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 +2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 +1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 +3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 +4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 +2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 + 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 +2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 +1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 +2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 +2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 +4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 +7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 +1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 +3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 +7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 +1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 +8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 +2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 +8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 +2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 +2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 +8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 +8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 +8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 + 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 +8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 +4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 +3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 +8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 +1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 +8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 + 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 +1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 + 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 +4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 +1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 +4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 +1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 + 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 +3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 +4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 +8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 + 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 +3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 + 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 +2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 +#Everything below is of no interest for detection purpose +2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118 +2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134 +8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150 +8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166 +8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182 +8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198 +8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214 +8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230 +8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246 +8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262 +8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278 +8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294 +8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310 +8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326 +8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342 +8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358 +8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374 +8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390 +8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406 +8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422 +8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438 +8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454 +8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470 +8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486 +8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502 +8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518 +8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534 +8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550 +8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566 +8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582 +8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598 +8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614 +8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630 +8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646 +8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662 +8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678 +8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694 +8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710 +8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726 +8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742 diff --git a/fanficdownloader/chardet/euctwprober.py b/fanficdownloader/chardet/euctwprober.py new file mode 100644 index 00000000..b073f134 --- /dev/null +++ b/fanficdownloader/chardet/euctwprober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCTWDistributionAnalysis +from mbcssm import EUCTWSMModel + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCTWSMModel) + self._mDistributionAnalyzer = EUCTWDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-TW" diff --git a/fanficdownloader/chardet/gb2312freq.py b/fanficdownloader/chardet/gb2312freq.py new file mode 100644 index 00000000..7a4d5a1b --- /dev/null +++ b/fanficdownloader/chardet/gb2312freq.py @@ -0,0 +1,471 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# GB2312 most frequently used character table +# +# Char to FreqOrder table , from hz6763 + +# 512 --> 0.79 -- 0.79 +# 1024 --> 0.92 -- 0.13 +# 2048 --> 0.98 -- 0.06 +# 6768 --> 1.00 -- 0.02 +# +# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79 +# Random Distribution Ration = 512 / (3755 - 512) = 0.157 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR + +GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 + +GB2312_TABLE_SIZE = 3760 + +GB2312CharToFreqOrder = ( \ +1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, +2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, +2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, + 249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670, +1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820, +1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585, + 152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566, +1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575, +2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853, +3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061, + 544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155, +1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406, + 927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816, +2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606, + 360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023, +2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414, +1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513, +3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052, + 198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570, +1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575, + 253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250, +2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506, +1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26, +3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835, +1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686, +2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054, +1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894, + 585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105, +3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403, +3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694, + 252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873, +3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940, + 836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121, +1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648, +3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992, +2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233, +1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157, + 755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807, +1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094, +4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258, + 887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478, +3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152, +3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909, + 509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272, +1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221, +2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252, +1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301, +1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254, + 389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070, +3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461, +3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360, +4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124, + 296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535, +3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243, +1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713, +1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071, +4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442, + 215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946, + 814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257, +3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180, +1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427, + 602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781, +1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724, +2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937, + 930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943, + 432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789, + 396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552, +3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246, +4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451, +3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310, + 750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860, +2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297, +2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780, +2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745, + 776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936, +2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032, + 968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657, + 163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414, + 220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976, +3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436, +2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254, +2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536, +1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238, + 18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059, +2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741, + 90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447, + 286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601, +1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269, +1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894, + 915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173, + 681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994, +1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956, +2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437, +3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154, +2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240, +2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143, +2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634, +3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472, +1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541, +1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143, +2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312, +1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414, +3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754, +1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424, +1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302, +3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739, + 795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004, +2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484, +1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739, +4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535, +1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641, +1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307, +3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573, +1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533, + 47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965, + 504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99, +1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280, + 160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505, +1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012, +1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039, + 744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982, +3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530, +4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392, +3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656, +2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220, +2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766, +1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535, +3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728, +2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338, +1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627, +1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885, + 125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411, +2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671, +2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162, +3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774, +4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524, +3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346, + 180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040, +3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188, +2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280, +1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131, + 259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947, + 774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970, +3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814, +4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557, +2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997, +1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972, +1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369, + 766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376, +1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480, +3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610, + 955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128, + 642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769, +1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207, + 57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392, +1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623, + 193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782, +2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650, + 158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478, +2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773, +2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007, +1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323, +1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598, +2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961, + 819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302, +1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409, +1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683, +2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191, +2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616, +3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302, +1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774, +4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147, + 571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731, + 845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464, +3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377, +1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315, + 470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557, +3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903, +1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060, +4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261, +1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092, +2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810, +1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708, + 498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658, +1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871, +3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503, + 448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229, +2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112, + 136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504, +1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389, +1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27, +1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542, +3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861, +2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845, +3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700, +3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469, +3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582, + 996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999, +2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274, + 786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020, +2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601, + 12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628, +1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31, + 475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668, + 233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778, +1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169, +3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667, +3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881, +1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276, +1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320, +3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751, +2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432, +2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772, +1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843, +3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116, + 451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904, +4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652, +1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664, +2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770, +3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283, +3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626, +1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713, + 768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333, + 391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062, +2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555, + 931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014, +1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510, + 386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015, +1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459, +1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390, +1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238, +1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, +1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, + 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, + 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512 +#Everything below is of no interest for detection purpose +5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636, +5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874, +5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278, +3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806, +4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827, +5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512, +5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578, +4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828, +4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105, +4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189, +4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561, +3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226, +6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778, +4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039, +6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404, +4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213, +4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739, +4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328, +5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592, +3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424, +4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270, +3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232, +4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456, +4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121, +6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971, +6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409, +5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519, +4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367, +6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834, +4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460, +5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464, +5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709, +5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906, +6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530, +3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262, +6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920, +4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190, +5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318, +6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538, +6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697, +4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544, +5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016, +4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638, +5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006, +5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071, +4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552, +4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556, +5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432, +4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632, +4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885, +5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336, +4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729, +4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854, +4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332, +5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004, +5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419, +4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293, +3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580, +4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339, +6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341, +5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493, +5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046, +4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904, +6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728, +5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350, +6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233, +4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944, +5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413, +5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700, +3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999, +5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694, +6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571, +4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359, +6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178, +4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421, +4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330, +6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855, +3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587, +6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803, +4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791, +3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304, +3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445, +3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506, +4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856, +2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057, +5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777, +4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369, +5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028, +5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914, +5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175, +4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681, +5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534, +4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912, +5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054, +1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336, +3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666, +4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375, +4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113, +6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614, +4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173, +5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197, +3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271, +5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423, +5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529, +5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921, +3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837, +5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922, +5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187, +3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382, +5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628, +5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683, +5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053, +6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928, +4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662, +6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663, +4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554, +3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191, +4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013, +5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932, +5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055, +5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829, +3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096, +3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660, +6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199, +6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748, +5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402, +6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957, +6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668, +6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763, +6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407, +6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051, +5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429, +6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791, +6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028, +3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305, +3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159, +4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683, +4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372, +3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514, +5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544, +5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472, +5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716, +5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905, +5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327, +4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030, +5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281, +6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224, +5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327, +4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062, +4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354, +6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065, +3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953, +4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681, +4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708, +5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442, +6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387, +6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237, +4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713, +6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547, +5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957, +5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337, +5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074, +5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685, +5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455, +4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722, +5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615, +5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093, +5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989, +5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094, +6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212, +4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967, +5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733, +4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260, +4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864, +6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353, +4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095, +6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287, +3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504, +5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539, +6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750, +6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864, +6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213, +5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573, +6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252, +6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970, +3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703, +5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978, +4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767) + diff --git a/fanficdownloader/chardet/gb2312prober.py b/fanficdownloader/chardet/gb2312prober.py new file mode 100644 index 00000000..91eb3925 --- /dev/null +++ b/fanficdownloader/chardet/gb2312prober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import GB2312DistributionAnalysis +from mbcssm import GB2312SMModel + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(GB2312SMModel) + self._mDistributionAnalyzer = GB2312DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "GB2312" diff --git a/fanficdownloader/chardet/hebrewprober.py b/fanficdownloader/chardet/hebrewprober.py new file mode 100644 index 00000000..a2b1eaa9 --- /dev/null +++ b/fanficdownloader/chardet/hebrewprober.py @@ -0,0 +1,269 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetprober import CharSetProber +import constants + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +# windows-1255 / ISO-8859-8 code points of interest +FINAL_KAF = '\xea' +NORMAL_KAF = '\xeb' +FINAL_MEM = '\xed' +NORMAL_MEM = '\xee' +FINAL_NUN = '\xef' +NORMAL_NUN = '\xf0' +FINAL_PE = '\xf3' +NORMAL_PE = '\xf4' +FINAL_TSADI = '\xf5' +NORMAL_TSADI = '\xf6' + +# Minimum Visual vs Logical final letter score difference. +# If the difference is below this, don't rely solely on the final letter score distance. +MIN_FINAL_CHAR_DISTANCE = 5 + +# Minimum Visual vs Logical model score difference. +# If the difference is below this, don't rely at all on the model score distance. +MIN_MODEL_DISTANCE = 0.01 + +VISUAL_HEBREW_NAME = "ISO-8859-8" +LOGICAL_HEBREW_NAME = "windows-1255" + +class HebrewProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mLogicalProber = None + self._mVisualProber = None + self.reset() + + def reset(self): + self._mFinalCharLogicalScore = 0 + self._mFinalCharVisualScore = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate a word + # delimiter at the beginning of the data + self._mPrev = ' ' + self._mBeforePrev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._mLogicalProber = logicalProber + self._mVisualProber = visualProber + + def is_final(self, c): + return c in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters causing + # the Non-Final tsadi to appear at an end of a word even though this is not + # the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being a + # good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for + # example legally end with a Non-Final Pe or Kaf. However, the benefit of + # these letters as Non-Final letters outweighs the damage since these words + # are quite rare. + return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + + def feed(self, aBuf): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew or + # visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is an + # indication that the text is laid out "naturally" since the final letter + # really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In normal + # Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with + # the Non-Final form of that letter. Exceptions to this rule are mentioned + # above in isNonFinal(). This is an indication that the text is laid out + # backwards. +1 for visual score + # 3) A word longer than 1 letter, starting with a final letter. Final letters + # should not appear at the beginning of a word. This is an indication that + # the text is laid out backwards. +1 for visual score. + # + # The visual score and logical score are accumulated throughout the text and + # are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since that case + # is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with spaces) + # so the word boundary detection works properly. [MAP] + + if self.get_state() == constants.eNotMe: + # Both model probers say it's not them. No reason to continue. + return constants.eNotMe + + aBuf = self.filter_high_bit_only(aBuf) + + for cur in aBuf: + if cur == ' ': + # We stand on a space - a word just ended + if self._mBeforePrev != ' ': + # next-to-last char was not a space so self._mPrev is not a 1 letter word + if self.is_final(self._mPrev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._mFinalCharLogicalScore += 1 + elif self.is_non_final(self._mPrev): + # case (2) [-2:not space][-1:Non-Final letter][cur:space] + self._mFinalCharVisualScore += 1 + else: + # Not standing on a space + if (self._mBeforePrev == ' ') and (self.is_final(self._mPrev)) and (cur != ' '): + # case (3) [-2:space][-1:final letter][cur:not space] + self._mFinalCharVisualScore += 1 + self._mBeforePrev = self._mPrev + self._mPrev = cur + + # Forever detecting, till the end or until both model probers return eNotMe (handled above) + return constants.eDetecting + + def get_charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore + if finalsub >= MIN_FINAL_CHAR_DISTANCE: + return LOGICAL_HEBREW_NAME + if finalsub <= -MIN_FINAL_CHAR_DISTANCE: + return VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = self._mLogicalProber.get_confidence() - self._mVisualProber.get_confidence() + if modelsub > MIN_MODEL_DISTANCE: + return LOGICAL_HEBREW_NAME + if modelsub < -MIN_MODEL_DISTANCE: + return VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the day. + if finalsub < 0.0: + return VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to Logical. + return LOGICAL_HEBREW_NAME + + def get_state(self): + # Remain active as long as any of the model probers are active. + if (self._mLogicalProber.get_state() == constants.eNotMe) and \ + (self._mVisualProber.get_state() == constants.eNotMe): + return constants.eNotMe + return constants.eDetecting diff --git a/fanficdownloader/chardet/jisfreq.py b/fanficdownloader/chardet/jisfreq.py new file mode 100644 index 00000000..5fe4a5c3 --- /dev/null +++ b/fanficdownloader/chardet/jisfreq.py @@ -0,0 +1,567 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology +# +# Japanese frequency table, applied to both S-JIS and EUC-JP +# They are sorted in order. + +# 128 --> 0.77094 +# 256 --> 0.85710 +# 512 --> 0.92635 +# 1024 --> 0.97130 +# 2048 --> 0.99431 +# +# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 +# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 +# +# Typical Distribution Ratio, 25% of IDR + +JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 + +# Char to FreqOrder table , +JIS_TABLE_SIZE = 4368 + +JISCharToFreqOrder = ( \ + 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 +3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 +1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 +2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64 +2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80 +5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96 +1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112 +5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128 +5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144 +5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160 +5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176 +5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192 +5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208 +1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224 +1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240 +1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256 +2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272 +3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288 +3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304 + 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320 + 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336 +1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352 + 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368 +5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384 + 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400 + 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416 + 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432 + 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448 + 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464 +5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480 +5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496 +5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512 +4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528 +5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544 +5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560 +5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576 +5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592 +5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608 +5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624 +5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640 +5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656 +5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672 +3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688 +5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704 +5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720 +5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736 +5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752 +5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768 +5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784 +5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800 +5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816 +5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832 +5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848 +5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864 +5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880 +5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896 +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912 +5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928 +5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944 +5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960 +5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976 +5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992 +5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008 +5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024 +5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040 +5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056 +5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072 +5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088 +5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104 +5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120 +5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136 +5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152 +5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168 +5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184 +5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200 +5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216 +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232 +5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248 +5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264 +5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280 +5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296 +6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312 +6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328 +6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344 +6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360 +6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376 +6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392 +6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408 +6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424 +4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440 + 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456 + 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472 +1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488 +1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504 + 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520 +3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536 +3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552 + 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568 +3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584 +3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600 + 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616 +2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632 + 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648 +3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664 +1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680 + 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696 +1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712 + 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728 +2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744 +2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760 +2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776 +2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792 +1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808 +1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824 +1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840 +1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856 +2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872 +1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888 +2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904 +1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920 +1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936 +1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952 +1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968 +1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984 +1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000 + 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016 + 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032 +1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048 +2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064 +2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080 +2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096 +3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112 +3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128 + 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144 +3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160 +1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176 + 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192 +2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208 +1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224 + 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240 +3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256 +4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272 +2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288 +1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304 +2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320 +1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336 + 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352 + 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368 +1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384 +2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400 +2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416 +2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432 +3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448 +1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464 +2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480 + 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496 + 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512 + 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528 +1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544 +2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560 + 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576 +1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592 +1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608 + 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624 +1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640 +1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656 +1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672 + 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688 +2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704 + 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720 +2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736 +3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752 +2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768 +1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784 +6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800 +1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816 +2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832 +1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848 + 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864 + 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880 +3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896 +3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912 +1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928 +1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944 +1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960 +1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976 + 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992 + 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008 +2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024 + 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040 +3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056 +2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072 + 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088 +1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104 +2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120 + 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136 +1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152 + 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168 +4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184 +2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200 +1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216 + 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232 +1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248 +2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264 + 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280 +6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296 +1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312 +1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328 +2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344 +3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360 + 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376 +3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392 +1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408 + 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424 +1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440 + 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456 +3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472 + 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488 +2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504 + 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520 +4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536 +2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552 +1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568 +1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584 +1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600 + 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616 +1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632 +3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648 +1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664 +3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680 + 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696 + 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712 + 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728 +2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744 +1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760 + 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776 +1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792 + 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808 +1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824 + 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840 + 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856 + 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872 +1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888 +1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904 +2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920 +4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936 + 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952 +1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968 + 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984 +1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000 +3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016 +1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032 +2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048 +2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064 +1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080 +1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096 +2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112 + 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128 +2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144 +1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160 +1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176 +1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192 +1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208 +3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224 +2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240 +2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256 + 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272 +3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288 +3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304 +1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320 +2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 +1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 +2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 +#Everything below is of no interest for detection purpose +2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384 +6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400 +6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416 +6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432 +6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448 +4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464 +4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480 +3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496 +3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512 +4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528 +3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544 +6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560 +4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576 +6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592 +6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608 +6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624 +6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640 +6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656 +6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672 +3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688 +3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704 +6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720 +2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736 +4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752 +4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768 +4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784 +6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800 +3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816 +4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832 +4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848 +6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864 +4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880 +6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896 +3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912 +2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928 +4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944 +2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960 +6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976 +4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992 +6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008 +6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024 +6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040 +4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056 +6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072 +2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088 +6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104 +4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120 +6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136 +4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152 +4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168 +6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184 +6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200 +6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216 +3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232 +1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248 +3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264 +3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280 +4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296 +6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312 +3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328 +6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344 +3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360 +3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376 +2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392 +6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408 +6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424 +3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440 +6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456 +3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472 +6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488 +6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504 +6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520 +4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536 +6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552 +4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568 +3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584 +3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600 +6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616 +6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632 +4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648 +6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664 +6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680 +6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696 +6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712 +6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728 +6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744 +4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760 +4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776 +3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792 +6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808 +4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824 +2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840 +6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856 +6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872 +4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888 +2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904 +4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920 +2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936 +4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952 +4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968 +4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984 +6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000 +3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016 +6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032 +3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048 +6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064 +2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080 +3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096 +7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112 +2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128 +3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144 +3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160 +3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176 +3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192 +7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208 +7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224 +7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240 +7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256 +7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272 +4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288 +3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304 +3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320 +4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336 +3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352 +3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368 +7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384 +4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400 +7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416 +7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432 +7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448 +7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464 +7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480 +4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496 +4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512 +7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528 +3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544 +4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560 +7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576 +7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592 +4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608 +3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624 +3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640 +7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656 +4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672 +4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688 +4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704 +4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720 +4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736 +4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752 +7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768 +7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784 +7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800 +7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816 +7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832 +2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848 +3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864 +7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880 +7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896 +3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912 +4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928 +3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944 +3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960 +2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976 +7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992 +7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008 +4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024 +3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040 +3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056 +7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072 +7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088 +7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104 +4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120 +7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136 +2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152 +3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168 +4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184 +7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200 +4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216 +4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232 +7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248 +7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264 +5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280 +7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296 +7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312 +7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328 +7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344 +7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360 +5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376 +5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392 +7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408 +3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424 +7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440 +7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456 +3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472 +7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488 +7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504 +1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520 +3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536 +4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552 +2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568 +3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584 +2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600 +5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616 +4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632 +4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648 +5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664 +7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680 +7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696 +7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712 +7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728 +3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744 +7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760 +3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776 +7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792 +4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808 +7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824 +7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840 +7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856 +7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872 +7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888 +7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904 +7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920 +7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936 +7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952 +7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968 +7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984 +7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000 +8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016 +8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032 +8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048 +8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064 +8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080 +8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096 +8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112 +8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128 +8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144 +8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160 +8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176 +8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192 +8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208 +8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224 +8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240 +8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256 +8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272 diff --git a/fanficdownloader/chardet/jpcntx.py b/fanficdownloader/chardet/jpcntx.py new file mode 100644 index 00000000..93db4a9c --- /dev/null +++ b/fanficdownloader/chardet/jpcntx.py @@ -0,0 +1,210 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +NUM_OF_CATEGORY = 6 +DONT_KNOW = -1 +ENOUGH_REL_THRESHOLD = 100 +MAX_REL_THRESHOLD = 1000 +MINIMUM_DATA_THRESHOLD = 4 + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( \ +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis: + def __init__(self): + self.reset() + + def reset(self): + self._mTotalRel = 0 # total sequence received + self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category + self._mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer + self._mLastCharOrder = -1 # The order of previous char + self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made + + def feed(self, aBuf, aLen): + if self._mDone: return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not complete, we + # record how many byte needed to complete that character and skip these bytes here. + # We can choose to record those bytes as well and analyse the character once it + # is complete, but since a character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._mNeedToSkipCharNum + while i < aLen: + order, charLen = self.get_order(aBuf[i:i+2]) + i += charLen + if i > aLen: + self._mNeedToSkipCharNum = i - aLen + self._mLastCharOrder = -1 + else: + if (order != -1) and (self._mLastCharOrder != -1): + self._mTotalRel += 1 + if self._mTotalRel > MAX_REL_THRESHOLD: + self._mDone = constants.True + break + self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 + self._mLastCharOrder = order + + def got_enough_data(self): + return self._mTotalRel > ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._mTotalRel > MINIMUM_DATA_THRESHOLD: + return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + else: + return DONT_KNOW + + def get_order(self, aStr): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aStr): + if not aStr: return -1, 1 + # find out current char's byte length + if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \ + ((aStr[0] >= '\xE0') and (aStr[0] <= '\xFC')): + charLen = 2 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aStr) > 1: + if (aStr[0] == '\202') and \ + (aStr[1] >= '\x9F') and \ + (aStr[1] <= '\xF1'): + return ord(aStr[1]) - 0x9F, charLen + + return -1, charLen + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aStr): + if not aStr: return -1, 1 + # find out current char's byte length + if (aStr[0] == '\x8E') or \ + ((aStr[0] >= '\xA1') and (aStr[0] <= '\xFE')): + charLen = 2 + elif aStr[0] == '\x8F': + charLen = 3 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aStr) > 1: + if (aStr[0] == '\xA4') and \ + (aStr[1] >= '\xA1') and \ + (aStr[1] <= '\xF3'): + return ord(aStr[1]) - 0xA1, charLen + + return -1, charLen diff --git a/fanficdownloader/chardet/langbulgarianmodel.py b/fanficdownloader/chardet/langbulgarianmodel.py new file mode 100644 index 00000000..bf5641e7 --- /dev/null +++ b/fanficdownloader/chardet/langbulgarianmodel.py @@ -0,0 +1,228 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 +) + +win1251BulgarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest sequences: 0.2992% +# negative sequences: 0.0020% +BulgarianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { \ + 'charToOrderMap': Latin5_BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-5" +} + +Win1251BulgarianModel = { \ + 'charToOrderMap': win1251BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1251" +} diff --git a/fanficdownloader/chardet/langcyrillicmodel.py b/fanficdownloader/chardet/langcyrillicmodel.py new file mode 100644 index 00000000..e604cc73 --- /dev/null +++ b/fanficdownloader/chardet/langcyrillicmodel.py @@ -0,0 +1,329 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# KOI8-R language model +# Character Mapping Table: +KOI8R_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { \ + 'charToOrderMap': KOI8R_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "KOI8-R" +} + +Win1251CyrillicModel = { \ + 'charToOrderMap': win1251_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1251" +} + +Latin5CyrillicModel = { \ + 'charToOrderMap': latin5_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-5" +} + +MacCyrillicModel = { \ + 'charToOrderMap': macCyrillic_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "MacCyrillic" +}; + +Ibm866Model = { \ + 'charToOrderMap': IBM866_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "IBM866" +} + +Ibm855Model = { \ + 'charToOrderMap': IBM855_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "IBM855" +} diff --git a/fanficdownloader/chardet/langgreekmodel.py b/fanficdownloader/chardet/langgreekmodel.py new file mode 100644 index 00000000..ec6d49e8 --- /dev/null +++ b/fanficdownloader/chardet/langgreekmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( \ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { \ + 'charToOrderMap': Latin7_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-7" +} + +Win1253GreekModel = { \ + 'charToOrderMap': win1253_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1253" +} diff --git a/fanficdownloader/chardet/langhebrewmodel.py b/fanficdownloader/chardet/langhebrewmodel.py new file mode 100644 index 00000000..a8bcc65b --- /dev/null +++ b/fanficdownloader/chardet/langhebrewmodel.py @@ -0,0 +1,201 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +win1255_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, + 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest sequences: 0.087% +# negative sequences: 0.0015% +HebrewLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { \ + 'charToOrderMap': win1255_CharToOrderMap, + 'precedenceMatrix': HebrewLangModel, + 'mTypicalPositiveRatio': 0.984004, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1255" +} diff --git a/fanficdownloader/chardet/langhungarianmodel.py b/fanficdownloader/chardet/langhungarianmodel.py new file mode 100644 index 00000000..d635f03c --- /dev/null +++ b/fanficdownloader/chardet/langhungarianmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest sequences: 0.8894% +# negative sequences: 0.0009% +HungarianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { \ + 'charToOrderMap': Latin2_HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': constants.True, + 'charsetName': "ISO-8859-2" +} + +Win1250HungarianModel = { \ + 'charToOrderMap': win1250HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': constants.True, + 'charsetName': "windows-1250" +} diff --git a/fanficdownloader/chardet/langthaimodel.py b/fanficdownloader/chardet/langthaimodel.py new file mode 100644 index 00000000..96ec054f --- /dev/null +++ b/fanficdownloader/chardet/langthaimodel.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( \ +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { \ + 'charToOrderMap': TIS620CharToOrderMap, + 'precedenceMatrix': ThaiLangModel, + 'mTypicalPositiveRatio': 0.926386, + 'keepEnglishLetter': constants.False, + 'charsetName': "TIS-620" +} diff --git a/fanficdownloader/chardet/latin1prober.py b/fanficdownloader/chardet/latin1prober.py new file mode 100644 index 00000000..b46129ba --- /dev/null +++ b/fanficdownloader/chardet/latin1prober.py @@ -0,0 +1,136 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetprober import CharSetProber +import constants +import operator + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +CLASS_NUM = 8 # total classes + +Latin1_CharToClass = ( \ + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 + OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F + UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 + OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF + ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF + ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 + ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF + ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 + ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( \ +# UDF OTH ASC ASS ACV ACO ASV ASO + 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, # ASO +) + +class Latin1Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self.reset() + + def reset(self): + self._mLastCharClass = OTH + self._mFreqCounter = [0] * FREQ_CAT_NUM + CharSetProber.reset(self) + + def get_charset_name(self): + return "windows-1252" + + def feed(self, aBuf): + aBuf = self.filter_with_english_letters(aBuf) + for c in aBuf: + charClass = Latin1_CharToClass[ord(c)] + freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass] + if freq == 0: + self._mState = constants.eNotMe + break + self._mFreqCounter[freq] += 1 + self._mLastCharClass = charClass + + return self.get_state() + + def get_confidence(self): + if self.get_state() == constants.eNotMe: + return 0.01 + + total = reduce(operator.add, self._mFreqCounter) + if total < 0.01: + confidence = 0.0 + else: + confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total) + if confidence < 0.0: + confidence = 0.0 + # lower the confidence of latin1 so that other more accurate detector + # can take priority. + confidence = confidence * 0.5 + return confidence diff --git a/fanficdownloader/chardet/mbcharsetprober.py b/fanficdownloader/chardet/mbcharsetprober.py new file mode 100644 index 00000000..a8131445 --- /dev/null +++ b/fanficdownloader/chardet/mbcharsetprober.py @@ -0,0 +1,82 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from charsetprober import CharSetProber + +class MultiByteCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mDistributionAnalyzer = None + self._mCodingSM = None + self._mLastChar = ['\x00', '\x00'] + + def reset(self): + CharSetProber.reset(self) + if self._mCodingSM: + self._mCodingSM.reset() + if self._mDistributionAnalyzer: + self._mDistributionAnalyzer.reset() + self._mLastChar = ['\x00', '\x00'] + + def get_charset_name(self): + pass + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mDistributionAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + return self._mDistributionAnalyzer.get_confidence() diff --git a/fanficdownloader/chardet/mbcsgroupprober.py b/fanficdownloader/chardet/mbcsgroupprober.py new file mode 100644 index 00000000..941cc3e3 --- /dev/null +++ b/fanficdownloader/chardet/mbcsgroupprober.py @@ -0,0 +1,50 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetgroupprober import CharSetGroupProber +from utf8prober import UTF8Prober +from sjisprober import SJISProber +from eucjpprober import EUCJPProber +from gb2312prober import GB2312Prober +from euckrprober import EUCKRProber +from big5prober import Big5Prober +from euctwprober import EUCTWProber + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ \ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + Big5Prober(), + EUCTWProber()] + self.reset() diff --git a/fanficdownloader/chardet/mbcssm.py b/fanficdownloader/chardet/mbcssm.py new file mode 100644 index 00000000..e46c1ffe --- /dev/null +++ b/fanficdownloader/chardet/mbcssm.py @@ -0,0 +1,514 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +# BIG5 + +BIG5_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 4,4,4,4,4,4,4,4, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 4,3,3,3,3,3,3,3, # a0 - a7 + 3,3,3,3,3,3,3,3, # a8 - af + 3,3,3,3,3,3,3,3, # b0 - b7 + 3,3,3,3,3,3,3,3, # b8 - bf + 3,3,3,3,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0) # f8 - ff + +BIG5_st = ( \ + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart)#10-17 + +Big5CharLenTable = (0, 1, 1, 2, 0) + +Big5SMModel = {'classTable': BIG5_cls, + 'classFactor': 5, + 'stateTable': BIG5_st, + 'charLenTable': Big5CharLenTable, + 'name': 'Big5'} + +# EUC-JP + +EUCJP_cls = ( \ + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,5,5, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,5,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,4,4,4,4,4,4,4, # 30 - 37 + 4,4,4,4,4,4,4,4, # 38 - 3f + 4,4,4,4,4,4,4,4, # 40 - 47 + 4,4,4,4,4,4,4,4, # 48 - 4f + 4,4,4,4,4,4,4,4, # 50 - 57 + 4,4,4,4,4,4,4,4, # 58 - 5f + 4,4,4,4,4,4,4,4, # 60 - 67 + 4,4,4,4,4,4,4,4, # 68 - 6f + 4,4,4,4,4,4,4,4, # 70 - 77 + 4,4,4,4,4,4,4,4, # 78 - 7f + 5,5,5,5,5,5,5,5, # 80 - 87 + 5,5,5,5,5,5,1,3, # 88 - 8f + 5,5,5,5,5,5,5,5, # 90 - 97 + 5,5,5,5,5,5,5,5, # 98 - 9f + 5,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,0,5) # f8 - ff + +EUCJP_st = ( \ + 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 + eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f + 3,eError,eError,eError,eStart,eStart,eStart,eStart)#20-27 + +EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) + +EUCJPSMModel = {'classTable': EUCJP_cls, + 'classFactor': 6, + 'stateTable': EUCJP_st, + 'charLenTable': EUCJPCharLenTable, + 'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,3,3,3, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,3,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 2,2,2,2,2,2,2,2, # e0 - e7 + 2,2,2,2,2,2,2,2, # e8 - ef + 2,2,2,2,2,2,2,2, # f0 - f7 + 2,2,2,2,2,2,2,0) # f8 - ff + +EUCKR_st = ( + eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 + eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart)#08-0f + +EUCKRCharLenTable = (0, 1, 2, 0) + +EUCKRSMModel = {'classTable': EUCKR_cls, + 'classFactor': 4, + 'stateTable': EUCKR_st, + 'charLenTable': EUCKRCharLenTable, + 'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_cls = ( \ + 2,2,2,2,2,2,2,2, # 00 - 07 + 2,2,2,2,2,2,0,0, # 08 - 0f + 2,2,2,2,2,2,2,2, # 10 - 17 + 2,2,2,0,2,2,2,2, # 18 - 1f + 2,2,2,2,2,2,2,2, # 20 - 27 + 2,2,2,2,2,2,2,2, # 28 - 2f + 2,2,2,2,2,2,2,2, # 30 - 37 + 2,2,2,2,2,2,2,2, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,2, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,6,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,3,4,4,4,4,4,4, # a0 - a7 + 5,5,1,1,1,1,1,1, # a8 - af + 1,1,1,1,1,1,1,1, # b0 - b7 + 1,1,1,1,1,1,1,1, # b8 - bf + 1,1,3,1,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0) # f8 - ff + +EUCTW_st = ( \ + eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 + eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f + 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 + eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f + +EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) + +EUCTWSMModel = {'classTable': EUCTW_cls, + 'classFactor': 7, + 'stateTable': EUCTW_st, + 'charLenTable': EUCTWCharLenTable, + 'name': 'x-euc-tw'} + +# GB2312 + +GB2312_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 3,3,3,3,3,3,3,3, # 30 - 37 + 3,3,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,4, # 78 - 7f + 5,6,6,6,6,6,6,6, # 80 - 87 + 6,6,6,6,6,6,6,6, # 88 - 8f + 6,6,6,6,6,6,6,6, # 90 - 97 + 6,6,6,6,6,6,6,6, # 98 - 9f + 6,6,6,6,6,6,6,6, # a0 - a7 + 6,6,6,6,6,6,6,6, # a8 - af + 6,6,6,6,6,6,6,6, # b0 - b7 + 6,6,6,6,6,6,6,6, # b8 - bf + 6,6,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 6,6,6,6,6,6,6,6, # e0 - e7 + 6,6,6,6,6,6,6,6, # e8 - ef + 6,6,6,6,6,6,6,6, # f0 - f7 + 6,6,6,6,6,6,6,0) # f8 - ff + +GB2312_st = ( \ + eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 + 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f + eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validing +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) + +GB2312SMModel = {'classTable': GB2312_cls, + 'classFactor': 7, + 'stateTable': GB2312_st, + 'charLenTable': GB2312CharLenTable, + 'name': 'GB2312'} + +# Shift_JIS + +SJIS_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,3,3,3, # 88 - 8f + 3,3,3,3,3,3,3,3, # 90 - 97 + 3,3,3,3,3,3,3,3, # 98 - 9f + #0xa0 is illegal in sjis encoding, but some pages does + #contain such byte. We need to be more error forgiven. + 2,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,4,4,4, # e8 - ef + 4,4,4,4,4,4,4,4, # f0 - f7 + 4,4,4,4,4,0,0,0) # f8 - ff + +SJIS_st = ( \ + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart)#10-17 + +SJISCharLenTable = (0, 1, 1, 2, 0, 0) + +SJISSMModel = {'classTable': SJIS_cls, + 'classFactor': 6, + 'stateTable': SJIS_st, + 'charLenTable': SJISCharLenTable, + 'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_cls = ( \ + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5) # f8 - ff + +UCS2BE_st = ( \ + 5, 7, 7,eError, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 + 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,eError,#20-27 + 5, 8, 6, 6,eError, 6, 6, 6,#28-2f + 6, 6, 6, 6,eError,eError,eStart,eStart)#30-37 + +UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) + +UCS2BESMModel = {'classTable': UCS2BE_cls, + 'classFactor': 6, + 'stateTable': UCS2BE_st, + 'charLenTable': UCS2BECharLenTable, + 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_cls = ( \ + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5) # f8 - ff + +UCS2LE_st = ( \ + 6, 6, 7, 6, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 + 5, 5, 5,eError, 5,eError, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,eError,#20-27 + 5, 5, 5,eError,eError,eError, 5, 5,#28-2f + 5, 5, 5,eError, 5,eError,eStart,eStart)#30-37 + +UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) + +UCS2LESMModel = {'classTable': UCS2LE_cls, + 'classFactor': 6, + 'stateTable': UCS2LE_st, + 'charLenTable': UCS2LECharLenTable, + 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 2,2,2,2,3,3,3,3, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 5,5,5,5,5,5,5,5, # a0 - a7 + 5,5,5,5,5,5,5,5, # a8 - af + 5,5,5,5,5,5,5,5, # b0 - b7 + 5,5,5,5,5,5,5,5, # b8 - bf + 0,0,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 7,8,8,8,8,8,8,8, # e0 - e7 + 8,8,8,8,8,9,8,8, # e8 - ef + 10,11,11,11,11,11,11,11, # f0 - f7 + 12,13,13,13,14,15,0,0) # f8 - ff + +UTF8_st = ( \ + eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 + 9, 11, 8, 7, 6, 5, 4, 3,#08-0f + eError,eError,eError,eError,eError,eError,eError,eError,#10-17 + eError,eError,eError,eError,eError,eError,eError,eError,#18-1f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f + eError,eError, 5, 5, 5, 5,eError,eError,#30-37 + eError,eError,eError,eError,eError,eError,eError,eError,#38-3f + eError,eError,eError, 5, 5, 5,eError,eError,#40-47 + eError,eError,eError,eError,eError,eError,eError,eError,#48-4f + eError,eError, 7, 7, 7, 7,eError,eError,#50-57 + eError,eError,eError,eError,eError,eError,eError,eError,#58-5f + eError,eError,eError,eError, 7, 7,eError,eError,#60-67 + eError,eError,eError,eError,eError,eError,eError,eError,#68-6f + eError,eError, 9, 9, 9, 9,eError,eError,#70-77 + eError,eError,eError,eError,eError,eError,eError,eError,#78-7f + eError,eError,eError,eError,eError, 9,eError,eError,#80-87 + eError,eError,eError,eError,eError,eError,eError,eError,#88-8f + eError,eError, 12, 12, 12, 12,eError,eError,#90-97 + eError,eError,eError,eError,eError,eError,eError,eError,#98-9f + eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 + eError,eError,eError,eError,eError,eError,eError,eError,#a8-af + eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 + eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf + eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 + eError,eError,eError,eError,eError,eError,eError,eError)#c8-cf + +UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8SMModel = {'classTable': UTF8_cls, + 'classFactor': 16, + 'stateTable': UTF8_st, + 'charLenTable': UTF8CharLenTable, + 'name': 'UTF-8'} diff --git a/fanficdownloader/chardet/sbcharsetprober.py b/fanficdownloader/chardet/sbcharsetprober.py new file mode 100644 index 00000000..da071163 --- /dev/null +++ b/fanficdownloader/chardet/sbcharsetprober.py @@ -0,0 +1,106 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetprober import CharSetProber + +SAMPLE_SIZE = 64 +SB_ENOUGH_REL_THRESHOLD = 1024 +POSITIVE_SHORTCUT_THRESHOLD = 0.95 +NEGATIVE_SHORTCUT_THRESHOLD = 0.05 +SYMBOL_CAT_ORDER = 250 +NUMBER_OF_SEQ_CAT = 4 +POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 +#NEGATIVE_CAT = 0 + +class SingleByteCharSetProber(CharSetProber): + def __init__(self, model, reversed=constants.False, nameProber=None): + CharSetProber.__init__(self) + self._mModel = model + self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup + self._mNameProber = nameProber # Optional auxiliary prober for name decision + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mLastOrder = 255 # char order of last character + self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT + self._mTotalSeqs = 0 + self._mTotalChar = 0 + self._mFreqChar = 0 # characters that fall in our sampling range + + def get_charset_name(self): + if self._mNameProber: + return self._mNameProber.get_charset_name() + else: + return self._mModel['charsetName'] + + def feed(self, aBuf): + if not self._mModel['keepEnglishLetter']: + aBuf = self.filter_without_english_letters(aBuf) + aLen = len(aBuf) + if not aLen: + return self.get_state() + for c in aBuf: + order = self._mModel['charToOrderMap'][ord(c)] + if order < SYMBOL_CAT_ORDER: + self._mTotalChar += 1 + if order < SAMPLE_SIZE: + self._mFreqChar += 1 + if self._mLastOrder < SAMPLE_SIZE: + self._mTotalSeqs += 1 + if not self._mReversed: + self._mSeqCounters[self._mModel['precedenceMatrix'][(self._mLastOrder * SAMPLE_SIZE) + order]] += 1 + else: # reverse the order of the letters in the lookup + self._mSeqCounters[self._mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + self._mLastOrder]] += 1 + self._mLastOrder = order + + if self.get_state() == constants.eDetecting: + if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: + cf = self.get_confidence() + if cf > POSITIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, we have a winner\n' % (self._mModel['charsetName'], cf)) + self._mState = constants.eFoundIt + elif cf < NEGATIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, below negative shortcut threshhold %s\n' % (self._mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD)) + self._mState = constants.eNotMe + + return self.get_state() + + def get_confidence(self): + r = 0.01 + if self._mTotalSeqs > 0: +# print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio'] + r = (1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs / self._mModel['mTypicalPositiveRatio'] +# print r, self._mFreqChar, self._mTotalChar + r = r * self._mFreqChar / self._mTotalChar + if r >= 1.0: + r = 0.99 + return r diff --git a/fanficdownloader/chardet/sbcsgroupprober.py b/fanficdownloader/chardet/sbcsgroupprober.py new file mode 100644 index 00000000..d19160c8 --- /dev/null +++ b/fanficdownloader/chardet/sbcsgroupprober.py @@ -0,0 +1,64 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetgroupprober import CharSetGroupProber +from sbcharsetprober import SingleByteCharSetProber +from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model +from langgreekmodel import Latin7GreekModel, Win1253GreekModel +from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from langthaimodel import TIS620ThaiModel +from langhebrewmodel import Win1255HebrewModel +from hebrewprober import HebrewProber + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ \ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + SingleByteCharSetProber(Latin2HungarianModel), + SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + ] + hebrewProber = HebrewProber() + logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber) + visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber) + hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) + self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber]) + + self.reset() diff --git a/fanficdownloader/chardet/sjisprober.py b/fanficdownloader/chardet/sjisprober.py new file mode 100644 index 00000000..fea2690c --- /dev/null +++ b/fanficdownloader/chardet/sjisprober.py @@ -0,0 +1,85 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import SJISDistributionAnalysis +from jpcntx import SJISContextAnalysis +from mbcssm import SJISSMModel +import constants, sys +from constants import eStart, eError, eItsMe + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(SJISSMModel) + self._mDistributionAnalyzer = SJISDistributionAnalysis() + self._mContextAnalyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "SHIFT_JIS" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar[2 - charLen :], charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mContextAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/fanficdownloader/chardet/test.py b/fanficdownloader/chardet/test.py new file mode 100644 index 00000000..2ebf3a4d --- /dev/null +++ b/fanficdownloader/chardet/test.py @@ -0,0 +1,20 @@ +import sys, glob +sys.path.insert(0, '..') +from chardet.universaldetector import UniversalDetector + +count = 0 +u = UniversalDetector() +for f in glob.glob(sys.argv[1]): + print f.ljust(60), + u.reset() + for line in file(f, 'rb'): + u.feed(line) + if u.done: break + u.close() + result = u.result + if result['encoding']: + print result['encoding'], 'with confidence', result['confidence'] + else: + print '******** no result' + count += 1 +print count, 'tests' diff --git a/fanficdownloader/chardet/universaldetector.py b/fanficdownloader/chardet/universaldetector.py new file mode 100644 index 00000000..809df227 --- /dev/null +++ b/fanficdownloader/chardet/universaldetector.py @@ -0,0 +1,154 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from latin1prober import Latin1Prober # windows-1252 +from mbcsgroupprober import MBCSGroupProber # multi-byte character sets +from sbcsgroupprober import SBCSGroupProber # single-byte character sets +from escprober import EscCharSetProber # ISO-2122, etc. +import re + +MINIMUM_THRESHOLD = 0.20 +ePureAscii = 0 +eEscAscii = 1 +eHighbyte = 2 + +class UniversalDetector: + def __init__(self): + self._highBitDetector = re.compile(r'[\x80-\xFF]') + self._escDetector = re.compile(r'(\033|~{)') + self._mEscCharSetProber = None + self._mCharSetProbers = [] + self.reset() + + def reset(self): + self.result = {'encoding': None, 'confidence': 0.0} + self.done = constants.False + self._mStart = constants.True + self._mGotData = constants.False + self._mInputState = ePureAscii + self._mLastChar = '' + if self._mEscCharSetProber: + self._mEscCharSetProber.reset() + for prober in self._mCharSetProbers: + prober.reset() + + def feed(self, aBuf): + if self.done: return + + aLen = len(aBuf) + if not aLen: return + + if not self._mGotData: + # If the data starts with BOM, we know it is UTF + if aBuf[:3] == '\xEF\xBB\xBF': + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8", 'confidence': 1.0} + elif aBuf[:4] == '\xFF\xFE\x00\x00': + # FF FE 00 00 UTF-32, little-endian BOM + self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} + elif aBuf[:4] == '\x00\x00\xFE\xFF': + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} + elif aBuf[:4] == '\xFE\xFF\x00\x00': + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = {'encoding': "X-ISO-10646-UCS-4-3412", 'confidence': 1.0} + elif aBuf[:4] == '\x00\x00\xFF\xFE': + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = {'encoding': "X-ISO-10646-UCS-4-2143", 'confidence': 1.0} + elif aBuf[:2] == '\xFF\xFE': + # FF FE UTF-16, little endian BOM + self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} + elif aBuf[:2] == '\xFE\xFF': + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + + self._mGotData = constants.True + if self.result['encoding'] and (self.result['confidence'] > 0.0): + self.done = constants.True + return + + if self._mInputState == ePureAscii: + if self._highBitDetector.search(aBuf): + self._mInputState = eHighbyte + elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf): + self._mInputState = eEscAscii + + self._mLastChar = aBuf[-1] + + if self._mInputState == eEscAscii: + if not self._mEscCharSetProber: + self._mEscCharSetProber = EscCharSetProber() + if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), + 'confidence': self._mEscCharSetProber.get_confidence()} + self.done = constants.True + elif self._mInputState == eHighbyte: + if not self._mCharSetProbers: + self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), Latin1Prober()] + for prober in self._mCharSetProbers: + if prober.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': prober.get_charset_name(), + 'confidence': prober.get_confidence()} + self.done = constants.True + break + + def close(self): + if self.done: return + if not self._mGotData: + if constants._debug: + sys.stderr.write('no data received!\n') + return + self.done = constants.True + + if self._mInputState == ePureAscii: + self.result = {'encoding': 'ascii', 'confidence': 1.0} + return self.result + + if self._mInputState == eHighbyte: + proberConfidence = None + maxProberConfidence = 0.0 + maxProber = None + for prober in self._mCharSetProbers: + if not prober: continue + proberConfidence = prober.get_confidence() + if proberConfidence > maxProberConfidence: + maxProberConfidence = proberConfidence + maxProber = prober + if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): + self.result = {'encoding': maxProber.get_charset_name(), + 'confidence': maxProber.get_confidence()} + return self.result + + if constants._debug: + sys.stderr.write('no probers hit minimum threshhold\n') + for prober in self._mCharSetProbers[0].mProbers: + if not prober: continue + sys.stderr.write('%s confidence = %s\n' % \ + (prober.get_charset_name(), \ + prober.get_confidence())) diff --git a/fanficdownloader/chardet/utf8prober.py b/fanficdownloader/chardet/utf8prober.py new file mode 100644 index 00000000..c1792bb3 --- /dev/null +++ b/fanficdownloader/chardet/utf8prober.py @@ -0,0 +1,76 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from charsetprober import CharSetProber +from codingstatemachine import CodingStateMachine +from mbcssm import UTF8SMModel + +ONE_CHAR_PROB = 0.5 + +class UTF8Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(UTF8SMModel) + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mCodingSM.reset() + self._mNumOfMBChar = 0 + + def get_charset_name(self): + return "utf-8" + + def feed(self, aBuf): + for c in aBuf: + codingState = self._mCodingSM.next_state(c) + if codingState == eError: + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + if self._mCodingSM.get_current_charlen() >= 2: + self._mNumOfMBChar += 1 + + if self.get_state() == constants.eDetecting: + if self.get_confidence() > constants.SHORTCUT_THRESHOLD: + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + unlike = 0.99 + if self._mNumOfMBChar < 6: + for i in range(0, self._mNumOfMBChar): + unlike = unlike * ONE_CHAR_PROB + return 1.0 - unlike + else: + return unlike diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py new file mode 100644 index 00000000..9b6d35f4 --- /dev/null +++ b/fanficdownloader/configurable.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ConfigParser, re + +# All of the writers(epub,html,txt) and adapters(ffnet,twlt,etc) +# inherit from Configurable. The config file(s) uses ini format: +# [sections] with key:value settings. +# +# [defaults] +# titlepage_entries: category,genre, status +# [www.whofic.com] +# titlepage_entries: category,genre, status,dateUpdated,rating +# [epub] +# titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated +# [www.whofic.com:epub] +# titlepage_entries: category,genre, status,datePublished +# [overrides] +# titlepage_entries: category + +class Configuration(ConfigParser.SafeConfigParser): + + def __init__(self, site, fileform): + ConfigParser.SafeConfigParser.__init__(self) + self.sectionslist = ['defaults'] + + if site.startswith("www."): + sitewith = site + sitewithout = site.replace("www.","") + else: + sitewith = "www."+site + sitewithout = site + + self.addConfigSection(sitewith) + self.addConfigSection(sitewithout) + if fileform: + self.addConfigSection(fileform) + self.addConfigSection(sitewith+":"+fileform) + self.addConfigSection(sitewithout+":"+fileform) + self.addConfigSection("overrides") + + self.listTypeEntries = [ + 'category', + 'genre', + 'characters', + 'ships', + 'warnings', + 'extratags', + 'author', + 'authorId', + 'authorUrl', + 'lastupdate', + ] + + self.validEntries = self.listTypeEntries + [ + 'series', + 'seriesUrl', + 'language', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'title', + 'storyUrl', + 'description', + 'formatname', + 'formatext', + 'siteabbrev', + 'version', + # internal stuff. + 'authorHTML', + 'seriesHTML', + 'langcode', + 'output_css', + ] + + def addConfigSection(self,section): + self.sectionslist.insert(0,section) + + def isListType(self,key): + return key in self.listTypeEntries or self.hasConfig("include_in_"+key) + + def isValidMetaEntry(self, key): + return key in self.getValidMetaList() + + def getValidMetaList(self): + return self.validEntries + self.getConfigList("extra_valid_entries") + + # used by adapters & writers, non-convention naming style + def hasConfig(self, key): + return self.has_config(self.sectionslist, key) + + def has_config(self, sections, key): + for section in sections: + try: + self.get(section,key) + #print("found %s in section [%s]"%(key,section)) + return True + except: + try: + self.get(section,"add_to_"+key) + #print("found add_to_%s in section [%s]"%(key,section)) + return True + except: + pass + + return False + + # used by adapters & writers, non-convention naming style + def getConfig(self, key, default=""): + return self.get_config(self.sectionslist,key,default) + + def get_config(self, sections, key, default=""): + val = default + for section in sections: + try: + val = self.get(section,key) + if val and val.lower() == "false": + val = False + #print "getConfig(%s)=[%s]%s" % (key,section,val) + break + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError), e: + pass + + for section in sections[::-1]: + # 'martian smiley' [::-1] reverses list by slicing whole list with -1 step. + try: + val = val + self.get(section,"add_to_"+key) + #print "getConfig(add_to_%s)=[%s]%s" % (key,section,val) + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError), e: + pass + + return val + + # split and strip each. + def get_config_list(self, sections, key): + vlist = re.split(r'(?<!\\),',self.get_config(sections,key)) # don't split on \, + vlist = filter( lambda x : x !='', [ v.strip().replace('\,',',') for v in vlist ]) + #print "vlist("+key+"):"+str(vlist) + return vlist + + # used by adapters & writers, non-convention naming style + def getConfigList(self, key): + return self.get_config_list(self.sectionslist, key) + +# extended by adapter, writer and story for ease of calling configuration. +class Configurable(object): + + def __init__(self, configuration): + self.configuration = configuration + + def isListType(self,key): + return self.configuration.isListType(key) + + def isValidMetaEntry(self, key): + return self.configuration.isValidMetaEntry(key) + + def getValidMetaList(self): + return self.configuration.getValidMetaList() + + def hasConfig(self, key): + return self.configuration.hasConfig(key) + + def has_config(self, sections, key): + return self.configuration.has_config(sections, key) + + def getConfig(self, key, default=""): + return self.configuration.getConfig(key,default) + + def get_config(self, sections, key, default=""): + return self.configuration.get_config(sections,key,default) + + def getConfigList(self, key): + return self.configuration.getConfigList(key) + + def get_config_list(self, sections, key): + return self.configuration.get_config_list(sections,key) diff --git a/fanficdownloader/epubutils.py b/fanficdownloader/epubutils.py new file mode 100644 index 00000000..6d2e6ff0 --- /dev/null +++ b/fanficdownloader/epubutils.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Jim Miller' +__docformat__ = 'restructuredtext en' + +import logging +logger = logging.getLogger(__name__) + +import re, os, traceback +from zipfile import ZipFile +from xml.dom.minidom import parseString + +from . import BeautifulSoup as bs + +def get_dcsource(inputio): + return get_update_data(inputio,getfilecount=False,getsoups=False)[0] + +def get_dcsource_chaptercount(inputio): + return get_update_data(inputio,getfilecount=True,getsoups=False)[:2] # (source,filecount) + +def get_update_data(inputio, + getfilecount=True, + getsoups=True): + epub = ZipFile(inputio, 'r') # works equally well with inputio as a path or a blob + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + contentdom = parseString(epub.read(rootfilename)) + firstmetadom = contentdom.getElementsByTagName("metadata")[0] + try: + source=firstmetadom.getElementsByTagName("dc:source")[0].firstChild.data.encode("utf-8") + except: + source=None + + ## Save the path to the .opf file--hrefs inside it are relative to it. + relpath = get_path_part(rootfilename) + + oldcover = None + calibrebookmark = None + logfile = None + # Looking for pre-existing cover. + for item in contentdom.getElementsByTagName("reference"): + if item.getAttribute("type") == "cover": + # there is a cover (x)html file, save the soup for it. + href=relpath+item.getAttribute("href") + oldcoverhtmlhref = href + oldcoverhtmldata = epub.read(href) + oldcoverhtmltype = "application/xhtml+xml" + for item in contentdom.getElementsByTagName("item"): + if( relpath+item.getAttribute("href") == oldcoverhtmlhref ): + oldcoverhtmltype = item.getAttribute("media-type") + break + soup = bs.BeautifulSoup(oldcoverhtmldata.decode("utf-8")) + src = None + # first img or image tag. + imgs = soup.findAll('img') + if imgs: + src = get_path_part(href)+imgs[0]['src'] + else: + imgs = soup.findAll('image') + if imgs: + src=get_path_part(href)+imgs[0]['xlink:href'] + + if not src: + continue + try: + # remove all .. and the path part above it, if present. + # Mostly for epubs edited by Sigil. + src = re.sub(r"([^/]+/\.\./)","",src) + #print("epubutils: found pre-existing cover image:%s"%src) + oldcoverimghref = src + oldcoverimgdata = epub.read(src) + for item in contentdom.getElementsByTagName("item"): + if( relpath+item.getAttribute("href") == oldcoverimghref ): + oldcoverimgtype = item.getAttribute("media-type") + break + oldcover = (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata) + except Exception as e: + logger.warn("Cover Image %s not found"%src) + logger.warn("Exception: %s"%(unicode(e))) + traceback.print_exc() + + filecount = 0 + soups = [] # list of xhmtl blocks + images = {} # dict() longdesc->data + if getfilecount: + # spin through the manifest--only place there are item tags. + for item in contentdom.getElementsByTagName("item"): + # First, count the 'chapter' files. FFDL uses file0000.xhtml, + # but can also update epubs downloaded from Twisting the + # Hellmouth, which uses chapter0.html. + if( item.getAttribute("media-type") == "application/xhtml+xml" ): + href=relpath+item.getAttribute("href") + #print("---- item href:%s path part: %s"%(href,get_path_part(href))) + if re.match(r'.*/log_page\.x?html',href): + try: + logfile = epub.read(href).decode("utf-8") + except: + pass # corner case I bumped into while testing. + if re.match(r'.*/(file|chapter)\d+\.x?html',href): + if getsoups: + soup = bs.BeautifulSoup(epub.read(href).decode("utf-8")) + for img in soup.findAll('img'): + newsrc='' + longdesc='' + try: + newsrc=get_path_part(href)+img['src'] + # remove all .. and the path part above it, if present. + # Mostly for epubs edited by Sigil. + newsrc = re.sub(r"([^/]+/\.\./)","",newsrc) + longdesc=img['longdesc'] + data = epub.read(newsrc) + images[longdesc] = data + img['src'] = img['longdesc'] + except Exception as e: + logger.warn("Image %s not found!\n(originally:%s)"%(newsrc,longdesc)) + logger.warn("Exception: %s"%(unicode(e))) + traceback.print_exc() + soup = soup.find('body') + # ffdl epubs have chapter title h3 + h3 = soup.find('h3') + if h3: + h3.extract() + # TtH epubs have chapter title h2 + h2 = soup.find('h2') + if h2: + h2.extract() + + for skip in soup.findAll(attrs={'class':'skip_on_ffdl_update'}): + skip.extract() + + soups.append(soup) + + filecount+=1 + + try: + calibrebookmark = epub.read("META-INF/calibre_bookmarks.txt") + except: + pass + + #for k in images.keys(): + #print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k]))) + return (source,filecount,soups,images,oldcover,calibrebookmark,logfile) + +def get_path_part(n): + relpath = os.path.dirname(n) + if( len(relpath) > 0 ): + relpath=relpath+"/" + return relpath + +def get_story_url_from_html(inputio,_is_good_url=None): + + #print("get_story_url_from_html called") + epub = ZipFile(inputio, 'r') # works equally well with inputio as a path or a blob + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + contentdom = parseString(epub.read(rootfilename)) + #firstmetadom = contentdom.getElementsByTagName("metadata")[0] + + ## Save the path to the .opf file--hrefs inside it are relative to it. + relpath = get_path_part(rootfilename) + + # spin through the manifest--only place there are item tags. + for item in contentdom.getElementsByTagName("item"): + # First, count the 'chapter' files. FFDL uses file0000.xhtml, + # but can also update epubs downloaded from Twisting the + # Hellmouth, which uses chapter0.html. + #print("---- item:%s"%item) + if( item.getAttribute("media-type") == "application/xhtml+xml" ): + filehref=relpath+item.getAttribute("href") + soup = bs.BeautifulSoup(epub.read(filehref).decode("utf-8")) + for link in soup.findAll('a',href=re.compile(r'^http.*')): + ahref=link['href'] + #print("href:(%s)"%ahref) + # hack for bad ficsaver ffnet URLs. + m = re.match(r"^http://www.fanfiction.net/s(?P<id>\d+)//$",ahref) + if m != None: + ahref="http://www.fanfiction.net/s/%s/1/"%m.group('id') + if _is_good_url == None or _is_good_url(ahref): + return ahref + return None diff --git a/fanficdownloader/exceptions.py b/fanficdownloader/exceptions.py new file mode 100644 index 00000000..2960add9 --- /dev/null +++ b/fanficdownloader/exceptions.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## A few exceptions for different things for adapters + +class FailedToDownload(Exception): + def __init__(self,error): + self.error=error + + def __str__(self): + return self.error + +class InvalidStoryURL(Exception): + def __init__(self,url,domain,example): + self.url=url + self.domain=domain + self.example=example + + def __str__(self): + return "Bad Story URL: (%s) for site: (%s) Example: (%s)" % (self.url, self.domain, self.example) + +class FailedToLogin(Exception): + def __init__(self,url, username, passwdonly=False): + self.url=url + self.username=username + self.passwdonly=passwdonly + + def __str__(self): + if self.passwdonly: + return "URL Failed, password required: (%s) " % (self.url) + else: + return "Failed to Login for URL: (%s) with username: (%s)" % (self.url, self.username) + +class AdultCheckRequired(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story requires confirmation of adult status: (%s)" % self.url + +class StoryDoesNotExist(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story does not exist: (%s)" % self.url + +class UnknownSite(Exception): + def __init__(self,url,supported_sites_list): + self.url=url + self.supported_sites_list=supported_sites_list + self.supported_sites_list.sort() + + def __str__(self): + return "Unknown Site(%s). Supported sites: (%s)" % (self.url, ", ".join(self.supported_sites_list)) + +class FailedToWriteOutput(Exception): + def __init__(self,error): + self.error=error + + def __str__(self): + return self.error + +class RegularExpresssionFailed(Exception): + def __init__(self,error,regex,line): + self.error=error + self.regex=regex + self.line=line + + def __str__(self): + return "Regular Expression Error '%s' in regex '%s' in line '%s'"%(self.error,self.regex,self.line) + diff --git a/fanficdownloader/geturls.py b/fanficdownloader/geturls.py new file mode 100644 index 00000000..b66202d2 --- /dev/null +++ b/fanficdownloader/geturls.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import urlparse +import urllib2 as u2 + +from BeautifulSoup import BeautifulSoup +from gziphttp import GZipProcessor + +import adapters +from configurable import Configuration +from exceptions import UnknownSite + +def get_urls_from_page(url,configuration=None,normalize=False): + + if not configuration: + configuration = Configuration("test1.com","EPUB") + + data = None + adapter = None + try: + adapter = adapters.getAdapter(configuration,url,anyurl=True) + + # special stuff to log into archiveofourown.org, if possible. + # Unlike most that show the links to 'adult' stories, but protect + # them, AO3 doesn't even show them if not logged in. Only works + # with saved user/pass--not going to prompt for list. + if 'archiveofourown.org' in url: + if adapter.getConfig("username"): + if adapter.getConfig("is_adult"): + if '?' in url: + addurl = "&view_adult=true" + else: + addurl = "?view_adult=true" + else: + addurl="" + # just to get an authenticity_token. + data = adapter._fetchUrl(url+addurl) + # login the session. + adapter.performLogin(url,data) + # get the list page with logged in session. + + # this way it uses User-Agent or other special settings. Only AO3 + # is doing login. + data = adapter._fetchUrl(url,usecache=False) + except UnknownSite: + # no adapter with anyurl=True, must be a random site. + opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) + data = opener.open(url).read() + + # kludge because I don't see it on enough sites to be worth generalizing yet. + restrictsearch=None + if 'scarvesandcoffee.net' in url: + restrictsearch=('div',{'id':'mainpage'}) + + return get_urls_from_html(data,url,configuration,normalize,restrictsearch) + +def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None): + + normalized = [] # normalized url + retlist = [] # orig urls. + + if not configuration: + configuration = Configuration("test1.com","EPUB") + + soup = BeautifulSoup(data) + if restrictsearch: + soup = soup.find(*restrictsearch) + #print("restrict search:%s"%soup) + + for a in soup.findAll('a'): + if a.has_key('href'): + #print("a['href']:%s"%a['href']) + href = form_url(url,a['href']) + #print("1 urlhref:%s"%href) + # this (should) catch normal story links, some javascript + # 'are you old enough' links, and 'Report This' links. + # The 'normalized' set prevents duplicates. + if 'story.php' in a['href']: + #print("trying:%s"%a['href']) + m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",a['href']) + if m != None: + href = form_url(a['href'] if '//' in a['href'] else url, + m.group('sid')) + + try: + href = href.replace('&index=1','') + #print("2 urlhref:%s"%href) + adapter = adapters.getAdapter(configuration,href) + #print("found adapter") + if adapter.story.getMetadata('storyUrl') not in normalized: + normalized.append(adapter.story.getMetadata('storyUrl')) + retlist.append(href) + except Exception, e: + #print e + pass + + if normalize: + return normalized + else: + return retlist + +def get_urls_from_text(data,configuration=None,normalize=False): + + normalized = [] # normalized url + retlist = [] # orig urls. + data=unicode(data) + + if not configuration: + configuration = Configuration("test1.com","EPUB") + + for href in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', data): + # this (should) catch normal story links, some javascript + # 'are you old enough' links, and 'Report This' links. + # The 'normalized' set prevents duplicates. + if 'story.php' in href: + m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",href) + if m != None: + href = form_url(href,m.group('sid')) + try: + href = href.replace('&index=1','') + adapter = adapters.getAdapter(configuration,href) + if adapter.story.getMetadata('storyUrl') not in normalized: + normalized.append(adapter.story.getMetadata('storyUrl')) + retlist.append(href) + except: + pass + + if normalize: + return normalized + else: + return retlist + +def form_url(parenturl,url): + url = url.strip() # ran across an image with a space in the + # src. Browser handled it, so we'd better, too. + + if "//" in url or parenturl == None: + returl = url + else: + parsedUrl = urlparse.urlparse(parenturl) + if url.startswith("/") : + returl = urlparse.urlunparse( + (parsedUrl.scheme, + parsedUrl.netloc, + url, + '','','')) + else: + toppath="" + if parsedUrl.path.endswith("/"): + toppath = parsedUrl.path + else: + toppath = parsedUrl.path[:parsedUrl.path.rindex('/')] + returl = urlparse.urlunparse( + (parsedUrl.scheme, + parsedUrl.netloc, + toppath + '/' + url, + '','','')) + return returl + diff --git a/fanficdownloader/gziphttp.py b/fanficdownloader/gziphttp.py new file mode 100644 index 00000000..76049eea --- /dev/null +++ b/fanficdownloader/gziphttp.py @@ -0,0 +1,38 @@ +## Borrowed from http://techknack.net/python-urllib2-handlers/ + +import urllib2 +from gzip import GzipFile +from StringIO import StringIO + +class GZipProcessor(urllib2.BaseHandler): + """A handler to add gzip capabilities to urllib2 requests + """ + def http_request(self, req): + req.add_header("Accept-Encoding", "gzip") + return req + https_request = http_request + + def http_response(self, req, resp): + #print("Content-Encoding:%s"%resp.headers.get("Content-Encoding")) + if resp.headers.get("Content-Encoding") == "gzip": + gz = GzipFile( + fileobj=StringIO(resp.read()), + mode="r" + ) +# resp.read = gz.read +# resp.readlines = gz.readlines +# resp.readline = gz.readline +# resp.next = gz.next + old_resp = resp + resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + return resp + https_response = http_response + +# brave new world - 1:30 w/o, 1:10 with? 40 chapters, so 20s from sleeps. +# with gzip, no sleep: 47.469 +# w/o gzip, no sleep: 47.736 + +# I Am What I Am 67 chapters +# w/o gzip: 57.168 +# w/ gzip: 40.692 diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py new file mode 100644 index 00000000..22fb40af --- /dev/null +++ b/fanficdownloader/html.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + +import re +import sys +import StringIO +import urllib + +from BeautifulSoup import BeautifulSoup + +class HtmlProcessor: + WHITESPACE_RE = re.compile(r'\s') + # Look for </blockquote + BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE) + + def __init__(self, html, unfill=0): + self.unfill = unfill + html = self._ProcessRawHtml(html) + self._soup = BeautifulSoup(html) + if self._soup.title.contents: + self.title = self._soup.title.contents[0] + else: + self.title = None + + def _ProcessRawHtml(self, html): + new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html) + if count: + print >>sys.stderr, 'Replaced %d bad tags' % count + return new_html + + def _StubInternalAnchors(self): + '''Replace each internal anchor with a fixed-size filepos anchor. + + Looks for every anchor with <a href="#myanchor"> and replaces that + with <a filepos="00000000050">. Stores anchors in self._anchor_references''' + self._anchor_references = [] + anchor_num = 0 + # anchor links + anchorlist = self._soup.findAll('a', href=re.compile('^#')) + # treat reference tags like a tags for TOCTOP. + anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#'))) + for anchor in anchorlist: + self._anchor_references.append((anchor_num, anchor['href'])) + del anchor['href'] + anchor['filepos'] = '%.10d' % anchor_num + anchor_num += 1 + + def _ReplaceAnchorStubs(self): + # TODO: Browsers allow extra whitespace in the href names. + # use __str__ instead of prettify--it inserts extra spaces. + assembled_text = self._soup.__str__('utf8') + del self._soup # shouldn't touch this anymore + for anchor_num, original_ref in self._anchor_references: + ref = urllib.unquote(original_ref[1:]) # remove leading '#' + # Find the position of ref in the utf-8 document. + # TODO(chatham): Using regexes and looking for name= would be better. + newpos = assembled_text.rfind(ref.encode('utf-8')) + if newpos == -1: + print >>sys.stderr, 'Could not find anchor "%s"' % original_ref + continue + newpos += len(ref) + 2 # don't point into the middle of the <a name> tag + old_filepos = 'filepos="%.10d"' % anchor_num + new_filepos = 'filepos="%.10d"' % newpos + assert assembled_text.find(old_filepos) != -1 + assembled_text = assembled_text.replace(old_filepos, new_filepos, 1) + return assembled_text + + def _FixPreTags(self): + '''Replace <pre> tags with HTML-ified text.''' + pres = self._soup.findAll('pre') + for pre in pres: + pre.replaceWith(self._FixPreContents(str(pre.contents[0]))) + + def _FixPreContents(self, text): + if self.unfill: + line_splitter = '\n\n' + line_joiner = '' + else: + line_splitter = '\n' + line_joiner = ' ' + lines = [] + for line in text.split(line_splitter): + lines.append(self.WHITESPACE_RE.subn(' ', line)[0]) + return line_joiner.join(lines) + + def _RemoveUnsupported(self): + '''Remove any tags which the kindle cannot handle.''' + # TODO(chatham): <link> tags to script? + unsupported_tags = ('script', 'style') + for tag_type in unsupported_tags: + for element in self._soup.findAll(tag_type): + element.extract() + + def RenameAnchors(self, prefix): + '''Rename every internal anchor to have the given prefix, then + return the contents of the body tag.''' + for anchor in self._soup.findAll('a', href=re.compile('^#')): + anchor['href'] = '#' + prefix + anchor['href'][1:] + for a in self._soup.findAll('a'): + if a.get('name'): + a['name'] = prefix + a['name'] + + # TODO(chatham): figure out how to fix this. sometimes body comes out + # as NoneType. + content = [] + if self._soup.body is not None: + content = [unicode(c) for c in self._soup.body.contents] + return '\n'.join(content) + + def CleanHtml(self): + # TODO(chatham): fix_html_br, fix_html + self._RemoveUnsupported() + self._StubInternalAnchors() + self._FixPreTags() + return self._ReplaceAnchorStubs() + + +if __name__ == '__main__': + FILE ='/tmp/documentation.html' + #FILE = '/tmp/multipre.html' + FILE = '/tmp/view.html' + import codecs + d = open(FILE).read() + h = HtmlProcessor(d) + s = h.CleanHtml() + #print s diff --git a/fanficdownloader/html2text.py b/fanficdownloader/html2text.py new file mode 100644 index 00000000..19965276 --- /dev/null +++ b/fanficdownloader/html2text.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""html2text: Turn HTML into equivalent Markdown-structured text.""" +__version__ = "2.37" +__author__ = "Aaron Swartz (me@aaronsw.com)" +__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3." +__contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"] + +# TODO: +# Support decoded entities with unifiable. + +if not hasattr(__builtins__, 'True'): True, False = 1, 0 +import re, sys, urllib, htmlentitydefs, codecs, StringIO, types +import sgmllib +import urlparse +sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]') + +try: from textwrap import wrap +except: pass + +# Use Unicode characters instead of their ascii psuedo-replacements +UNICODE_SNOB = 0 + +# Put the links after each paragraph instead of at the end. +LINKS_EACH_PARAGRAPH = 0 + +# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) +BODY_WIDTH = 78 + +# Don't show internal links (href="#local-anchor") -- corresponding link targets +# won't be visible in the plain text file anyway. +SKIP_INTERNAL_LINKS = False + +### Entity Nonsense ### + +def name2cp(k): + if k == 'apos': return ord("'") + if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 + return htmlentitydefs.name2codepoint[k] + else: + k = htmlentitydefs.entitydefs[k] + if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 + return ord(codecs.latin_1_decode(k)[0]) + +unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', +'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*', +'ndash':'-', 'oelig':'oe', 'aelig':'ae', +'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', +'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', +'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i', +'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', +'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'} + +unifiable_n = {} + +for k in unifiable.keys(): + unifiable_n[name2cp(k)] = unifiable[k] + +def charref(name): + if name[0] in ['x','X']: + c = int(name[1:], 16) + else: + c = int(name) + + if not UNICODE_SNOB and c in unifiable_n.keys(): + return unifiable_n[c] + else: + return unichr(c) + +def entityref(c): + if not UNICODE_SNOB and c in unifiable.keys(): + return unifiable[c] + else: + try: name2cp(c) + except KeyError: return "&" + c + else: return unichr(name2cp(c)) + +def replaceEntities(s): + s = s.group(1) + if s[0] == "#": + return charref(s[1:]) + else: return entityref(s) + +r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") +def unescape(s): + return r_unescape.sub(replaceEntities, s) + +def fixattrs(attrs): + # Fix bug in sgmllib.py + if not attrs: return attrs + newattrs = [] + for attr in attrs: + newattrs.append((attr[0], unescape(attr[1]))) + return newattrs + +### End Entity Nonsense ### + +def onlywhite(line): + """Return true if the line does only consist of whitespace characters.""" + for c in line: + if c is not ' ' and c is not ' ': + return c is ' ' + return line + +def optwrap(text,wrap_width=BODY_WIDTH): + """Wrap all paragraphs in the provided text.""" + + if not wrap_width: + return text + + assert wrap, "Requires Python 2.3." + result = '' + newlines = 0 + for para in text.split("\n"): + if len(para) > 0: + if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*': + for line in wrap(para, wrap_width): + result += line + "\n" + result += "\n" + newlines = 2 + else: + if not onlywhite(para): + result += para + "\n" + newlines = 1 + else: + if newlines < 2: + result += "\n" + newlines += 1 + return result + +def hn(tag): + if tag[0] == 'h' and len(tag) == 2: + try: + n = int(tag[1]) + if n in range(1, 10): return n + except ValueError: return 0 + +class _html2text(sgmllib.SGMLParser): + def __init__(self, out=None, baseurl=''): + sgmllib.SGMLParser.__init__(self) + + if out is None: self.out = self.outtextf + else: self.out = out + self.outtext = u'' + self.quiet = 0 + self.p_p = 0 + self.outcount = 0 + self.start = 1 + self.space = 0 + self.a = [] + self.astack = [] + self.acount = 0 + self.list = [] + self.blockquote = 0 + self.pre = 0 + self.startpre = 0 + self.lastWasNL = 0 + self.abbr_title = None # current abbreviation definition + self.abbr_data = None # last inner HTML (for abbr being defined) + self.abbr_list = {} # stack of abbreviations to write later + self.baseurl = baseurl + + def outtextf(self, s): + self.outtext += s + + def close(self): + sgmllib.SGMLParser.close(self) + + self.pbr() + self.o('', 0, 'end') + + return self.outtext + + def handle_charref(self, c): + self.o(charref(c)) + + def handle_entityref(self, c): + self.o(entityref(c)) + + def unknown_starttag(self, tag, attrs): + self.handle_tag(tag, attrs, 1) + + def unknown_endtag(self, tag): + self.handle_tag(tag, None, 0) + + def previousIndex(self, attrs): + """ returns the index of certain set of attributes (of a link) in the + self.a list + + If the set of attributes is not found, returns None + """ + if not attrs.has_key('href'): return None + + i = -1 + for a in self.a: + i += 1 + match = 0 + + if a.has_key('href') and a['href'] == attrs['href']: + if a.has_key('title') or attrs.has_key('title'): + if (a.has_key('title') and attrs.has_key('title') and + a['title'] == attrs['title']): + match = True + else: + match = True + + if match: return i + + def handle_tag(self, tag, attrs, start): + attrs = fixattrs(attrs) + + if hn(tag): + self.p() + if start: self.o(hn(tag)*"#" + ' ') + + if tag in ['p', 'div']: self.p() + + if tag == "br" and start: self.o(" \n") + + if tag == "hr" and start: + self.p() + self.o("* * *") + self.p() + + if tag in ["head", "style", 'script']: + if start: self.quiet += 1 + else: self.quiet -= 1 + + if tag in ["body"]: + self.quiet = 0 # sites like 9rules.com never close <head> + + if tag == "blockquote": + if start: + self.p(); self.o('> ', 0, 1); self.start = 1 + self.blockquote += 1 + else: + self.blockquote -= 1 + self.p() + + if tag in ['em', 'i', 'u']: self.o("_") + if tag in ['strong', 'b']: self.o("**") + if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` `` + if tag == "abbr": + if start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + + self.abbr_title = None + self.abbr_data = '' + if attrs.has_key('title'): + self.abbr_title = attrs['title'] + else: + if self.abbr_title != None: + self.abbr_list[self.abbr_data] = self.abbr_title + self.abbr_title = None + self.abbr_data = '' + + if tag == "a": + if start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): + self.astack.append(attrs) + self.o("[") + else: + self.astack.append(None) + else: + if self.astack: + a = self.astack.pop() + if a: + i = self.previousIndex(a) + if i is not None: + a = self.a[i] + else: + self.acount += 1 + a['count'] = self.acount + a['outcount'] = self.outcount + self.a.append(a) + self.o("][" + `a['count']` + "]") + + if tag == "img" and start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + if attrs.has_key('src'): + attrs['href'] = attrs['src'] + alt = attrs.get('alt', '') + i = self.previousIndex(attrs) + if i is not None: + attrs = self.a[i] + else: + self.acount += 1 + attrs['count'] = self.acount + attrs['outcount'] = self.outcount + self.a.append(attrs) + self.o("![") + self.o(alt) + self.o("]["+`attrs['count']`+"]") + + if tag == 'dl' and start: self.p() + if tag == 'dt' and not start: self.pbr() + if tag == 'dd' and start: self.o(' ') + if tag == 'dd' and not start: self.pbr() + + if tag in ["ol", "ul"]: + if start: + self.list.append({'name':tag, 'num':0}) + else: + if self.list: self.list.pop() + + self.p() + + if tag == 'li': + if start: + self.pbr() + if self.list: li = self.list[-1] + else: li = {'name':'ul', 'num':0} + self.o(" "*len(self.list)) #TODO: line up <ol><li>s > 9 correctly. + if li['name'] == "ul": self.o("* ") + elif li['name'] == "ol": + li['num'] += 1 + self.o(`li['num']`+". ") + self.start = 1 + else: + self.pbr() + + if tag in ["table", "tr"] and start: self.p() + if tag == 'td': self.pbr() + + if tag == "pre": + if start: + self.startpre = 1 + self.pre = 1 + else: + self.pre = 0 + self.p() + + def pbr(self): + if self.p_p == 0: self.p_p = 1 + + def p(self): self.p_p = 2 + + def o(self, data, puredata=0, force=0): + if self.abbr_data is not None: self.abbr_data += data + + if not self.quiet: + if puredata and not self.pre: + data = re.sub('\s+', ' ', data) + if data and data[0] == ' ': + self.space = 1 + data = data[1:] + if not data and not force: return + + if self.startpre: + #self.out(" :") #TODO: not output when already one there + self.startpre = 0 + + bq = (">" * self.blockquote) + if not (force and data and data[0] == ">") and self.blockquote: bq += " " + + if self.pre: + bq += " " + data = data.replace("\n", "\n"+bq) + + if self.start: + self.space = 0 + self.p_p = 0 + self.start = 0 + + if force == 'end': + # It's the end. + self.p_p = 0 + self.out("\n") + self.space = 0 + + + if self.p_p: + self.out(('\n'+bq)*self.p_p) + self.space = 0 + + if self.space: + if not self.lastWasNL: self.out(' ') + self.space = 0 + + if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"): + if force == "end": self.out("\n") + + newa = [] + for link in self.a: + if self.outcount > link['outcount']: + self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) + if link.has_key('title'): self.out(" ("+link['title']+")") + self.out("\n") + else: + newa.append(link) + + if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done. + + self.a = newa + + if self.abbr_list and force == "end": + for abbr, definition in self.abbr_list.items(): + self.out(" *[" + abbr + "]: " + definition + "\n") + + self.p_p = 0 + self.out(data) + self.lastWasNL = data and data[-1] == '\n' + self.outcount += 1 + + def handle_data(self, data): + if r'\/script>' in data: self.quiet -= 1 + self.o(data, 1) + + def unknown_decl(self, data): pass + +def wrapwrite(text): sys.stdout.write(text.encode('utf8')) + +def html2text_file(html, out=wrapwrite, baseurl=''): + h = _html2text(out, baseurl) + h.feed(html) + h.feed("") + return h.close() + +def html2text(html, baseurl='', wrap_width=BODY_WIDTH): + return optwrap(html2text_file(html, None, baseurl),wrap_width) + +if __name__ == "__main__": + baseurl = '' + if sys.argv[1:]: + arg = sys.argv[1] + if arg.startswith('http://'): + baseurl = arg + j = urllib.urlopen(baseurl) + try: + from feedparser import _getCharacterEncoding as enc + except ImportError: + enc = lambda x, y: ('utf-8', 1) + text = j.read() + encoding = enc(j.headers, text)[0] + if encoding == 'us-ascii': encoding = 'utf-8' + data = text.decode(encoding) + + else: + encoding = 'utf8' + if len(sys.argv) > 2: + encoding = sys.argv[2] + data = open(arg, 'r').read().decode(encoding) + else: + data = sys.stdin.read().decode('utf8') + wrapwrite(html2text(data, baseurl)) diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py new file mode 100644 index 00000000..09d29855 --- /dev/null +++ b/fanficdownloader/htmlcleanup.py @@ -0,0 +1,482 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logger = logging.getLogger(__name__) + +import re + +def _unirepl(match): + "Return the unicode string for a decimal number" + if match.group(1).startswith('x'): + radix=16 + s = match.group(1)[1:] + else: + radix=10 + s = match.group(1) + try: + value = int(s, radix) + retval = "%s%s"%(unichr(value),match.group(2)) + except: + # This way, at least if there's more of entities out there + # that fail, it doesn't blow the entire download. + logger.warn("Numeric entity translation failed, skipping: &#x%s%s"%(match.group(1),match.group(2))) + retval = "" + return retval + +def _replaceNumberEntities(data): + # The same brokenish entity parsing in SGMLParser that inserts ';' + # after non-entities will also insert ';' incorrectly after number + # entities, including part of the next word if it's a-z. + # "Don't—ever—do—that—again," becomes + # "Don't—e;ver—d;o—that—a;gain," + # Also need to allow for 5 digit decimal entities 法 + # Last expression didn't allow for 2 digit hex correctly: é + p = re.compile(r'&#(x[0-9a-fA-F]{,4}|[0-9]{,5})([0-9a-fA-F]*?);') + return p.sub(_unirepl, data) + +def _replaceNotEntities(data): + # not just \w or \S. regexp from c:\Python25\lib\sgmllib.py + # (or equiv), SGMLParser, entityref + p = re.compile(r'&([a-zA-Z][-.a-zA-Z0-9]*);') + return p.sub(r'&\1', data) + +def stripHTML(soup): + return removeAllEntities(re.sub(r'<[^>]+>','',"%s" % soup)).strip() + +def conditionalRemoveEntities(value): + if isinstance(value,str) or isinstance(value,unicode) : + return removeEntities(value).strip() + else: + return value + +def removeAllEntities(text): + # Remove < < and & + return removeEntities(text).replace('<', '<').replace('>', '>').replace('&', '&') + +def removeEntities(text): + + if text is None: + return "" + + if not isinstance(text,basestring): + return unicode(text) + + try: + t = text.decode('utf-8') + except UnicodeEncodeError, e: + try: + t = text.encode ('ascii', 'xmlcharrefreplace') + except UnicodeEncodeError, e: + t = text + text = t + # replace numeric versions of [&<>] with named versions, + # then replace named versions with actual characters, + text = re.sub(r'�*38;','&',text) + text = re.sub(r'�*60;','<',text) + text = re.sub(r'�*62;','>',text) + + # replace remaining � entities with unicode value, such as ' -> ' + text = _replaceNumberEntities(text) + + # replace several named entities with character, such as — -> - + # see constants.py for the list. + # reverse sort will put entities with ; before the same one without, when valid. + for e in reversed(sorted(entities.keys())): + v = entities[e] + try: + text = text.replace(e, v) + except UnicodeDecodeError, ex: + # for the pound symbol in constants.py + text = text.replace(e, v.decode('utf-8')) + + # SGMLParser, and in turn, BeautifulStoneSoup doesn't parse + # entities terribly well and inserts (;) after something that + # it thinks might be an entity. AT&T becomes AT&T; All of my + # attempts to fix this by changing the input to + # BeautifulStoneSoup break something else instead. But at + # this point, there should be *no* real entities left, so find + # these not-entities and removing them here should be safe. + text = _replaceNotEntities(text) + + # < < and & are the only html entities allowed in xhtml, put those back. + return text.replace('&', '&').replace('&lt', '<').replace('&gt', '>') + +# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent +entities = { 'á' : 'á', + 'Á' : 'Á', + 'Á' : 'Á', + 'á' : 'á', + 'â' : 'â', + 'Â' : 'Â', + 'Â' : 'Â', + 'â' : 'â', + '´' : '´', + '´' : '´', + 'Æ' : 'Æ', + 'æ' : 'æ', + 'Æ' : 'Æ', + 'æ' : 'æ', + 'à' : 'à', + 'À' : 'À', + 'À' : 'À', + 'à' : 'à', + 'ℵ' : 'ℵ', + 'α' : 'α', + 'Α' : 'Α', + '&' : '&', + '&' : '&', + '&' : '&', + '&' : '&', + '∧' : '∧', + '∠' : '∠', + 'å' : 'å', + 'Å' : 'Å', + 'Å' : 'Å', + 'å' : 'å', + '≈' : '≈', + 'ã' : 'ã', + 'Ã' : 'Ã', + 'Ã' : 'Ã', + 'ã' : 'ã', + 'ä' : 'ä', + 'Ä' : 'Ä', + 'Ä' : 'Ä', + 'ä' : 'ä', + '„' : '„', + 'β' : 'β', + 'Β' : 'Β', + '¦' : '¦', + '¦' : '¦', + '•' : '•', + '∩' : '∩', + 'ç' : 'ç', + 'Ç' : 'Ç', + 'Ç' : 'Ç', + 'ç' : 'ç', + '¸' : '¸', + '¸' : '¸', + '¢' : '¢', + '¢' : '¢', + 'χ' : 'χ', + 'Χ' : 'Χ', + 'ˆ' : 'ˆ', + '♣' : '♣', + '≅' : '≅', + '©' : '©', + '©' : '©', + '©' : '©', + '©' : '©', + '↵' : '↵', + '∪' : '∪', + '¤' : '¤', + '¤' : '¤', + '†' : '†', + '‡' : '‡', + '↓' : '↓', + '⇓' : '⇓', + '°' : '°', + '°' : '°', + 'δ' : 'δ', + 'Δ' : 'Δ', + '♦' : '♦', + '÷' : '÷', + '÷' : '÷', + 'é' : 'é', + 'É' : 'É', + 'É' : 'É', + 'é' : 'é', + 'ê' : 'ê', + 'Ê' : 'Ê', + 'Ê' : 'Ê', + 'ê' : 'ê', + 'è' : 'è', + 'È' : 'È', + 'È' : 'È', + 'è' : 'è', + '∅' : '∅', + ' ' : ' ', + ' ' : ' ', + 'ε' : 'ε', + 'Ε' : 'Ε', + '≡' : '≡', + 'η' : 'η', + 'Η' : 'Η', + 'ð' : 'ð', + 'Ð' : 'Ð', + 'Ð' : 'Ð', + 'ð' : 'ð', + 'ë' : 'ë', + 'Ë' : 'Ë', + 'Ë' : 'Ë', + 'ë' : 'ë', + '€' : '€', + '∃' : '∃', + 'ƒ' : 'ƒ', + '∀' : '∀', + '½' : '½', + '½' : '½', + '¼' : '¼', + '¼' : '¼', + '¾' : '¾', + '¾' : '¾', + '⁄' : '⁄', + 'γ' : 'γ', + 'Γ' : 'Γ', + '≥' : '≥', + #'>' : '>', + #'>' : '>', + #'>' : '>', + #'>' : '>', + '↔' : '↔', + '⇔' : '⇔', + '♥' : '♥', + '…' : '…', + 'í' : 'í', + 'Í' : 'Í', + 'Í' : 'Í', + 'í' : 'í', + 'î' : 'î', + 'Î' : 'Î', + 'Î' : 'Î', + 'î' : 'î', + '¡' : '¡', + '¡' : '¡', + 'ì' : 'ì', + 'Ì' : 'Ì', + 'Ì' : 'Ì', + 'ì' : 'ì', + 'ℑ' : 'ℑ', + '∞' : '∞', + '∫' : '∫', + 'ι' : 'ι', + 'Ι' : 'Ι', + '¿' : '¿', + '¿' : '¿', + '∈' : '∈', + 'ï' : 'ï', + 'Ï' : 'Ï', + 'Ï' : 'Ï', + 'ï' : 'ï', + 'κ' : 'κ', + 'Κ' : 'Κ', + 'λ' : 'λ', + 'Λ' : 'Λ', + '«' : '«', + '«' : '«', + '←' : '←', + '⇐' : '⇐', + '⌈' : '⌈', + '“' : '“', + '≤' : '≤', + '⌊' : '⌊', + '∗' : '∗', + '◊' : '◊', + '‎' : '‎', + '‹' : '‹', + '‘' : '‘', + #'<' : '<', + #'<' : '<', + #'<' : '<', + #'<' : '<', + '¯' : '¯', + '¯' : '¯', + '—' : '—', + 'µ' : 'µ', + 'µ' : 'µ', + '·' : '·', + '·' : '·', + '−' : '−', + 'μ' : 'μ', + 'Μ' : 'Μ', + '∇' : '∇', + ' ' : ' ', + ' ' : ' ', + '–' : '–', + '≠' : '≠', + '∋' : '∋', + '¬' : '¬', + '¬' : '¬', + '∉' : '∉', + '⊄' : '⊄', + 'ñ' : 'ñ', + 'Ñ' : 'Ñ', + 'Ñ' : 'Ñ', + 'ñ' : 'ñ', + 'ν' : 'ν', + 'Ν' : 'Ν', + 'ó' : 'ó', + 'Ó' : 'Ó', + 'Ó' : 'Ó', + 'ó' : 'ó', + 'ô' : 'ô', + 'Ô' : 'Ô', + 'Ô' : 'Ô', + 'ô' : 'ô', + 'Œ' : 'Œ', + 'œ' : 'œ', + 'ò' : 'ò', + 'Ò' : 'Ò', + 'Ò' : 'Ò', + 'ò' : 'ò', + '‾' : '‾', + 'ω' : 'ω', + 'Ω' : 'Ω', + 'ο' : 'ο', + 'Ο' : 'Ο', + '⊕' : '⊕', + '∨' : '∨', + 'ª' : 'ª', + 'ª' : 'ª', + 'º' : 'º', + 'º' : 'º', + 'ø' : 'ø', + 'Ø' : 'Ø', + 'Ø' : 'Ø', + 'ø' : 'ø', + 'õ' : 'õ', + 'Õ' : 'Õ', + 'Õ' : 'Õ', + 'õ' : 'õ', + '⊗' : '⊗', + 'ö' : 'ö', + 'Ö' : 'Ö', + 'Ö' : 'Ö', + 'ö' : 'ö', + '¶' : '¶', + '¶' : '¶', + '∂' : '∂', + '‰' : '‰', + '⊥' : '⊥', + 'φ' : 'φ', + 'Φ' : 'Φ', + 'π' : 'π', + 'Π' : 'Π', + 'ϖ' : 'ϖ', + '±' : '±', + '±' : '±', + '£' : '£', + '£' : '£', + '′' : '′', + '″' : '″', + '∏' : '∏', + '∝' : '∝', + 'ψ' : 'ψ', + 'Ψ' : 'Ψ', + '"' : '"', + '"' : '"', + '"' : '"', + '"' : '"', + '√' : '√', + '»' : '»', + '»' : '»', + '→' : '→', + '⇒' : '⇒', + '⌉' : '⌉', + '”' : '”', + 'ℜ' : 'ℜ', + '®' : '®', + '®' : '®', + '®' : '®', + '®' : '®', + '⌋' : '⌋', + 'ρ' : 'ρ', + 'Ρ' : 'Ρ', + '‏' : '‏', + '›' : '›', + '’' : '’', + '‚' : '‚', + 'š' : 'š', + 'Š' : 'Š', + '⋅' : '⋅', + '§' : '§', + '§' : '§', + '' : '', # strange optional hyphenation control character, not just a dash + '' : '', + 'σ' : 'σ', + 'Σ' : 'Σ', + 'ς' : 'ς', + '∼' : '∼', + '♠' : '♠', + '⊂' : '⊂', + '⊆' : '⊆', + '∑' : '∑', + '¹' : '¹', + '¹' : '¹', + '²' : '²', + '²' : '²', + '³' : '³', + '³' : '³', + '⊃' : '⊃', + '⊇' : '⊇', + 'ß' : 'ß', + 'ß' : 'ß', + 'τ' : 'τ', + 'Τ' : 'Τ', + '∴' : '∴', + 'θ' : 'θ', + 'Θ' : 'Θ', + 'ϑ' : 'ϑ', + ' ' : ' ', + 'þ' : 'þ', + 'Þ' : 'Þ', + 'Þ' : 'Þ', + 'þ' : 'þ', + '˜' : '˜', + '×' : '×', + '×' : '×', + '™' : '™', + 'ú' : 'ú', + 'Ú' : 'Ú', + 'Ú' : 'Ú', + 'ú' : 'ú', + '↑' : '↑', + '⇑' : '⇑', + 'û' : 'û', + 'Û' : 'Û', + 'Û' : 'Û', + 'û' : 'û', + 'ù' : 'ù', + 'Ù' : 'Ù', + 'Ù' : 'Ù', + 'ù' : 'ù', + '¨' : '¨', + '¨' : '¨', + 'ϒ' : 'ϒ', + 'υ' : 'υ', + 'Υ' : 'Υ', + 'ü' : 'ü', + 'Ü' : 'Ü', + 'Ü' : 'Ü', + 'ü' : 'ü', + '℘' : '℘', + 'ξ' : 'ξ', + 'Ξ' : 'Ξ', + 'ý' : 'ý', + 'Ý' : 'Ý', + 'Ý' : 'Ý', + 'ý' : 'ý', + '¥' : '¥', + '¥' : '¥', + 'ÿ' : 'ÿ', + 'Ÿ' : 'Ÿ', + 'ÿ' : 'ÿ', + 'ζ' : 'ζ', + 'Ζ' : 'Ζ', + '‍' : '‍', # strange spacing control character, not just a space + '‌' : '‌', # strange spacing control character, not just a space + } diff --git a/fanficdownloader/htmlheuristics.py b/fanficdownloader/htmlheuristics.py new file mode 100644 index 00000000..52a78df4 --- /dev/null +++ b/fanficdownloader/htmlheuristics.py @@ -0,0 +1,348 @@ +# -*- coding: utf-8 -*- + +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logger = logging.getLogger(__name__) +import re +import codecs +import BeautifulSoup as bs +import HtmlTagStack as stack + +from . import exceptions as exceptions + +def replace_br_with_p(body): + + # Ascii character (and Unicode as well) xA0 is a non-breaking space, ascii code 160. + # However, Python Regex does not recognize it as a whitespace, so we'll be changing it to a reagular space. + body = body.replace(u'\xa0', u' ') + + if body.find('>') == -1 or body.rfind('<') == -1: + return body + + # logger.debug(u'BODY start.: ' + body[:250]) + # logger.debug(u'BODY end...: ' + body[-250:]) + # logger.debug(u'BODY.......: ' + body) + + # clean breaks ( ), removing whitespaces between them. + body = re.sub(r'\s*<br[^>]*>\s*', r' ', body) + + # change surrounding div to a p and remove attrs Top surrounding + # tag in all cases now should be div, to just strip the first and + # last tags. + if is_valid_block(body) and body.find('<div') == 0: + body = body[body.index('>')+1:body.rindex('<')] + + body = soup_up_div(u'<div>' + body + u'</div>') + + body = body[body.index('>')+1:body.rindex('<')] + + # Find all bexisting blocks with p, pre and blockquote tags, we need to shields break tags inside those. + # This is for "lenient" mode, however it is also used to clear break tags before and after the block elements. + blocksRegex = re.compile(r'(\s*<br\ />\s*)*\s*<(pre|p|blockquote|table)([^>]*)>(.+?)</\2>\s*(\s*<br\ />\s*)*', re.DOTALL) + body = blocksRegex.sub(r'\n<\2\3>\4</\2>\n', body) + + # if aggressive mode = true + # blocksRegex = re.compile(r'(\s*<br\ */*>\s*)*\s*<(pre)([^>]*)>(.+?)</\2>\s*(\s*<br\ */*>\s*)*', re.DOTALL) + # In aggressive mode, we also check breakes inside blockquotes, meaning we can get orphaned paragraph tags. + # body = re.sub(r'<blockquote([^>]*)>(.+?)</blockquote>', r'<blockquote\1>\2</blockquote>', body, re.DOTALL) + # end aggressive mode + + blocks = blocksRegex.finditer(body) + # For our replacements to work, we need to work backwards, so we reverse the iterator. + blocksList = [] + for match in blocks: + blocksList.insert(0, match) + + for match in blocksList: + group4 = match.group(4).replace(u' ', u'{br /}') + body = body[:match.start(4)] + group4 + body[match.end(4):] + + # change surrounding div to a p and remove attrs Top surrounding + # tag in all cases now should be div, to just strip the first and + # last tags. + # body = u'' + body + u'' + + # Nuke div tags surrounding a HR tag. + body = re.sub(r'<div[^>]+>\s*<hr[^>]+>\s*</div>', r'\n<hr />\n', body) + + # So many people add formatting to their HR tags, and ePub does not allow those, we are supposed to use css. + # This nukes the hr tag attributes. + body = re.sub(r'\s*<hr[^>]+>\s*', r'\n<hr />\n', body) + + # Remove leading and trailing breaks from HR tags + body = re.sub(r'\s*(<br\ \/>)*\s*<hr\ \/>\s*(<br\ \/>)*\s*', r'\n<hr />\n', body) + # Nuking breaks leading paragraps that may be in the body. They are eventually treated as + body = re.sub(r'\s*(<br\ \/>)+\s*<p', r'\n\n<p', body) + # Nuking breaks trailing paragraps that may be in the body. They are eventually treated as + body = re.sub(r'\s*(<br\ \/>)+\s*', r'\n\n', body) + + # Because a leading or trailing non break tag will break the following code, we have to mess around rather badly for a few lines. + body = body.replace(u'[',u'&squareBracketStart;') + body = body.replace(u']',u'&squareBracketEnd;') + body = body.replace(u' ',u'[br /]') + + breaksRegexp = [ + re.compile(r'([^\]])(\[br\ \/\])([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){2}([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){3}([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){4}([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){5}([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){6}([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){7}([^\[])'), + re.compile(r'([^\]])(\[br\ \/\]){8}([^\[])'), + re.compile(r'(\[br\ \/\]){9,}')] + + breaksCount = [ + len(breaksRegexp[0].findall(body)), + len(breaksRegexp[1].findall(body)), + len(breaksRegexp[2].findall(body)), + len(breaksRegexp[3].findall(body)), + len(breaksRegexp[4].findall(body)), + len(breaksRegexp[5].findall(body)), + len(breaksRegexp[6].findall(body)), + len(breaksRegexp[7].findall(body))] + + breaksMax = 0 + breaksMaxIndex = 0; + + for i in range(1,len(breaksCount)): + if breaksCount[i] >= breaksMax: + breaksMax = breaksCount[i] + breaksMaxIndex = i + + lines = body.split(u'[br /]') + contentLines = 0; + contentLinesSum = 0; + longestLineLength = 0; + averageLineLength = 0; + + for line in lines: + lineLen = len(line.strip()) + if lineLen > 0: + contentLines += 1 + contentLinesSum += lineLen + if lineLen > longestLineLength: + longestLineLength = lineLen + + if contentLines == 0: + contentLines = 1 + + averageLineLength = contentLinesSum/contentLines + + logger.debug(u'---') + logger.debug(u'Lines.............: ' + unicode(len(lines))) + logger.debug(u'contentLines......: ' + unicode(contentLines)) + logger.debug(u'contentLinesSum...: ' + unicode(contentLinesSum)) + logger.debug(u'longestLineLength.: ' + unicode(longestLineLength)) + logger.debug(u'averageLineLength.: ' + unicode(averageLineLength)) + + if breaksMaxIndex == len(breaksCount)-1 and breaksMax < 2: + breaksMaxIndex = 0 + breaksMax = breaksCount[0] + + logger.debug(u'---') + logger.debug(u'breaks 1: ' + unicode(breaksCount[0])) + logger.debug(u'breaks 2: ' + unicode(breaksCount[1])) + logger.debug(u'breaks 3: ' + unicode(breaksCount[2])) + logger.debug(u'breaks 4: ' + unicode(breaksCount[3])) + logger.debug(u'breaks 5: ' + unicode(breaksCount[4])) + logger.debug(u'breaks 6: ' + unicode(breaksCount[5])) + logger.debug(u'breaks 7: ' + unicode(breaksCount[6])) + logger.debug(u'breaks 8: ' + unicode(breaksCount[7])) + logger.debug(u'----') + logger.debug(u'max found: ' + unicode(breaksMax)) + logger.debug(u'max Index: ' + unicode(breaksMaxIndex)) + logger.debug(u'----') + + if breaksMaxIndex > 0 and breaksCount[0] > breaksMax and averageLineLength < 90: + body = breaksRegexp[0].sub(r'\1 \n\3', body) + + # Find all instances of consecutive breaks less than otr equal to the max count use most often + # replase those tags to inverted p tag pairs, those with more connsecutive breaks are replaced them with a horisontal line + for i in range(len(breaksCount)): + # if i > 0 or breaksMaxIndex == 0: + if i <= breaksMaxIndex: + logger.debug(unicode(i) + u' <= breaksMaxIndex (' + unicode(breaksMaxIndex) + u')') + body = breaksRegexp[i].sub(r'\1\n\3', body) + elif i == breaksMaxIndex+1: + logger.debug(unicode(i) + u' == breaksMaxIndex+1 (' + unicode(breaksMaxIndex+1) + u')') + body = breaksRegexp[i].sub(r'\1\n \n\3', body) + else: + logger.debug(unicode(i) + u' > breaksMaxIndex+1 (' + unicode(breaksMaxIndex+1) + u')') + body = breaksRegexp[i].sub(r'\1\n<hr />\n\3', body) + + body = breaksRegexp[8].sub(r'\n<hr />\n', body) + + # Reverting the square brackets + body = body.replace(u'[', u'<') + body = body.replace(u']', u'>') + body = body.replace(u'&squareBracketStart;', u'[') + body = body.replace(u'&squareBracketEnd;', u']') + + body = body.replace(u'{p}', u'') + body = body.replace(u'{/p}', u'') + + # If for some reason, a third break makes its way inside the paragraph, preplace that with the empty paragraph for the additional linespaing. + body = re.sub(r'\s*(<br\ \/>)+', r' \n', body) + + # change empty p tags to include a br to force spacing. + body = re.sub(r'\s*', r' ', body) + + # Clean up hr tags, and add inverted p tag pairs + body = re.sub(r'(<div[^>]+>)*\s*<hr\ \/>\s*(</div>)*', r'\n<hr />\n', body) + + # Clean up hr tags, and add inverted p tag pairs + body = re.sub(r'\s*<hr\ \/>\s*', r'\n<hr />\n', body) + + # Because the previous regexp may cause trouble if the hr tag already had a p tag pair around it, w nee dot repair that. + # Repeated opening p tags are condenced to one. As we added the extra leading opening p tags, we can safely assume that + # the last in such a chain must be the original. Lets keep its attributes if they are there. + body = re.sub(r'\s*(<p[^>]*>\s*)+<p([^>]*)>\s*', r'\n<p\2>', body) + # Repeated closing p tags are condenced to one + body = re.sub(r'\s*(<\/\s*p>\s*){2,}', r'\n', body) + + # superflous cleaning, remove whitespaces traling opening p tags. These does affect formatting. + body = re.sub(r'\s*<p([^>]*)>\s*', r'\n<p\1>', body) + # superflous cleaning, remove whitespaces leading closing p tags. These does not affect formatting. + body = re.sub(r'\s*\s*', r'\n', body) + + # Remove empty tag pairs + body = re.sub(r'\s*<(\S+)[^>]*>\s*</\1>', r'', body) + + body = body.replace(u'{br /}', u' ') + body = body.strip() + + # re-wrap in div tag. + body = u'<div>\n' + body + u'</div>\n' + + # return body + return tag_sanitizer(body) + +def is_valid_block(block): + return unicode(block).find('<') == 0 and unicode(block).find('<!') != 0 + +def soup_up_div(body): + blockTags = ['address', 'blockquote', 'del', 'div', 'dl', 'fieldset', 'form', 'ins', 'noscript', 'ol', 'p', 'pre', 'table', 'ul'] + recurseTags = ['blockquote', 'div', 'noscript'] + + tag = body[:body.index('>')+1] + tagend = body[body.rindex('<'):] + + body = body.replace(u' ', u'[br /]') + + soup = bs.BeautifulSoup(body) + + body = u'' + lastElement = 1 # 1 = block, 2 = nested, 3 = invalid + + for i in soup.contents[0]: + if unicode(i).strip().__len__() > 0: + s = unicode(i) + if type(i) == bs.Tag: + if i.name in blockTags: + if lastElement > 1: + body = body.strip(r'\s*(\[br\ \/\]\s*)*\s*') + body += u'{/p}' + + lastElement = 1 + + if i.name in recurseTags: + s = soup_up_div(s) + + body += s.strip() + '\n' + else: + if lastElement == 1: + body = body.strip(r'\s*(\[br\ \/\]\s*)*\s*') + body += u'{p}' + + lastElement = 2 + body += s + elif type(i) == bs.Comment: + body += s + else: + if lastElement == 1: + body = body.strip(r'\s*(\[br\ \/\]\s*)*\s*') + body += u'{p}' + + lastElement = 3 + body += s + + if lastElement > 1: + body = body.strip(r'\s*(\[br\ \/\]\s*)*\s*') + body += u'{/p}' + + body = body.replace(u'[br /]', u' ') + + return tag + body + tagend + + +def is_end_tag(tag): + return re.match(r'</([^\ >]+)>', tag) != None + +def is_comment_tag(tag): + return re.match(r'<\!\-\-([^>]+)>', tag) != None + +def is_closed_tag(tag): + return re.match(r'<(.+?)/>', tag) != None + +def tag_sanitizer(html): + blockTags = ['address', 'blockquote', 'del', 'div', 'dl', 'fieldset', 'form', 'ins', 'noscript', 'ol', 'pre', 'table', 'ul'] + + body = u'' + tags = re.findall(r'(<[^>]+>)([^<]*)', html) + + for rTag in tags: + name = stack.get_tag_name(rTag[0]) + is_end = is_end_tag(rTag[0]) + is_closed = is_closed_tag(rTag[0]) or is_comment_tag(rTag[0]) + + # is_comment = is_comment_tag(rTag[0]) + # logger.debug(u'%s > isEnd: %s > isClosed: %s > isComment: %s'%(name, unicode(is_end), unicode(is_closed), unicode(is_comment))) + # logger.debug(u'> %s%s\n'%(rTag[0], rTag[1])) + + if name in blockTags: + body += rTag[0] + body += rTag[1] + elif name == u'p': + if is_end: + body += stack.spool_end() + body += rTag[0] + body += rTag[1] + elif is_closed: + body += rTag[0] + body += rTag[1] + else: + body += rTag[0] + body += stack.spool_start() + body += rTag[1] + else: + if is_end: + t = stack.get_last() + tn = stack.get_tag_name(t) + rTn = stack.get_tag_name(rTag[0]) + if tn == rTn: + body += rTag[0] + stack.pop() + elif not is_closed: + stack.push(rTag[0]) + body += rTag[0] + else: + body += rTag[0] + + body += rTag[1] + stack.flush() + return body diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py new file mode 100644 index 00000000..7a527154 --- /dev/null +++ b/fanficdownloader/mobi.py @@ -0,0 +1,386 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + + +import StringIO +import struct +import time +import random +import logging + +logger = logging.getLogger(__name__) + +from html import HtmlProcessor + +# http://wiki.mobileread.com/wiki/MOBI +# http://membres.lycos.fr/microfirst/palm/pdb.html + +encoding = { + 'UTF-8' : 65001, + 'latin-1' : 1252, +} + +languages = {"en-us" : 0x0409, + "sv" : 0x041d, + "fi" : 0x000b, + "en" : 0x0009, + "en-gb" : 0x0809} + +def ToHex(s): + v = ['%.2x' % ord(c) for c in s] + return ' '.join(v) + +class _SubEntry: + def __init__(self, pos, html_data): + self.pos = pos + self.html = HtmlProcessor(html_data) + self.title = self.html.title + self._name = 'mobi_article_%d' % pos + if not self.title: + self.title = 'Article %d' % self.pos + + def TocLink(self): + return '<a href="#%s_MOBI_START">%.80s</a>' % (self._name, self.title) + + def Anchor(self): + return '<a name="%s_MOBI_START">' % self._name + + def Body(self): + return self.html.RenameAnchors(self._name + '_') + +class Converter: + def __init__(self, refresh_url='', title='Unknown', author='Unknown', publisher='Unknown'): + self._header = Header() + self._header.SetTitle(title) + self._header.SetAuthor(author) + self._header.SetPublisher(publisher) + self._refresh_url = refresh_url + + def ConvertString(self, s): + out = StringIO.StringIO() + self._ConvertStringToFile(s, out) + return out.getvalue() + + def ConvertStrings(self, html_strs): + out = StringIO.StringIO() + self._ConvertStringsToFile(html_strs, out) + return out.getvalue() + + def ConvertFile(self, html_file, out_file): + self._ConvertStringToFile(open(html_file,'rb').read(), + open(out_file, 'wb')) + + def ConvertFiles(self, html_files, out_file): + html_strs = [open(f,'rb').read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'wb')) + + def MakeOneHTML(self, html_strs): + """This takes a list of HTML strings and returns a big HTML file with + all contents consolidated. It constructs a table of contents and adds + anchors within the text + """ + title_html = [] + toc_html = [] + body_html = [] + + PAGE_BREAK = '<mbp:pagebreak>' + + # pull out the title page, assumed first html_strs. + htmltitle = html_strs[0] + entrytitle = _SubEntry(1, htmltitle) + title_html.append(entrytitle.Body()) + + title_html.append(PAGE_BREAK) + toc_html.append('<a name="TOCTOP"><h3>Table of Contents</h3> ') + + for pos, html in enumerate(html_strs[1:]): + entry = _SubEntry(pos+1, html) + toc_html.append('%s ' % entry.TocLink()) + + # give some space between bodies of work. + body_html.append(PAGE_BREAK) + + body_html.append(entry.Anchor()) + + body_html.append(entry.Body()) + + # TODO: this title can get way too long with RSS feeds. Not sure how to fix + # cheat slightly and use the <a href> code to set filepos in references. + header = '''<html> +<head> +<title>Bibliorize %s GMT + + + + + +''' % time.ctime(time.time()) + + footer = '' + all_html = header + '\n'.join(title_html + toc_html + body_html) + footer + #print "%s" % all_html.encode('utf8') + return all_html + + def _ConvertStringsToFile(self, html_strs, out_file): + try: + tmp = self.MakeOneHTML(html_strs) + self._ConvertStringToFile(tmp, out_file) + except Exception, e: + logger.error('Error %s', e) + #logger.debug('Details: %s' % html_strs) + + def _ConvertStringToFile(self, html_data, out): + html = HtmlProcessor(html_data) + data = html.CleanHtml() + + # collect offsets of '' tags, use to make index list. + # indexlist = [] # list of (offset,length) tuples. + # not in current use. + + # j=0 + # lastj=0 + # while True: + # j=data.find('',lastj+10) # plus a bit so we find the next. + # if j < 0: + # break + # indexlist.append((lastj,j-lastj)) + # print "index offset: %d length: %d" % (lastj,j-lastj) + # lastj=j + + records = [] +# title = html.title +# if title: +# self._header.SetTitle(title) + record_id = 1 + for start_pos in range(0, len(data), Record.MAX_SIZE): + end = min(len(data), start_pos + Record.MAX_SIZE) + record_data = data[start_pos:end] + records.append(self._header.AddRecord(record_data, record_id)) + #print "HTML Record %03d: (size:%d) [[%s ... %s]]" % ( record_id, len(record_data), record_data[:20], record_data[-20:] ) + record_id += 1 + self._header.SetImageRecordIndex(record_id) + records[0:0] = [self._header.MobiHeader()] + + header, rec_offset = self._header.PDBHeader(len(records)) + out.write(header) + for record in records: + record.WriteHeader(out, rec_offset) + #print "rec_offset: %d len(record.data): %d" % (rec_offset,len(record.data)) + rec_offset += (len(record.data)+1) # plus one for trailing null + + # Write to nuls for some reason + out.write('\0\0') + for record in records: + record.WriteData(out) + out.write('\0') + # needs a trailing null, I believe it indicates zero length 'overlap'. + # otherwise, the readers eat the last char of each html record. + # Calibre writes another 6-7 bytes of stuff after that, but we seem + # to be getting along without it. + +class Record: + MAX_SIZE = 4096 + INDEX_LEN = 8 + _unique_id_seed = 28 # should be arbitrary, but taken from MobiHeader + + # TODO(chatham): Record compression doesn't look that hard. + + def __init__(self, data, record_id): + assert len(data) <= self.MAX_SIZE + self.data = data + if record_id != 0: + self._id = record_id + else: + Record._unique_id_seed += 1 + self._id = 0 + + def __repr__(self): + return 'Record: id=%d len=%d' % (self._id, len(self.data)) + + def _SetUniqueId(self): + Record._unique_id_seed += 1 + # TODO(chatham): Wraparound crap + self._id = Record._unique_id_seed + + def WriteData(self, out): + out.write(self.data) + + def WriteHeader(self, out, rec_offset): + attributes = 64 # dirty? + header = struct.pack('>IbbH', + rec_offset, + attributes, + 0, self._id) + assert len(header) == Record.INDEX_LEN + out.write(header) + +EXTH_HEADER_FIELDS = { + 'author' : 100, + 'publisher' : 101, +} + +class Header: + EPOCH_1904 = 2082844800 + + def __init__(self): + self._length = 0 + self._record_count = 0 + self._title = '2008_2_34' + self._author = 'Unknown author' + self._publisher = 'Unknown publisher' + self._first_image_index = 0 + + def SetAuthor(self, author): + self._author = author.encode('ascii','ignore') + + def SetTitle(self, title): + # TODO(chatham): Reevaluate whether this needs to be ASCII. + # maybe just do sys.setdefaultencoding('utf-8')? Problems + # appending self._title with other things. + self._title = title.encode('ascii','ignore') + + def SetPublisher(self, publisher): + self._publisher = publisher.encode('ascii','ignore') + + def AddRecord(self, data, record_id): + self.max_record_size = max(Record.MAX_SIZE, len(data)) + self._record_count += 1 + self._length += len(data) + return Record(data, record_id) + + def _ReplaceWord(self, data, pos, word): + return data[:pos] + struct.pack('>I', word) + data[pos+4:] + + def PalmDocHeader(self): + compression = 1 # no compression + unused = 0 + encryption_type = 0 # no ecryption + records = self._record_count + 1 # the header record itself + palmdoc_header = struct.pack('>HHIHHHH', + compression, + unused, + self._length, + records, + Record.MAX_SIZE, + encryption_type, + unused) + assert len(palmdoc_header) == 16 + return palmdoc_header + + def PDBHeader(self, num_records): + HEADER_LEN = 32+2+2+9*4 + RECORD_INDEX_HEADER_LEN = 6 + RESOURCE_INDEX_LEN = 10 + + index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN + rec_offset = HEADER_LEN + index_len + 2 + + short_title = self._title[0:31] + attributes = 0 + version = 0 + ctime = self.EPOCH_1904 + int(time.time()) + mtime = self.EPOCH_1904 + int(time.time()) + backup_time = self.EPOCH_1904 + int(time.time()) + modnum = 0 + appinfo_offset = 0 + sort_offset = 0 + type = 'BOOK' + creator = 'MOBI' + id_seed = 36 + header = struct.pack('>32sHHII', + short_title, attributes, version, + ctime, mtime) + header += struct.pack('>IIII', backup_time, modnum, + appinfo_offset, sort_offset) + header += struct.pack('>4s4sI', + type, creator, id_seed) + next_record = 0 # not used? + header += struct.pack('>IH', next_record, num_records) + return header, rec_offset + + def _GetExthHeader(self): + # They set author, publisher, coveroffset, thumboffset + data = {'author' : self._author, + 'publisher' : self._publisher, + } + # Turn string type names into EXTH typeids. + r = [] + for key, value in data.items(): + typeid = EXTH_HEADER_FIELDS[key] + length_encoding_len = 8 + r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value) + content = ''.join(r) + + # Pad to word boundary + while len(content) % 4: + content += '\0' + TODO_mysterious = 12 + exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content + return exth + + def SetImageRecordIndex(self, idx): + self._first_image_index = idx + + def MobiHeader(self): + exth_header = self._GetExthHeader(); + palmdoc_header = self.PalmDocHeader() + + fs = 0xffffffff + + # Record 0 + header_len = 0xE4 # TODO + mobi_type = 2 # BOOK + text_encoding = encoding['UTF-8'] + unique_id = random.randint(1, 1<<32) + creator_version = 4 + reserved = '%c' % 0xff * 40 + nonbook_index = fs + full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header + language = languages['en-us'] + unused = 0 + mobi_header = struct.pack('>4sIIIII40sIIIIII', + 'MOBI', + header_len, + mobi_type, + text_encoding, + unique_id, + creator_version, + reserved, + nonbook_index, + full_name_offset, + len(self._title), + language, + fs, fs) + assert len(mobi_header) == 104 - 16 + + unknown_fields = chr(0) * 32 + drm_offset = 0 + drm_count = 0 + drm_size = 0 + drm_flags = 0 + exth_flags = 0x50 + header_end = chr(0) * 64 + mobi_header += struct.pack('>IIIIIII', + creator_version, + self._first_image_index, + fs, + unused, + fs, + unused, + exth_flags) + mobi_header += '\0' * 112 # TODO: Why this much padding? + # Set some magic offsets to be 0xFFFFFFF. + for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc): + mobi_header = self._ReplaceWord(mobi_header, pos, fs) + + # 16 bytes? + padding = '\0' * 48 * 4 # why? + total_header = palmdoc_header + mobi_header + exth_header + self._title + padding + + return self.AddRecord(total_header, 0) + +if __name__ == '__main__': + import sys + m = Converter(title='Testing Mobi', author='Mobi Author', publisher='mobi converter') + m.ConvertFiles(sys.argv[1:], 'test.mobi') + #m.ConvertFile(sys.argv[1], 'test.mobi') diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py new file mode 100644 index 00000000..d3bad278 --- /dev/null +++ b/fanficdownloader/story.py @@ -0,0 +1,873 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os, re +import urlparse +import string +from math import floor +from functools import partial +import logging +logger = logging.getLogger(__name__) +import urlparse as up + +import exceptions +from htmlcleanup import conditionalRemoveEntities, removeAllEntities +from configurable import Configurable + +SPACE_REPLACE=u'\s' +SPLIT_META=u'\,' + +# Create convert_image method depending on which graphics lib we can +# load. Preferred: calibre, PIL, none + +imagetypes = { + 'jpg':'image/jpeg', + 'jpeg':'image/jpeg', + 'png':'image/png', + 'gif':'image/gif', + 'svg':'image/svg+xml', + } + +try: + from calibre.utils.magick import Image + convtype = {'jpg':'JPG', 'png':'PNG'} + + def convert_image(url,data,sizes,grayscale, + removetrans,imgtype="jpg",background='#ffffff'): + export = False + img = Image() + img.load(data) + + owidth, oheight = img.size + nwidth, nheight = sizes + scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight) + if scaled: + img.size = (nwidth, nheight) + export = True + + if normalize_format_name(img.format) != imgtype: + export = True + + if removetrans and img.has_transparent_pixels(): + canvas = Image() + canvas.create_canvas(int(img.size[0]), int(img.size[1]), str(background)) + canvas.compose(img) + img = canvas + export = True + + if grayscale and img.type != "GrayscaleType": + img.type = "GrayscaleType" + export = True + + if export: + return (img.export(convtype[imgtype]),imgtype,imagetypes[imgtype]) + else: + logger.debug("image used unchanged") + return (data,imgtype,imagetypes[imgtype]) + +except: + + # No calibre routines, try for PIL for CLI. + try: + import Image + from StringIO import StringIO + convtype = {'jpg':'JPEG', 'png':'PNG'} + def convert_image(url,data,sizes,grayscale, + removetrans,imgtype="jpg",background='#ffffff'): + export = False + img = Image.open(StringIO(data)) + + owidth, oheight = img.size + nwidth, nheight = sizes + scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight) + if scaled: + img = img.resize((nwidth, nheight),Image.ANTIALIAS) + export = True + + if normalize_format_name(img.format) != imgtype: + if img.mode == "P": + # convert pallete gifs to RGB so jpg save doesn't fail. + img = img.convert("RGB") + export = True + + if removetrans and img.mode == "RGBA": + background = Image.new('RGBA', img.size, background) + # Paste the image on top of the background + background.paste(img, img) + img = background.convert('RGB') + export = True + + if grayscale and img.mode != "L": + img = img.convert("L") + export = True + + if export: + outsio = StringIO() + img.save(outsio,convtype[imgtype]) + return (outsio.getvalue(),imgtype,imagetypes[imgtype]) + else: + logger.debug("image used unchanged") + return (data,imgtype,imagetypes[imgtype]) + + except: + # No calibre or PIL, simple pass through with mimetype. + def convert_image(url,data,sizes,grayscale, + removetrans,imgtype="jpg",background='#ffffff'): + return no_convert_image(url,data) + +## also used for explicit no image processing. +def no_convert_image(url,data): + parsedUrl = up.urlparse(url) + + ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower() + + if ext not in imagetypes: + logger.debug("no_convert_image url:%s - no known extension"%url) + # doesn't have extension? use jpg. + ext='jpg' + + return (data,ext,imagetypes[ext]) + +def normalize_format_name(fmt): + if fmt: + fmt = fmt.lower() + if fmt == 'jpeg': + fmt = 'jpg' + return fmt + +def fit_image(width, height, pwidth, pheight): + ''' + Fit image in box of width pwidth and height pheight. + @param width: Width of image + @param height: Height of image + @param pwidth: Width of box + @param pheight: Height of box + @return: scaled, new_width, new_height. scaled is True iff new_width and/or new_height is different from width or height. + ''' + scaled = height > pheight or width > pwidth + if height > pheight: + corrf = pheight/float(height) + width, height = floor(corrf*width), pheight + if width > pwidth: + corrf = pwidth/float(width) + width, height = pwidth, floor(corrf*height) + if height > pheight: + corrf = pheight/float(height) + width, height = floor(corrf*width), pheight + + return scaled, int(width), int(height) + +try: + # doesn't really matter what, just checking for appengine. + from google.appengine.api import apiproxy_stub_map + + is_appengine = True +except: + is_appengine = False + + +# The list comes from ffnet, the only multi-language site we support +# at the time of writing. Values are taken largely from pycountry, +# but with some corrections and guesses. +langs = { + "English":"en", + "Spanish":"es", + "French":"fr", + "German":"de", + "Chinese":"zh", + "Japanese":"ja", + "Dutch":"nl", + "Portuguese":"pt", + "Russian":"ru", + "Italian":"it", + "Bulgarian":"bg", + "Polish":"pl", + "Hungarian":"hu", + "Hebrew":"he", + "Arabic":"ar", + "Swedish":"sv", + "Norwegian":"no", + "Danish":"da", + "Finnish":"fi", + "Filipino":"fil", + "Esperanto":"eo", + "Hindi":"hi", + "Punjabi":"pa", + "Farsi":"fa", + "Greek":"el", + "Romanian":"ro", + "Albanian":"sq", + "Serbian":"sr", + "Turkish":"tr", + "Czech":"cs", + "Indonesian":"id", + "Croatian":"hr", + "Catalan":"ca", + "Latin":"la", + "Korean":"ko", + "Vietnamese":"vi", + "Thai":"th", + "Devanagari":"hi", + } + +def re_compile(regex,line): + try: + return re.compile(regex) + except Exception, e: + raise exceptions.RegularExpresssionFailed(e,regex,line) + +class InExMatch: + keys = [] + regex = None + match = None + negate = False + + def __init__(self,line): + if "=~" in line: + (self.keys,self.match) = line.split("=~") + self.match = self.match.replace(SPACE_REPLACE,' ') + self.regex = re_compile(self.match,line) + elif "!~" in line: + (self.keys,self.match) = line.split("!~") + self.match = self.match.replace(SPACE_REPLACE,' ') + self.regex = re_compile(self.match,line) + self.negate = True + elif "==" in line: + (self.keys,self.match) = line.split("==") + self.match = self.match.replace(SPACE_REPLACE,' ') + elif "!=" in line: + (self.keys,self.match) = line.split("!=") + self.match = self.match.replace(SPACE_REPLACE,' ') + self.negate = True + self.keys = map( lambda x: x.strip(), self.keys.split(",") ) + + # For conditional, only one key + def is_key(self,key): + return key == self.keys[0] + + # For conditional, only one key + def key(self): + return self.keys[0] + + def in_keys(self,key): + return key in self.keys + + def is_match(self,value): + retval = False + if self.regex: + if self.regex.search(value): + retval = True + #print(">>>>>>>>>>>>>%s=~%s r: %s,%s=%s"%(self.match,value,self.negate,retval,self.negate != retval)) + else: + retval = self.match == value + #print(">>>>>>>>>>>>>%s==%s r: %s,%s=%s"%(self.match,value,self.negate,retval, self.negate != retval)) + + return self.negate != retval + + def __str__(self): + if self.negate: + f='!' + else: + f='=' + if self.regex: + s='~' + else: + s='=' + return u'InExMatch(%s %s%s %s)'%(self.keys,f,s,self.match) + +class Story(Configurable): + + def __init__(self, configuration): + Configurable.__init__(self, configuration) + try: + ## calibre plugin will set externally to match PI version. + self.metadata = {'version':os.environ['CURRENT_VERSION_ID']} + except: + self.metadata = {'version':'4.4'} + self.replacements = [] + self.in_ex_cludes = {} + self.chapters = [] # chapters will be tuples of (title,html) + self.imgurls = [] + self.imgtuples = [] + + self.cover=None # *href* of new cover image--need to create html. + self.oldcover=None # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata) + self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update. + self.logfile=None # cheesy way to carry log file forward across update. + + ## Look for config parameter, split and add each to metadata field. + for (config,metadata) in [("extracategories","category"), + ("extragenres","genre"), + ("extracharacters","characters"), + ("extraships","ships"), + ("extrawarnings","warnings")]: + for val in self.getConfigList(config): + self.addToList(metadata,val) + + self.setReplace(self.getConfig('replace_metadata')) + + in_ex_clude_list = ['include_metadata_pre','exclude_metadata_pre', + 'include_metadata_post','exclude_metadata_post'] + for ie in in_ex_clude_list: + ies = self.getConfig(ie) + # print("%s %s"%(ie,ies)) + if ies: + iel = [] + self.in_ex_cludes[ie] = self.set_in_ex_clude(ies) + + def join_list(self, key, vallist): + return self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ').join(map(unicode, vallist)) + + def setMetadata(self, key, value, condremoveentities=True): + + # keep as list type, but set as only value. + if self.isList(key): + self.addToList(key,value,condremoveentities=condremoveentities,clear=True) + else: + ## still keeps < < and & + if condremoveentities: + self.metadata[key]=conditionalRemoveEntities(value) + else: + self.metadata[key]=value + + if key == "language": + try: + # getMetadata not just self.metadata[] to do replace_metadata. + self.setMetadata('langcode',langs[self.getMetadata(key)]) + except: + self.setMetadata('langcode','en') + + if key == 'dateUpdated' and value: + # Last Update tags for Bill. + self.addToList('lastupdate',value.strftime("Last Update Year/Month: %Y/%m")) + self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d")) + + + ## metakey[,metakey]=~pattern + ## metakey[,metakey]==string + ## *for* part lines. Effect only when trailing conditional key=~regexp matches + ## metakey[,metakey]=~pattern[&&metakey=~regexp] + ## metakey[,metakey]==string[&&metakey=~regexp] + ## metakey[,metakey]=~pattern[&&metakey==string] + ## metakey[,metakey]==string[&&metakey==string] + def set_in_ex_clude(self,setting): + dest = [] + # print("set_in_ex_clude:"+setting) + for line in setting.splitlines(): + if line: + (match,condmatch)=(None,None) + if "&&" in line: + (line,conditional) = line.split("&&") + condmatch = InExMatch(conditional) + match = InExMatch(line) + dest.append([match,condmatch]) + return dest + + def do_in_ex_clude(self,which,value,key): + if value and which in self.in_ex_cludes: + include = 'include' in which + keyfound = False + found = False + for (match,condmatch) in self.in_ex_cludes[which]: + keyfndnow = False + if match.in_keys(key): + # key in keys and either no conditional, or conditional matched + if condmatch == None or condmatch.is_key(key): + keyfndnow = True + else: + condval = self.getMetadata(condmatch.key()) + keyfndnow = condmatch.is_match(condval) + keyfound |= keyfndnow + # print("match:%s %s\ncondmatch:%s %s\n\tkeyfound:%s\n\tfound:%s"%( + # match,value,condmatch,condval,keyfound,found)) + if keyfndnow: + found = isinstance(value,basestring) and match.is_match(value) + if found: + # print("match:%s %s\n\tkeyfndnow:%s\n\tfound:%s"%( + # match,value,keyfndnow,found)) + if not include: + value = None + break + if include and keyfound and not found: + value = None + return value + + + ## Two or three part lines. Two part effect everything. + ## Three part effect only those key(s) lists. + ## pattern=>replacement + ## metakey,metakey=>pattern=>replacement + ## *Five* part lines. Effect only when trailing conditional key=>regexp matches + ## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp] + def setReplace(self,replace): + for line in replace.splitlines(): + # print("replacement line:%s"%line) + (metakeys,regexp,replacement,condkey,condregexp)=(None,None,None,None,None) + if "&&" in line: + (line,conditional) = line.split("&&") + (condkey,condregexp) = conditional.split("=>") + if "=>" in line: + parts = line.split("=>") + if len(parts) > 2: + metakeys = map( lambda x: x.strip(), parts[0].split(",") ) + (regexp,replacement)=parts[1:] + else: + (regexp,replacement)=parts + + if regexp: + regexp = re_compile(regexp,line) + if condregexp: + condregexp = re_compile(condregexp,line) + # A way to explicitly include spaces in the + # replacement string. The .ini parser eats any + # trailing spaces. + replacement=replacement.replace(SPACE_REPLACE,' ') + self.replacements.append([metakeys,regexp,replacement,condkey,condregexp]) + + def doReplacements(self,value,key,return_list=False,seen_list=[]): + value = self.do_in_ex_clude('include_metadata_pre',value,key) + value = self.do_in_ex_clude('exclude_metadata_pre',value,key) + + retlist = [value] + for replaceline in self.replacements: + if replaceline in seen_list: # recursion on pattern, bail + # print("bailing on %s"%replaceline) + continue + #print("replacement tuple:%s"%replaceline) + (metakeys,regexp,replacement,condkey,condregexp) = replaceline + if (metakeys == None or key in metakeys) \ + and isinstance(value,basestring) \ + and regexp.search(value): + doreplace=True + if condkey and condkey != key: # prevent infinite recursion. + condval = self.getMetadata(condkey) + doreplace = condval != None and condregexp.search(condval) + + if doreplace: + # split into more than one list entry if + # SPLIT_META present in replacement string. Split + # first, then regex sub, then recurse call replace + # on each. Break out of loop, each split element + # handled individually by recursion call. + if SPLIT_META in replacement: + retlist = [] + for splitrepl in replacement.split(SPLIT_META): + retlist.extend(self.doReplacements(regexp.sub(splitrepl,value), + key, + return_list=True, + seen_list=seen_list+[replaceline])) + break + else: + # print("replacement,value:%s,%s->%s"%(replacement,value,regexp.sub(replacement,value))) + value = regexp.sub(replacement,value) + retlist = [value] + + for val in retlist: + retlist = map(partial(self.do_in_ex_clude,'include_metadata_post',key=key),retlist) + retlist = map(partial(self.do_in_ex_clude,'exclude_metadata_post',key=key),retlist) + # value = self.do_in_ex_clude('include_metadata_post',value,key) + # value = self.do_in_ex_clude('exclude_metadata_post',value,key) + + if return_list: + return retlist + else: + return self.join_list(key,retlist) + + def getMetadataRaw(self,key): + if self.isValidMetaEntry(key) and self.metadata.has_key(key): + return self.metadata[key] + + def getMetadata(self, key, + removeallentities=False, + doreplacements=True): + value = None + if not self.isValidMetaEntry(key): + return value + + if self.isList(key): + # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ') + # value = join_string.join(self.getList(key, removeallentities, doreplacements=True)) + value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True)) + if doreplacements: + value = self.doReplacements(value,key+"_LIST") + return value + elif self.metadata.has_key(key): + value = self.metadata[key] + if value: + if key == "numWords": + value = commaGroups(value) + if key == "numChapters": + value = commaGroups("%d"%value) + if key in ("dateCreated"): + value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S")) + if key in ("datePublished","dateUpdated"): + value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d")) + + if doreplacements: + value=self.doReplacements(value,key) + if removeallentities and value != None: + return removeAllEntities(value) + else: + return value + else: #if self.getConfig("default_value_"+key): + return self.getConfig("default_value_"+key) + + def getAllMetadata(self, + removeallentities=False, + doreplacements=True, + keeplists=False): + ''' + All single value *and* list value metadata as strings (unless + keeplists=True, then keep lists). + ''' + allmetadata = {} + + # special handling for authors/authorUrls + linkhtml="%s" + if self.isList('author'): # more than one author, assume multiple authorUrl too. + htmllist=[] + for i, v in enumerate(self.getList('author')): + aurl = self.getList('authorUrl')[i] + auth = v + # make sure doreplacements & removeallentities are honored. + if doreplacements: + aurl=self.doReplacements(aurl,'authorUrl') + auth=self.doReplacements(auth,'author') + if removeallentities: + aurl=removeAllEntities(aurl) + auth=removeAllEntities(auth) + + htmllist.append(linkhtml%('author',aurl,auth)) + # join_string = self.getConfig("join_string_authorHTML",u", ").replace(SPACE_REPLACE,' ') + self.setMetadata('authorHTML',self.join_list("join_string_authorHTML",htmllist)) + else: + self.setMetadata('authorHTML',linkhtml%('author',self.getMetadata('authorUrl', removeallentities, doreplacements), + self.getMetadata('author', removeallentities, doreplacements))) + + self.extendList("extratags",self.getConfigList("extratags")) + + if self.getMetadataRaw('seriesUrl'): + self.setMetadata('seriesHTML',linkhtml%('series',self.getMetadata('seriesUrl', removeallentities, doreplacements), + self.getMetadata('series', removeallentities, doreplacements))) + elif self.getMetadataRaw('series'): + self.setMetadata('seriesHTML',self.getMetadataRaw('series')) + + # logger.debug("make_linkhtml_entries:%s"%self.getConfig('make_linkhtml_entries')) + for k in self.getConfigList('make_linkhtml_entries'): + # Assuming list, because it has to be site specific and + # they are all lists. Bail if kUrl list not the same + # length. + # logger.debug("\nk:%s\nlist:%s\nlistURL:%s"%(k,self.getList(k),self.getList(k+'Url'))) + if len(self.getList(k+'Url')) != len(self.getList(k)): + continue + htmllist=[] + for i, v in enumerate(self.getList(k)): + url = self.getList(k+'Url')[i] + # make sure doreplacements & removeallentities are honored. + if doreplacements: + url=self.doReplacements(url,k+'Url') + v=self.doReplacements(v,k) + if removeallentities: + url=removeAllEntities(url) + v=removeAllEntities(v) + + htmllist.append(linkhtml%(k,url,v)) + # join_string = self.getConfig("join_string_"+k+"HTML",u", ").replace(SPACE_REPLACE,' ') + self.setMetadata(k+'HTML',self.join_list("join_string_"+k+"HTML",htmllist)) + + for k in self.getValidMetaList(): + if self.isList(k) and keeplists: + allmetadata[k] = self.getList(k, removeallentities, doreplacements) + else: + allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements) + + return allmetadata + + # just for less clutter in adapters. + def extendList(self,listname,l): + for v in l: + self.addToList(listname,v.strip()) + + def addToList(self,listname,value,condremoveentities=True,clear=False): + if value==None: + return + if condremoveentities: + value = conditionalRemoveEntities(value) + if clear or not self.isList(listname) or not listname in self.metadata: + # Calling addToList to a non-list meta will overwrite it. + self.metadata[listname]=[] + # prevent duplicates. + if not value in self.metadata[listname]: + self.metadata[listname].append(value) + + if listname == 'category' and self.getConfig('add_genre_when_multi_category') and len(self.metadata[listname]) > 1: + self.addToList('genre',self.getConfig('add_genre_when_multi_category')) + + def isList(self,listname): + 'Everything set with an include_in_* is considered a list.' + return self.isListType(listname) or \ + ( self.isValidMetaEntry(listname) and self.metadata.has_key(listname) \ + and isinstance(self.metadata[listname],list) ) + + def getList(self,listname, + removeallentities=False, + doreplacements=True, + includelist=[]): + #print("getList(%s,%s)"%(listname,includelist)) + retlist = [] + + if not self.isValidMetaEntry(listname): + return retlist + + # includelist prevents infinite recursion of include_in_'s + if self.hasConfig("include_in_"+listname) and listname not in includelist: + for k in self.getConfigList("include_in_"+listname): + retlist.extend(self.getList(k,removeallentities=False, + doreplacements=doreplacements,includelist=includelist+[listname])) + else: + + if not self.isList(listname): + retlist = [self.getMetadata(listname,removeallentities=False, + doreplacements=doreplacements)] + else: + retlist = self.getMetadataRaw(listname) + + if retlist: + if doreplacements: + newretlist = [] + for val in retlist: + newretlist.extend(self.doReplacements(val,listname,return_list=True)) + retlist = newretlist + + if removeallentities: + retlist = map(removeAllEntities,retlist) + + retlist = filter( lambda x : x!=None and x!='' ,retlist) + + # reorder ships so b/a and c/b/a become a/b and a/b/c. Only on '/', + # use replace_metadata to change separator first if needed. + # ships=>[ ]*(/|&|&)[ ]*=>/ + if listname == 'ships' and self.getConfig('sort_ships') and retlist: + retlist = [ '/'.join(sorted(x.split('/'))) for x in retlist ] + + if retlist: + if listname in ('author','authorUrl','authorId') or self.getConfig('keep_in_order_'+listname): + # need to retain order for author & authorUrl so the + # two match up. + return retlist + else: + # remove dups and sort. + return sorted(list(set(retlist))) + else: + return [] + + def getSubjectTags(self, removeallentities=False): + # set to avoid duplicates subject tags. + subjectset = set() + + tags_list = self.getConfigList("include_subject_tags") + self.getConfigList("extra_subject_tags") + + # metadata all go into dc:subject tags, but only if they are configured. + for (name,value) in self.getAllMetadata(removeallentities=removeallentities,keeplists=True).iteritems(): + if name in tags_list: + if isinstance(value,list): + for tag in value: + subjectset.add(tag) + else: + subjectset.add(value) + + if None in subjectset: + subjectset.remove(None) + if '' in subjectset: + subjectset.remove('') + + return list(subjectset | set(self.getConfigList("extratags"))) + + def addChapter(self, url, title, html): + if self.getConfig('strip_chapter_numbers') and \ + self.getConfig('chapter_title_strip_pattern'): + title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title) + self.chapters.append( (url,title,html) ) + + def getChapters(self,fortoc=False): + "Chapters will be tuples of (title,html)" + retval = [] + ## only add numbers if more than one chapter. + if len(self.chapters) > 1 and \ + (self.getConfig('add_chapter_numbers') == "true" \ + or (self.getConfig('add_chapter_numbers') == "toconly" and fortoc)) \ + and self.getConfig('chapter_title_add_pattern'): + for index, (url,title,html) in enumerate(self.chapters): + retval.append( (url, + string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}), + html) ) + else: + retval = self.chapters + + return retval + + def formatFileName(self,template,allowunsafefilename=True): + values = origvalues = self.getAllMetadata() + # fall back default: + if not template: + template="${title}-${siteabbrev}_${storyId}${formatext}" + + if not allowunsafefilename: + values={} + pattern = re_compile(self.getConfig("output_filename_safepattern",r"[^a-zA-Z0-9_\. \[\]&'-]+"),"output_filename_safepattern") + for k in origvalues.keys(): + values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k))) + + return string.Template(template).substitute(values).encode('utf8') + + # pass fetch in from adapter in case we need the cookies collected + # as well as it's a base_story class method. + def addImgUrl(self,parenturl,url,fetch,cover=False,coverexclusion=None): + + # otherwise it saves the image in the epub even though it + # isn't used anywhere. + if cover and self.getConfig('never_make_cover'): + return (None,None) + + url = url.strip() # ran across an image with a space in the + # src. Browser handled it, so we'd better, too. + + # appengine (web version) isn't allowed to do images--just + # gets too big too fast and breaks things. + if is_appengine: + return (None,None) + + if url.startswith("http") or url.startswith("file") or parenturl == None: + imgurl = url + else: + parsedUrl = urlparse.urlparse(parenturl) + if url.startswith("//") : + imgurl = urlparse.urlunparse( + (parsedUrl.scheme, + '', + url, + '','','')) + elif url.startswith("/") : + imgurl = urlparse.urlunparse( + (parsedUrl.scheme, + parsedUrl.netloc, + url, + '','','')) + else: + toppath="" + if parsedUrl.path.endswith("/"): + toppath = parsedUrl.path + else: + toppath = parsedUrl.path[:parsedUrl.path.rindex('/')] + imgurl = urlparse.urlunparse( + (parsedUrl.scheme, + parsedUrl.netloc, + toppath + '/' + url, + '','','')) + #print("\n===========\nparsedUrl.path:%s\ntoppath:%s\nimgurl:%s\n\n"%(parsedUrl.path,toppath,imgurl)) + + # apply coverexclusion to explicit covers, too. Primarily for ffnet imageu. + if cover and coverexclusion and re.search(coverexclusion,imgurl): + return (None,None) + + prefix='ffdl' + if imgurl not in self.imgurls: + parsedUrl = urlparse.urlparse(imgurl) + + try: + if self.getConfig('no_image_processing'): + (data,ext,mime) = no_convert_image(imgurl, + fetch(imgurl)) + else: + try: + sizes = [ int(x) for x in self.getConfigList('image_max_size') ] + except Exception, e: + raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e)) + grayscale = self.getConfig('grayscale_images') + imgtype = self.getConfig('convert_images_to') + if not imgtype: + imgtype = "jpg" + removetrans = self.getConfig('remove_transparency') + removetrans = removetrans or grayscale or imgtype=="jpg" + (data,ext,mime) = convert_image(imgurl, + fetch(imgurl), + sizes, + grayscale, + removetrans, + imgtype, + background="#"+self.getConfig('background_color')) + except Exception, e: + logger.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e)) + return ("failedtoload","failedtoload") + + # explicit cover, make the first image. + if cover: + if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']: + # remove existing cover, if there is one. + del self.imgurls[0] + del self.imgtuples[0] + self.imgurls.insert(0,imgurl) + newsrc = "images/cover.%s"%ext + self.cover=newsrc + self.imgtuples.insert(0,{'newsrc':newsrc,'mime':mime,'data':data}) + else: + self.imgurls.append(imgurl) + # First image, copy not link because calibre will replace with it's cover. + # Only if: No cover already AND + # make_firstimage_cover AND + # NOT never_make_cover AND + # either no coverexclusion OR coverexclusion doesn't match + if self.cover == None and \ + self.getConfig('make_firstimage_cover') and \ + not self.getConfig('never_make_cover') and \ + not (coverexclusion and re.search(coverexclusion,imgurl)): + newsrc = "images/cover.%s"%ext + self.cover=newsrc + self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data}) + self.imgurls.append(imgurl) + + newsrc = "images/%s-%s.%s"%( + prefix, + self.imgurls.index(imgurl), + ext) + self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data}) + + #logger.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data))) + else: + newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc'] + + #print("===============\n%s\nimg url:%s\n============"%(newsrc,self.imgurls[-1])) + + return (newsrc, imgurl) + + def getImgUrls(self): + retlist = [] + for i, url in enumerate(self.imgurls): + #parsedUrl = urlparse.urlparse(url) + retlist.append(self.imgtuples[i]) + return retlist + + def __str__(self): + return "Metadata: " +str(self.metadata) + +def commaGroups(s): + groups = [] + while s and s[-1].isdigit(): + groups.append(s[-3:]) + s = s[:-3] + return s + ','.join(reversed(groups)) + diff --git a/fanficdownloader/translit.py b/fanficdownloader/translit.py new file mode 100644 index 00000000..bf205a6d --- /dev/null +++ b/fanficdownloader/translit.py @@ -0,0 +1,57 @@ +#-*-coding:utf-8-*- +# Code taken from http://python.su/forum/viewtopic.php?pid=66946 +import unicodedata +def is_syllable(letter): + syllables = ("A", "E", "I", "O", "U", "a", "e", "i", "o", "u") + if letter in syllables: + return True + return False +def is_consonant(letter): + return not is_syllable(letter) +def romanize(letter): + try: + str(letter) + except UnicodeEncodeError: + pass + else: + return str(letter) + unid = unicodedata.name(letter) + exceptions = {"NUMERO SIGN": "No", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK": "\"", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK": "\"", "DASH": "-"} + for name_contains in exceptions: + if unid.find(name_contains)!=-1: + return exceptions[name_contains] + assert(unid.startswith("CYRILLIC"))# Not ready to romanize anything but cyrillics + transformation_pairs = {"CYRILLIC CAPITAL LETTER ": str.capitalize, "CYRILLIC SMALL LETTER ": str.lower} + func = str.lower + for name_contains in transformation_pairs: + if unid.find(name_contains)!=-1: + func = transformation_pairs[name_contains] + unid = unid.replace(name_contains, "") + cyrillic_exceptions = {"YERU": "y", "SHORT I": "y", "HARD SIGN": "\'", "SOFT SIGN": "\'", "BYELORUSSIAN-UKRAINIAN I": "i", "GHE WITH UPTURN": "g", "UKRAINIAN IE": "ie", "YU": "yu", "YA": "ya"} + for name_contains in cyrillic_exceptions: + if unid.find(name_contains)!=-1: + return cyrillic_exceptions[name_contains] + if all(map(is_syllable, unid)): + return func(unid) + else: + return func(filter(is_consonant, unid)) +def translit(text): + output = "" + for letter in text: + output += romanize(letter) + return output +#def main(): + #text = u"русск.: Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч." + #print translit(text) + #text = u"укр.: Гей, хлопці, не вспію - на ґанку ваша файна їжа знищується бурундучком." + #print translit(text) + #text = u"болг.: Ах, чудна българска земьо, полюшквай цъфтящи жита." + #print translit(text) + #text = u"серб.: Неуредне ноћне даме досађивале су Џеку К." + #print translit(text) + #russk.: Lyubya, s'iesh' shchiptsy, - vzdohniot mer, - kayf zhghuch. + #ukr.: Ghiey, hloptsi, nie vspiyu - na ganku vasha fayna yzha znishchuiet'sya burunduchkom. + #bolgh.: Ah, chudna b'lgharska ziem'o, polyushkvay ts'ftyashchi zhita. + #sierb.: Nieuriednie notshnie damie dosadjivalie su Dzhieku K. +if __name__=="__main__": + main() \ No newline at end of file diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py new file mode 100644 index 00000000..7d9faf64 --- /dev/null +++ b/fanficdownloader/writers/__init__.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## This could (should?) use a dynamic loader like adapters, but for +## now, it's static, since there's so few of them. + +from ..exceptions import FailedToDownload + +from writer_html import HTMLWriter +from writer_txt import TextWriter +from writer_epub import EpubWriter +from writer_mobi import MobiWriter + +def getWriter(type,config,story): + if type == "html": + return HTMLWriter(config,story) + if type == "txt": + return TextWriter(config,story) + if type == "epub": + return EpubWriter(config,story) + if type == "mobi": + return MobiWriter(config,story) + + raise FailedToDownload("(%s) is not a supported download format."%type) diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py new file mode 100644 index 00000000..c26d4095 --- /dev/null +++ b/fanficdownloader/writers/base_writer.py @@ -0,0 +1,285 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import os.path +import datetime +import string +import StringIO +import zipfile +from zipfile import ZipFile, ZIP_DEFLATED +import logging + +from ..configurable import Configurable +from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML + +logger = logging.getLogger(__name__) + +class BaseStoryWriter(Configurable): + + @staticmethod + def getFormatName(): + return 'base' + + @staticmethod + def getFormatExt(): + return '.bse' + + def __init__(self, configuration, adapter): + Configurable.__init__(self, configuration) + + self.adapter = adapter + self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially. + + # fall back labels. + self.titleLabels = { + 'category':'Category', + 'genre':'Genre', + 'language':'Language', + 'status':'Status', + 'series':'Series', + 'characters':'Characters', + 'ships':'Relationships', + 'datePublished':'Published', + 'dateUpdated':'Updated', + 'dateCreated':'Packaged', + 'rating':'Rating', + 'warnings':'Warnings', + 'numChapters':'Chapters', + 'numWords':'Words', + 'site':'Site', + 'storyId':'Story ID', + 'authorId':'Author ID', + 'extratags':'Extra Tags', + 'title':'Title', + 'storyUrl':'Story URL', + 'description':'Summary', + 'author':'Author', + 'authorUrl':'Author URL', + 'formatname':'File Format', + 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev', + 'version':'FFDL Version' + } + self.story.setMetadata('formatname',self.getFormatName()) + self.story.setMetadata('formatext',self.getFormatExt()) + + def getMetadata(self,key, removeallentities=False): + return stripHTML(self.story.getMetadata(key, removeallentities)) + + def getOutputFileName(self): + if self.getConfig('zip_output'): + return self.getZipFileName() + else: + return self.getBaseFileName() + + def getBaseFileName(self): + return self.story.formatFileName(self.getConfig('output_filename'),self.getConfig('allow_unsafe_filename')) + + def getZipFileName(self): + return self.story.formatFileName(self.getConfig('zip_filename'),self.getConfig('allow_unsafe_filename')) + + def _write(self, out, text): + out.write(text.encode('utf8')) + + def writeTitlePage(self, out, START, ENTRY, END, WIDE_ENTRY=None, NO_TITLE_ENTRY=None): + """ + Write the title page, but only include entries that there's + metadata for. START, ENTRY and END are expected to already by + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use label and value. + """ + if self.getConfig("include_titlepage"): + + if self.hasConfig("titlepage_start"): + START = string.Template(self.getConfig("titlepage_start")) + + if self.hasConfig("titlepage_entry"): + ENTRY = string.Template(self.getConfig("titlepage_entry")) + + if self.hasConfig("titlepage_end"): + END = string.Template(self.getConfig("titlepage_end")) + + if self.hasConfig("titlepage_wide_entry"): + WIDE_ENTRY = string.Template(self.getConfig("titlepage_wide_entry")) + + if self.hasConfig("titlepage_no_title_entry"): + NO_TITLE_ENTRY = string.Template(self.getConfig("titlepage_no_title_entry")) + + self._write(out,START.substitute(self.story.getAllMetadata())) + + if WIDE_ENTRY==None: + WIDE_ENTRY=ENTRY + + titleEntriesList = self.getConfigList("titlepage_entries") + self.getConfigList("extra_titlepage_entries") + wideTitleEntriesList = self.getConfigList("wide_titlepage_entries") + + for entry in titleEntriesList: + if self.isValidMetaEntry(entry): + if self.story.getMetadata(entry): + if entry in wideTitleEntriesList: + TEMPLATE=WIDE_ENTRY + else: + TEMPLATE=ENTRY + + if self.hasConfig(entry+"_label"): + label=self.getConfig(entry+"_label") + elif entry in self.titleLabels: + logger.debug("Using fallback label for %s_label"%entry) + label=self.titleLabels[entry] + else: + label="%s"%entry.title() + logger.debug("No known label for %s, fallback to '%s'"%(entry,label)) + + # If the label for the title entry is empty, use the + # 'no title' option if there is one. + if label == "" and NO_TITLE_ENTRY: + TEMPLATE= NO_TITLE_ENTRY + + self._write(out,TEMPLATE.substitute({'label':label, + 'id':entry, + 'value':self.story.getMetadata(entry)})) + else: + self._write(out, entry) + + self._write(out,END.substitute(self.story.getAllMetadata())) + + def writeTOCPage(self, out, START, ENTRY, END): + """ + Write the Table of Contents page. START, ENTRY and END are expected to already by + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use index and chapter. + """ + # Only do TOC if there's more than one chapter and it's configured. + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + if self.hasConfig("tocpage_start"): + START = string.Template(self.getConfig("tocpage_start")) + + if self.hasConfig("tocpage_entry"): + ENTRY = string.Template(self.getConfig("tocpage_entry")) + + if self.hasConfig("tocpage_end"): + END = string.Template(self.getConfig("tocpage_end")) + + self._write(out,START.substitute(self.story.getAllMetadata())) + + for index, (url,title,html) in enumerate(self.story.getChapters(fortoc=True)): + if html: + self._write(out,ENTRY.substitute({'chapter':title, + 'number':index+1, + 'index':"%04d"%(index+1), + 'url':url})) + + self._write(out,END.substitute(self.story.getAllMetadata())) + + # if no outstream is given, write to file. + def writeStory(self,outstream=None, metaonly=False, outfilename=None, forceOverwrite=False): + + self.metaonly = metaonly + if outfilename == None: + outfilename=self.getOutputFileName() + + self.outfilename = outfilename + + # minor cheat, tucking css into metadata. + if self.getConfig("output_css"): + self.story.setMetadata("output_css", + self.getConfig("output_css"), + condremoveentities=False) + else: + self.story.setMetadata("output_css",'') + + if not outstream: + close=True + logger.info("Save directly to file: %s" % outfilename) + if self.getConfig('make_directories'): + path="" + outputdirs = os.path.dirname(outfilename).split('/') + for dir in outputdirs: + path+=dir+"/" + if not os.path.exists(path): + os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? + + ## Check for output file date vs updated date here + if not (self.getConfig('always_overwrite') or forceOverwrite): + if os.path.exists(outfilename): + ## date() truncs off time, which files have, but sites don't report. + lastupdated=self.story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.datetime.fromtimestamp(os.stat(outfilename)[8]).date() + if fileupdated > lastupdated: + logger.warn("File(%s) Updated(%s) more recently than Story(%s) - Skipping" % (outfilename,fileupdated,lastupdated)) + return + if not metaonly: + self.story = self.adapter.getStory() # get full story + # now, just + # before writing. + # Fetch before + # opening file. + outstream = open(outfilename,"wb") + else: + close=False + logger.debug("Save to stream") + + if not metaonly: + self.story = self.adapter.getStory() # get full story now, + # just before + # writing. Okay if + # double called with + # above, it will only + # fetch once. + if self.getConfig('zip_output'): + out = StringIO.StringIO() + self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) + self.writeStoryImpl(out) + self.zipout.writestr(self.getBaseFileName(),out.getvalue()) + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in self.zipout.filelist: + zf.create_system = 0 + self.zipout.close() + out.close() + else: + self.writeStoryImpl(outstream) + + if close: + outstream.close() + + def writeFile(self, filename, data): + logger.debug("writeFile:%s"%filename) + + if self.getConfig('zip_output'): + outputdirs = os.path.dirname(self.getBaseFileName()) + if outputdirs: + filename=outputdirs+'/'+filename + self.zipout.writestr(filename,data) + else: + outputdirs = os.path.dirname(self.outfilename) + if outputdirs: + filename=outputdirs+'/'+filename + + dir = os.path.dirname(filename) + if not os.path.exists(dir): + os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2? + + outstream = open(filename,"wb") + outstream.write(data) + outstream.close() + + def writeStoryImpl(self, out): + "Must be overriden by sub classes." + pass + diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py new file mode 100644 index 00000000..fb405a3e --- /dev/null +++ b/fanficdownloader/writers/writer_epub.py @@ -0,0 +1,690 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO +import zipfile +from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED +import urllib +import re + +## XML isn't as forgiving as HTML, so rather than generate as strings, +## use DOM to generate the XML files. +from xml.dom.minidom import parse, parseString, getDOMImplementation + +from base_writer import * +from ..htmlcleanup import stripHTML + +logger = logging.getLogger(__name__) + +class EpubWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'epub' + + @staticmethod + def getFormatExt(): + return '.epub' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.EPUB_CSS = string.Template('''${output_css}''') + + self.EPUB_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
${title} by ${authorHTML}
+
+''') + + self.EPUB_TITLE_ENTRY = string.Template(''' +${label}: ${value}
+''') + + self.EPUB_NO_TITLE_ENTRY = string.Template(''' +${value}
+''') + + self.EPUB_TITLE_PAGE_END = string.Template(''' +
+ + + +''') + + self.EPUB_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
${title} by ${authorHTML}
+ +''') + + self.EPUB_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_NO_TITLE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_TITLE_PAGE_END = string.Template(''' +
${label}: ${value}
${label}: ${value}
${label}${value}
+ + + +''') + + self.EPUB_TOC_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
+
Table of Contents
+''') + + self.EPUB_TOC_ENTRY = string.Template(''' +${chapter}
+''') + + self.EPUB_TOC_PAGE_END = string.Template(''' +
+ + +''') + + self.EPUB_CHAPTER_START = string.Template(''' + + + +${chapter} + + + +
${chapter}
+''') + + self.EPUB_CHAPTER_END = string.Template(''' + + +''') + + self.EPUB_LOG_PAGE_START = string.Template(''' + + + +Update Log + + + +
Update Log
+''') + + self.EPUB_LOG_UPDATE_START = string.Template(''' +
+''') + + self.EPUB_LOG_ENTRY = string.Template(''' +${label}: ${value} +''') + + self.EPUB_LOG_UPDATE_END = string.Template(''' +
+''') + + self.EPUB_LOG_PAGE_END = string.Template(''' + + +''') + + self.EPUB_LOG_PAGE_END = string.Template(''' + + +''') + + self.EPUB_COVER = string.Template(''' +Cover
+ +
+''') + + def writeLogPage(self, out): + """ + Write the log page, but only include entries that there's + metadata for. START, ENTRY and END are expected to already be + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use id, label and value. + """ + if self.hasConfig("logpage_start"): + START = string.Template(self.getConfig("logpage_start")) + else: + START = self.EPUB_LOG_PAGE_START + + if self.hasConfig("logpage_end"): + END = string.Template(self.getConfig("logpage_end")) + else: + END = self.EPUB_LOG_PAGE_END + + # if there's a self.story.logfile, there's an existing log + # to add to. + if self.story.logfile: + logger.debug("existing logfile found, appending") + logger.debug("existing data:%s"%self._getLastLogData(self.story.logfile)) + replace_string = "" # "" + self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string)) + else: + # otherwise, write a new one. + self._write(out,START.substitute(self.story.getAllMetadata())) + self._write(out,self._makeLogEntry()) + self._write(out,END.substitute(self.story.getAllMetadata())) + + # self parsing instead of Soup because it should be simple and not + # worth the overhead. + def _getLastLogData(self,logfile): + """ + Make a dict() of the most recent(last) log entry for each piece of metadata. + Switch rindex to index to search from top instead of bottom. + """ + values = {} + for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"): + try: + # 1975-04-15 + span = ''%entry + idx = logfile.rindex(span)+len(span) + values[entry] = logfile[idx:logfile.index('',idx)] + except Exception, e: + #print("e:%s"%e) + pass + + return values + + def _makeLogEntry(self, oldvalues={}): + if self.hasConfig("logpage_update_start"): + START = string.Template(self.getConfig("logpage_update_start")) + else: + START = self.EPUB_LOG_UPDATE_START + + if self.hasConfig("logpage_entry"): + ENTRY = string.Template(self.getConfig("logpage_entry")) + else: + ENTRY = self.EPUB_LOG_ENTRY + + if self.hasConfig("logpage_update_end"): + END = string.Template(self.getConfig("logpage_update_end")) + else: + END = self.EPUB_LOG_UPDATE_END + + retval = START.substitute(self.story.getAllMetadata()) + + for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"): + if self.isValidMetaEntry(entry): + val = self.story.getMetadata(entry) + if val and ( entry not in oldvalues or val != oldvalues[entry] ): + if self.hasConfig(entry+"_label"): + label=self.getConfig(entry+"_label") + elif entry in self.titleLabels: + logger.debug("Using fallback label for %s_label"%entry) + label=self.titleLabels[entry] + else: + label="%s"%entry.title() + logger.debug("No known label for %s, fallback to '%s'"%(entry,label)) + + retval = retval + ENTRY.substitute({'id':entry, + 'label':label, + 'value':val}) + else: + # could be useful for introducing extra text, but + # mostly it makes it easy to tell when you get the + # keyword wrong. + retval = retval + entry + + retval = retval + END.substitute(self.story.getAllMetadata()) + + if self.getConfig('replace_hr'): + retval = retval.replace("
","
* * *
") + + return retval + + def writeStoryImpl(self, out): + + ## Python 2.5 ZipFile is rather more primative than later + ## versions. It can operate on a file, or on a StringIO, but + ## not on an open stream. OTOH, I suspect we would have had + ## problems with closing and opening again to change the + ## compression type anyway. + zipio = StringIO.StringIO() + + ## mimetype must be first file and uncompressed. Python 2.5 + ## ZipFile can't change compression type file-by-file, so we + ## have to close and re-open + outputepub = ZipFile(zipio, 'w', compression=ZIP_STORED) + outputepub.debug=3 + outputepub.writestr('mimetype','application/epub+zip') + outputepub.close() + + ## Re-open file for content. + outputepub = ZipFile(zipio, 'a', compression=ZIP_DEFLATED) + outputepub.debug=3 + + ## Create META-INF/container.xml file. The only thing it does is + ## point to content.opf + containerdom = getDOMImplementation().createDocument(None, "container", None) + containertop = containerdom.documentElement + containertop.setAttribute("version","1.0") + containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container") + rootfiles = containerdom.createElement("rootfiles") + containertop.appendChild(rootfiles) + rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf", + "media-type":"application/oebps-package+xml"})) + outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8')) + containerdom.unlink() + del containerdom + + ## Epub has two metadata files with real data. We're putting + ## them in content.opf (pointed to by META-INF/container.xml) + ## and toc.ncx (pointed to by content.opf) + + ## content.opf contains metadata, a 'manifest' list of all + ## other included files, and another 'spine' list of the items in the + ## file + + uniqueid= 'fanficdownloader-uid:%s-u%s-s%s' % ( + self.getMetadata('site'), + self.story.getList('authorId')[0], + self.getMetadata('storyId')) + + contentdom = getDOMImplementation().createDocument(None, "package", None) + package = contentdom.documentElement + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","fanficdownloader-uid") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + package.appendChild(metadata) + + metadata.appendChild(newTag(contentdom,"dc:identifier", + text=uniqueid, + attrs={"id":"fanficdownloader-uid"})) + + if self.getMetadata('title'): + metadata.appendChild(newTag(contentdom,"dc:title",text=self.getMetadata('title'))) + + if self.getMetadata('author'): + if self.story.isList('author'): + for auth in self.story.getList('author'): + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=auth)) + else: + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=self.getMetadata('author'))) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="fanficdownloader [http://fanficdownloader.googlecode.com]",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="")) + if self.story.getMetadata('langcode'): + metadata.appendChild(newTag(contentdom,"dc:language",text=self.story.getMetadata('langcode'))) + else: + metadata.appendChild(newTag(contentdom,"dc:language",text='en')) + + # published, created, updated, calibre + # Leave calling self.story.getMetadataRaw directly in case date format changes. + if self.story.getMetadataRaw('datePublished'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"publication"}, + text=self.story.getMetadataRaw('datePublished').strftime("%Y-%m-%d"))) + + if self.story.getMetadataRaw('dateCreated'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"creation"}, + text=self.story.getMetadataRaw('dateCreated').strftime("%Y-%m-%d"))) + + if self.story.getMetadataRaw('dateUpdated'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"modification"}, + text=self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%d"))) + metadata.appendChild(newTag(contentdom,"meta", + attrs={"name":"calibre:timestamp", + "content":self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%dT%H:%M:%S")})) + + if self.getMetadata('description'): + metadata.appendChild(newTag(contentdom,"dc:description",text= + self.getMetadata('description'))) + + for subject in self.story.getSubjectTags(): + metadata.appendChild(newTag(contentdom,"dc:subject",text=subject)) + + + if self.getMetadata('site'): + metadata.appendChild(newTag(contentdom,"dc:publisher", + text=self.getMetadata('site'))) + + if self.getMetadata('storyUrl'): + metadata.appendChild(newTag(contentdom,"dc:identifier", + attrs={"opf:scheme":"URL"}, + text=self.getMetadata('storyUrl'))) + metadata.appendChild(newTag(contentdom,"dc:source", + text=self.getMetadata('storyUrl'))) + + ## end of metadata, create manifest. + items = [] # list of (id, href, type, title) tuples(all strings) + itemrefs = [] # list of strings -- idrefs from .opfs' spines + items.append(("ncx","toc.ncx","application/x-dtbncx+xml",None)) ## we'll generate the toc.ncx file, + ## but it needs to be in the items manifest. + + guide = None + coverIO = None + + coverimgid = "image0000" + if not self.story.cover and self.story.oldcover: + logger.debug("writer_epub: no new cover, has old cover, write image.") + (oldcoverhtmlhref, + oldcoverhtmltype, + oldcoverhtmldata, + oldcoverimghref, + oldcoverimgtype, + oldcoverimgdata) = self.story.oldcover + outputepub.writestr(oldcoverhtmlhref,oldcoverhtmldata) + outputepub.writestr(oldcoverimghref,oldcoverimgdata) + + coverimgid = "image0" + items.append((coverimgid, + oldcoverimghref, + oldcoverimgtype, + None)) + items.append(("cover",oldcoverhtmlhref,oldcoverhtmltype,None)) + itemrefs.append("cover") + metadata.appendChild(newTag(contentdom,"meta",{"content":"image0", + "name":"cover"})) + guide = newTag(contentdom,"guide") + guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover", + "title":"Cover", + "href":oldcoverhtmlhref})) + + + + if self.getConfig('include_images'): + imgcount=0 + for imgmap in self.story.getImgUrls(): + imgfile = "OEBPS/"+imgmap['newsrc'] + outputepub.writestr(imgfile,imgmap['data']) + items.append(("image%04d"%imgcount, + imgfile, + imgmap['mime'], + None)) + imgcount+=1 + if 'cover' in imgfile: + # make sure coverimgid is set to the cover, not + # just the first image. + coverimgid = items[-1][0] + + + items.append(("style","OEBPS/stylesheet.css","text/css",None)) + + if self.story.cover: + # Note that the id of the cover xhmtl *must* be 'cover' + # for it to work on Nook. + items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None)) + itemrefs.append("cover") + # + # + metadata.appendChild(newTag(contentdom,"meta",{"content":coverimgid, + "name":"cover"})) + # cover stuff for later: + # at end of : + # + # + # + guide = newTag(contentdom,"guide") + guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover", + "title":"Cover", + "href":"OEBPS/cover.xhtml"})) + + if self.hasConfig("cover_content"): + COVER = string.Template(self.getConfig("cover_content")) + else: + COVER = self.EPUB_COVER + coverIO = StringIO.StringIO() + coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items()))) + + if self.getConfig("include_titlepage"): + items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) + itemrefs.append("title_page") + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) + itemrefs.append("toc_page") + + dologpage = ( self.getConfig("include_logpage") == "smart" and \ + (self.story.logfile or self.story.getMetadataRaw("status") == "In-Progress") ) \ + or self.getConfig("include_logpage") == "true" + + if dologpage: + items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log")) + itemrefs.append("log_page") + + for index, (url,title,html) in enumerate(self.story.getChapters(fortoc=True)): + if html: + i=index+1 + items.append(("file%04d"%i, + "OEBPS/file%04d.xhtml"%i, + "application/xhtml+xml", + title)) + itemrefs.append("file%04d"%i) + + manifest = contentdom.createElement("manifest") + package.appendChild(manifest) + for item in items: + (id,href,type,title)=item + manifest.appendChild(newTag(contentdom,"item", + attrs={'id':id, + 'href':href, + 'media-type':type})) + + spine = newTag(contentdom,"spine",attrs={"toc":"ncx"}) + package.appendChild(spine) + for itemref in itemrefs: + spine.appendChild(newTag(contentdom,"itemref", + attrs={"idref":itemref, + "linear":"yes"})) + # guide only exists if there's a cover. + if guide: + package.appendChild(guide) + + # write content.opf to zip. + contentxml = contentdom.toxml(encoding='utf-8') + + # tweak for brain damaged Nook STR. Nook insists on name before content. + contentxml = contentxml.replace(''%coverimgid, + ''%coverimgid) + outputepub.writestr("content.opf",contentxml) + + contentdom.unlink() + del contentdom + + ## create toc.ncx file + tocncxdom = getDOMImplementation().createDocument(None, "ncx", None) + ncx = tocncxdom.documentElement + ncx.setAttribute("version","2005-1") + ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/") + head = tocncxdom.createElement("head") + ncx.appendChild(head) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:uid", "content":uniqueid})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:depth", "content":"1"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:totalPageCount", "content":"0"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:maxPageNumber", "content":"0"})) + + docTitle = tocncxdom.createElement("docTitle") + docTitle.appendChild(newTag(tocncxdom,"text",text=self.getMetadata('title'))) + ncx.appendChild(docTitle) + + tocnavMap = tocncxdom.createElement("navMap") + ncx.appendChild(tocnavMap) + + # + # + # + # + # + # + index=0 + for item in items: + (id,href,type,title)=item + # only items to be skipped, cover.xhtml, images, toc.ncx, stylesheet.css, should have no title. + if title : + navPoint = newTag(tocncxdom,"navPoint", + attrs={'id':id, + 'playOrder':str(index)}) + tocnavMap.appendChild(navPoint) + navLabel = newTag(tocncxdom,"navLabel") + navPoint.appendChild(navLabel) + ## the xml library will re-escape as needed. + navLabel.appendChild(newTag(tocncxdom,"text",text=stripHTML(title))) + navPoint.appendChild(newTag(tocncxdom,"content",attrs={"src":href})) + index=index+1 + + # write toc.ncx to zip file + outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8')) + tocncxdom.unlink() + del tocncxdom + + # write stylesheet.css file. + outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute(self.story.getAllMetadata())) + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.EPUB_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.EPUB_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.EPUB_TABLE_TITLE_WIDE_ENTRY + NO_TITLE_ENTRY = self.EPUB_TABLE_NO_TITLE_ENTRY + TITLE_PAGE_END = self.EPUB_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.EPUB_TITLE_PAGE_START + TITLE_ENTRY = self.EPUB_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.EPUB_TITLE_ENTRY # same, only wide in tables. + NO_TITLE_ENTRY = self.EPUB_NO_TITLE_ENTRY + TITLE_PAGE_END = self.EPUB_TITLE_PAGE_END + + if coverIO: + outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue()) + coverIO.close() + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END, + NO_TITLE_ENTRY=NO_TITLE_ENTRY) + if titlepageIO.getvalue(): # will be false if no title page. + outputepub.writestr("OEBPS/title_page.xhtml",titlepageIO.getvalue()) + titlepageIO.close() + + # write toc page. + tocpageIO = StringIO.StringIO() + self.writeTOCPage(tocpageIO, + self.EPUB_TOC_PAGE_START, + self.EPUB_TOC_ENTRY, + self.EPUB_TOC_PAGE_END) + if tocpageIO.getvalue(): # will be false if no toc page. + outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue()) + tocpageIO.close() + + if dologpage: + # write log page. + logpageIO = StringIO.StringIO() + self.writeLogPage(logpageIO) + outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue()) + logpageIO.close() + + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.EPUB_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.EPUB_CHAPTER_END + + for index, (url,title,html) in enumerate(self.story.getChapters()): + if html: + logger.debug('Writing chapter text for: %s' % title) + vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + #fullhtml = fullhtml.replace('
','
\n').replace('
','
\n') + # The replaces above added tons of extra newlines + # during *each* epub update. The regexp version adds + # only one and removes any extra. + fullhtml = re.sub(r'(
|
)\n*',r'\1\n',fullhtml) + + outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8')) + del fullhtml + + if self.story.calibrebookmark: + outputepub.writestr("META-INF/calibre_bookmarks.txt",self.story.calibrebookmark) + + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 + outputepub.close() + out.write(zipio.getvalue()) + zipio.close() + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py new file mode 100644 index 00000000..41b01754 --- /dev/null +++ b/fanficdownloader/writers/writer_html.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string + +from base_writer import * + +class HTMLWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'html' + + @staticmethod + def getFormatExt(): + return '.html' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.HTML_FILE_START = string.Template(''' + + + +${title} by ${author} + + + +
${title} by ${authorHTML}
+''') + + self.HTML_COVER = string.Template(''' + +''') + + self.HTML_TITLE_PAGE_START = string.Template(''' + +''') + + self.HTML_TITLE_ENTRY = string.Template(''' + +''') + + self.HTML_TITLE_PAGE_END = string.Template(''' +
${label}: ${value}
+''') + + self.HTML_TOC_PAGE_START = string.Template(''' +
Table of Contents
+
+''') + + self.HTML_TOC_ENTRY = string.Template(''' +${chapter}
+''') + + self.HTML_TOC_PAGE_END = string.Template(''' +
+''') + + self.HTML_CHAPTER_START = string.Template(''' +
${chapter}
+''') + + self.HTML_CHAPTER_END = string.Template('') + + self.HTML_FILE_END = string.Template(''' + +''') + + + def writeStoryImpl(self, out): + + if self.hasConfig("cover_content"): + COVER = string.Template(self.getConfig("cover_content")) + else: + COVER = self.HTML_COVER + + if self.hasConfig('file_start'): + FILE_START = string.Template(self.getConfig("file_start")) + else: + FILE_START = self.HTML_FILE_START + + if self.hasConfig('file_end'): + FILE_END = string.Template(self.getConfig("file_end")) + else: + FILE_END = self.HTML_FILE_END + + self._write(out,FILE_START.substitute(self.story.getAllMetadata())) + + if self.getConfig('include_images') and self.story.cover: + self._write(out,COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items()))) + + self.writeTitlePage(out, + self.HTML_TITLE_PAGE_START, + self.HTML_TITLE_ENTRY, + self.HTML_TITLE_PAGE_END) + + self.writeTOCPage(out, + self.HTML_TOC_PAGE_START, + self.HTML_TOC_ENTRY, + self.HTML_TOC_PAGE_END) + + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.HTML_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.HTML_CHAPTER_END + + for index, (url,title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + self._write(out,CHAPTER_START.substitute(vals)) + self._write(out,html) + self._write(out,CHAPTER_END.substitute(vals)) + + self._write(out,FILE_END.substitute(self.story.getAllMetadata())) + + if self.getConfig('include_images'): + for imgmap in self.story.getImgUrls(): + self.writeFile(imgmap['newsrc'],imgmap['data']) + diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py new file mode 100644 index 00000000..4396ab08 --- /dev/null +++ b/fanficdownloader/writers/writer_mobi.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO + +from base_writer import * +from ..htmlcleanup import stripHTML +from ..mobi import Converter +from ..exceptions import FailedToWriteOutput + +logger = logging.getLogger(__name__) + +class MobiWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'mobi' + + @staticmethod + def getFormatExt(): + return '.mobi' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.MOBI_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + +
${title} by ${authorHTML}
+
+''') + + self.MOBI_TITLE_ENTRY = string.Template(''' +${label}: ${value}
+''') + + self.MOBI_NO_TITLE_ENTRY = string.Template(''' +${value}
+''') + + self.MOBI_TITLE_PAGE_END = string.Template(''' +
+ + + +''') + + self.MOBI_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + +
${title} by ${authorHTML}
+ +''') + + self.MOBI_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_NO_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_PAGE_END = string.Template(''' +
${label}: ${value}
${label}: ${value}
${value}
+ + + +''') + + self.MOBI_CHAPTER_START = string.Template(''' + + + +${chapter} + + +
${chapter}
+''') + + self.MOBI_CHAPTER_END = string.Template(''' + + +''') + + def writeStoryImpl(self, out): + + files = [] + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.MOBI_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TABLE_TITLE_WIDE_ENTRY + NO_TITLE_ENTRY = self.MOBI_TABLE_NO_TITLE_ENTRY + TITLE_PAGE_END = self.MOBI_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.MOBI_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TITLE_ENTRY # same, only wide in tables. + NO_TITLE_ENTRY = self.MOBI_NO_TITLE_ENTRY + TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END, + NO_TITLE_ENTRY=NO_TITLE_ENTRY) + if titlepageIO.getvalue(): # will be false if no title page. + files.append(titlepageIO.getvalue()) + titlepageIO.close() + + ## MOBI always has a TOC injected by mobi.py because there's + ## no meta-data TOC. + # # write toc page. + # tocpageIO = StringIO.StringIO() + # self.writeTOCPage(tocpageIO, + # self.MOBI_TOC_PAGE_START, + # self.MOBI_TOC_ENTRY, + # self.MOBI_TOC_PAGE_END) + # if tocpageIO.getvalue(): # will be false if no toc page. + # files.append(tocpageIO.getvalue()) + # tocpageIO.close() + + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.MOBI_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.MOBI_CHAPTER_END + + for index, (url,title,html) in enumerate(self.story.getChapters()): + if html: + logger.debug('Writing chapter text for: %s' % title) + vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('
','
\n').replace('
','
\n') + files.append(fullhtml.encode('utf-8')) + del fullhtml + + c = Converter(title=self.getMetadata('title'), + author=self.getMetadata('author'), + publisher=self.getMetadata('site')) + mobidata = c.ConvertStrings(files) + if len(mobidata) < 1: + raise FailedToWriteOutput("Zero length mobi output") + out.write(mobidata) + + del files + del mobidata + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py new file mode 100644 index 00000000..8705a41f --- /dev/null +++ b/fanficdownloader/writers/writer_txt.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +from textwrap import wrap + +from base_writer import * + +from ..html2text import html2text + +## In BaseStoryWriter, we define _write to encode objects +## back into for true output. But txt needs to write the +## title page and TOC to a buffer first to wordwrap. And StringIO +## gets pissy about unicode bytes in its buflist. This decodes the +## unicode containing object passed in back to a +## object so they join up properly. Could override _write to not +## encode and do out.write(whatever.encode('utf8') instead. Honestly +## not sure which is uglier. +class KludgeStringIO(): + def __init__(self, buf = ''): + self.buflist=[] + def write(self,s): + try: + s=s.decode('utf-8') + except: + pass + self.buflist.append(s) + def getvalue(self): + return u''.join(self.buflist) + def close(self): + pass + +class TextWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'txt' + + @staticmethod + def getFormatExt(): + return '.txt' + + def __init__(self, config, story): + + BaseStoryWriter.__init__(self, config, story) + + self.TEXT_FILE_START = string.Template(u''' + + +${title} + +by ${author} + + +''') + + self.TEXT_TITLE_PAGE_START = string.Template(u''' +''') + + self.TEXT_TITLE_ENTRY = string.Template(u'''${label}: ${value} +''') + + self.TEXT_TITLE_PAGE_END = string.Template(u''' + + +''') + + self.TEXT_TOC_PAGE_START = string.Template(u''' + +TABLE OF CONTENTS + +''') + + self.TEXT_TOC_ENTRY = string.Template(u''' +${chapter} +''') + + self.TEXT_TOC_PAGE_END = string.Template(u''' +''') + + self.TEXT_CHAPTER_START = string.Template(u''' + +\t${chapter} + +''') + self.TEXT_CHAPTER_END = string.Template(u'') + + self.TEXT_FILE_END = string.Template(u''' + +End file. +''') + + def writeStoryImpl(self, out): + + self.wrap_width = self.getConfig('wrap_width') + if self.wrap_width == '' or self.wrap_width == '0': + self.wrap_width = None + else: + self.wrap_width = int(self.wrap_width) + + wrapout = KludgeStringIO() + + if self.hasConfig("file_start"): + FILE_START = string.Template(self.getConfig("file_start")) + else: + FILE_START = self.TEXT_FILE_START + + if self.hasConfig("file_end"): + FILE_END = string.Template(self.getConfig("file_end")) + else: + FILE_END = self.TEXT_FILE_END + + wrapout.write(FILE_START.substitute(self.story.getAllMetadata())) + + self.writeTitlePage(wrapout, + self.TEXT_TITLE_PAGE_START, + self.TEXT_TITLE_ENTRY, + self.TEXT_TITLE_PAGE_END) + towrap = wrapout.getvalue() + + self.writeTOCPage(wrapout, + self.TEXT_TOC_PAGE_START, + self.TEXT_TOC_ENTRY, + self.TEXT_TOC_PAGE_END) + + towrap = wrapout.getvalue() + wrapout.close() + towrap = removeAllEntities(towrap) + + self._write(out,self.lineends(self.wraplines(towrap))) + + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.TEXT_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.TEXT_CHAPTER_END + + for index, (url, title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_START.substitute(vals))))) + self._write(out,self.lineends(html2text(html,wrap_width=self.wrap_width))) + self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_END.substitute(vals))))) + + self._write(out,self.lineends(self.wraplines(FILE_END.substitute(self.story.getAllMetadata())))) + + def wraplines(self, text): + + if not self.wrap_width: + return text + + result='' + for para in text.split("\n"): + first=True + for line in wrap(para, self.wrap_width): + if first: + first=False + else: + result += u"\n" + result += line + result += u"\n" + return result + + ## The appengine will return unix line endings. + def lineends(self, txt): + txt = txt.replace('\r','') + if self.getConfig("windows_eol"): + txt = txt.replace('\n',u'\r\n') + return txt + diff --git a/ffstorage.py b/ffstorage.py new file mode 100644 index 00000000..bad9b4a4 --- /dev/null +++ b/ffstorage.py @@ -0,0 +1,63 @@ +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pickle, copy +from google.appengine.ext import db + +class ObjectProperty(db.Property): + data_type = db.Blob + + def get_value_for_datastore(self, model_instance): + value = self.__get__(model_instance, model_instance.__class__) + pickled_val = pickle.dumps(value,protocol=pickle.HIGHEST_PROTOCOL) + if value is not None: return db.Blob(pickled_val) + + def make_value_from_datastore(self, value): + if value is not None: return pickle.loads(value) + + def default_value(self): + return copy.copy(self.default) + +class DownloadMeta(db.Model): + user = db.UserProperty() + url = db.StringProperty() + name = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + format = db.StringProperty() + failure = db.TextProperty() + completed = db.BooleanProperty(default=False) + date = db.DateTimeProperty(auto_now_add=True) + version = db.StringProperty() + # data_chunks is implicit from DownloadData def. + +class DownloadData(db.Model): + download = db.ReferenceProperty(DownloadMeta, + collection_name='data_chunks') + blob = db.BlobProperty() + index = db.IntegerProperty() + +class UserConfig(db.Model): + user = db.UserProperty() + config = db.BlobProperty() + +class SavedMeta(db.Model): + url = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + date = db.DateTimeProperty(auto_now_add=True) + count = db.IntegerProperty() + meta = ObjectProperty() + diff --git a/index-ajax.html b/index-ajax.html new file mode 100644 index 00000000..62eba47c --- /dev/null +++ b/index-ajax.html @@ -0,0 +1,109 @@ + + + + + + + FanFictionDownLoader (fanfiction.net, fictionalley, ficwad to epub and HTML) + + + + + + + + + +
+
+ FanFictionDownLoader +
+ + +
+
+ Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
+ +
+ Ebook format +
+ +
+ +
+ + + +
+ +
+
+ +
+
+ +
+ Login and Password +
+
+ If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
+
+
+
Login
+
+
+ +
+
Password
+
+
+
+
+ + +
+ + +
+ +
+
+ Few things to know, which will make your life substantially easier: +
+
Small post written by me — how to read fiction in Stanza or any other ebook reader.
+
Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
+
Paste a URL of the first chapter of the fanfic, not the index page
+
Fics with a single chapter are not supported (you can just copy and paste it)
+
Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
+
FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
+
You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
+
If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
+
If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
+
+ Otherwise, just have fun, and if you want to say thank you — use the email above. +
+
+ +

+ This is a web front-end to FanFictionDownLoader
+ Copyright © Fanficdownloader team +
+ +
+ + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..cf13ea94 --- /dev/null +++ b/index.html @@ -0,0 +1,206 @@ + + + + + FanFictionDownLoader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + + + + + +
+
+ FanFictionDownLoader +
+ +
+ + +
+ + {{yourfile}} + + + {% if authorized %} +
+
+
+
Hi, {{ nickname }}! This is FanFictionDownLoader, which makes reading stories from various websites + much easier.
+
+ +
Changes:
+
+
+
New Site: fhsarchive.com -- eFiction Base adapter.
+
Fixes for storiesonline.net site changes--'codes' are now 'sitetags', thanks Jeff.
+
Fix for literotica.com HTML.
+
Known issue: Specific metadata 'eroticatags' for literotica.com doesn't work on all stories.
+
Known issue: Metadata collection is not as complete for 'Base eFiction' adapters.
+
+
+
+ Questions? Check out our + FAQs. +
+
+ If you have any problems with this application, please + report them in + the FanFictionDownLoader Google Group. The + Previous Version is also available for you to use if necessary. +
+
+ {{ error_message }} +
+
+ +
+
URL:
+
+
Ebook format
+
+ EPub + HTML + Plain Text + Mobi(Kindle) +
+
+
+ +
For most readers, including Sony Reader, Nook and iPad, use EPub.
+
+
+
+
+ Customize your User Configuration. +
+
+ Or see your personal list of previously downloaded fanfics. +
+
+ See a list of downloaded fanfics by all users by most popular or most recent. +
+
+
+ {% else %} +
+
+
+ This is a FanFictionDownLoader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so FanFictionDownLoader can remember your fanfics and store them. +
+
Login using Google account
+
+
+ {% endif %} + +
+
+ FanFictionDownLoader calibre Plugin +

+ + There's also a version of this downloader that runs inside + the popular calibre + ebook management package as a plugin. + +

+ + Once you have calibre installed and running, inside + calibre, you can go to 'Get plugins to enhance calibre' or + 'Get new plugins' and + install FanFictionDownLoader. + +
+
+
+
Supported sites:
+
+ There's a + Supported + Sites page in our wiki. If you have a site you'd like + to see supported, please check there first. +
+ + {% autoescape off %}{{ supported_sites }}{% endautoescape %} + +
+ A few additional things to know, which will make your life substantially easier: +
+
+
+ First thing to know: We do not use your Google login and password. In fact, all we know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
+
+ + Small post written by Roman + — how to read fiction in Stanza or any other ebook reader. +
+
+ You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
+
+ Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
+
+ If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
+
+ If you think that something that should work in fact doesn't, post a message to + our Google Group. we also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
+
+ Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
+
+ +

+ This is a web front-end to FanFictionDownLoader
+ Copyright © FanFictionDownLoader team +
+ +
+ + +
+
+ + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..a55512f1 --- /dev/null +++ b/index.yaml @@ -0,0 +1,28 @@ +indexes: + +# notAUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + +- kind: SavedMeta + properties: + - name: count + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
" + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "", "
" ] || + + !tags.indexOf("
" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + +

+
+ FanFictionDownLoader +
+ +
+ + +
+ + {% if fic.failure %} +
+ {{ fic.failure }} +
+ {% endif %} +
+ + +
+ + {% if is_login %} + + {% if is_passwdonly %} +
Password
+
+ {{ site }} requires a Password for this story.
+ You need to provide the Password for this story + to download it. +
+ {% else %} +
Login / Password
+
+ {{ site }} requires a Login/Password for this story.
+ You need to provide your Login/Password for {{ site }} + to download it. +
+
+
Login
+
+
+ {% endif %} +
+
Password
+
+
+ + {% else %} + + + +
+
Are you an Adult?
+
+ + {% endif %} + +
+ +
+ +
+
+ +
+ +

+ This is a web front-end to FanFictionDownLoader
+ Copyright © FanFictionDownLoader team +
+ +
+ + +
+

+ + diff --git a/main.py b/main.py new file mode 100644 index 00000000..ee18b6df --- /dev/null +++ b/main.py @@ -0,0 +1,639 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logging.getLogger().setLevel(logging.DEBUG) + +import os +from os.path import dirname, basename, normpath +import re +import sys +import zlib +import urllib +import datetime + +import traceback +from StringIO import StringIO + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" +## Note that if you are using the SDK App Engine Launcher and hit an SDK +## Console page first, you will get a django version mismatch error when you +## to go hit one of the application pages. Just change a file again, and +## make sure to hit an app page before the SDK page to clear it. +#os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' +#from google.appengine.dist import use_library +#use_library('django', '1.2') + +from google.appengine.ext import db +from google.appengine.api import taskqueue +from google.appengine.api import users +#from google.appengine.ext import webapp +import webapp2 +from google.appengine.ext.webapp import template +#from google.appengine.ext.webapp2 import util +from google.appengine.runtime import DeadlineExceededError + +from ffstorage import * + +from fanficdownloader import adapters, writers, exceptions +from fanficdownloader.configurable import Configuration + +class UserConfigServer(webapp2.RequestHandler): + + def getUserConfig(self,user,url,fileformat): + + configuration = Configuration(adapters.getConfigSectionFor(url),fileformat) + + logging.debug('reading defaults.ini config file') + configuration.read('defaults.ini') + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l and l[0].config: + uconfig=l[0] + #logging.debug('reading config from UserConfig(%s)'%uconfig.config) + configuration.readfp(StringIO(uconfig.config)) + + return configuration + +class MainHandler(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if user: + error = self.request.get('error') + template_values = {'nickname' : user.nickname(), 'authorized': True} + url = self.request.get('url') + template_values['url'] = url + + if error: + if error == 'login_required': + template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' + elif error == 'bad_url': + template_values['error_message'] = 'Unsupported URL: ' + url + elif error == 'custom': + template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') + elif error == 'configsaved': + template_values['error_message'] = 'Configuration Saved' + elif error == 'recentcleared': + template_values['error_message'] = 'Your Recent Downloads List has been Cleared' + + filename = self.request.get('file') + if len(filename) > 1: + template_values['yourfile'] = '''
"%s" by %s
''' % (filename, self.request.get('name'), self.request.get('author')) + + self.response.headers['Content-Type'] = 'text/html' + path = os.path.join(os.path.dirname(__file__), 'index.html') + + else: + logging.debug(users.create_login_url('/')) + url = users.create_login_url(self.request.uri) + template_values = {'login_url' : url, 'authorized': False} + path = os.path.join(os.path.dirname(__file__), 'index.html') + + + template_values['supported_sites'] = '
\n' + for (site,examples) in adapters.getSiteExamples(): + template_values['supported_sites'] += "
%s
\n
Example Story URLs:
"%site + for u in examples: + template_values['supported_sites'] += "%s
\n"%(u,u) + template_values['supported_sites'] += "
\n" + template_values['supported_sites'] += '
\n' + + self.response.out.write(template.render(path, template_values)) + + +class EditConfigServer(UserConfigServer): + def get(self): + self.post() + + def post(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + template_values = {'nickname' : user.nickname(), 'authorized': True} + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + else: + uconfig=None + + if self.request.get('update'): + if uconfig is None: + uconfig = UserConfig() + uconfig.user = user + uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. + uconfig.put() + try: + # just getting config for testing purposes. + configuration = self.getUserConfig(user,"test1.com","epub") + self.redirect("/?error=configsaved") + except Exception, e: + logging.info("Saved Config Failed:%s"%e) + self.redirect("/?error=custom&errtext=%s"%urlEscape(str(e))) + else: # not update, assume display for edit + if uconfig is not None and uconfig.config: + config = uconfig.config + else: + configfile = open("example.ini","rb") + config = configfile.read() + configfile.close() + template_values['config'] = config + + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['defaultsini'] = config + + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write(template.render(path, template_values)) + + +class FileServer(webapp2.RequestHandler): + + def get(self): + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + return + + try: + download = getDownloadMeta(id=fileId) + + name = download.name.encode('utf-8') + + logging.info("Serving file: %s" % name) + + if name.endswith('.epub'): + self.response.headers['Content-Type'] = 'application/epub+zip' + elif name.endswith('.html'): + self.response.headers['Content-Type'] = 'text/html' + elif name.endswith('.txt'): + self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.mobi'): + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name + + data = DownloadData.all().filter("download =", download).order("index") + # epubs are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if download.format != 'epub': + def dc(data): + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data + else: + def dc(data): + return data + + for datum in data: + self.response.out.write(dc(datum.blob)) + + except Exception, e: + fic = DownloadMeta() + fic.failure = unicode(e) + + template_values = dict(fic = fic, + #nickname = user.nickname(), + #escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class FileStatusServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + escaped_url=False + + try: + download = getDownloadMeta(id=fileId) + + if download: + logging.info("Status url: %s" % download.url) + if download.completed and download.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format) + else: + download = DownloadMeta() + download.failure = "Download not found" + + except Exception, e: + download = DownloadMeta() + download.failure = unicode(e) + + template_values = dict(fic = download, + nickname = user.nickname(), + escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class ClearRecentServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + logging.info("Clearing Recent List for user: "+user.nickname()) + q = DownloadMeta.all() + q.filter('user =', user) + num=0 + while( True ): + results = q.fetch(100) + if results: + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num = num + 1 + logging.debug('Delete '+d.url) + else: + break + logging.info('Deleted %d instances download.' % num) + self.redirect("/?error=recentcleared") + +class RecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = DownloadMeta.all() + q.filter('user =', user).order('-date') + fics = q.fetch(100) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) + + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + +class AllRecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = SavedMeta.all() + if self.request.get('bydate'): + q.order('-date') + else: + q.order('-count') + + fics = q.fetch(200) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + sendslugs = [] + + for fic in fics: + ficslug = FicSlug(fic) + sendslugs.append(ficslug) + + template_values = dict(fics = sendslugs, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'allrecent.html') + self.response.out.write(template.render(path, template_values)) + +class FicSlug(): + def __init__(self,savedmeta): + self.url = savedmeta.url + self.count = savedmeta.count + for k, v in savedmeta.meta.iteritems(): + setattr(self,k,v) + +class FanfictionDownloader(UserConfigServer): + def get(self): + self.post() + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + format = self.request.get('format') + url = self.request.get('url') + + if not url or url.strip() == "": + self.redirect('/') + return + + logging.info("Queuing Download: %s" % url) + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') == "on" + + # use existing record if available. Fetched/Created before + # the adapter can normalize the URL in case we need to record + # an exception. + download = getDownloadMeta(url=url,user=user,format=format,new=True) + + adapter = None + try: + try: + configuration = self.getUserConfig(user,url,format) + except Exception, e: + self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e))) + return + + adapter = adapters.getAdapter(configuration,url) + logging.info('Created an adaper: %s' % adapter) + + if login or password: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + + ## Fetch again using normalized story URL. The one + ## fetched/created above, if different, will not be saved. + download = getDownloadMeta(url=story.getMetadata('storyUrl'), + user=user,format=format,new=True) + + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.url = story.getMetadata('storyUrl') + download.put() + + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'id':str(download.key()), + 'format':format, + 'url':download.url, + 'login':login, + 'password':password, + 'user':user.email(), + 'is_adult':is_adult}) + + logging.info("enqueued download key: " + str(download.key())) + + except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: + download.failure = unicode(e) + download.put() + logging.info(unicode(e)) + is_login= ( isinstance(e, exceptions.FailedToLogin) ) + is_passwdonly = is_login and e.passwdonly + template_values = dict(nickname = user.nickname(), + url = url, + format = format, + site = adapter.getConfigSection(), + fic = download, + is_login=is_login, + is_passwdonly=is_passwdonly + ) + # thewriterscoffeeshop.com can do adult check *and* user required. + if isinstance(e,exceptions.AdultCheckRequired): + template_values['login']=login + template_values['password']=password + + path = os.path.join(os.path.dirname(__file__), 'login.html') + self.response.out.write(template.render(path, template_values)) + return + except (exceptions.InvalidStoryURL,exceptions.UnknownSite,exceptions.StoryDoesNotExist), e: + logging.warn(unicode(e)) + download.failure = unicode(e) + download.put() + except Exception, e: + logging.error("Failure Queuing Download: url:%s" % url) + logging.exception(e) + download.failure = unicode(e) + download.put() + + self.redirect('/status?id='+str(download.key())) + + return + + +class FanfictionDownloaderTask(UserConfigServer): + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + fileId = self.request.get('id') + # User object can't pass, just email address + user = users.User(self.request.get('user')) + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') + + logging.info("Downloading: " + url + " for user: "+user.nickname()) + logging.info("ID: " + fileId) + + adapter = None + writerClass = None + + # use existing record if available. + # fileId should have record from /fdown. + download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True) + for c in download.data_chunks: + c.delete() + download.put() + + logging.info('Creating adapter...') + + try: + configuration = self.getUserConfig(user,url,format) + adapter = adapters.getAdapter(configuration,url) + + logging.info('Created an adapter: %s' % adapter) + + if login or password: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + # adapter.getStory() is what does all the heavy lifting. + # adapter.getStoryMetadataOnly() only fetches enough to + # get metadata. writer.writeStory() will call + # adapter.getStory(), too. + writer = writers.getWriter(format,configuration,adapter) + download.name = writer.getOutputFileName() + #logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.url = adapter.getStory().getMetadata('storyUrl') + download.put() + + allmeta = adapter.getStory().getAllMetadata(removeallentities=True,doreplacements=False) + + outbuffer = StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del outbuffer + #del writer.adapter + #del writer.story + del writer + #del adapter.story + del adapter + + # epubs are all already compressed. Each chunk is + # compressed individually to avoid having to hold the + # whole in memory just for the compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + + # delete existing chunks first + for c in download.data_chunks: + c.delete() + + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + smetal = SavedMeta.all().filter('url =', allmeta['storyUrl'] ).fetch(1) + if smetal and smetal[0]: + smeta = smetal[0] + smeta.count += 1 + else: + smeta=SavedMeta() + smeta.count = 1 + + smeta.url = allmeta['storyUrl'] + smeta.title = allmeta['title'] + smeta.author = allmeta['author'] + smeta.meta = allmeta + smeta.date = datetime.datetime.now() + smeta.put() + + logging.info("Download finished OK") + del data + + except Exception, e: + logging.exception(e) + download.failure = unicode(e) + download.put() + return + + return + +def getDownloadMeta(id=None,url=None,user=None,format=None,new=False): + ## try to get download rec from passed id first. then fall back + ## to user/url/format + download = None + if id: + try: + download = db.get(db.Key(id)) + logging.info("DownloadMeta found by ID:"+id) + except: + pass + + if not download and url and user and format: + try: + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is not None and len(q) > 0 ): + logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format)) + download = q[0] + except: + pass + + if new: + # NOT clearing existing chunks here, because this record may + # never be saved. + if not download: + logging.debug("New DownloadMeta") + download = DownloadMeta() + + download.completed=False + download.failure=None + download.date=datetime.datetime.now() + + download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID']) + if user: + download.user = user + if url: + download.url = url + if format: + download.format = format + + return download + +def toPercentDecimal(match): + "Return the %decimal number for the character for url escaping" + s = match.group(1) + return "%%%02x" % ord(s) + +def urlEscape(data): + "Escape text, including unicode, for use in URLs" + p = re.compile(r'([^\w])') + return p.sub(toPercentDecimal, data.encode("utf-8")) + +logging.getLogger().setLevel(logging.DEBUG) +app = webapp2.WSGIApplication([('/', MainHandler), + ('/fdowntask', FanfictionDownloaderTask), + ('/fdown', FanfictionDownloader), + (r'/file.*', FileServer), + ('/status', FileStatusServer), + ('/allrecent', AllRecentFilesServer), + ('/recent', RecentFilesServer), + ('/editconfig', EditConfigServer), + ('/clearrecent', ClearRecentServer), + ], + debug=False) diff --git a/makeplugin.py b/makeplugin.py new file mode 100644 index 00000000..b6e588c7 --- /dev/null +++ b/makeplugin.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from glob import glob + +from makezip import createZipFile + +if __name__=="__main__": + filename="FanFictionDownLoader.zip" + exclude=['*.pyc','*~','*.xcf','*[0-9].png','*.po','*.pot','*default.mo'] + # from top dir. 'w' for overwrite + createZipFile(filename,"w", + ['plugin-defaults.ini','plugin-example.ini','fanficdownloader','downloader.py','defaults.ini'], + exclude=exclude) + #from calibre-plugin dir. 'a' for append + os.chdir('calibre-plugin') + files=['about.txt','images','translations'] + files.extend(glob('*.py')) + files.extend(glob('plugin-import-name-*.txt')) + createZipFile("../"+filename,"a", + files,exclude=exclude) diff --git a/makezip.py b/makezip.py new file mode 100644 index 00000000..fbab1d4c --- /dev/null +++ b/makezip.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2014, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, zipfile, sys +from glob import glob + +def addFolderToZip(myZipFile,folder,exclude=[]): + folder = folder.encode('ascii') #convert path to ascii for ZipFile Method + excludelist=[] + for ex in exclude: + excludelist.extend(glob(folder+"/"+ex)) + for file in glob(folder+"/*"): + if file in excludelist: + continue + if os.path.isfile(file): + #print file + myZipFile.write(file, file, zipfile.ZIP_DEFLATED) + elif os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + +def createZipFile(filename,mode,files,exclude=[]): + myZipFile = zipfile.ZipFile( filename, mode ) # Open the zip file for writing + excludelist=[] + for ex in exclude: + excludelist.extend(glob(ex)) + for file in files: + if file in excludelist: + continue + file = file.encode('ascii') #convert path to ascii for ZipFile Method + if os.path.isfile(file): + (filepath, filename) = os.path.split(file) + #print file + myZipFile.write( file, filename, zipfile.ZIP_DEFLATED ) + if os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + myZipFile.close() + return (1,filename) + diff --git a/plugin-defaults.ini b/plugin-defaults.ini new file mode 100644 index 00000000..2dc098a2 --- /dev/null +++ b/plugin-defaults.ini @@ -0,0 +1,1997 @@ +# Copyright 2013 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels. Example: + +## [defaults] +## titlepage_entries: category,genre, status +## [www.whofic.com] +## # overrides defaults. +## titlepage_entries: category,genre, status,dateUpdated,rating +## [epub] +## # overrides defaults & site section +## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated +## [www.whofic.com:epub] +## # overrides defaults, site section & format section +## titlepage_entries: category,genre, status,datePublished +## [overrides] +## # overrides all other sections +## titlepage_entries: category + +## Some sites also require the user to confirm they are adult for +## adult content. Uncomment by removing '#' in front of is_adult. +#is_adult:true + +## All available titlepage_entries and the label used for them: +## _label: +## Labels may be customized. +title_label:Title +storyUrl_label:Story URL +description_label:Summary +author_label:Author +authorUrl_label:Author URL +## epub, txt, html +formatname_label:File Format +## .epub, .txt, .html +formatext_label:File Extension +## Category and Genre have overlap, depending on the site. +## Sometimes Harry Potter is a category and Fantasy a genre. (fanfiction.net) +## Sometimes Fantasy is category *and* a genre (fictionpress.com) +## Sometimes there are multiple categories and/or genres. +category_label:Category +genre_label:Genre +language_label:Language +characters_label:Characters +ships_label:Relationships +series_label:Series +seriesUrl_label:Series URL +## seriesHTML is series as a link to seriesUrl. +seriesHTML_label:Series +## Completed/In-Progress +status_label:Status +## Dates story first published, last updated, and downloaded(last with time). +datePublished_label:Published +dateUpdated_label:Updated +dateCreated_label:Packaged +## Rating depends on the site. Some use K,T,M,etc, and some PG,R,NC-17 +rating_label:Rating +## Also depends on the site. +warnings_label:Warnings +numChapters_label:Chapters +numWords_label:Words +## www.fanfiction.net, fictionalley.com, etc. +site_label:Publisher +## ffnet, fpcom, etc. +siteabbrev_label:Site Abbrev +## The site's unique story/author identifier. Usually a number. +storyId_label:Story ID +authorId_label:Author ID +## Primarily to put specific values in dc:subject tags for epub. Will +## show up in Calibre as tags. Also carried into mobi when converted. +extratags_label:Extra Tags +## The version of fanficdownloader +version_label:FFDL Version + +## Date formats used by FFDL. Published and Update don't have time. +## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior +## Note that ini format requires % to be escaped as %%. +dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S +datePublished_format:%%Y-%%m-%%d +dateUpdated_format:%%Y-%%m-%%d + +## items to include in the title page +## Empty metadata entries will *not* appear, even if in the list. +## You can include extra text or HTML that will be included as-is in +## the title page. Eg: titlepage_entries: ...,
,summary,
,... +## All current formats already include title and author. +titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description + +## Try to collect series name and number of this story in series. +## Some sites (ab)use 'series' for reading lists and personal +## collections. This lets us turn it on and off by site without +## keeping a lengthy titlepage_entries per site and prevents it +## updating in the plugin. +collect_series: true + +## include title page as first page. +include_titlepage: true + +## include a TOC page before the story text +include_tocpage: true + +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +#website_encodings: auto, utf8, Windows-1252 + +## entries to make epub subjects and calibre tags +## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" +include_subject_tags: extratags, genre, category, characters, ships, status + +## extra tags (comma separated) to include, primarily for epub. +extratags: FanFiction + +## extra categories, genres, characters, ships and warnings can be +## configured. Used primarily for sites that are dedicated to a genre +## or 'ship and so don't included it for every story. +#extracategories: +#extragenres: +#extracharacters: +#extraships: +#extrawarnings: + +## Add this to genre if there's more than one category. +#add_genre_when_multi_category: Crossover + +## default_value_(entry) can be used to set the value for a metadata +## entry when no value has been found on the site. For example, some +## sites doesn't have a status metadatum. If uncommented, this will +## use 'Unknown' for status when no status is found. +#default_value_status:Unknown +## Can also be used for other metadata values +#default_value_category:FanFiction + +## number of seconds to sleep between calls to the story site. May by +## useful if pulling large numbers of stories or if the site is slow. +#slow_down_sleep_time:0.5 + +## How long to wait for each HTTP connection to finish. Longer times +## are better for sites that are slow to respond. Shorter times +## prevent excessive wait when your network or the site is down. +connect_timeout:60.0 + +## Use regular expressions to find and replace (or remove) metadata. +## For example, you could change Sci-Fi=>SF, remove *-Centered tags, +## etc. See http://docs.python.org/library/re.html (look for re.sub) +## for regexp details. +## Make sure to keep at least one space at the start of each line and +## to escape % to %%, if used. +## Two, three or five part lines. Two part effect everything. +## Three part effect only those key(s) lists. +## *Five* part lines. Effect only when trailing conditional key=>regexp matches +## metakey[,metakey]=>pattern=>replacement[&&conditionalkey=>regexp] +## Note that if metakey == conditionalkey the conditional is ignored. +## You can use \s in the replacement to add explicit spaces. (The config parser +## tends to discard trailing spaces.) +## replace_metadata _LIST options: FFDL replace_metadata lines +## operate on individual list items for list entries. But if you +## want to do a replacement on the joined string for the whole list, +## you can by using _LIST. Example, if you added +## calibre_author: calibre_author_LIST=>^(.{,100}).*$=>\1 +#replace_metadata: +# genre,category=>Sci-Fi=>SF +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# title=>(.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> +# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers +# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural + +## Include/Exclude metadata +## +## You can use the include/exclude metadata features to either limit +## the values of particular metadata lists to specific values or to +## exclude specific values. Further, you can conditionally apply each +## line depending on other metadata, use exact strings or regular +## expressions(regex) to match values, and negate matches. +## +## The settings are: +## include_metadata_pre +## exclude_metadata_pre +## include_metadata_post +## exclude_metadata_post +## +## The form of each line is: +## metakey[,metakey]==exactvalue +## metakey[,metakey]=~regex +## metakey[,metakey]==exactvalue&&conditionalkey==exactcondvalue +## metakey[,metakey]=~regex&&conditionalkey==exactcondvalue +## metakey[,metakey]==exactvalue&&conditionalkey=~condregex +## +## This is fairly complicated, so it's documented on its own wiki +## page: +## https://code.google.com/p/fanficdownloader/wiki/InExcludeMetadataFeature + +## Some readers don't show horizontal rule (
) tags correctly. +## This replaces them all with a centered '* * *'. (Note centering +## doesn't work on some devices either.) +#replace_hr: false + +## Some sites/authors/stories use br tags instead of p tags for +## paragraphs. This feature uses some heuristics to find and replace +## br paragraphs with p tags while preserving scene breaks. +#replace_br_with_p: false + +## If you have the Generate Cover plugin installed, you can use the +## generate_cover_settings parameter to intelligently decide which GC +## setting to run. There are three parts 1) a template of which +## metadata part(s) to look at, 2) a regular expression to match the +## template, and 3) the name of the GC setting to use, which must +## match exactly. Use this parameter in [defaults], or by site eg, +## [ficwad.com] +## Make sure to keep at least one space at the start of each line and +## to escape % to %%, if used. +## template => regexp to match => GC Setting to use. +## To use this, make sure you go to the Generate Cover tab in FFDL +## config and check 'Allow generate_cover_settings from personal.ini +## to override' +#generate_cover_settings: +# ${category} => Buffy:? [tT]he Vampire Slayer => BuffyCover +# ${category} => Star Trek => StarTrekCover + +## If set false, the summary will have all html stripped. +## Both this and include_images must be true to get images in the +## summary. +keep_summary_html:true + +## If set true, any style attributes on tags in the story HTML will be +## kept. Useful for keeping extra colors & formatting from original. +#keep_style_attr: false + +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers:true. Only want them added back +## on for Table of Contents(toc)? Use add_chapter_numbers:toconly. +## (toconly doesn't work on mobi output.) Don't like the way it +## strips numbers or adds them back? See chapter_title_strip_pattern +## and chapter_title_add_pattern. +strip_chapter_numbers:false + +## add_chapter_numbers can be true, false or toconly +## (Note number is not added when there's only one chapter.) +add_chapter_numbers:false + +## (Two versions of chapter_title_strip_pattern are shown below. You +## should only have one uncommented.) +## This version will remove the leading number from: +## "1." => "" +## "1. The Beginning" => "The Beginning" +## "1: Start" => "Start" +## "2, Chapter the second" => "Chapter the second" +## etc +chapter_title_strip_pattern:^[0-9]+[\.: -]+ + +## This version will strip all of the above *plus* remove 'Chapter 1': +## "Chapter 1" => "" +## "1. Chapter 1" => "" +## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue" +## "Chapter 2 - Pirates Place" => "Pirates Place" +## etc +#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)? + +## Uses a python template substitution. The ${index} is the 'chapter' +## number and ${title} is the chapter title, after applying +## chapter_title_strip_pattern. Those are the only variables available. +## "The Beginning" => "1. The Beginning" +chapter_title_add_pattern:${index}. ${title} + +## Uses a python template substitution. The ${title} is the default +## title of a new anthology, in the case of a series, or +## the first book title otherwise. This is only applied to new +## anthologies. +anthology_title_pattern:${title} Anthology + +## Add tag(s) for anthology (series) books. Set to empty to not add +## any anthology tags. +anthology_tags:Anthology + +## Reorder ships so b/a and c/b/a become a/b and a/b/c. Only separates +## on '/', so use replace_metadata to change separator first if +## needed. Something like: ships=>[ ]*(/|&|&)[ ]*=>/ You can use +## ships_LIST to change the / back to something else if you want. +sort_ships:false + +## join_string_ options -- FFDL list entries are comma +## separated by default. You can use this to change that. For example, +## if you want authors separated with ' & ' instead, use +## join_string_calibre_author:\s&\s. (\s == space) +#join_string_author:,\s + +## keep_in_order_ options: FFDL sorts list entries by default +## (except for author/authorUrl/authorId). But if you want to use an +## extra entry derived from author, it ends up sorted. For example, if +## you added calibre_author: keep_in_order_calibre_author:true +#keep_in_order_author:true + +## User-agent +user_agent:FFDL/2.0 + +## Each output format has a section that overrides [defaults] +[html] + +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output formats. +## include_images is *not* available in the web service in any format. +#include_images:false + +## This switch prevents FFDL from doing any processing on the images. +## Usually they would be converted to jpg, resized and optionally made +## grayscale. +no_image_processing: true + +## output background color--only used by html and epub (and ignored in +## epub by many readers). Included below in output_css--will be +## ignored if not in output_css. +background_color: ffffff + +## Allow customization of CSS. Make sure to keep at least one space +## at the start of each line and to escape % to %%. Also need +## background_color to be in the same section, if included in CSS. +output_css: + body { background-color: #%(background_color)s; } + .CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } + .center {text-align: center;} + .cover {text-align: center;} + .full {width: 100%%; } + .quarter {width: 25%%; } + .smcap {font-variant: small-caps;} + .u {text-decoration: underline;} + .bold {font-weight: bold;} + +[txt] +## Add URLs since there aren't links. +titlepage_entries: series,seriesUrl,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description + +## Width to word wrap text output. 0 indicates no wrapping. +wrap_width: 78 + +## use \r\n for line endings, the windows convention. text output only. +windows_eol: true + +[epub] + +## epub carries the TOC in metadata. +## mobi generated from epub by calibre will have a TOC at the end. +include_tocpage: false + +## include a Update Log page before the story text. If 'true', the +## log will be updated each time the epub is and all the metadata +## fields that have changed since the last update (typically +## dateUpdated,numChapters,numWords at a minimum) will be shown. +## Great for tracking when chapters came out and when the description, +## etc changed. +## Plugin will now preserve the log page when the epub is overwritten, +## too. +include_logpage: false +## If set to 'smart', logpage will only be included if the story is +## status:In-Progress or already had a logpage. That way you don't +## end up with Completed stories that have just one logpage entry. +#include_logpage: smart + +## items to include in the log page Empty metadata entries, or those +## that haven't changed since the last update, will *not* appear, even +## if in the list. You can include extra text or HTML that will be +## included as-is in each log entry. Eg: logpage_entries: ...,
, +## summary,
,... +logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings + +## epub->mobi conversions typically don't like tables. +titlepage_use_table: false + +## When using tables, make these span both columns. +wide_titlepage_entries: description, storyUrl, authorUrl, seriesUrl + +## output background color--only used by html and epub (and ignored in +## epub by many readers). Included below in output_css--will be +## ignored if not in output_css. +background_color: ffffff + +## Allow customization of CSS. Make sure to keep at least one space +## at the start of each line and to escape % to %%. Also need +## background_color to be in the same section, if included in CSS. +## 'adobe-hyphenate: none;' prevents hyphenation on newer Nooks +## STR(wG) (1.2.1+ for sure) +output_css: + body { background-color: #%(background_color)s; + text-align: justify; + margin: 2%%; + adobe-hyphenate: none; } + pre { font-size: x-small; } + sml { font-size: small; } + h1 { text-align: center; } + h2 { text-align: center; } + h3 { text-align: center; } + h4 { text-align: center; } + h5 { text-align: center; } + h6 { text-align: center; } + .CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } + .center {text-align: center;} + .cover {text-align: center;} + .full {width: 100%%; } + .quarter {width: 25%%; } + .smcap {font-variant: small-caps;} + .u {text-decoration: underline;} + .bold {font-weight: bold;} + +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output format. +#include_images:false + +## If set, the first image found will be made the cover image. If +## keep_summary_html is true, any images in summary will be before any +## in chapters. +#make_firstimage_cover: false + +## If set, the epub will never have a cover, even include_images is on +## and the site has specific cover images. +#never_make_cover: false + +## If set, and there isn't already a cover image from the adapter or +## from make_firstimage_cover, this image will be made the cover. +## It can be either a 'file:' or 'http:' url. +## Note that if you enable make_firstimage_cover in [epub], but want +## to use default_cover_image for a specific site, use the site:format +## section, for example: [ficwad.com:epub] +## default_cover_image is a python string Template string with +## ${title}, ${author} etc, same as titlepage_entries. Unless +## allow_unsafe_filename is true, invalid filename chars will be +## removed from metadata fields +#default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png +#default_cover_image:file:///C:/Users/username/Desktop/nook/images/${title}/icon.png +#default_cover_image:http://www.somesite.com/someimage.gif + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +#cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + +## Resize images down to width, height, preserving aspect ratio. +## Nook size, with margin. +image_max_size: 580, 725 + +## Change image to grayscale, if graphics library allows, to save +## space. +#grayscale_images: false + +## if the tag doesn't have a div or a p around it, nook gets +## confused and displays it on every page after that under the text +## for the rest of the chapter. I doubt adding a div around the img +## will break any other readers, but in case it does, the fix can be +## turned off. This setting is not used if replace_br_with_p is +## true--replace_br_with_p also fixes the problem. +nook_img_fix:true + +[mobi] +## mobi TOC cannot be turned off right now. +#include_tocpage: true + +## Each site has a section that overrides [defaults]. +## test1.com specifically is not a real story site. Instead, +## it is a fake site for testing configuration and output. It uses +## URLs like: http://test1.com?sid=12345 +[test1.com] +extratags: FanFiction,Testing +# extracategories:Fafner +# extragenres:Romance,Fluff +# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P. +# extraships:Smythe-Smythe/Mokona +# extrawarnings:Extreme Bogosity + +# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL + +# include_in_compositeJ:dateCreated +# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ +# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated, + +# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_subject_tags: metaA,metaB,metaC + +# replace_metadata: +# compositeL=>Val=>VALUE +# series,extratags=>Test=>Plan +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# (.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> +# characters=>Harry P\.=>Harry Potter + + +## If necessary, you can define [:] sections to +## customize the formats differently for the same site. Overrides +## defaults, format and site. +[test1.com:txt] +extratags: FanFiction,Testing,Text + +[test1.com:html] +extratags: FanFiction,Testing,HTML + +[archive.skyehawke.com] + +[archiveofourown.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## archiveofourown.org stories allow chapters to be added out of +## order. So the newest chapter may not be the last one. FFDL update +## doesn't like that. If do_update_hook is uncommented and set true, +## the adapter will discard all existing chapters from the newest one +## on when updating to enforce accurate chapters. +#do_update_hook:false + +## AO3 adapter defines a few extra metadata entries. +## If there's ever more than 4 series, add series04,series04Url etc. +extra_valid_entries:fandoms,freeformtags,freefromtags,ao3categories,comments,kudos,hits,bookmarks,collections,series00,series01,series02,series03,series00Url,series01Url,series02Url,series03Url +fandoms_label:Fandoms +freeformtags_label:Freeform Tags +freefromtags_label:Freeform Tags +ao3categories_label:AO3 Categories +comments_label:Comments +kudos_label:Kudos +hits_label:Hits +collections_label:Collections +bookmarks_label:Bookmarks + +## freeformtags was previously typo'ed as freefromtags. This way, +## freefromtags will still work for people who've used it. +include_in_freefromtags:freeformtags + +## adds to titlepage_entries instead of replacing it. +#extra_titlepage_entries: fandoms,freeformtags,ao3categories,comments,kudos,hits,bookmarks,series00,series01,series02,series03,series00Url,series01Url,series02Url,series03Url + +## adds to include_subject_tags instead of replacing it. +#extra_subject_tags:fandoms,freeformtags,ao3categories + +## AO3 chapters can include several different types of notes. We've +## traditional included them all in the chapter text, but this allows +## you to customize which you include. Copy this parameter to your +## personal.ini and list the ones you don't want. +#exclude_notes:authorheadnotes,chaptersummary,chapterheadnotes,chapterfootnotes,authorfootnotes + +[ashwinder.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Severus Snape,Hermione Granger +extraships:Severus Snape/Hermione Granger + +[asr3.slashzone.org] +## Site dedicated to these categories/characters/ships +extracategories:The Sentinel + +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[bdsm-geschichten.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## This site offers no index page so we can either guess the chapter URLs +## by dec/incrementing numbers ('guess') or walk all the chapters in the metadata +## parsing state ('parse'). Since guessing can lead to errors for non-standard +## story URLs, the default is to parse +#find_chapters:guess + +[bloodshedverse.com] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:Windows-1252,ISO-8859-1,auto + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:warnings,reviews +reviews_label:Reviews + +## Site dedicated to these categories/characters/ships +extracharacters:Spike,Buffy +extracategories:Buffy the Vampire Slayer + +## Strips links found in the story text +## Specific to bloodshedverse.com +strip_text_links:true + +[bloodties-fans.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Blood Ties + +[buffynfaith.net] +## Site dedicated to these categories/characters/ships +extracategories:Buffy: The Vampire Slayer + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[castlefans.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Castle + +[chaos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +[csi-forensics.com] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings +readings_label: Readings + +[dark-solace.org] +## Site dedicated to these categories/characters/ships +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +extracategories:Buffy: The Vampire Slayer +extracharacters:Buffy, Spike +extraships:Spike/Buffy + +[dramione.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Draco Malfoy,Hermione Granger +extraships:Draco Malfoy/Hermione Granger + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +## Some adapters collect additional meta information beyond the +## standard ones. They need to be defined in extra_valid_entries to +## tell the rest of the FFDL system about them. They can be used in +## include_subject_tags, titlepage_entries, extra_titlepage_entries, +## logpage_entries, extra_logpage_entries, and include_in_* config +## items. You can also add additional entries here to build up +## composite metadata entries. dramione.org, for example, adds +## 'cliches' and then defines as the composite of hermiones,dracos in +## include_in_cliches. +extra_valid_entries:themes,hermiones,dracos,timeline,cliches,read,reviews +include_in_cliches:hermiones,dracos + +## For another example, you could, by uncommenting this line, include +## themes in with genre metadata. +#include_in_genre:genre, themes + +## You can give each new valid entry a specific label for use on +## titlepage and logpage. If not defined, it will simply be the +themes_label:Themes +hermiones_label:Hermiones +dracos_label:Dracos +timeline_label:Timeline +cliches_label:Character Cliches + +## extra_titlepage_entries (and extra_logpage_entries) *add* to +## titlepage_entries (and logpage_entries) so you can add site +## specific entries to titlepage/logpage without having to copy the +## entire titlepage_entries line. (But if you want them higher than +## the end, you will need to copy titlepage_entries.) +#extra_titlepage_entries: themes,timeline,cliches +#extra_logpage_entries: themes,timeline,cliches +#extra_subject_tags: themes,timeline,cliches + +## (Plugin Only) - You can also populate calibre custom columns with +## the site specific metadata using custom_columns_settings (but only +## if 'Allow custom_columns_settings from personal.ini' is checked in +## the plugin GUI config.) There are three parts, the entry name, +## then the label of the calibre custom column, then (optionally) a +## 'mode'. 'r' to Replace any existing values, 'a' to Add to existing +## value (use with tag-like columns), and 'n' for setting on New books +## only. (Default is 'r'.) +## Literal strings can be set into custom columns using double quotes. +## Each metadata=>column mapping must be on a separate line and each +## needs to have one space at the start of each line. + +#custom_columns_settings: +# cliches=>#acolumn +# themes=>#bcolumn,a +# timeline=>#ccolumn,n +# "FanFiction"=>#collection + +[efiction.esteliel.de] +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[erosnsappho.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +[fanfiction.csodaidok.hu] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:ISO-8859-2,auto + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,challenge +reviews_label:Reviews +challenge_label:Challenge + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfic.hu] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:ISO-8859-1,auto + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfiction.mugglenet.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfic.potterheadsanonymous.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[fanfiction.portkey.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extraships:Harry Potter/Hermione Granger + +[fanfiction.tenhawkpresents.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[fannation.shades-of-moonlight.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings,romance +extra_titlepage_entries: readings,romance +readings_label: Readings +romance_label: Romance + +[fhsarchive.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[ficwad.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[fictionmania.tv] +## website encoding(s) In theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8", but this will let you +## explicitly set the encoding and order if you need to. The special +## value 'auto' will call chardet and use the encoding it reports if +## it has +90% confidence. 'auto' is not reliable. +website_encodings:ISO-8859-1,auto + +## items to include in the log page Empty metadata entries, or those +## that haven't changed since the last update, will *not* appear, even +## if in the list. You can include extra text or HTML that will be +## included as-is in each log entry. Eg: logpage_entries: ...,
, +## summary,
,... +## Don't include numChapters since all stories are a single "chapter", there's +## no way to reliably find the next chapter +logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings + +## items to include in the title page +## Empty metadata entries will *not* appear, even if in the list. +## You can include extra text or HTML that will be included as-is in +## the title page. Eg: titlepage_entries: ...,
,summary,
,... +## All current formats already include title and author. +## Don't include numChapters since all stories are a single "chapter", there's +## no way to reliably find the next chapter +titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numWords,site,description + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:fileName,fileSize,oldName,newName,keyWords,mainCharactersAge,readings + +## Turns all space characters into " " HTML entities to forcefully preserve +## formatting with spaces. Enabling this will blow up the filesize quite a bit +## and is probably not a good idea, unless you absolutely need the story +## formatting. +## Specific to fictionmania.tv +non_breaking_spaces:false + +[fictionpad.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +extra_valid_entries:followers,comments,views,likes,dislikes +#extra_titlepage_entries:followers,comments,views,likes,dislikes + +followers_label:Followers +comments_label:Comments +views_label:Views +likes_label:Likes +dislikes_label:Dislikes + +[finestories.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[grangerenchanted.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Hermione Granger + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:read,reviews + +[hlfiction.net] +## Site dedicated to these categories/characters/ships +extracategories:Highlander + +[imagine.e-fic.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[indeath.net] +## Site dedicated to these categories/characters/ships +extracategories:In Death + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/public/style_emoticons/.* + +[ksarchive.com] +## Site dedicated to these categories/characters/ships +extracategories:Star Trek +extracharacters:Kirk,Spock +extraships:Kirk/Spock + +[literotica.com] +extra_valid_entries:eroticatags +eroticatags_label:Erotica Tags +extra_titlepage_entries: eroticatags + +[lotrfanfiction.com] +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings +readings_label: Readings + +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[lumos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[merlinfic.dtwins.co.uk] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Merlin + +[national-library.net] +## Site dedicated to these categories/characters/ships +extracategories:West Wing + +[ncisfic.com] +## Site dedicated to these categories/characters/ships +extracategories:NCIS + +[netraptor.org] + +[nfacommunity.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:NCIS + +[nha.magical-worlds.us] +## Site dedicated to these categories/characters/ships +extracategories:Buffy: The Vampire Slayer +extracharacters:Willow + +[nocturnal-light.net] +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:readings,reviews +readings_label:Readings +reviews_label:Reviews + +## Site dedicated to these categories/characters/ships +extracharacters:Spike,Buffy +extracategories:Buffy the Vampire Slayer + +[occlumency.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Severus Snape + +[onedirectionfanfiction.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:One Direction + +[pommedesang.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Anita Blake Vampire Hunter + +[ponyfictionarchive.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:My Little Pony: Friendship is Magic + +[pretendercentre.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:The Pretender + +[samandjack.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: SG-1 +extracharacters:Sam,Jack +extraships:Sam/Jack + +[samdean.archive.nu] +## Site dedicated to these categories/characters/ships +extracategories:Supernatural +extracharacters:Sam,Dean +extraships:Sam/Dean + +[scarhead.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[sg1-heliopolis.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[sheppardweir.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: Atlantis +extracharacters:John Sheppard,Elizabeth Weir +extraships:John Sheppard/Elizabeth Weir + +[spikeluver.com] +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:warnings,reviews +reviews_label:Reviews + +## Site dedicated to these categories/characters/ships +extracharacters:Spike,Buffy +extracategories:Buffy the Vampire Slayer + +[stargate-atlantis.org] +## Site dedicated to these categories/characters/ships +extracategories:Stargate: Atlantis + +[storiesonline.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Clear FanFiction from defaults, site is original fiction. +extratags: + +extra_valid_entries:size,universe,universeUrl,universeHTML,sitetags,notice +#extra_titlepage_entries:size,universeHTML,codes,notice + +size_label:Size +universe_label:Universe +universeUrl_label:Universe URL +universeHTML_label:Universe +sitetags_label:Site Tags +notice_label:Notice + +## Assume entryUrl, apply to "%s" to +## make entryHTML. +make_linkhtml_entries:universe + +## storiesonline.net stories can be in a series or a universe, but not +## both. By default, universe will be populated in 'series' with +## index=0 +universe_as_series: true + +[svufiction.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[thehexfiles.net] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Draco Malfoy,Harry Potter +extraships:Harry Potter/Draco Malfoy + +[thehookupzone.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Criminal Minds + +[themaplebookshelf.com] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +extra_valid_entries: readings,challenge +extra_titlepage_entries: readings,challenge +challenge_label: Challenge +readings_label: Readings + +[themasque.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[thequidditchpitch.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[tokra.fandomnet.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: SG-1 + +[tolkienfanfiction.com] +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[trekiverse.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Virtually all eFiction-based sites allow downloading the whole story in +## bulk using the 'Print' feature. If 'bulk_load' is set to 'true', both +## metadata and chapters can be loaded in one step +bulk_load:true + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Star Trek + +extra_valid_entries:readings,awards +extra_titlepage_entries:readings,awards +awards_label:Awards +readings_label:Readings + +cover_exclusion_regexp:art/.*Awards.jpg + +[voracity2.e-fic.com] +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,readings +reviews_label:Reviews +readings_label:Readings + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Star Trek + +[www.dracoandginny.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Draco Malfoy,Ginny Weasley +extraships:Draco Malfoy/Ginny Weasley + +[www.thealphagate.com] +## Site dedicated to these categories/characters/ships +extracategories:Stargate: SG-1 + +[www.checkmated.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.destinysgateway.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[www.dokuga.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:InuYasha +extracharacters:Sesshoumaru,Kagome +extraships:Sesshoumaru/Kagome + +[www.dotmoon.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[www.efpfanfic.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:notes,context,type +notes_label:Notes +context_label:Context +type_label:Type of Couple + +[www.fanfiction.net] +user_agent: +## fanfiction.net's 'cover' images are really just tiny thumbnails. +## Set this to true to never use them. +#never_make_cover: false + +## fanfiction.net shows the user's +cover_exclusion_regexp:/imageu/ + +## fanfiction.net is blocking people more aggressively. If you +## download fewer stories less often you can likely get by with +## reducing this sleep. +slow_down_sleep_time:4 + +## ffnet is sensitive to too many hits. Users are sensitive to long +## waits during the initial metadata collection in the foreground. +## When used, these settings will speed up metadata downloads in the +## foreground linearly. +tweak_fg_sleep:true +min_fg_sleep:1.0 +max_fg_sleep:4.0 +max_fg_sleep_at_downloads: 10 + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,favs,follows + +## ffnet uses 'Pairings', not 'Relationship', stating they don't have +## to be romantic pairings. +ships_label:Pairings + +## Date formats used by FFDL. Published and Update don't usually have +## time, but they do now on ffnet. +## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior +## Note that ini format requires % to be escaped as %%. +#dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S +datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S +dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S + +## ffnet used to have a tendency to send out update notices in email +## before all their servers were showing the update on the first +## chapter. It generates another server request and doesn't seem to +## be needed lately, so now default it to off. +check_next_chapter:false + +[www.fanfiktion.de] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[www.ficbook.net] + +[www.fictionalley.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +## fictionalley.org doesn't have a status metadatum. If uncommented, +## this will be used for status. +#default_value_status:Unknown + +[www.fictionpress.com] +user_agent: +## Clear FanFiction from defaults, fictionpress.com is original fiction. +extratags: + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:reviews,favs,follows + +[www.fimfiction.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## fimfiction.net stories can be locked requiring individual +## passwords. If fail_on_password is set, the downloader will fail +## when a password is required rather than prompting every time. +#fail_on_password: false + +## fimfiction.net stories allow chapters to be added out of order. So +## the newest chapter may not be the last one. FFDL update doesn't +## like that. If do_update_hook is uncommented and set true, the +## adapter will discard all existing chapters from the newest one on +## when updating to enforce accurate chapters. +#do_update_hook:false + +## fimfiction.net is reported to misinterprete some BBCode with +## blockquotes incorrectly. This fixes those instances and defaults +## to on, but can be switched off if it is found to cause problems. +fix_fimf_blockquotes:true + +## Site dedicated to these categories/characters/ships +extracategories:My Little Pony: Friendship is Magic + +## Extra metadata that this adapter knows about. See [dramione.org] +## for examples of how to use them. +extra_valid_entries:likes,dislikes,views,total_views,short_description,groups,groupsUrl,groupsHTML,prequel,prequelUrl,prequelHTML,sequels,sequelsUrl,sequelsHTML,comment_count,coverSource,coverSourceUrl,coverSourceHTML +likes_label:Likes +dislikes_label:Dislikes +views_label:Highest Single Chapter Views +total_views_label:Total Views +short_description_label:Short Summary +groups_label:Groups +groupsUrl_label:Groups URLs +groupsHTML_label:Groups +prequel_label:Prequel +prequelUrl_label:Prequel URL +prequelHTML_label:Prequel +sequels_label:Sequels +sequelsUrl_label:Sequel URLs +sequelsHTML_label:Sequels +comment_count_label:Comment Count +coverSource_label:Cover Source +coverSourceUrl_label:Cover Source URL +coverSourceHTML_label:Cover Source + +keep_in_order_sequels:true +keep_in_order_sequelsUrl:true +keep_in_order_groups:true +keep_in_order_groupsUrl:true + +## Assume entryUrl, apply to "%s" to +## make entryHTML. +make_linkhtml_entries:prequel,sequels,groups,coverSource + +[www.harrypotterfanfiction.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.henneth-annun.net] +## Site dedicated to these categories/characters/ships +extracategories:The Hobbit + +[www.hpfandom.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.hpfanficarchive.com] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.ik-eternal.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:InuYasha +extracharacters:InuYasha,Kagome +extraships:InuYasha/Kagome + +[www.jlaunlimited.com] +## Site dedicated to these categories/characters/ships +extracategories:JLA + +[www.libraryofmoria.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[www.mediaminer.org] + +[www.midnightwhispers.ca] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Queer as Folk + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + +[www.ncisfiction.net] +## Site dedicated to these categories/characters/ships +extracategories:NCIS + +[www.nickandgreg.net] +## Site dedicated to these categories/characters/ships +extracategories:CSI +extraships:Nick Stokes/Greg Sanders + +[www.phoenixsong.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## phoenixsong.net, oddly, can have high rated chapters (login +## required) in the middle of a lower rated story. Use this to force +## FFDL to always login to phoenixsong.net so those stories download +## correctly. If you have a login, this is recommended. +#force_login:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extraships:Harry Potter/Ginny Weasley + +[www.potionsandsnitches.net] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.potterfics.com] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.prisonbreakfic.net] +## Site dedicated to these categories/characters/ships +extracategories:Prison Break + +[www.psychfic.com] +## Site dedicated to these categories/characters/ships +extracategories:Psych + +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +[www.qaf-fic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Queer as Folk + +[www.restrictedsection.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extragenres:Erotica + +[www.scarvesandcoffee.net] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Glee +extracharacters:Kurt Hummel,Blaine Anderson + +[www.simplyundeniable.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter + +[www.sinful-desire.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Supernatural + +[www.siye.co.uk] +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Harry Potter,Ginny Weasley +extraships:Harry Potter/Ginny Weasley + +[www.squidge.org/peja] +## www.squidge.org/peja calls it Fandom +category_label:Fandom + +## Remove numWords -- www.squidge.org/peja word counts are inaccurate +titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,description + +[www.squidge.org/peja:txt] +## Add URLs since there aren't links and remove numWords -- +## www.squidge.org/peja word counts are inaccurate +titlepage_entries: series,seriesUrl,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,storyUrl, authorUrl, description + +[www.storiesofarda.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Lord of the Rings + +[www.thepetulantpoetess.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +[www.twcslibrary.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## twcslibrary.net (ab)uses series as personal reading lists. +collect_series: false + +[www.tthfanfic.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## tth is a little unusual--it doesn't require user/pass, but the site +## keeps track of which chapters you've read and won't send another +## update until it thinks you're up to date. This way, on download, +## it thinks you're up to date. +#username:YourName +#password:yourpassword + +[www.twilightarchives.com] +## Site dedicated to these categories/characters/ships +extracategories:Twilight + +[www.twilighted.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Twilight + +## twilighted.net (ab)uses series as personal reading lists. +collect_series: false + +[www.twiwrite.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Site dedicated to these categories/characters/ships +extracategories:Twilight + +## twiwrite.net (ab)uses series as personal reading lists. +collect_series: false + +[www.walkingtheplank.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Harry Potter +extracharacters:Severus Snape,Harry Potter +extraships:Severus Snape/Harry Potter + +[www.whofic.com] + +[www.wizardtales.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[www.wolverineandrogue.com] +## Site dedicated to these categories/characters/ships +extracategories:X-Men Movie +extracharacters:Wolverine,Rogue + +[www.wraithbait.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## Site dedicated to these categories/characters/ships +extracategories:Stargate: Atlantis + +extra_valid_entries:reviews +reviews_label:Reviews + +[overrides] +## It may sometimes be useful to override all of the specific format, +## site and site:format sections in your private configuration. For +## example, this extratags param here would override all of the +## extratags params in all other sections. Only commandline options +## beat overrides. +#extratags:fanficdownloader + + +[teststory:defaults] +valid_entries:title,author_list,authorId_list,authorUrl_list,storyUrl, + datePublished,dateUpdated,numWords,status,language,series,seriesUrl, + rating,category_list,genre_list,warnings_list,characters_list,ships_list, + description,site,extratags + +# {{storyId}} is a special case--it's the only one that works. +title:Test Story Title {{storyId}} +author_list:Test Author aa +authorId_list:1 +authorUrl_list:http://test1.com?authid=1 +storyUrl:http://test1.com?sid={{storyId}} +datePublished:1975-03-15 +dateUpdated:1975-04-15 +numWords:123,456 +status:In-Progress +language:English + +chaptertitles:Prologue + +## Add additional sections with different numbers to get different +## parameters for different story urls. +## test1.com?sid=1000 +[teststory:1000] +# note the leading commas when doing add_to_ with valid_entries and *_list +add_to_valid_entries:,favs +title:Testing New Feature {{storyId}} +author_list:Bob Smith +authorId_list:45 +authorUrl_list:http://test1.com?authid=45 +datePublished:2013-03-15 +dateUpdated:2013-04-15 +numWords:1456 +favs:56 +series:The Great Test [4] +seriesUrl:http://test1.com?seriesid=1 +rating:Tweenie +category_list:Harry Potter,Furbie,Crossover,Puella Magi Madoka Magica/魔法少女まどか★マギカ,Magical Girl Lyrical Nanoha +genre_list:Fantasy,Comedy,Sci-Fi,Noir +warnings_list:Swearing,Violence +characters_list:Bob Smith,George Johnson,Fred Smythe + +chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kintikin,3. Chapter 3 diff --git a/plugin-example.ini b/plugin-example.ini new file mode 100644 index 00000000..99e867a8 --- /dev/null +++ b/plugin-example.ini @@ -0,0 +1,122 @@ +## This is an example of what your personal configuration might look +## like. Uncomment options by removing the '#' in front of them. + +[defaults] +## [defaults] section applies to all formats and sites but may be +## overridden at several levels. Example: + +## [defaults] +## titlepage_entries: category,genre, status +## [www.whofic.com] +## # overrides defaults. +## titlepage_entries: category,genre, status,dateUpdated,rating +## [epub] +## # overrides defaults & site section +## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated +## [www.whofic.com:epub] +## # overrides defaults, site section & format section +## titlepage_entries: category,genre, status,datePublished +## [overrides] +## # overrides all other sections +## titlepage_entries: category + +## Some sites also require the user to confirm they are adult for +## adult content. Uncomment by removing '#' in front of is_adult. +#is_adult:true + +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers. Don't like the way it strips +## numbers or adds them back? See chapter_title_strip_pattern and +## chapter_title_add_pattern. +#strip_chapter_numbers:true +#add_chapter_numbers:true + +## Add this to genre if there's more than one category. +#add_genre_when_multi_category: Crossover + +[epub] +## include images from img tags in the body and summary of stories. +## Images will be converted to jpg for size if possible. Images work +## in epub format only. To get mobi or other format with images, +## download as epub and use Calibre to convert. +#include_images:true + +## If not set, the summary will have all html stripped for safety. +## Both this and include_images must be true to get images in the +## summary. +#keep_summary_html:true + +## If set, the first image found will be made the cover image. If +## keep_summary_html is true, any images in summary will be before any +## in chapters. +#make_firstimage_cover:true + +## Resize images down to width, height, preserving aspect ratio. +## Nook size, with margin. +#image_max_size: 580, 725 + +## Change image to grayscale, if graphics library allows, to save +## space. +#grayscale_images: false + + +## Most common, I expect will be using this to save username/passwords +## for different sites. Here are a few examples. See defaults.ini +## for the full list. + +[www.twilighted.net] +#username:YourPenname +#password:YourPassword +## default is false +#collect_series: true + +[ficwad.com] +#username:YourUsername +#password:YourPassword + +[www.twiwrite.net] +#username:YourName +#password:yourpassword +## default is false +#collect_series: true + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + +[www.thewriterscoffeeshop.com] +#username:YourName +#password:yourpassword +#is_adult:true +## default is false +#collect_series: true + +[www.fictionalley.org] +#is_adult:true + +[www.harrypotterfanfiction.com] +#is_adult:true + +[www.fimfiction.net] +#is_adult:true +#fail_on_password: false + +[www.tthfanfic.org] +#is_adult:true +## tth is a little unusual--it doesn't require user/pass, but the site +## keeps track of which chapters you've read and won't send another +## update until it thinks you're up to date. This way, on download, +## it thinks you're up to date. +#username:YourName +#password:yourpassword + + +## This section will override anything in the system defaults or other +## sections here. +[overrides] +## default varies by site. Set true here to force all sites to +## collect series. +#collect_series: true diff --git a/queue.yaml b/queue.yaml new file mode 100644 index 00000000..77c4e83b --- /dev/null +++ b/queue.yaml @@ -0,0 +1,7 @@ +queue: +- name: default + rate: 1/s +- name: download + rate: 10/s + retry_parameters: + task_retry_limit: 2 diff --git a/readme.txt b/readme.txt new file mode 100644 index 00000000..e21a5b1b --- /dev/null +++ b/readme.txt @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Other code contributed by Pau Sanchez(bbcodeutils). + +To use, do: + +python downloader.py [-f (epub|html|txt)] + +Default format is epub. + +Eg: + +python downloader.py http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo + +Do 'python downloader.py -h' for more options. + +This tool uses Python 2.7, but should work with newer versions of Python. + +For more information, see: + +http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderSupportedsites + +http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs diff --git a/recent.html b/recent.html new file mode 100644 index 00000000..beaeec45 --- /dev/null +++ b/recent.html @@ -0,0 +1,85 @@ + + + + + FanFictionDownLoader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +
+
+ FanFictionDownLoader +
+ + + + + {{yourfile}} + + +
+
+
Hi, {{ nickname }}! These are the fanfics you've recently requested.
+
Clear your Recent Downloads List
+
+
+ +
+ {% for fic in fics %} +
+ {% if fic.completed %} + Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if not fic.completed and not fic.failure %} + Processing {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if fic.failure %} + {{ fic.failure }} + {% endif %} + Source + {% if fic.completed and fic.escaped_url %} + Convert + {% endif %} +
+ {% endfor %} +
+ + + + +
+ + diff --git a/settings.py b/settings.py new file mode 100644 index 00000000..1e2a09d2 --- /dev/null +++ b/settings.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" + +pass diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/encoder.py b/simplejson/encoder.py new file mode 100644 index 00000000..cf582903 --- /dev/null +++ b/simplejson/encoder.py @@ -0,0 +1,440 @@ +"""Implementation of JSONEncoder +""" +import re + +try: + from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: + c_encode_basestring_ascii = None +try: + from simplejson._speedups import make_encoder as c_make_encoder +except ImportError: + c_make_encoder = None + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# Assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') +FLOAT_REPR = repr + +def encode_basestring(s): + """Return a JSON representation of a Python string + + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + + +def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 00000000..f16ebf7c Binary files /dev/null and b/static/ajax-loader.gif differ diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 00000000..ad4ca66a Binary files /dev/null and b/static/favicon.ico differ diff --git a/status.html b/status.html new file mode 100644 index 00000000..fe96fe67 --- /dev/null +++ b/status.html @@ -0,0 +1,94 @@ + + + + + {% if fic.completed %} Finished {% else %} {% if fic.failure %} Failed {% else %} Working... {% endif %} {% endif %} - FanFictionDownLoader + + + {% if not fic.completed and not fic.failure %} + + {% endif %} + + + +
+
+ FanFictionDownLoader +
+
+ + +
+ +
+ {% if fic.url %} +
+
+ {% if fic.completed %} +
Your fic has finished processing and you can download it now.
+ Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if fic.failure %} + {{ fic.failure }} + {% endif %} + {% if not fic.completed and not fic.failure %} +
Not done yet. This page will periodically poll to see if your story has finished.
+ Processing {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + Source + {% if fic.completed and escaped_url %} + Convert + {% endif %} +
+
+ {% endif %} +
See your personal list of previously downloaded fanfics.
+
+ +
+ +

+ This is a web front-end to FanFictionDownLoader
+ Copyright © Fanficdownloader team +
+ +
+ + +
+
+ + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 00000000..4c48b5ac --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/utils/remover.py b/utils/remover.py new file mode 100644 index 00000000..50101d36 --- /dev/null +++ b/utils/remover.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# encoding: utf-8 +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +remover.py + +Created by Roman on 2010-06-20. +Copyright 2011 Fanficdownloader team +""" + +import datetime +import logging + +#from google.appengine.ext.webapp import util +import webapp2 +#from google.appengine.ext import webapp +from google.appengine.api import users +from google.appengine.api import taskqueue +from google.appengine.api import memcache + +from ffstorage import * + +class Remover(webapp2.RequestHandler): + def get(self): + logging.debug("Starting r3m0v3r") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + theDate = datetime.datetime.now() - datetime.timedelta(days=3) + logging.debug("Will delete stuff older than %s" % theDate) + + fics = DownloadMeta.all() + fics.filter("date <",theDate).order("date") + + results = fics.fetch(100) + logging.debug([x.name for x in results]) + + num=0 + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num += 1 + logging.debug('Delete '+d.url) + + logging.info('Deleted instances: %d' % num) + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write('Deleted instances: %d
' % num) + +class RemoveOrphanDataChunks(webapp2.RequestHandler): + + def get(self): + logging.debug("Starting RemoveOrphanDataChunks") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + + ## Can't search for all chunks in web req because it's too + ## long. Can't do it in a queue task, because it's still too + ## long. Can't try ordering by id or download because the ids + ## are not increasing. Instead, use a saved cursor to walk + ## all the way through over time, then starting at the top + ## again when finished. + + chunks = DownloadData.all() + + cursor = memcache.get('orphan_search_cursor') + if cursor: + chunks.with_cursor(cursor) + + deleted = 0 + num = 0 + step = 100 + results = chunks.fetch(step) + for d in results: + ## This is the only way to test for orphans I could find. + try: + meta = d.download + except db.ReferencePropertyResolveError: + ## delete orphan chunk. + d.delete() + deleted += 1 + num += 1 + if num < step: + memcache.delete('orphan_search_cursor') + logging.warn('Orphan search reached end, starting over next time.') + else: + memcache.set('orphan_search_cursor',chunks.cursor()) + + logging.info('Deleted %d orphan chunks from %d total.' % (deleted,num)) + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write('Deleted %d orphan chunks from %d total.' % (deleted,num)) + +logging.getLogger().setLevel(logging.DEBUG) +app = webapp2.WSGIApplication([('/r3m0v3r', Remover), + ('/r3m0v3rOrphans', RemoveOrphanDataChunks)], + debug=False) diff --git a/utils/tally.py b/utils/tally.py new file mode 100644 index 00000000..d3305f5f --- /dev/null +++ b/utils/tally.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# encoding: utf-8 +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import logging + +#from google.appengine.ext.webapp import util +import webapp2 +#from google.appengine.ext import webapp +from google.appengine.api import users +from google.appengine.api import taskqueue +from google.appengine.api import memcache + +from ffstorage import * + +class Tally(webapp2.RequestHandler): + def get(self): + logging.debug("Starting Tally") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + + fics = DownloadMeta.all() + + cursor = memcache.get('tally_search_cursor') + if cursor: + fics.with_cursor(cursor) + + self.response.out.write('"user","url","name","title","author","format","failure","completed","date","version"
') + num = 0 + step = 500 + results = fics.fetch(step) + for d in results: + self.response.out.write('"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s"
' % + (d.user,d.url,d.name,d.title,d.author, + d.format,d.failure,d.completed,d.date, + d.version)) + num += 1 + if num < step: + memcache.delete('tally_search_cursor') + logging.warn('Tally search reached end, starting over next time.') + else: + memcache.set('tally_search_cursor',fics.cursor()) + + logging.info('Tallied %d fics.' % num) + self.response.out.write('
Tallied %d fics.
' % num) + +logging.getLogger().setLevel(logging.DEBUG) +app = webapp2.WSGIApplication([('/tally', Tally), + ], + debug=False)

+ FanFictionDownLoader +

+ FanFictionDownLoader +

Edit Config

Default System configuration

Welcome to FicWad

${title} by ${authorHTML}

${title} by ${authorHTML}

Table of Contents

${chapter}

Update Log

${title} by ${authorHTML}

Table of Contents

${chapter}

${title} by ${authorHTML}

${title} by ${authorHTML}

${chapter}

+ FanFictionDownLoader +

+ Login and Password +

+ FanFictionDownLoader +

Changes:

Supported sites:

+ FanFictionDownLoader +

Password

Login / Password

+ FanFictionDownLoader +

+ FanFictionDownLoader +