Extra metadata feature(refactoring), loosen url check on AO3, fix auth on castlefans.org.

This commit is contained in:
Jim Miller 2012-09-21 12:55:26 -05:00
parent e5b0974d0e
commit 4bb91cd0c5
27 changed files with 596 additions and 303 deletions

View file

@ -1,6 +1,6 @@
# ffd-retief-hrd fanfictiondownloader # ffd-retief-hrd fanfictiondownloader
application: fanfictiondownloader application: fanfictiondownloader
version: 4-4-25 version: 4-4-26
runtime: python27 runtime: python27
api_version: 1 api_version: 1
threadsafe: true threadsafe: true

View file

@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = 'UI plugin to download FanFiction stories from various sites.' description = 'UI plugin to download FanFiction stories from various sites.'
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller' author = 'Jim Miller'
version = (1, 6, 9) version = (1, 6, 10)
minimum_calibre_version = (0, 8, 57) minimum_calibre_version = (0, 8, 57)
#: This field defines the GUI plugin class that contains all the code #: This field defines the GUI plugin class that contains all the code

View file

@ -64,6 +64,7 @@ default_prefs['countpagesstats'] = []
default_prefs['errorcol'] = '' default_prefs['errorcol'] = ''
default_prefs['custom_cols'] = {} default_prefs['custom_cols'] = {}
default_prefs['custom_cols_newonly'] = {} default_prefs['custom_cols_newonly'] = {}
default_prefs['allow_custcol_from_ini'] = True
default_prefs['std_cols_newonly'] = {} default_prefs['std_cols_newonly'] = {}
@ -258,7 +259,7 @@ class ConfigWidget(QWidget):
# error column # error column
prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()).toString()) prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()).toString())
# cust cols # cust cols tab
colsmap = {} colsmap = {}
for (col,combo) in self.cust_columns_tab.custcol_dropdowns.iteritems(): for (col,combo) in self.cust_columns_tab.custcol_dropdowns.iteritems():
val = unicode(combo.itemData(combo.currentIndex()).toString()) val = unicode(combo.itemData(combo.currentIndex()).toString())
@ -272,6 +273,8 @@ class ConfigWidget(QWidget):
colsnewonly[col] = checkbox.isChecked() colsnewonly[col] = checkbox.isChecked()
prefs['custom_cols_newonly'] = colsnewonly prefs['custom_cols_newonly'] = colsnewonly
prefs['allow_custcol_from_ini'] = self.cust_columns_tab.allow_custcol_from_ini.isChecked()
prefs.save_to_db() prefs.save_to_db()
def edit_shortcuts(self): def edit_shortcuts(self):
@ -437,7 +440,7 @@ class PersonalIniTab(QWidget):
self.ini.setText(prefs['personal.ini']) self.ini.setText(prefs['personal.ini'])
self.l.addWidget(self.ini) self.l.addWidget(self.ini)
self.defaults = QPushButton('View Defaults', self) self.defaults = QPushButton('View Defaults (plugin-defaults.ini)', self)
self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.") self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.")
self.defaults.clicked.connect(self.show_defaults) self.defaults.clicked.connect(self.show_defaults)
self.l.addWidget(self.defaults) self.l.addWidget(self.defaults)
@ -456,7 +459,7 @@ class ShowDefaultsIniDialog(QDialog):
self.resize(600, 500) self.resize(600, 500)
self.l = QVBoxLayout() self.l = QVBoxLayout()
self.setLayout(self.l) self.setLayout(self.l)
self.label = QLabel("Plugin Defaults (Read-Only)") self.label = QLabel("Plugin Defaults (plugin-defaults.ini) (Read-Only)")
self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.") self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.")
self.setWindowTitle(_('Plugin Defaults')) self.setWindowTitle(_('Plugin Defaults'))
self.setWindowIcon(icon) self.setWindowIcon(icon)
@ -595,6 +598,8 @@ class GenerateCoverTab(QWidget):
horz.addWidget(dropdown) horz.addWidget(dropdown)
self.sl.addLayout(horz) self.sl.addLayout(horz)
self.sl.insertStretch(-1)
self.gcnewonly = QCheckBox("Run Generate Cover Only on New Books",self) self.gcnewonly = QCheckBox("Run Generate Cover Only on New Books",self)
self.gcnewonly.setToolTip("Default is to run GC any time the calibre metadata is updated.") self.gcnewonly.setToolTip("Default is to run GC any time the calibre metadata is updated.")
self.gcnewonly.setChecked(prefs['gcnewonly']) self.gcnewonly.setChecked(prefs['gcnewonly'])
@ -605,8 +610,6 @@ class GenerateCoverTab(QWidget):
self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini']) self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini'])
self.l.addWidget(self.allow_gc_from_ini) self.l.addWidget(self.allow_gc_from_ini)
self.l.insertStretch(-1)
class CountPagesTab(QWidget): class CountPagesTab(QWidget):
def __init__(self, parent_dialog, plugin_action): def __init__(self, parent_dialog, plugin_action):
@ -838,11 +841,16 @@ class CustomColumnsTab(QWidget):
self.sl.insertStretch(-1) self.sl.insertStretch(-1)
self.l.addSpacing(5)
self.allow_custcol_from_ini = QCheckBox('Allow custom_columns_settings from personal.ini to override',self)
self.allow_custcol_from_ini.setToolTip("The personal.ini parameter custom_columns_settings allows you to set custom columns to site specific values that aren't common to all sites.<br \>custom_columns_settings is ignored when this is off.")
self.allow_custcol_from_ini.setChecked(prefs['allow_custcol_from_ini'])
self.l.addWidget(self.allow_custcol_from_ini)
self.l.addSpacing(5) self.l.addSpacing(5)
label = QLabel("Special column:") label = QLabel("Special column:")
label.setWordWrap(True) label.setWordWrap(True)
self.l.addWidget(label) self.l.addWidget(label)
self.l.addSpacing(5)
horz = QHBoxLayout() horz = QHBoxLayout()
label = QLabel("Update/Overwrite Error Column:") label = QLabel("Update/Overwrite Error Column:")

View file

@ -8,7 +8,6 @@ __copyright__ = '2012, Jim Miller'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import time, os, copy, threading, re, platform import time, os, copy, threading, re, platform
from ConfigParser import SafeConfigParser
from StringIO import StringIO from StringIO import StringIO
from functools import partial from functools import partial
from datetime import datetime from datetime import datetime
@ -37,7 +36,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
create_menu_action_unique, get_library_uuid) create_menu_action_unique, get_library_uuid)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
#from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page
@ -249,10 +248,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
return return
print("get_urls_from_page URL:%s"%d.url.text()) print("get_urls_from_page URL:%s"%d.url.text())
ffdlconfig = SafeConfigParser() if 'archiveofourown.org' in url:
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) configuration = Configuration(adapters.getConfigSectionFor(url),"EPUB")
ffdlconfig.readfp(StringIO(prefs['personal.ini'])) configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
url_list = get_urls_from_page("%s"%d.url.text(),ffdlconfig) configuration.readfp(StringIO(options['personal.ini']))
else:
configuration = None
url_list = get_urls_from_page("%s"%d.url.text(),configuration)
if url_list: if url_list:
d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui) d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui)
@ -442,12 +444,10 @@ keep_summary_html:true
make_firstimage_cover:true make_firstimage_cover:true
''' + options['personal.ini'] ''' + options['personal.ini']
## was self.ffdlconfig, but we need to be able to change it configuration = Configuration(adapters.getConfigSectionFor(url),fileform)
## when doing epub update. configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig = SafeConfigParser() configuration.readfp(StringIO(options['personal.ini']))
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) adapter = adapters.getAdapter(configuration,url)
ffdlconfig.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(ffdlconfig,url,fileform)
## three tries, that's enough if both user/pass & is_adult needed, ## three tries, that's enough if both user/pass & is_adult needed,
## or a couple tries of one or the other ## or a couple tries of one or the other
@ -470,13 +470,13 @@ make_firstimage_cover:true
# let other exceptions percolate up. # let other exceptions percolate up.
story = adapter.getStoryMetadataOnly() story = adapter.getStoryMetadataOnly()
writer = writers.getWriter(options['fileform'],adapter.config,adapter) writer = writers.getWriter(options['fileform'],configuration,adapter)
book['all_metadata'] = story.getAllMetadata(removeallentities=True) book['all_metadata'] = story.getAllMetadata(removeallentities=True)
book['title'] = story.getMetadata("title", removeallentities=True) book['title'] = story.getMetadata("title", removeallentities=True)
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("site") book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags(removeallentities=True) # getTags could be moved up into adapter now. Adapter didn't used to know the fileform book['tags'] = story.getSubjectTags(removeallentities=True)
book['comments'] = sanitize_comments_html(story.getMetadata("description")) book['comments'] = sanitize_comments_html(story.getMetadata("description"))
book['series'] = story.getMetadata("series", removeallentities=True) book['series'] = story.getMetadata("series", removeallentities=True)
@ -950,6 +950,52 @@ make_firstimage_cover:true
val = book['all_metadata']['status'] == 'In-Progress' val = book['all_metadata']['status'] == 'In-Progress'
db.set_custom(book_id, val, label=label, commit=False) db.set_custom(book_id, val, label=label, commit=False)
adapter = None
if prefs['allow_custcol_from_ini']:
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
configuration.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(configuration,book['url'])
# meta => custcol[,a|n|r]
# cliches=>\#acolumn,r
for line in adapter.getConfig('custom_columns_settings').splitlines():
if "=>" in line:
(meta,custcol) = map( lambda x: x.strip(), line.split("=>") )
flag='r'
if "," in custcol:
(custcol,flag) = map( lambda x: x.strip(), custcol.split(",") )
#print("meta:(%s) => custcol:(%s), flag(%s) "%(meta,custcol,flag))
if meta not in book['all_metadata']:
print("No value for %s, skipping custom column(%s) update."%(meta,custcol))
continue
if custcol not in custom_columns:
print("No custom column(%s), skipping."%(custcol))
continue
else:
coldef = custom_columns[custcol]
label = coldef['label']
if flag == 'r' or book['added']:
db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False)
if flag == 'a':
try:
existing=db.get_custom(book_id,label=label,index_is_id=True)
if isinstance(existing,list):
vallist = existing
else :
vallist = [existing]
vallist.append(book['all_metadata'][meta])
except:
vallist = [book['all_metadata'][meta]]
db.set_custom(book_id, ", ".join(vallist), label=label, commit=False)
db.commit() db.commit()
if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']): if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']):
@ -961,10 +1007,11 @@ make_firstimage_cover:true
gc_plugin = self.gui.iactions['Generate Cover'] gc_plugin = self.gui.iactions['Generate Cover']
setting_name = None setting_name = None
if prefs['allow_gc_from_ini']: if prefs['allow_gc_from_ini']:
ffdlconfig = SafeConfigParser() if not adapter: # might already have it from allow_custcol_from_ini
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
ffdlconfig.readfp(StringIO(prefs['personal.ini'])) configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform']) configuration.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(configuration,book['url'])
# template => regexp to match => GC Setting to use. # template => regexp to match => GC Setting to use.
# generate_cover_settings: # generate_cover_settings:
@ -973,7 +1020,7 @@ make_firstimage_cover:true
if "=>" in line: if "=>" in line:
(template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") ) (template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") )
value = Template(template).safe_substitute(book['all_metadata']).encode('utf8') value = Template(template).safe_substitute(book['all_metadata']).encode('utf8')
print("%s(%s) => %s => %s"%(template,value,regexp,setting)) # print("%s(%s) => %s => %s"%(template,value,regexp,setting))
if re.search(regexp,value): if re.search(regexp,value):
setting_name = setting setting_name = setting
break break
@ -1192,11 +1239,11 @@ make_firstimage_cover:true
return None return None
def _is_good_downloader_url(self,url): def _is_good_downloader_url(self,url):
# this is the accepted way to 'check for existance'? really? # this is the accepted way to 'check for existance of a class variable'? really?
try: try:
self.dummyconfig self.dummyconfig
except AttributeError: except AttributeError:
self.dummyconfig = SafeConfigParser() self.dummyconfig = Configuration("test1.com","EPUB")
# pulling up an adapter is pretty low over-head. If # pulling up an adapter is pretty low over-head. If
# it fails, it's a bad url. # it fails, it's a bad url.
try: try:

View file

@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
import time, os, traceback import time, os, traceback
from ConfigParser import SafeConfigParser
from StringIO import StringIO from StringIO import StringIO
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
@ -20,6 +19,7 @@ from calibre.utils.logging import Log
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
@ -114,19 +114,19 @@ def do_download_for_worker(book,options):
book['comment'] = 'Download started...' book['comment'] = 'Download started...'
ffdlconfig = SafeConfigParser() configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(options['personal.ini'])) configuration.readfp(StringIO(options['personal.ini']))
if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
ffdlconfig.set("overrides","never_make_cover","true") configuration.set("overrides","never_make_cover","true")
# images only for epub, even if the user mistakenly turned it # images only for epub, even if the user mistakenly turned it
# on else where. # on else where.
if options['fileform'] != "epub": if options['fileform'] != "epub":
ffdlconfig.set("overrides","include_images","false") configuration.set("overrides","include_images","false")
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform']) adapter = adapters.getAdapter(configuration,book['url'])
adapter.is_adult = book['is_adult'] adapter.is_adult = book['is_adult']
adapter.username = book['username'] adapter.username = book['username']
adapter.password = book['password'] adapter.password = book['password']
@ -137,7 +137,7 @@ def do_download_for_worker(book,options):
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
# else: # else:
# print("no calibre_series") # print("no calibre_series")
writer = writers.getWriter(options['fileform'],adapter.config,adapter) writer = writers.getWriter(options['fileform'],configuration,adapter)
outfile = book['outfile'] outfile = book['outfile']

View file

@ -18,6 +18,10 @@
## [defaults] section applies to all formats and sites but may be ## [defaults] section applies to all formats and sites but may be
## overridden at several levels ## overridden at several levels
## Some sites also require the user to confirm they are adult for
## adult content. Uncomment by removing '#' in front of is_adult.
#is_adult:true
## All available titlepage_entries and the label used for them: ## All available titlepage_entries and the label used for them:
## <entryname>_label:<label> ## <entryname>_label:<label>
## Labels may be customized. ## Labels may be customized.
@ -63,9 +67,15 @@ authorId_label:Author ID
## show up in Calibre as tags. Also carried into mobi when converted. ## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags extratags_label:Extra Tags
## The version of fanficdownloader ## The version of fanficdownloader
##
version_label:FFDL Version version_label:FFDL Version
## Date formats used by FFDL. Published and Update don't have time.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d
dateUpdated_format:%%Y-%%m-%%d
## items to include in the title page ## items to include in the title page
## Empty metadata entries will *not* appear, even if in the list. ## Empty metadata entries will *not* appear, even if in the list.
## You can include extra text or HTML that will be included as-is in ## You can include extra text or HTML that will be included as-is in
@ -478,6 +488,39 @@ extraships:Draco Malfoy/Hermione Granger
## personal.ini, not defaults.ini. ## personal.ini, not defaults.ini.
#is_adult:true #is_adult:true
## Some adapters collect additional meta information beyond the
## standard ones. They need to be defined in extra_valid_entries to
## tell the rest of the FFDL system about them. They can be used in
## include_subject_tags, titlepage_entries, extra_titlepage_entries,
## logpage_entries, extra_logpage_entries, and include_in_* config
## items. You can also add additional entries here to build up
## composite metadata entries. dramione.org, for example, adds
## 'cliches' and then defines as the composite of hermiones,dracos in
## include_in_cliches.
extra_valid_entries:themes,hermiones,dracos,timeline,cliches
include_in_cliches:hermiones,dracos
## For another example, you could, by uncommenting this line, include
## themes in with genre metadata. Note, however, that you couldn't
## use cliches. include_in_* can only include the original real
## entries, not other include_in_* values.
#include_in_genre:genre, themes
## You can give each new valid entry a specific label for use on
## titlepage and logpage. If not defined, it will simply be the
themes_label:Themes
hermiones_label:Hermiones
dracos_label:Dracos
timeline_label:Timeline
cliches_label:Character Cliches
## extra_titlepage_entries (and extra_logpage_entries) *add* to
## titlepage_entries (and logpage_entries) so you can add site
## specific entries to titlepage/logpage without having to copy the
## entire titlepage_entries line. (But if you want them higher than
## the end, you will need to copy titlepage_entries.)
extra_titlepage_entries: themes,hermiones,dracos,timeline,cliches
[erosnsappho.sycophanthex.com] [erosnsappho.sycophanthex.com]
## Site dedicated to these categories/characters/ships ## Site dedicated to these categories/characters/ships
extracategories:Harry Potter extracategories:Harry Potter
@ -789,9 +832,13 @@ extraships:Sesshoumaru/Kagome
[www.fanfiction.net] [www.fanfiction.net]
## fanfiction.net's 'cover' images are really just tiny thumbnails. ## fanfiction.net's 'cover' images are really just tiny thumbnails.
## Comment this out or change it to false to use them anyway. ## Change this to false to use them anyway.
never_make_cover: true never_make_cover: true
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
[www.fanfiktion.de] [www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The ## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In ## program can prompt you, or you can save it in config. In
@ -818,6 +865,10 @@ output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
## Clear FanFiction from defaults, fictionpress.com is original fiction. ## Clear FanFiction from defaults, fictionpress.com is original fiction.
extratags: extratags:
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
[www.ficwad.com] [www.ficwad.com]
## Some sites require login (or login for some rated stories) The ## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In ## program can prompt you, or you can save it in config. In

View file

@ -27,6 +27,7 @@ import ConfigParser
from subprocess import call from subprocess import call
from fanficdownloader import adapters,writers,exceptions from fanficdownloader import adapters,writers,exceptions
from fanficdownloader.configurable import Configuration
from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data
from fanficdownloader.geturls import get_urls_from_page from fanficdownloader.geturls import get_urls_from_page
@ -90,7 +91,15 @@ def main():
if options.update and options.format != 'epub': if options.update and options.format != 'epub':
parser.error("-u/--update-epub only works with epub") parser.error("-u/--update-epub only works with epub")
config = ConfigParser.SafeConfigParser() ## Attempt to update an existing epub.
if options.update:
(url,chaptercount) = get_dcsource_chaptercount(args[0])
print "Updating %s, URL: %s" % (args[0],url)
output_filename = args[0]
else:
url = args[0]
configuration = Configuration(adapters.getConfigSectionFor(url),options.format)
conflist = [] conflist = []
homepath = join(expanduser("~"),".fanficdownloader") homepath = join(expanduser("~"),".fanficdownloader")
@ -109,46 +118,43 @@ def main():
conflist.extend(options.configfile) conflist.extend(options.configfile)
logging.debug('reading %s config file(s), if present'%conflist) logging.debug('reading %s config file(s), if present'%conflist)
config.read(conflist) configuration.read(conflist)
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
try: try:
config.add_section("overrides") configuration.add_section("overrides")
except ConfigParser.DuplicateSectionError: except ConfigParser.DuplicateSectionError:
pass pass
if options.force: if options.force:
config.set("overrides","always_overwrite","true") configuration.set("overrides","always_overwrite","true")
if options.update:
configuration.set("overrides","output_filename",args[0])
if options.update and not options.updatecover: if options.update and not options.updatecover:
config.set("overrides","never_make_cover","true") configuration.set("overrides","never_make_cover","true")
# images only for epub, even if the user mistakenly turned it # images only for epub, even if the user mistakenly turned it
# on else where. # on else where.
if options.format != "epub": if options.format != "epub":
config.set("overrides","include_images","false") configuration.set("overrides","include_images","false")
if options.options: if options.options:
for opt in options.options: for opt in options.options:
(var,val) = opt.split('=') (var,val) = opt.split('=')
config.set("overrides",var,val) configuration.set("overrides",var,val)
if options.list: if options.list:
retlist = get_urls_from_page(args[0], config) retlist = get_urls_from_page(args[0], configuration)
print "\n".join(retlist) print "\n".join(retlist)
return return
try: try:
## Attempt to update an existing epub.
if options.update:
(url,chaptercount) = get_dcsource_chaptercount(args[0])
print "Updating %s, URL: %s" % (args[0],url)
output_filename = args[0]
config.set("overrides","output_filename",args[0])
else:
url = args[0]
adapter = adapters.getAdapter(config,url,options.format) adapter = adapters.getAdapter(configuration,url)
## Check for include_images and absence of PIL, give warning. ## Check for include_images and absence of PIL, give warning.
if adapter.getConfig('include_images'): if adapter.getConfig('include_images'):
@ -206,7 +212,7 @@ def main():
adapter.calibrebookmark, adapter.calibrebookmark,
adapter.logfile) = get_update_data(args[0]) adapter.logfile) = get_update_data(args[0])
writeStory(config,adapter,"epub") writeStory(configuration,adapter,"epub")
else: else:
# regular download # regular download
@ -215,7 +221,7 @@ def main():
adapter.setChaptersRange(options.begin,options.end) adapter.setChaptersRange(options.begin,options.end)
output_filename=writeStory(config,adapter,options.format,options.metaonly) output_filename=writeStory(configuration,adapter,options.format,options.metaonly)
if not options.metaonly and adapter.getConfig("post_process_cmd"): if not options.metaonly and adapter.getConfig("post_process_cmd"):
metadata = adapter.story.metadata metadata = adapter.story.metadata

View file

@ -3,10 +3,20 @@
[defaults] [defaults]
## Some sites also require the user to confirm they are adult for ## Some sites also require the user to confirm they are adult for
## adult content. In commandline version, this should go in your ## adult content. Uncomment by removing '#' in front of is_adult. In
## personal.ini, not defaults.ini. ## commandline version, this should go in your personal.ini, not
## defaults.ini.
#is_adult:true #is_adult:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
#strip_chapter_numbers:true
#add_chapter_numbers:true
[epub] [epub]
## include images from img tags in the body and summary of stories. ## include images from img tags in the body and summary of stories.
## Images will be converted to jpg for size if possible. Images work ## Images will be converted to jpg for size if possible. Images work
@ -34,7 +44,8 @@
## Most common, I expect will be using this to save username/passwords ## Most common, I expect will be using this to save username/passwords
## for different sites. Here are a few examples. ## for different sites. Here are a few examples. See defaults.ini
## for the full list.
[www.twilighted.net] [www.twilighted.net]
#username:YourPenname #username:YourPenname

View file

@ -118,7 +118,7 @@ for x in imports():
#print x #print x
__class_list.append(sys.modules[x].getClass()) __class_list.append(sys.modules[x].getClass())
def getAdapter(config,url,fileform=None): def getDomainURL(url):
## fix up leading protocol. ## fix up leading protocol.
fixedurl = re.sub(r"(?i)^[htps]+[:/]+","http://",url.strip()) fixedurl = re.sub(r"(?i)^[htps]+[:/]+","http://",url.strip())
if not fixedurl.startswith("http"): if not fixedurl.startswith("http"):
@ -135,20 +135,17 @@ def getAdapter(config,url,fileform=None):
if( domain != parsedUrl.netloc ): if( domain != parsedUrl.netloc ):
fixedurl = fixedurl.replace(parsedUrl.netloc,domain) fixedurl = fixedurl.replace(parsedUrl.netloc,domain)
return (domain,fixedurl)
def getAdapter(config,url):
logging.debug("trying url:"+url) logging.debug("trying url:"+url)
cls = getClassFor(domain) (domain,fixedurl) = getDomainURL(url)
if not cls and domain.startswith("www."): cls = getClassFromList(domain)
domain = domain.replace("www.","") logging.debug("fixedurl:"+fixedurl)
logging.debug("trying site:without www: "+domain)
cls = getClassFor(domain)
fixedurl = fixedurl.replace("http://www.","http://")
if not cls:
logging.debug("trying site:www."+domain)
cls = getClassFor("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")
if cls: if cls:
adapter = cls(config,fixedurl) # raises InvalidStoryURL adapter = cls(config,fixedurl) # raises InvalidStoryURL
adapter.setSectionOrder(adapter.getConfigSection(),fileform)
return adapter return adapter
# No adapter found. # No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
@ -156,7 +153,28 @@ def getAdapter(config,url,fileform=None):
def getConfigSections(): def getConfigSections():
return [cls.getConfigSection() for cls in __class_list] return [cls.getConfigSection() for cls in __class_list]
def getConfigSectionFor(url):
(domain,fixedurl) = getDomainURL(url)
cls = getClassFromList(domain)
if cls:
return cls.getConfigSection()
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
def getClassFor(domain): def getClassFor(domain):
cls = getClassFromList(domain)
if not cls and domain.startswith("www."):
domain = domain.replace("www.","")
logging.debug("trying site:without www: "+domain)
cls = getClassFromList(domain)
fixedurl = fixedurl.replace("http://www.","http://")
if not cls:
logging.debug("trying site:www."+domain)
cls = getClassFromList("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")
def getClassFromList(domain):
for cls in __class_list: for cls in __class_list:
if cls.matchesSite(domain): if cls.matchesSite(domain):
return cls return cls

View file

@ -83,7 +83,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
def getSiteURLPattern(self): def getSiteURLPattern(self):
# http://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770 # http://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)(/chapters/\d+)?/?$" return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)"
## Login ## Login
def needToLoginCheck(self, data): def needToLoginCheck(self, data):

View file

@ -175,12 +175,13 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# Now go hunting for all the meta data and the chapter list. # Now go hunting for all the meta data and the chapter list.
pagetitle = soup.find('div',{'id':'pagetitle'})
## Title ## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string) self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)

View file

@ -161,7 +161,6 @@ class DramioneOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Use banner as cover if found # Use banner as cover if found
if self.getConfig('include_images'):
coverurl = '' coverurl = ''
img = soup.find('img',{'class':'banner'}) img = soup.find('img',{'class':'banner'})
if img: if img:
@ -173,8 +172,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
coverurl = a.parent['href'] coverurl = a.parent['href']
#print "Cover: "+coverurl #print "Cover: "+coverurl
if coverurl: if coverurl:
self.story.addImgUrl(self,url,coverurl,self._fetchUrlRaw,cover=True) self.setCoverImage(url,coverurl)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
@ -194,6 +192,22 @@ class DramioneOrgAdapter(BaseSiteAdapter):
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
themes=soup.findAll('a', {'class' : "tag-3"})
for theme in themes:
self.story.addToList('themes',theme.string)
hermiones=soup.findAll('a', {'class' : "tag-4"})
for hermione in hermiones:
self.story.addToList('hermiones',hermione.string)
dracos=soup.findAll('a', {'class' : "tag-5"})
for draco in dracos:
self.story.addToList('dracos',draco.string)
timelines=soup.findAll('a', {'class' : "tag-6"})
for timeline in timelines:
self.story.addToList('timeline',timeline.string)
# utility method # utility method
def defaultGetattr(d,k): def defaultGetattr(d,k):
try: try:

View file

@ -186,8 +186,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
donechars = False donechars = False
while len(metalist) > 0: while len(metalist) > 0:
if metalist[0].startswith('Reviews') or metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:') or metalist[0].startswith('Favs:') or metalist[0].startswith('Follows:'): if metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:'):
pass pass
elif metalist[0].startswith('Reviews'):
self.story.setMetadata('reviews',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Favs:'):
self.story.setMetadata('favs',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Follows:'):
self.story.setMetadata('follows',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Updated'): elif metalist[0].startswith('Updated'):
self.story.setMetadata('dateUpdated',makeDate(metalist[0].split(':')[1].strip(), '%m-%d-%y')) self.story.setMetadata('dateUpdated',makeDate(metalist[0].split(':')[1].strip(), '%m-%d-%y'))
elif metalist[0].startswith('Published'): elif metalist[0].startswith('Published'):
@ -208,10 +214,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
else: else:
self.story.setMetadata('status', 'In-Progress') self.story.setMetadata('status', 'In-Progress')
if self.getConfig('include_images'):
img = soup.find('img',{'class':'cimage'}) img = soup.find('img',{'class':'cimage'})
if img: if img:
self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw,cover=True) self.setCoverImage(url,img['src'])
# Find the chapter selector # Find the chapter selector
select = soup.find('select', { 'name' : 'chapter' } ) select = soup.find('select', { 'name' : 'chapter' } )

View file

@ -138,14 +138,14 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("numWords", str(storyMetadata["words"])) self.story.setMetadata("numWords", str(storyMetadata["words"]))
# fimfic is the first site with an explicit cover image. # fimfic is the first site with an explicit cover image.
if self.getConfig('include_images') and "image" in storyMetadata.keys(): if "image" in storyMetadata.keys():
if "full_image" in storyMetadata: if "full_image" in storyMetadata:
coverurl = storyMetadata["full_image"] coverurl = storyMetadata["full_image"]
else: else:
coverurl = storyMetadata["image"] coverurl = storyMetadata["image"]
if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:' if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl coverurl = "http:"+coverurl
self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True) self.setCoverImage(self.url,coverurl)
# the fimfic API gives bbcode for desc, not html. # the fimfic API gives bbcode for desc, not html.

View file

@ -67,10 +67,8 @@ class BaseSiteAdapter(Configurable):
def validateURL(self): def validateURL(self):
return re.match(self.getSiteURLPattern(), self.url) return re.match(self.getSiteURLPattern(), self.url)
def __init__(self, config, url): def __init__(self, configuration, url):
self.config = config Configurable.__init__(self, configuration)
Configurable.__init__(self, config)
self.setSectionOrder(self.getConfigSection())
self.username = "NoneGiven" # if left empty, site doesn't return any message at all. self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = "" self.password = ""
@ -79,7 +77,7 @@ class BaseSiteAdapter(Configurable):
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
self.storyDone = False self.storyDone = False
self.metadataDone = False self.metadataDone = False
self.story = Story() self.story = Story(configuration)
self.story.setMetadata('site',self.getConfigSection()) self.story.setMetadata('site',self.getConfigSection())
self.story.setMetadata('dateCreated',datetime.datetime.now()) self.story.setMetadata('dateCreated',datetime.datetime.now())
self.chapterUrls = [] # tuples of (chapter title,chapter url) self.chapterUrls = [] # tuples of (chapter title,chapter url)
@ -138,7 +136,7 @@ class BaseSiteAdapter(Configurable):
logging.debug("try code:"+code) logging.debug("try code:"+code)
return data.decode(code) return data.decode(code)
except: except:
logging.info("code failed:"+code) logging.debug("code failed:"+code)
pass pass
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128]) return "".join([x for x in data if ord(x) < 128])
@ -199,8 +197,7 @@ class BaseSiteAdapter(Configurable):
if (self.chapterFirst!=None and index < self.chapterFirst) or \ if (self.chapterFirst!=None and index < self.chapterFirst) or \
(self.chapterLast!=None and index > self.chapterLast): (self.chapterLast!=None and index > self.chapterLast):
self.story.addChapter(removeEntities(title), self.story.addChapter(removeEntities(title),
None, None)
self)
else: else:
if self.oldchapters and index < len(self.oldchapters): if self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None, data = self.utf8FromSoup(None,
@ -209,16 +206,14 @@ class BaseSiteAdapter(Configurable):
else: else:
data = self.getChapterText(url) data = self.getChapterText(url)
self.story.addChapter(removeEntities(title), self.story.addChapter(removeEntities(title),
removeEntities(data), removeEntities(data))
self)
self.storyDone = True self.storyDone = True
# include image, but no cover from story, add default_cover_image cover. # include image, but no cover from story, add default_cover_image cover.
if self.getConfig('include_images') and \ if self.getConfig('include_images') and \
not self.story.cover and \ not self.story.cover and \
self.getConfig('default_cover_image'): self.getConfig('default_cover_image'):
self.story.addImgUrl(self, self.story.addImgUrl(None,
None,
#self.getConfig('default_cover_image'), #self.getConfig('default_cover_image'),
self.story.formatFileName(self.getConfig('default_cover_image'), self.story.formatFileName(self.getConfig('default_cover_image'),
self.getConfig('allow_unsafe_filename')), self.getConfig('allow_unsafe_filename')),
@ -298,6 +293,10 @@ class BaseSiteAdapter(Configurable):
self.story.setMetadata('description',stripHTML(svalue)) self.story.setMetadata('description',stripHTML(svalue))
#print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n") #print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n")
def setCoverImage(self,storyurl,imgurl):
if self.getConfig('include_images'):
self.story.addImgUrl(storyurl,imgurl,self._fetchUrlRaw,cover=True)
# This gives us a unicode object, not just a string containing bytes. # This gives us a unicode object, not just a string containing bytes.
# (I gave soup a unicode string, you'd think it could give it back...) # (I gave soup a unicode string, you'd think it could give it back...)
# Now also does a bunch of other common processing for us. # Now also does a bunch of other common processing for us.
@ -313,7 +312,7 @@ class BaseSiteAdapter(Configurable):
# some pre-existing epubs have img tags that had src stripped off. # some pre-existing epubs have img tags that had src stripped off.
if img.has_key('src'): if img.has_key('src'):
img['longdesc']=img['src'] img['longdesc']=img['src']
img['src']=self.story.addImgUrl(self,url,img['src'],fetch, img['src']=self.story.addImgUrl(url,img['src'],fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp')) coverexclusion=self.getConfig('cover_exclusion_regexp'))
for attr in soup._getAttrMap().keys(): for attr in soup._getAttrMap().keys():

View file

@ -21,10 +21,6 @@ import ConfigParser
# inherit from Configurable. The config file(s) uses ini format: # inherit from Configurable. The config file(s) uses ini format:
# [sections] with key:value settings. # [sections] with key:value settings.
# #
# writer does [defaults], [www.whofic.com], [epub], [www.whofic.com:epub], [overrides]
#
# Until a write is created, the adapter only has [defaults], [www.whofic.com], [overrides]
#
# [defaults] # [defaults]
# titlepage_entries: category,genre, status # titlepage_entries: category,genre, status
# [www.whofic.com] # [www.whofic.com]
@ -36,14 +32,10 @@ import ConfigParser
# [overrides] # [overrides]
# titlepage_entries: category # titlepage_entries: category
class Configuration(ConfigParser.SafeConfigParser):
class Configurable(object): def __init__(self, site, fileform):
ConfigParser.SafeConfigParser.__init__(self)
def __init__(self, config):
self.config = config
self.sectionslist = ['defaults']
def setSectionOrder(self,site,fileform=None):
self.sectionslist = ['defaults'] self.sectionslist = ['defaults']
self.addConfigSection(site) self.addConfigSection(site)
if fileform: if fileform:
@ -51,13 +43,56 @@ class Configurable(object):
self.addConfigSection(site+":"+fileform) self.addConfigSection(site+":"+fileform)
self.addConfigSection("overrides") self.addConfigSection("overrides")
self.validEntries = [
'category',
'genre',
'language',
'characters',
'ships',
'series',
'status',
'datePublished',
'dateUpdated',
'dateCreated',
'rating',
'warnings',
'numChapters',
'numWords',
'site',
'storyId',
'authorId',
'extratags',
'title',
'storyUrl',
'description',
'author',
'authorUrl',
'formatname',
'formatext',
'siteabbrev',
'version',
# internal stuff.
'langcode',
'output_css',
'authorHTML'
]
def addConfigSection(self,section): def addConfigSection(self,section):
self.sectionslist.insert(0,section) self.sectionslist.insert(0,section)
def isValidMetaEntry(self, key):
return key in self.getValidMetaList()
def getValidMetaList(self):
vl = []
vl.extend(self.validEntries)
vl.extend(self.getConfigList("extra_valid_entries"))
return vl
def hasConfig(self, key): def hasConfig(self, key):
for section in self.sectionslist: for section in self.sectionslist:
try: try:
self.config.get(section,key) self.get(section,key)
#print("found %s in section [%s]"%(key,section)) #print("found %s in section [%s]"%(key,section))
return True return True
except: except:
@ -65,11 +100,11 @@ class Configurable(object):
return False return False
def getConfig(self, key): def getConfig(self, key, default=""):
val = "" val = default
for section in self.sectionslist: for section in self.sectionslist:
try: try:
val = self.config.get(section,key) val = self.get(section,key)
if val and val.lower() == "false": if val and val.lower() == "false":
val = False val = False
#print "getConfig(%s)=[%s]%s" % (key,section,val) #print "getConfig(%s)=[%s]%s" % (key,section,val)
@ -86,3 +121,24 @@ class Configurable(object):
#print "vlist("+key+"):"+str(vlist) #print "vlist("+key+"):"+str(vlist)
return vlist return vlist
# extended by adapter, writer and story for ease of calling configuration.
class Configurable(object):
def __init__(self, configuration):
self.configuration = configuration
def isValidMetaEntry(self, key):
return self.configuration.isValidMetaEntry(key)
def getValidMetaList(self):
return self.configuration.getValidMetaList()
def hasConfig(self, key):
return self.configuration.hasConfig(key)
def getConfig(self, key, default=""):
return self.configuration.getConfig(key)
def getConfigList(self, key):
return self.configuration.getConfigList(key)

View file

@ -18,19 +18,19 @@
import re import re
import urlparse import urlparse
import urllib2 as u2 import urllib2 as u2
import ConfigParser
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
from gziphttp import GZipProcessor from gziphttp import GZipProcessor
import adapters import adapters
from configurable import Configuration
def get_urls_from_page(url,config=None): def get_urls_from_page(url,configuration=None):
normalized = set() # normalized url normalized = set() # normalized url
retlist = [] # orig urls. retlist = [] # orig urls.
if not config: if not configuration:
config = ConfigParser.SafeConfigParser() configuration = Configuration("test1.com","EPUB")
data = None data = None
@ -39,7 +39,7 @@ def get_urls_from_page(url,config=None):
# them, AO3 doesn't even show them if not logged in. Only works # them, AO3 doesn't even show them if not logged in. Only works
# with saved user/pass--not going to prompt for list. # with saved user/pass--not going to prompt for list.
if 'archiveofourown.org' in url: if 'archiveofourown.org' in url:
ao3adapter = adapters.getAdapter(config,"http://www.archiveofourown.org/works/0","EPUB") ao3adapter = adapters.getAdapter(configuration,"http://www.archiveofourown.org/works/0","EPUB")
if ao3adapter.getConfig("username"): if ao3adapter.getConfig("username"):
if ao3adapter.getConfig("is_adult"): if ao3adapter.getConfig("is_adult"):
addurl = "?view_adult=true" addurl = "?view_adult=true"
@ -72,7 +72,7 @@ def get_urls_from_page(url,config=None):
try: try:
href = href.replace('&index=1','') href = href.replace('&index=1','')
adapter = adapters.getAdapter(config,href,"EPUB") adapter = adapters.getAdapter(configuration,href,"EPUB")
if adapter.story.getMetadata('storyUrl') not in normalized: if adapter.story.getMetadata('storyUrl') not in normalized:
normalized.add(adapter.story.getMetadata('storyUrl')) normalized.add(adapter.story.getMetadata('storyUrl'))
retlist.append(href) retlist.append(href)

View file

@ -22,6 +22,7 @@ from math import floor
import exceptions import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities from htmlcleanup import conditionalRemoveEntities, removeAllEntities
from configurable import Configurable
# Create convert_image method depending on which graphics lib we can # Create convert_image method depending on which graphics lib we can
# load. Preferred: calibre, PIL, none # load. Preferred: calibre, PIL, none
@ -186,9 +187,10 @@ langs = {
"Devanagari":"hi", "Devanagari":"hi",
} }
class Story: class Story(Configurable):
def __init__(self): def __init__(self, configuration):
Configurable.__init__(self, configuration)
try: try:
self.metadata = {'version':os.environ['CURRENT_VERSION_ID']} self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
except: except:
@ -197,35 +199,50 @@ class Story:
self.chapters = [] # chapters will be tuples of (title,html) self.chapters = [] # chapters will be tuples of (title,html)
self.imgurls = [] self.imgurls = []
self.imgtuples = [] self.imgtuples = []
self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.
self.cover=None # *href* of new cover image--need to create html. self.cover=None # *href* of new cover image--need to create html.
self.oldcover=None # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata) self.oldcover=None # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update. self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update.
self.logfile=None # cheesy way to carry log file forward across update. self.logfile=None # cheesy way to carry log file forward across update.
def setMetadata(self, key, value): self.setReplace(self.getConfig('replace_metadata'))
def setMetadata(self, key, value, condremoveentities=True):
## still keeps &lt; &lt; and &amp; ## still keeps &lt; &lt; and &amp;
if condremoveentities:
self.metadata[key]=conditionalRemoveEntities(value) self.metadata[key]=conditionalRemoveEntities(value)
else:
self.metadata[key]=value
if key == "language": if key == "language":
try: try:
self.metadata['langcode'] = langs[self.metadata[key]] self.metadata['langcode'] = langs[self.metadata[key]]
except: except:
self.metadata['langcode'] = 'en' self.metadata['langcode'] = 'en'
if key == 'dateUpdated':
# Last Update tags for Bill.
self.addToList('lastupdate',value.strftime("Last Update Year/Month: %Y/%m"))
self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d"))
def getMetadataRaw(self,key): def getMetadataRaw(self,key):
if self.metadata.has_key(key): if self.isValidMetaEntry(key) and self.metadata.has_key(key):
return self.metadata[key] return self.metadata[key]
def doReplacments(self,value): def doReplacments(self,value):
for (p,v) in self.replacements: for (p,v) in self.replacements:
if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value): if (isinstance(value,basestring)) and re.match(p,value):
value = re.sub(p,v,value) value = re.sub(p,v,value)
return value return value
def getMetadata(self, key, removeallentities=False, doreplacements=True): def getMetadata(self, key,
removeallentities=False,
doreplacements=True):
value = None value = None
if not self.isValidMetaEntry(key):
return value
if self.isList(key): if self.isList(key):
value = ', '.join(self.getList(key, removeallentities)) value = u', '.join(self.getList(key, removeallentities, doreplacements=True))
elif self.metadata.has_key(key): elif self.metadata.has_key(key):
value = self.metadata[key] value = self.metadata[key]
if value: if value:
@ -233,10 +250,8 @@ class Story:
value = commaGroups(value) value = commaGroups(value)
if key == "numChapters": if key == "numChapters":
value = commaGroups("%d"%value) value = commaGroups("%d"%value)
if key == "dateCreated": if key in ("dateCreated","datePublished","dateUpdated"):
value = value.strftime("%Y-%m-%d %H:%M:%S") value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
if key == "datePublished" or key == "dateUpdated":
value = value.strftime("%Y-%m-%d")
if doreplacements: if doreplacements:
value=self.doReplacments(value) value=self.doReplacments(value)
@ -245,18 +260,21 @@ class Story:
else: else:
return value return value
def getAllMetadata(self, removeallentities=False, doreplacements=True): def getAllMetadata(self,
removeallentities=False,
doreplacements=True,
keeplists=False):
''' '''
All single value *and* list value metadata as strings. All single value *and* list value metadata as strings (unless keeplists=True, then keep lists).
''' '''
allmetadata = {} allmetadata = {}
# special handling for authors/authorUrls # special handling for authors/authorUrls
authlinkhtml="<a class='authorlink' href='%s'>%s</a>" authlinkhtml="<a class='authorlink' href='%s'>%s</a>"
if 'author' in self.listables.keys(): # more than one author, assume multiple authorUrl too. if self.isList('author'): # more than one author, assume multiple authorUrl too.
htmllist=[] htmllist=[]
for i, v in enumerate(self.listables['author']): for i, v in enumerate(self.getList('author')):
aurl = self.listables['authorUrl'][i] aurl = self.getList('authorUrl')[i]
auth = v auth = v
# make sure doreplacements & removeallentities are honored. # make sure doreplacements & removeallentities are honored.
if doreplacements: if doreplacements:
@ -272,10 +290,11 @@ class Story:
self.setMetadata('authorHTML',authlinkhtml%(self.getMetadata('authorUrl', removeallentities, doreplacements), self.setMetadata('authorHTML',authlinkhtml%(self.getMetadata('authorUrl', removeallentities, doreplacements),
self.getMetadata('author', removeallentities, doreplacements))) self.getMetadata('author', removeallentities, doreplacements)))
for k in self.metadata.keys(): for k in self.getValidMetaList():
if self.isList(k) and keeplists:
allmetadata[k] = self.getList(k, removeallentities, doreplacements)
else:
allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements) allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
for l in self.listables.keys():
allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements)
return allmetadata return allmetadata
@ -288,18 +307,37 @@ class Story:
if value==None: if value==None:
return return
value = conditionalRemoveEntities(value) value = conditionalRemoveEntities(value)
if not self.isList(listname): if not self.isList(listname) or not listname in self.metadata:
self.listables[listname]=[] # Calling addToList to a non-list meta will overwrite it.
self.metadata[listname]=[]
# prevent duplicates. # prevent duplicates.
if not value in self.listables[listname]: if not value in self.metadata[listname]:
self.listables[listname].append(value) self.metadata[listname].append(value)
def getList(self,listname, removeallentities=False, doreplacements=True): def isList(self,listname):
'Everything set with an include_in_* is considered a list.'
return self.hasConfig("include_in_"+listname) or \
( self.isValidMetaEntry(listname) and self.metadata.has_key(listname) \
and isinstance(self.metadata[listname],list) )
def getList(self,listname,
removeallentities=False,
doreplacements=True,
doincludein=True):
retlist = [] retlist = []
if not self.isList(listname): if not self.isValidMetaEntry(listname):
retlist = [self.getMetadata(listname,removeallentities=removeallentities)] return retlist
# doincludein prevents recursion of include_in_'s
if doincludein and self.hasConfig("include_in_"+listname):
for k in self.getConfigList("include_in_"+listname):
retlist.extend(self.getList(k,removeallentities,doreplacements,doincludein=False))
else: else:
retlist = self.listables[listname]
if not self.isList(listname):
retlist = [self.getMetadata(listname,removeallentities, doreplacements)]
else:
retlist = self.getMetadataRaw(listname)
if doreplacements: if doreplacements:
retlist = filter( lambda x : x!=None and x!='' , retlist = filter( lambda x : x!=None and x!='' ,
@ -310,30 +348,39 @@ class Story:
return retlist return retlist
def getLists(self, removeallentities=False): def getSubjectTags(self, removeallentities=False):
lsts = {} # set to avoid duplicates subject tags.
for ln in self.listables.keys(): subjectset = set()
lsts[ln] = self.getList(ln, removeallentities)
return lsts
def isList(self,listname): tags_list = self.getConfigList("include_subject_tags") + self.getConfigList("extra_subject_tags")
return self.listables.has_key(listname)
def addChapter(self, title, html, configurable=None): # metadata all go into dc:subject tags, but only if they are configured.
if configurable and \ for (name,value) in self.getAllMetadata(removeallentities=removeallentities,keeplists=True).iteritems():
configurable.getConfig('strip_chapter_numbers') and \ if name in tags_list:
configurable.getConfig('chapter_title_strip_pattern'): if isinstance(value,list):
title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title) for tag in value:
subjectset.add(tag)
else:
subjectset.add(value)
if None in subjectset:
subjectset.remove(None)
return list(subjectset)
def addChapter(self, title, html):
if self.getConfig('strip_chapter_numbers') and \
self.getConfig('chapter_title_strip_pattern'):
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) ) self.chapters.append( (title,html) )
def getChapters(self, configurable=None): def getChapters(self):
"Chapters will be tuples of (title,html)" "Chapters will be tuples of (title,html)"
retval = [] retval = []
if configurable and \ if self.getConfig('add_chapter_numbers') and \
configurable.getConfig('add_chapter_numbers') and \ self.getConfig('chapter_title_add_pattern'):
configurable.getConfig('chapter_title_add_pattern'):
for index, (title,html) in enumerate(self.chapters): for index, (title,html) in enumerate(self.chapters):
retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) ) retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
else: else:
retval = self.chapters retval = self.chapters
@ -355,11 +402,11 @@ class Story:
# pass fetch in from adapter in case we need the cookies collected # pass fetch in from adapter in case we need the cookies collected
# as well as it's a base_story class method. # as well as it's a base_story class method.
def addImgUrl(self,configurable,parenturl,url,fetch,cover=False,coverexclusion=None): def addImgUrl(self,parenturl,url,fetch,cover=False,coverexclusion=None):
# otherwise it saves the image in the epub even though it # otherwise it saves the image in the epub even though it
# isn't used anywhere. # isn't used anywhere.
if cover and configurable.getConfig('never_make_cover'): if cover and self.getConfig('never_make_cover'):
return return
url = url.strip() # ran across an image with a space in the url = url.strip() # ran across an image with a space in the
@ -397,20 +444,20 @@ class Story:
if imgurl not in self.imgurls: if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl) parsedUrl = urlparse.urlparse(imgurl)
try: try:
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ] sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
except Exception, e: except Exception, e:
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(configurable.getConfigList('image_max_size'),e)) raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
try: try:
(data,ext,mime) = convert_image(imgurl, (data,ext,mime) = convert_image(imgurl,
fetch(imgurl), fetch(imgurl),
sizes, sizes,
configurable.getConfig('grayscale_images')) self.getConfig('grayscale_images'))
except Exception, e: except Exception, e:
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e)) print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
return "failedtoload" return "failedtoload"
# explicit cover, make the first image. # explicit cover, make the first image.
if cover and not configurable.getConfig('never_make_cover'): if cover and not self.getConfig('never_make_cover'):
if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']: if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
# remove existing cover, if there is one. # remove existing cover, if there is one.
del self.imgurls[0] del self.imgurls[0]
@ -427,8 +474,8 @@ class Story:
# NOT never_make_cover AND # NOT never_make_cover AND
# either no coverexclusion OR coverexclusion doesn't match # either no coverexclusion OR coverexclusion doesn't match
if self.cover == None and \ if self.cover == None and \
configurable.getConfig('make_firstimage_cover') and \ self.getConfig('make_firstimage_cover') and \
not configurable.getConfig('never_make_cover') and \ not self.getConfig('never_make_cover') and \
(not coverexclusion or not re.search(coverexclusion,imgurl)): (not coverexclusion or not re.search(coverexclusion,imgurl)):
newsrc = "images/cover.%s"%ext newsrc = "images/cover.%s"%ext
self.cover=newsrc self.cover=newsrc
@ -457,7 +504,7 @@ class Story:
return retlist return retlist
def __str__(self): def __str__(self):
return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters) return "Metadata: " +str(self.metadata)
def setReplace(self,replace): def setReplace(self,replace):
for line in replace.splitlines(): for line in replace.splitlines():

View file

@ -36,44 +36,12 @@ class BaseStoryWriter(Configurable):
def getFormatExt(): def getFormatExt():
return '.bse' return '.bse'
def __init__(self, config, adapter): def __init__(self, configuration, adapter):
Configurable.__init__(self, config) Configurable.__init__(self, configuration)
self.setSectionOrder(adapter.getConfigSection(),self.getFormatName())
self.adapter = adapter self.adapter = adapter
self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially. self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially.
self.story.setReplace(self.getConfig('replace_metadata'))
self.validEntries = [
'category',
'genre',
'language',
'characters',
'ships',
'series',
'status',
'datePublished',
'dateUpdated',
'dateCreated',
'rating',
'warnings',
'numChapters',
'numWords',
'site',
'storyId',
'authorId',
'extratags',
'title',
'storyUrl',
'description',
'author',
'authorUrl',
'formatname',
'formatext',
'siteabbrev',
'version']
# fall back labels. # fall back labels.
self.titleLabels = { self.titleLabels = {
'category':'Category', 'category':'Category',
@ -148,11 +116,11 @@ class BaseStoryWriter(Configurable):
if WIDE_ENTRY==None: if WIDE_ENTRY==None:
WIDE_ENTRY=ENTRY WIDE_ENTRY=ENTRY
titleEntriesList = self.getConfigList("titlepage_entries") titleEntriesList = self.getConfigList("titlepage_entries") + self.getConfigList("extra_titlepage_entries")
wideTitleEntriesList = self.getConfigList("wide_titlepage_entries") wideTitleEntriesList = self.getConfigList("wide_titlepage_entries")
for entry in titleEntriesList: for entry in titleEntriesList:
if entry in self.validEntries: if self.isValidMetaEntry(entry):
if self.story.getMetadata(entry): if self.story.getMetadata(entry):
if entry in wideTitleEntriesList: if entry in wideTitleEntriesList:
TEMPLATE=WIDE_ENTRY TEMPLATE=WIDE_ENTRY
@ -161,9 +129,12 @@ class BaseStoryWriter(Configurable):
if self.hasConfig(entry+"_label"): if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label") label=self.getConfig(entry+"_label")
else: elif entry in self.titleLabels:
print("Using fallback label for %s_label"%entry) logging.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry] label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
# If the label for the title entry is empty, use the # If the label for the title entry is empty, use the
# 'no title' option if there is one. # 'no title' option if there is one.
@ -184,10 +155,10 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use index and chapter. names as Story.metadata, but ENTRY should use index and chapter.
""" """
# Only do TOC if there's more than one chapter and it's configured. # Only do TOC if there's more than one chapter and it's configured.
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly : if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
self._write(out,START.substitute(self.story.getAllMetadata())) self._write(out,START.substitute(self.story.getAllMetadata()))
for index, (title,html) in enumerate(self.story.getChapters(self)): for index, (title,html) in enumerate(self.story.getChapters()):
if html: if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
@ -202,9 +173,11 @@ class BaseStoryWriter(Configurable):
# minor cheat, tucking css into metadata. # minor cheat, tucking css into metadata.
if self.getConfig("output_css"): if self.getConfig("output_css"):
self.story.metadata["output_css"] = self.getConfig("output_css") self.story.setMetadata("output_css",
self.getConfig("output_css"),
condremoveentities=False)
else: else:
self.story.metadata["output_css"] = '' self.story.setMetadata("output_css",'')
if not outstream: if not outstream:
close=True close=True
@ -261,29 +234,6 @@ class BaseStoryWriter(Configurable):
if close: if close:
outstream.close() outstream.close()
def getTags(self, removeallentities=False):
# set to avoid duplicates subject tags.
subjectset = set()
if self.story.getMetadataRaw('dateUpdated'):
# Last Update tags for Bill.
self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update Year/Month: %Y/%m"))
self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update: %Y/%m/%d"))
for entry in self.validEntries:
if entry in self.getConfigList("include_subject_tags") and \
entry not in self.story.getLists() and \
self.story.getMetadata(entry):
subjectset.add(self.getMetadata(entry, removeallentities))
# listables all go into dc:subject tags, but only if they are configured.
for (name,lst) in self.story.getLists(removeallentities).iteritems():
if name in self.getConfigList("include_subject_tags"):
for tag in lst:
subjectset.add(tag)
return list(subjectset)
def writeStoryImpl(self, out): def writeStoryImpl(self, out):
"Must be overriden by sub classes." "Must be overriden by sub classes."
pass pass

View file

@ -193,8 +193,7 @@ ${value}<br />
Switch rindex to index to search from top instead of bottom. Switch rindex to index to search from top instead of bottom.
""" """
values = {} values = {}
for entry in self.getConfigList("logpage_entries"): for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
if entry in self.validEntries:
try: try:
# <span id="dateUpdated">1975-04-15</span> # <span id="dateUpdated">1975-04-15</span>
span = '<span id="%s">'%entry span = '<span id="%s">'%entry
@ -209,15 +208,18 @@ ${value}<br />
def _makeLogEntry(self, oldvalues={}): def _makeLogEntry(self, oldvalues={}):
retval = "<p class='log_entry'>" retval = "<p class='log_entry'>"
for entry in self.getConfigList("logpage_entries"): for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
if entry in self.validEntries: if self.isValidMetaEntry(entry):
val = self.story.getMetadata(entry) val = self.story.getMetadata(entry)
if val and ( entry not in oldvalues or val != oldvalues[entry] ): if val and ( entry not in oldvalues or val != oldvalues[entry] ):
if self.hasConfig(entry+"_label"): if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label") label=self.getConfig(entry+"_label")
else: elif entry in self.titleLabels:
print("Using fallback label for %s_label"%entry) logging.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry] label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry, retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry,
'label':label, 'label':label,
@ -342,7 +344,7 @@ ${value}<br />
metadata.appendChild(newTag(contentdom,"dc:description",text= metadata.appendChild(newTag(contentdom,"dc:description",text=
self.getMetadata('description'))) self.getMetadata('description')))
for subject in self.getTags(): for subject in self.story.getSubjectTags():
metadata.appendChild(newTag(contentdom,"dc:subject",text=subject)) metadata.appendChild(newTag(contentdom,"dc:subject",text=subject))
@ -441,7 +443,7 @@ div { margin: 0pt; padding: 0pt; }
if self.getConfig("include_titlepage"): if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
itemrefs.append("title_page") itemrefs.append("title_page")
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly : if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
itemrefs.append("toc_page") itemrefs.append("toc_page")
@ -449,7 +451,7 @@ div { margin: 0pt; padding: 0pt; }
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log")) items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
itemrefs.append("log_page") itemrefs.append("log_page")
for index, (title,html) in enumerate(self.story.getChapters(self)): for index, (title,html) in enumerate(self.story.getChapters()):
if html: if html:
i=index+1 i=index+1
items.append(("file%04d"%i, items.append(("file%04d"%i,
@ -587,7 +589,7 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue()) outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close() logpageIO.close()
for index, (title,html) in enumerate(self.story.getChapters(self)): for index, (title,html) in enumerate(self.story.getChapters()):
if html: if html:
logging.debug('Writing chapter text for: %s' % title) logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})

View file

@ -94,7 +94,7 @@ ${output_css}
self.HTML_TOC_ENTRY, self.HTML_TOC_ENTRY,
self.HTML_TOC_PAGE_END) self.HTML_TOC_PAGE_END)
for index, (title,html) in enumerate(self.story.getChapters(self)): for index, (title,html) in enumerate(self.story.getChapters()):
if html: if html:
logging.debug('Writing chapter text for: %s' % title) logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))

View file

@ -169,7 +169,7 @@ ${value}<br />
# files.append(tocpageIO.getvalue()) # files.append(tocpageIO.getvalue())
# tocpageIO.close() # tocpageIO.close()
for index, (title,html) in enumerate(self.story.getChapters(self)): for index, (title,html) in enumerate(self.story.getChapters()):
if html: if html:
logging.debug('Writing chapter text for: %s' % title) logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})

View file

@ -133,7 +133,7 @@ End file.
self._write(out,self.lineends(self.wraplines(towrap))) self._write(out,self.lineends(self.wraplines(towrap)))
for index, (title,html) in enumerate(self.story.getChapters(self)): for index, (title,html) in enumerate(self.story.getChapters()):
if html: if html:
logging.debug('Writing chapter text for: %s' % title) logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))

View file

@ -58,7 +58,23 @@
<p> <p>
New sites bloodties-fans.com and thehookupzone.net, thanks Marillapm! New sites bloodties-fans.com and thehookupzone.net, thanks Marillapm!
</p> </p>
<h3>New Features and Fixes</h3>
<p> <p>
Added extra metadata feature for certain sites. This one
contains some metadata features that have been requested,
plus quite a bit of internal refactoring to allow them.
This is primarily intended for calibre plugin users, so
it's <a href="">documented in the plugin forum</a>.
<br /> Also added cover images for dramione.org, thanks
ilovejedd.
</p>
<p>
Fixes for: squidge.org/peja Rating including '[' when
there's no stars; loosen url check on AO3 to allow more
copy/paste URLs; fix author on castlefans.org due to
site/skin changes.
</p>
<p> <p>
Questions? Check out our Questions? Check out our
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs">FAQs</a>. <a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs">FAQs</a>.
@ -67,7 +83,7 @@
If you have any problems with this application, please If you have any problems with this application, please
report them in report them in
the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The
<a href="http://4-4-24.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary. <a href="http://4-4-25.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
</p> </p>
<div id='error'> <div id='error'>
{{ error_message }} {{ error_message }}

27
main.py
View file

@ -29,7 +29,6 @@ import datetime
import traceback import traceback
from StringIO import StringIO from StringIO import StringIO
import ConfigParser
## Just to shut up the appengine warning about "You are using the ## Just to shut up the appengine warning about "You are using the
## default Django version (0.96). The default Django version will ## default Django version (0.96). The default Django version will
@ -57,22 +56,25 @@ from google.appengine.runtime import DeadlineExceededError
from ffstorage import * from ffstorage import *
from fanficdownloader import adapters, writers, exceptions from fanficdownloader import adapters, writers, exceptions
from fanficdownloader.configurable import Configuration
class UserConfigServer(webapp2.RequestHandler): class UserConfigServer(webapp2.RequestHandler):
def getUserConfig(self,user):
config = ConfigParser.SafeConfigParser() def getUserConfig(self,user,url,fileformat):
configuration = Configuration(adapters.getConfigSectionFor(url),fileformat)
logging.debug('reading defaults.ini config file') logging.debug('reading defaults.ini config file')
config.read('defaults.ini') configuration.read('defaults.ini')
## Pull user's config record. ## Pull user's config record.
l = UserConfig.all().filter('user =', user).fetch(1) l = UserConfig.all().filter('user =', user).fetch(1)
if l and l[0].config: if l and l[0].config:
uconfig=l[0] uconfig=l[0]
#logging.debug('reading config from UserConfig(%s)'%uconfig.config) #logging.debug('reading config from UserConfig(%s)'%uconfig.config)
config.readfp(StringIO(uconfig.config)) configuration.readfp(StringIO(uconfig.config))
return config return configuration
class MainHandler(webapp2.RequestHandler): class MainHandler(webapp2.RequestHandler):
def get(self): def get(self):
@ -137,7 +139,8 @@ class EditConfigServer(UserConfigServer):
uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case.
uconfig.put() uconfig.put()
try: try:
config = self.getUserConfig(user) # just getting config for testing purposes.
configuration = self.getUserConfig(user,"test1.com","epub")
self.redirect("/?error=configsaved") self.redirect("/?error=configsaved")
except Exception, e: except Exception, e:
logging.info("Saved Config Failed:%s"%e) logging.info("Saved Config Failed:%s"%e)
@ -367,12 +370,12 @@ class FanfictionDownloader(UserConfigServer):
adapter = None adapter = None
try: try:
try: try:
config = self.getUserConfig(user) configuration = self.getUserConfig(user,url,format)
except Exception, e: except Exception, e:
self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e))) self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e)))
return return
adapter = adapters.getAdapter(config,url,format) adapter = adapters.getAdapter(configuration,url)
logging.info('Created an adaper: %s' % adapter) logging.info('Created an adaper: %s' % adapter)
if len(login) > 1: if len(login) > 1:
@ -474,8 +477,8 @@ class FanfictionDownloaderTask(UserConfigServer):
logging.info('Creating adapter...') logging.info('Creating adapter...')
try: try:
config = self.getUserConfig(user) configuration = self.getUserConfig(user,url,format)
adapter = adapters.getAdapter(config,url,format) adapter = adapters.getAdapter(configuration,url)
logging.info('Created an adapter: %s' % adapter) logging.info('Created an adapter: %s' % adapter)
@ -488,7 +491,7 @@ class FanfictionDownloaderTask(UserConfigServer):
# adapter.getStoryMetadataOnly() only fetches enough to # adapter.getStoryMetadataOnly() only fetches enough to
# get metadata. writer.writeStory() will call # get metadata. writer.writeStory() will call
# adapter.getStory(), too. # adapter.getStory(), too.
writer = writers.getWriter(format,config,adapter) writer = writers.getWriter(format,configuration,adapter)
download.name = writer.getOutputFileName() download.name = writer.getOutputFileName()
#logging.debug('output_filename:'+writer.getConfig('output_filename')) #logging.debug('output_filename:'+writer.getConfig('output_filename'))
logging.debug('getOutputFileName:'+writer.getOutputFileName()) logging.debug('getOutputFileName:'+writer.getOutputFileName())

View file

@ -67,9 +67,14 @@ authorId_label:Author ID
## show up in Calibre as tags. Also carried into mobi when converted. ## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags extratags_label:Extra Tags
## The version of fanficdownloader ## The version of fanficdownloader
##
version_label:FFDL Version version_label:FFDL Version
## Date formats used by FFDL. Published and Update don't have time.
## Note that ini format requires % to be escaped as %%.
dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d
dateUpdated_format:%%Y-%%m-%%d
## items to include in the title page ## items to include in the title page
## Empty metadata entries will *not* appear, even if in the list. ## Empty metadata entries will *not* appear, even if in the list.
## You can include extra text or HTML that will be included as-is in ## You can include extra text or HTML that will be included as-is in
@ -463,6 +468,55 @@ extraships:Draco Malfoy/Hermione Granger
## personal.ini, not defaults.ini. ## personal.ini, not defaults.ini.
#is_adult:true #is_adult:true
## Some adapters collect additional meta information beyond the
## standard ones. They need to be defined in extra_valid_entries to
## tell the rest of the FFDL system about them. They can be used in
## include_subject_tags, titlepage_entries, extra_titlepage_entries,
## logpage_entries, extra_logpage_entries, and include_in_* config
## items. You can also add additional entries here to build up
## composite metadata entries. dramione.org, for example, adds
## 'cliches' and then defines as the composite of hermiones,dracos in
## include_in_cliches.
extra_valid_entries:themes,hermiones,dracos,timeline,cliches
include_in_cliches:hermiones,dracos
## For another example, you could, by uncommenting this line, include
## themes in with genre metadata.
#include_in_genre:genre, themes
## You can give each new valid entry a specific label for use on
## titlepage and logpage. If not defined, it will simply be the
themes_label:Themes
hermiones_label:Hermiones
dracos_label:Dracos
timeline_label:Timeline
cliches_label:Character Cliches
## extra_titlepage_entries (and extra_logpage_entries) *add* to
## titlepage_entries (and logpage_entries) so you can add site
## specific entries to titlepage/logpage without having to copy the
## entire titlepage_entries line. (But if you want them higher than
## the end, you will need to copy titlepage_entries.)
#extra_titlepage_entries: themes,timeline,cliches
#extra_logpage_entries: themes,timeline,cliches
#extra_subject_tags: themes,timeline,cliches
## (Plugin Only) - You can also populate calibre custom columns with
## the site specific metadata using custom_columns_settings (but only
## if 'Allow custom_columns_settings from personal.ini' is checked in
## the plugin GUI config.) There are three parts, the entry name,
## then the label of the calibre custom column, then (optionally) a
## 'mode'. 'r' to Replace any existing values, 'a' to Add to existing
## value (use with tag-like columns), and 'n' for setting on New books
## only.
## Make sure to keep at least one space at the start of each
## line.
#custom_columns_settings:
# cliches=>#acolumn,r
# themes=>#bcolumn,a
# timeline=>#ccolumn,n
[erosnsappho.sycophanthex.com] [erosnsappho.sycophanthex.com]
## Site dedicated to these categories/characters/ships ## Site dedicated to these categories/characters/ships
extracategories:Harry Potter extracategories:Harry Potter
@ -774,9 +828,13 @@ extraships:Sesshoumaru/Kagome
[www.fanfiction.net] [www.fanfiction.net]
## fanfiction.net's 'cover' images are really just tiny thumbnails. ## fanfiction.net's 'cover' images are really just tiny thumbnails.
## Comment this out or change it to false to use them anyway. ## Change this to false to use them anyway.
never_make_cover: true never_make_cover: true
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
[www.fanfiktion.de] [www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The ## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In ## program can prompt you, or you can save it in config. In

View file

@ -42,7 +42,8 @@
## Most common, I expect will be using this to save username/passwords ## Most common, I expect will be using this to save username/passwords
## for different sites. Here are a few examples. ## for different sites. Here are a few examples. See defaults.ini
## for the full list.
[www.twilighted.net] [www.twilighted.net]
#username:YourPenname #username:YourPenname