mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-22 16:54:06 +01:00
Extra metadata feature(refactoring), loosen url check on AO3, fix auth on castlefans.org.
This commit is contained in:
parent
e5b0974d0e
commit
4bb91cd0c5
27 changed files with 596 additions and 303 deletions
2
app.yaml
2
app.yaml
|
|
@ -1,6 +1,6 @@
|
|||
# ffd-retief-hrd fanfictiondownloader
|
||||
application: fanfictiondownloader
|
||||
version: 4-4-25
|
||||
version: 4-4-26
|
||||
runtime: python27
|
||||
api_version: 1
|
||||
threadsafe: true
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
|
|||
description = 'UI plugin to download FanFiction stories from various sites.'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Jim Miller'
|
||||
version = (1, 6, 9)
|
||||
version = (1, 6, 10)
|
||||
minimum_calibre_version = (0, 8, 57)
|
||||
|
||||
#: This field defines the GUI plugin class that contains all the code
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ default_prefs['countpagesstats'] = []
|
|||
default_prefs['errorcol'] = ''
|
||||
default_prefs['custom_cols'] = {}
|
||||
default_prefs['custom_cols_newonly'] = {}
|
||||
default_prefs['allow_custcol_from_ini'] = True
|
||||
|
||||
default_prefs['std_cols_newonly'] = {}
|
||||
|
||||
|
|
@ -258,7 +259,7 @@ class ConfigWidget(QWidget):
|
|||
# error column
|
||||
prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()).toString())
|
||||
|
||||
# cust cols
|
||||
# cust cols tab
|
||||
colsmap = {}
|
||||
for (col,combo) in self.cust_columns_tab.custcol_dropdowns.iteritems():
|
||||
val = unicode(combo.itemData(combo.currentIndex()).toString())
|
||||
|
|
@ -272,6 +273,8 @@ class ConfigWidget(QWidget):
|
|||
colsnewonly[col] = checkbox.isChecked()
|
||||
prefs['custom_cols_newonly'] = colsnewonly
|
||||
|
||||
prefs['allow_custcol_from_ini'] = self.cust_columns_tab.allow_custcol_from_ini.isChecked()
|
||||
|
||||
prefs.save_to_db()
|
||||
|
||||
def edit_shortcuts(self):
|
||||
|
|
@ -437,7 +440,7 @@ class PersonalIniTab(QWidget):
|
|||
self.ini.setText(prefs['personal.ini'])
|
||||
self.l.addWidget(self.ini)
|
||||
|
||||
self.defaults = QPushButton('View Defaults', self)
|
||||
self.defaults = QPushButton('View Defaults (plugin-defaults.ini)', self)
|
||||
self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.")
|
||||
self.defaults.clicked.connect(self.show_defaults)
|
||||
self.l.addWidget(self.defaults)
|
||||
|
|
@ -456,7 +459,7 @@ class ShowDefaultsIniDialog(QDialog):
|
|||
self.resize(600, 500)
|
||||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
self.label = QLabel("Plugin Defaults (Read-Only)")
|
||||
self.label = QLabel("Plugin Defaults (plugin-defaults.ini) (Read-Only)")
|
||||
self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.")
|
||||
self.setWindowTitle(_('Plugin Defaults'))
|
||||
self.setWindowIcon(icon)
|
||||
|
|
@ -595,6 +598,8 @@ class GenerateCoverTab(QWidget):
|
|||
horz.addWidget(dropdown)
|
||||
self.sl.addLayout(horz)
|
||||
|
||||
self.sl.insertStretch(-1)
|
||||
|
||||
self.gcnewonly = QCheckBox("Run Generate Cover Only on New Books",self)
|
||||
self.gcnewonly.setToolTip("Default is to run GC any time the calibre metadata is updated.")
|
||||
self.gcnewonly.setChecked(prefs['gcnewonly'])
|
||||
|
|
@ -605,8 +610,6 @@ class GenerateCoverTab(QWidget):
|
|||
self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini'])
|
||||
self.l.addWidget(self.allow_gc_from_ini)
|
||||
|
||||
self.l.insertStretch(-1)
|
||||
|
||||
class CountPagesTab(QWidget):
|
||||
|
||||
def __init__(self, parent_dialog, plugin_action):
|
||||
|
|
@ -838,11 +841,16 @@ class CustomColumnsTab(QWidget):
|
|||
|
||||
self.sl.insertStretch(-1)
|
||||
|
||||
self.l.addSpacing(5)
|
||||
self.allow_custcol_from_ini = QCheckBox('Allow custom_columns_settings from personal.ini to override',self)
|
||||
self.allow_custcol_from_ini.setToolTip("The personal.ini parameter custom_columns_settings allows you to set custom columns to site specific values that aren't common to all sites.<br \>custom_columns_settings is ignored when this is off.")
|
||||
self.allow_custcol_from_ini.setChecked(prefs['allow_custcol_from_ini'])
|
||||
self.l.addWidget(self.allow_custcol_from_ini)
|
||||
|
||||
self.l.addSpacing(5)
|
||||
label = QLabel("Special column:")
|
||||
label.setWordWrap(True)
|
||||
self.l.addWidget(label)
|
||||
self.l.addSpacing(5)
|
||||
|
||||
horz = QHBoxLayout()
|
||||
label = QLabel("Update/Overwrite Error Column:")
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ __copyright__ = '2012, Jim Miller'
|
|||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import time, os, copy, threading, re, platform
|
||||
from ConfigParser import SafeConfigParser
|
||||
from StringIO import StringIO
|
||||
from functools import partial
|
||||
from datetime import datetime
|
||||
|
|
@ -37,7 +36,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
|
|||
create_menu_action_unique, get_library_uuid)
|
||||
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
|
||||
#from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page
|
||||
|
||||
|
|
@ -249,10 +248,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
|
|||
return
|
||||
print("get_urls_from_page URL:%s"%d.url.text())
|
||||
|
||||
ffdlconfig = SafeConfigParser()
|
||||
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
ffdlconfig.readfp(StringIO(prefs['personal.ini']))
|
||||
url_list = get_urls_from_page("%s"%d.url.text(),ffdlconfig)
|
||||
if 'archiveofourown.org' in url:
|
||||
configuration = Configuration(adapters.getConfigSectionFor(url),"EPUB")
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(options['personal.ini']))
|
||||
else:
|
||||
configuration = None
|
||||
url_list = get_urls_from_page("%s"%d.url.text(),configuration)
|
||||
|
||||
if url_list:
|
||||
d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui)
|
||||
|
|
@ -442,12 +444,10 @@ keep_summary_html:true
|
|||
make_firstimage_cover:true
|
||||
''' + options['personal.ini']
|
||||
|
||||
## was self.ffdlconfig, but we need to be able to change it
|
||||
## when doing epub update.
|
||||
ffdlconfig = SafeConfigParser()
|
||||
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
ffdlconfig.readfp(StringIO(options['personal.ini']))
|
||||
adapter = adapters.getAdapter(ffdlconfig,url,fileform)
|
||||
configuration = Configuration(adapters.getConfigSectionFor(url),fileform)
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(options['personal.ini']))
|
||||
adapter = adapters.getAdapter(configuration,url)
|
||||
|
||||
## three tries, that's enough if both user/pass & is_adult needed,
|
||||
## or a couple tries of one or the other
|
||||
|
|
@ -470,13 +470,13 @@ make_firstimage_cover:true
|
|||
|
||||
# let other exceptions percolate up.
|
||||
story = adapter.getStoryMetadataOnly()
|
||||
writer = writers.getWriter(options['fileform'],adapter.config,adapter)
|
||||
writer = writers.getWriter(options['fileform'],configuration,adapter)
|
||||
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
book['title'] = story.getMetadata("title", removeallentities=True)
|
||||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("site")
|
||||
book['tags'] = writer.getTags(removeallentities=True) # getTags could be moved up into adapter now. Adapter didn't used to know the fileform
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
|
||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||
|
||||
|
|
@ -913,7 +913,7 @@ make_firstimage_cover:true
|
|||
except AttributeError:
|
||||
print("AttributeError? %s"%col)
|
||||
pass
|
||||
|
||||
|
||||
db.set_metadata(book_id,mi)
|
||||
|
||||
# do configured column updates here.
|
||||
|
|
@ -950,6 +950,52 @@ make_firstimage_cover:true
|
|||
val = book['all_metadata']['status'] == 'In-Progress'
|
||||
db.set_custom(book_id, val, label=label, commit=False)
|
||||
|
||||
adapter = None
|
||||
if prefs['allow_custcol_from_ini']:
|
||||
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(options['personal.ini']))
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
|
||||
# meta => custcol[,a|n|r]
|
||||
# cliches=>\#acolumn,r
|
||||
for line in adapter.getConfig('custom_columns_settings').splitlines():
|
||||
if "=>" in line:
|
||||
(meta,custcol) = map( lambda x: x.strip(), line.split("=>") )
|
||||
flag='r'
|
||||
if "," in custcol:
|
||||
(custcol,flag) = map( lambda x: x.strip(), custcol.split(",") )
|
||||
|
||||
#print("meta:(%s) => custcol:(%s), flag(%s) "%(meta,custcol,flag))
|
||||
|
||||
if meta not in book['all_metadata']:
|
||||
print("No value for %s, skipping custom column(%s) update."%(meta,custcol))
|
||||
continue
|
||||
|
||||
if custcol not in custom_columns:
|
||||
print("No custom column(%s), skipping."%(custcol))
|
||||
continue
|
||||
else:
|
||||
coldef = custom_columns[custcol]
|
||||
label = coldef['label']
|
||||
|
||||
if flag == 'r' or book['added']:
|
||||
db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False)
|
||||
|
||||
if flag == 'a':
|
||||
try:
|
||||
existing=db.get_custom(book_id,label=label,index_is_id=True)
|
||||
if isinstance(existing,list):
|
||||
vallist = existing
|
||||
else :
|
||||
vallist = [existing]
|
||||
vallist.append(book['all_metadata'][meta])
|
||||
except:
|
||||
vallist = [book['all_metadata'][meta]]
|
||||
|
||||
db.set_custom(book_id, ", ".join(vallist), label=label, commit=False)
|
||||
|
||||
|
||||
db.commit()
|
||||
|
||||
if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']):
|
||||
|
|
@ -961,10 +1007,11 @@ make_firstimage_cover:true
|
|||
gc_plugin = self.gui.iactions['Generate Cover']
|
||||
setting_name = None
|
||||
if prefs['allow_gc_from_ini']:
|
||||
ffdlconfig = SafeConfigParser()
|
||||
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
ffdlconfig.readfp(StringIO(prefs['personal.ini']))
|
||||
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform'])
|
||||
if not adapter: # might already have it from allow_custcol_from_ini
|
||||
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(options['personal.ini']))
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
|
||||
# template => regexp to match => GC Setting to use.
|
||||
# generate_cover_settings:
|
||||
|
|
@ -973,7 +1020,7 @@ make_firstimage_cover:true
|
|||
if "=>" in line:
|
||||
(template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") )
|
||||
value = Template(template).safe_substitute(book['all_metadata']).encode('utf8')
|
||||
print("%s(%s) => %s => %s"%(template,value,regexp,setting))
|
||||
# print("%s(%s) => %s => %s"%(template,value,regexp,setting))
|
||||
if re.search(regexp,value):
|
||||
setting_name = setting
|
||||
break
|
||||
|
|
@ -1192,11 +1239,11 @@ make_firstimage_cover:true
|
|||
return None
|
||||
|
||||
def _is_good_downloader_url(self,url):
|
||||
# this is the accepted way to 'check for existance'? really?
|
||||
# this is the accepted way to 'check for existance of a class variable'? really?
|
||||
try:
|
||||
self.dummyconfig
|
||||
except AttributeError:
|
||||
self.dummyconfig = SafeConfigParser()
|
||||
self.dummyconfig = Configuration("test1.com","EPUB")
|
||||
# pulling up an adapter is pretty low over-head. If
|
||||
# it fails, it's a bad url.
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
|
|||
|
||||
import time, os, traceback
|
||||
|
||||
from ConfigParser import SafeConfigParser
|
||||
from StringIO import StringIO
|
||||
|
||||
from calibre.utils.ipc.server import Server
|
||||
|
|
@ -20,6 +19,7 @@ from calibre.utils.logging import Log
|
|||
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
|
||||
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
|
|
@ -114,19 +114,19 @@ def do_download_for_worker(book,options):
|
|||
|
||||
book['comment'] = 'Download started...'
|
||||
|
||||
ffdlconfig = SafeConfigParser()
|
||||
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
ffdlconfig.readfp(StringIO(options['personal.ini']))
|
||||
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(options['personal.ini']))
|
||||
|
||||
if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
|
||||
ffdlconfig.set("overrides","never_make_cover","true")
|
||||
configuration.set("overrides","never_make_cover","true")
|
||||
|
||||
# images only for epub, even if the user mistakenly turned it
|
||||
# on else where.
|
||||
if options['fileform'] != "epub":
|
||||
ffdlconfig.set("overrides","include_images","false")
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform'])
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
adapter.is_adult = book['is_adult']
|
||||
adapter.username = book['username']
|
||||
adapter.password = book['password']
|
||||
|
|
@ -137,7 +137,7 @@ def do_download_for_worker(book,options):
|
|||
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
||||
# else:
|
||||
# print("no calibre_series")
|
||||
writer = writers.getWriter(options['fileform'],adapter.config,adapter)
|
||||
writer = writers.getWriter(options['fileform'],configuration,adapter)
|
||||
|
||||
outfile = book['outfile']
|
||||
|
||||
|
|
|
|||
55
defaults.ini
55
defaults.ini
|
|
@ -18,6 +18,10 @@
|
|||
## [defaults] section applies to all formats and sites but may be
|
||||
## overridden at several levels
|
||||
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. Uncomment by removing '#' in front of is_adult.
|
||||
#is_adult:true
|
||||
|
||||
## All available titlepage_entries and the label used for them:
|
||||
## <entryname>_label:<label>
|
||||
## Labels may be customized.
|
||||
|
|
@ -63,9 +67,15 @@ authorId_label:Author ID
|
|||
## show up in Calibre as tags. Also carried into mobi when converted.
|
||||
extratags_label:Extra Tags
|
||||
## The version of fanficdownloader
|
||||
##
|
||||
version_label:FFDL Version
|
||||
|
||||
## Date formats used by FFDL. Published and Update don't have time.
|
||||
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
## Note that ini format requires % to be escaped as %%.
|
||||
dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
datePublished_format:%%Y-%%m-%%d
|
||||
dateUpdated_format:%%Y-%%m-%%d
|
||||
|
||||
## items to include in the title page
|
||||
## Empty metadata entries will *not* appear, even if in the list.
|
||||
## You can include extra text or HTML that will be included as-is in
|
||||
|
|
@ -478,6 +488,39 @@ extraships:Draco Malfoy/Hermione Granger
|
|||
## personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
## Some adapters collect additional meta information beyond the
|
||||
## standard ones. They need to be defined in extra_valid_entries to
|
||||
## tell the rest of the FFDL system about them. They can be used in
|
||||
## include_subject_tags, titlepage_entries, extra_titlepage_entries,
|
||||
## logpage_entries, extra_logpage_entries, and include_in_* config
|
||||
## items. You can also add additional entries here to build up
|
||||
## composite metadata entries. dramione.org, for example, adds
|
||||
## 'cliches' and then defines as the composite of hermiones,dracos in
|
||||
## include_in_cliches.
|
||||
extra_valid_entries:themes,hermiones,dracos,timeline,cliches
|
||||
include_in_cliches:hermiones,dracos
|
||||
|
||||
## For another example, you could, by uncommenting this line, include
|
||||
## themes in with genre metadata. Note, however, that you couldn't
|
||||
## use cliches. include_in_* can only include the original real
|
||||
## entries, not other include_in_* values.
|
||||
#include_in_genre:genre, themes
|
||||
|
||||
## You can give each new valid entry a specific label for use on
|
||||
## titlepage and logpage. If not defined, it will simply be the
|
||||
themes_label:Themes
|
||||
hermiones_label:Hermiones
|
||||
dracos_label:Dracos
|
||||
timeline_label:Timeline
|
||||
cliches_label:Character Cliches
|
||||
|
||||
## extra_titlepage_entries (and extra_logpage_entries) *add* to
|
||||
## titlepage_entries (and logpage_entries) so you can add site
|
||||
## specific entries to titlepage/logpage without having to copy the
|
||||
## entire titlepage_entries line. (But if you want them higher than
|
||||
## the end, you will need to copy titlepage_entries.)
|
||||
extra_titlepage_entries: themes,hermiones,dracos,timeline,cliches
|
||||
|
||||
[erosnsappho.sycophanthex.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
|
@ -789,9 +832,13 @@ extraships:Sesshoumaru/Kagome
|
|||
|
||||
[www.fanfiction.net]
|
||||
## fanfiction.net's 'cover' images are really just tiny thumbnails.
|
||||
## Comment this out or change it to false to use them anyway.
|
||||
## Change this to false to use them anyway.
|
||||
never_make_cover: true
|
||||
|
||||
## Extra metadata that this adapter knows about. See [dramione.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:reviews,favs,follows
|
||||
|
||||
[www.fanfiktion.de]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
@ -818,6 +865,10 @@ output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
|
|||
## Clear FanFiction from defaults, fictionpress.com is original fiction.
|
||||
extratags:
|
||||
|
||||
## Extra metadata that this adapter knows about. See [dramione.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:reviews,favs,follows
|
||||
|
||||
[www.ficwad.com]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import ConfigParser
|
|||
from subprocess import call
|
||||
|
||||
from fanficdownloader import adapters,writers,exceptions
|
||||
from fanficdownloader.configurable import Configuration
|
||||
from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data
|
||||
from fanficdownloader.geturls import get_urls_from_page
|
||||
|
||||
|
|
@ -90,7 +91,15 @@ def main():
|
|||
if options.update and options.format != 'epub':
|
||||
parser.error("-u/--update-epub only works with epub")
|
||||
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
## Attempt to update an existing epub.
|
||||
if options.update:
|
||||
(url,chaptercount) = get_dcsource_chaptercount(args[0])
|
||||
print "Updating %s, URL: %s" % (args[0],url)
|
||||
output_filename = args[0]
|
||||
else:
|
||||
url = args[0]
|
||||
|
||||
configuration = Configuration(adapters.getConfigSectionFor(url),options.format)
|
||||
|
||||
conflist = []
|
||||
homepath = join(expanduser("~"),".fanficdownloader")
|
||||
|
|
@ -109,46 +118,43 @@ def main():
|
|||
conflist.extend(options.configfile)
|
||||
|
||||
logging.debug('reading %s config file(s), if present'%conflist)
|
||||
config.read(conflist)
|
||||
configuration.read(conflist)
|
||||
|
||||
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
|
||||
|
||||
try:
|
||||
config.add_section("overrides")
|
||||
configuration.add_section("overrides")
|
||||
except ConfigParser.DuplicateSectionError:
|
||||
pass
|
||||
|
||||
if options.force:
|
||||
config.set("overrides","always_overwrite","true")
|
||||
configuration.set("overrides","always_overwrite","true")
|
||||
|
||||
if options.update:
|
||||
configuration.set("overrides","output_filename",args[0])
|
||||
|
||||
if options.update and not options.updatecover:
|
||||
config.set("overrides","never_make_cover","true")
|
||||
configuration.set("overrides","never_make_cover","true")
|
||||
|
||||
# images only for epub, even if the user mistakenly turned it
|
||||
# on else where.
|
||||
if options.format != "epub":
|
||||
config.set("overrides","include_images","false")
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
if options.options:
|
||||
for opt in options.options:
|
||||
(var,val) = opt.split('=')
|
||||
config.set("overrides",var,val)
|
||||
configuration.set("overrides",var,val)
|
||||
|
||||
if options.list:
|
||||
retlist = get_urls_from_page(args[0], config)
|
||||
retlist = get_urls_from_page(args[0], configuration)
|
||||
print "\n".join(retlist)
|
||||
|
||||
return
|
||||
|
||||
try:
|
||||
## Attempt to update an existing epub.
|
||||
if options.update:
|
||||
(url,chaptercount) = get_dcsource_chaptercount(args[0])
|
||||
print "Updating %s, URL: %s" % (args[0],url)
|
||||
output_filename = args[0]
|
||||
config.set("overrides","output_filename",args[0])
|
||||
else:
|
||||
url = args[0]
|
||||
|
||||
adapter = adapters.getAdapter(config,url,options.format)
|
||||
adapter = adapters.getAdapter(configuration,url)
|
||||
|
||||
## Check for include_images and absence of PIL, give warning.
|
||||
if adapter.getConfig('include_images'):
|
||||
|
|
@ -206,7 +212,7 @@ def main():
|
|||
adapter.calibrebookmark,
|
||||
adapter.logfile) = get_update_data(args[0])
|
||||
|
||||
writeStory(config,adapter,"epub")
|
||||
writeStory(configuration,adapter,"epub")
|
||||
|
||||
else:
|
||||
# regular download
|
||||
|
|
@ -215,7 +221,7 @@ def main():
|
|||
|
||||
adapter.setChaptersRange(options.begin,options.end)
|
||||
|
||||
output_filename=writeStory(config,adapter,options.format,options.metaonly)
|
||||
output_filename=writeStory(configuration,adapter,options.format,options.metaonly)
|
||||
|
||||
if not options.metaonly and adapter.getConfig("post_process_cmd"):
|
||||
metadata = adapter.story.metadata
|
||||
|
|
|
|||
17
example.ini
17
example.ini
|
|
@ -3,10 +3,20 @@
|
|||
|
||||
[defaults]
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
## adult content. Uncomment by removing '#' in front of is_adult. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
## defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
## Don't like the numbers at the start of chapter titles on some
|
||||
## sites? You can use strip_chapter_numbers to strip them off. Just
|
||||
## want to make them all look the same? Strip them off, then add them
|
||||
## back on with add_chapter_numbers. Don't like the way it strips
|
||||
## numbers or adds them back? See chapter_title_strip_pattern and
|
||||
## chapter_title_add_pattern.
|
||||
#strip_chapter_numbers:true
|
||||
#add_chapter_numbers:true
|
||||
|
||||
[epub]
|
||||
## include images from img tags in the body and summary of stories.
|
||||
## Images will be converted to jpg for size if possible. Images work
|
||||
|
|
@ -34,7 +44,8 @@
|
|||
|
||||
|
||||
## Most common, I expect will be using this to save username/passwords
|
||||
## for different sites. Here are a few examples.
|
||||
## for different sites. Here are a few examples. See defaults.ini
|
||||
## for the full list.
|
||||
|
||||
[www.twilighted.net]
|
||||
#username:YourPenname
|
||||
|
|
|
|||
|
|
@ -117,8 +117,8 @@ for x in imports():
|
|||
if "fanficdownloader.adapters.adapter_" in x:
|
||||
#print x
|
||||
__class_list.append(sys.modules[x].getClass())
|
||||
|
||||
def getAdapter(config,url,fileform=None):
|
||||
|
||||
def getDomainURL(url):
|
||||
## fix up leading protocol.
|
||||
fixedurl = re.sub(r"(?i)^[htps]+[:/]+","http://",url.strip())
|
||||
if not fixedurl.startswith("http"):
|
||||
|
|
@ -135,20 +135,17 @@ def getAdapter(config,url,fileform=None):
|
|||
if( domain != parsedUrl.netloc ):
|
||||
fixedurl = fixedurl.replace(parsedUrl.netloc,domain)
|
||||
|
||||
return (domain,fixedurl)
|
||||
|
||||
|
||||
def getAdapter(config,url):
|
||||
|
||||
logging.debug("trying url:"+url)
|
||||
cls = getClassFor(domain)
|
||||
if not cls and domain.startswith("www."):
|
||||
domain = domain.replace("www.","")
|
||||
logging.debug("trying site:without www: "+domain)
|
||||
cls = getClassFor(domain)
|
||||
fixedurl = fixedurl.replace("http://www.","http://")
|
||||
if not cls:
|
||||
logging.debug("trying site:www."+domain)
|
||||
cls = getClassFor("www."+domain)
|
||||
fixedurl = fixedurl.replace("http://","http://www.")
|
||||
(domain,fixedurl) = getDomainURL(url)
|
||||
cls = getClassFromList(domain)
|
||||
logging.debug("fixedurl:"+fixedurl)
|
||||
if cls:
|
||||
adapter = cls(config,fixedurl) # raises InvalidStoryURL
|
||||
adapter.setSectionOrder(adapter.getConfigSection(),fileform)
|
||||
return adapter
|
||||
# No adapter found.
|
||||
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
|
||||
|
|
@ -156,7 +153,28 @@ def getAdapter(config,url,fileform=None):
|
|||
def getConfigSections():
|
||||
return [cls.getConfigSection() for cls in __class_list]
|
||||
|
||||
def getConfigSectionFor(url):
|
||||
(domain,fixedurl) = getDomainURL(url)
|
||||
cls = getClassFromList(domain)
|
||||
if cls:
|
||||
return cls.getConfigSection()
|
||||
|
||||
# No adapter found.
|
||||
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
|
||||
|
||||
def getClassFor(domain):
|
||||
cls = getClassFromList(domain)
|
||||
if not cls and domain.startswith("www."):
|
||||
domain = domain.replace("www.","")
|
||||
logging.debug("trying site:without www: "+domain)
|
||||
cls = getClassFromList(domain)
|
||||
fixedurl = fixedurl.replace("http://www.","http://")
|
||||
if not cls:
|
||||
logging.debug("trying site:www."+domain)
|
||||
cls = getClassFromList("www."+domain)
|
||||
fixedurl = fixedurl.replace("http://","http://www.")
|
||||
|
||||
def getClassFromList(domain):
|
||||
for cls in __class_list:
|
||||
if cls.matchesSite(domain):
|
||||
return cls
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
def getSiteURLPattern(self):
|
||||
# http://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
|
||||
return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)(/chapters/\d+)?/?$"
|
||||
return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)"
|
||||
|
||||
## Login
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
|
|||
|
|
@ -174,13 +174,14 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
|
|
|||
|
|
@ -161,20 +161,18 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Use banner as cover if found
|
||||
if self.getConfig('include_images'):
|
||||
coverurl = ''
|
||||
img = soup.find('img',{'class':'banner'})
|
||||
if img:
|
||||
coverurl = img['src']
|
||||
#print "Cover: "+coverurl
|
||||
a = soup.find(text="This story has a banner; click to view.")
|
||||
if a:
|
||||
#print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
|
||||
coverurl = a.parent['href']
|
||||
#print "Cover: "+coverurl
|
||||
if coverurl:
|
||||
self.story.addImgUrl(self,url,coverurl,self._fetchUrlRaw,cover=True)
|
||||
|
||||
coverurl = ''
|
||||
img = soup.find('img',{'class':'banner'})
|
||||
if img:
|
||||
coverurl = img['src']
|
||||
#print "Cover: "+coverurl
|
||||
a = soup.find(text="This story has a banner; click to view.")
|
||||
if a:
|
||||
#print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
|
||||
coverurl = a.parent['href']
|
||||
#print "Cover: "+coverurl
|
||||
if coverurl:
|
||||
self.setCoverImage(url,coverurl)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
|
|
@ -194,6 +192,22 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
themes=soup.findAll('a', {'class' : "tag-3"})
|
||||
for theme in themes:
|
||||
self.story.addToList('themes',theme.string)
|
||||
|
||||
hermiones=soup.findAll('a', {'class' : "tag-4"})
|
||||
for hermione in hermiones:
|
||||
self.story.addToList('hermiones',hermione.string)
|
||||
|
||||
dracos=soup.findAll('a', {'class' : "tag-5"})
|
||||
for draco in dracos:
|
||||
self.story.addToList('dracos',draco.string)
|
||||
|
||||
timelines=soup.findAll('a', {'class' : "tag-6"})
|
||||
for timeline in timelines:
|
||||
self.story.addToList('timeline',timeline.string)
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -186,8 +186,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
donechars = False
|
||||
while len(metalist) > 0:
|
||||
if metalist[0].startswith('Reviews') or metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:') or metalist[0].startswith('Favs:') or metalist[0].startswith('Follows:'):
|
||||
if metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:'):
|
||||
pass
|
||||
elif metalist[0].startswith('Reviews'):
|
||||
self.story.setMetadata('reviews',metalist[0].split(':')[1].strip())
|
||||
elif metalist[0].startswith('Favs:'):
|
||||
self.story.setMetadata('favs',metalist[0].split(':')[1].strip())
|
||||
elif metalist[0].startswith('Follows:'):
|
||||
self.story.setMetadata('follows',metalist[0].split(':')[1].strip())
|
||||
elif metalist[0].startswith('Updated'):
|
||||
self.story.setMetadata('dateUpdated',makeDate(metalist[0].split(':')[1].strip(), '%m-%d-%y'))
|
||||
elif metalist[0].startswith('Published'):
|
||||
|
|
@ -208,10 +214,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if self.getConfig('include_images'):
|
||||
img = soup.find('img',{'class':'cimage'})
|
||||
if img:
|
||||
self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw,cover=True)
|
||||
img = soup.find('img',{'class':'cimage'})
|
||||
if img:
|
||||
self.setCoverImage(url,img['src'])
|
||||
|
||||
# Find the chapter selector
|
||||
select = soup.find('select', { 'name' : 'chapter' } )
|
||||
|
|
|
|||
|
|
@ -138,14 +138,14 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata("numWords", str(storyMetadata["words"]))
|
||||
|
||||
# fimfic is the first site with an explicit cover image.
|
||||
if self.getConfig('include_images') and "image" in storyMetadata.keys():
|
||||
if "image" in storyMetadata.keys():
|
||||
if "full_image" in storyMetadata:
|
||||
coverurl = storyMetadata["full_image"]
|
||||
else:
|
||||
coverurl = storyMetadata["image"]
|
||||
if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:'
|
||||
coverurl = "http:"+coverurl
|
||||
self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True)
|
||||
self.setCoverImage(self.url,coverurl)
|
||||
|
||||
|
||||
# the fimfic API gives bbcode for desc, not html.
|
||||
|
|
|
|||
|
|
@ -67,10 +67,8 @@ class BaseSiteAdapter(Configurable):
|
|||
def validateURL(self):
|
||||
return re.match(self.getSiteURLPattern(), self.url)
|
||||
|
||||
def __init__(self, config, url):
|
||||
self.config = config
|
||||
Configurable.__init__(self, config)
|
||||
self.setSectionOrder(self.getConfigSection())
|
||||
def __init__(self, configuration, url):
|
||||
Configurable.__init__(self, configuration)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
|
|
@ -79,7 +77,7 @@ class BaseSiteAdapter(Configurable):
|
|||
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
|
||||
self.storyDone = False
|
||||
self.metadataDone = False
|
||||
self.story = Story()
|
||||
self.story = Story(configuration)
|
||||
self.story.setMetadata('site',self.getConfigSection())
|
||||
self.story.setMetadata('dateCreated',datetime.datetime.now())
|
||||
self.chapterUrls = [] # tuples of (chapter title,chapter url)
|
||||
|
|
@ -138,7 +136,7 @@ class BaseSiteAdapter(Configurable):
|
|||
logging.debug("try code:"+code)
|
||||
return data.decode(code)
|
||||
except:
|
||||
logging.info("code failed:"+code)
|
||||
logging.debug("code failed:"+code)
|
||||
pass
|
||||
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
|
||||
return "".join([x for x in data if ord(x) < 128])
|
||||
|
|
@ -199,8 +197,7 @@ class BaseSiteAdapter(Configurable):
|
|||
if (self.chapterFirst!=None and index < self.chapterFirst) or \
|
||||
(self.chapterLast!=None and index > self.chapterLast):
|
||||
self.story.addChapter(removeEntities(title),
|
||||
None,
|
||||
self)
|
||||
None)
|
||||
else:
|
||||
if self.oldchapters and index < len(self.oldchapters):
|
||||
data = self.utf8FromSoup(None,
|
||||
|
|
@ -209,16 +206,14 @@ class BaseSiteAdapter(Configurable):
|
|||
else:
|
||||
data = self.getChapterText(url)
|
||||
self.story.addChapter(removeEntities(title),
|
||||
removeEntities(data),
|
||||
self)
|
||||
removeEntities(data))
|
||||
self.storyDone = True
|
||||
|
||||
# include image, but no cover from story, add default_cover_image cover.
|
||||
if self.getConfig('include_images') and \
|
||||
not self.story.cover and \
|
||||
self.getConfig('default_cover_image'):
|
||||
self.story.addImgUrl(self,
|
||||
None,
|
||||
self.story.addImgUrl(None,
|
||||
#self.getConfig('default_cover_image'),
|
||||
self.story.formatFileName(self.getConfig('default_cover_image'),
|
||||
self.getConfig('allow_unsafe_filename')),
|
||||
|
|
@ -298,6 +293,10 @@ class BaseSiteAdapter(Configurable):
|
|||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
#print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n")
|
||||
|
||||
def setCoverImage(self,storyurl,imgurl):
|
||||
if self.getConfig('include_images'):
|
||||
self.story.addImgUrl(storyurl,imgurl,self._fetchUrlRaw,cover=True)
|
||||
|
||||
# This gives us a unicode object, not just a string containing bytes.
|
||||
# (I gave soup a unicode string, you'd think it could give it back...)
|
||||
# Now also does a bunch of other common processing for us.
|
||||
|
|
@ -313,7 +312,7 @@ class BaseSiteAdapter(Configurable):
|
|||
# some pre-existing epubs have img tags that had src stripped off.
|
||||
if img.has_key('src'):
|
||||
img['longdesc']=img['src']
|
||||
img['src']=self.story.addImgUrl(self,url,img['src'],fetch,
|
||||
img['src']=self.story.addImgUrl(url,img['src'],fetch,
|
||||
coverexclusion=self.getConfig('cover_exclusion_regexp'))
|
||||
|
||||
for attr in soup._getAttrMap().keys():
|
||||
|
|
|
|||
|
|
@ -21,10 +21,6 @@ import ConfigParser
|
|||
# inherit from Configurable. The config file(s) uses ini format:
|
||||
# [sections] with key:value settings.
|
||||
#
|
||||
# writer does [defaults], [www.whofic.com], [epub], [www.whofic.com:epub], [overrides]
|
||||
#
|
||||
# Until a write is created, the adapter only has [defaults], [www.whofic.com], [overrides]
|
||||
#
|
||||
# [defaults]
|
||||
# titlepage_entries: category,genre, status
|
||||
# [www.whofic.com]
|
||||
|
|
@ -36,28 +32,67 @@ import ConfigParser
|
|||
# [overrides]
|
||||
# titlepage_entries: category
|
||||
|
||||
class Configuration(ConfigParser.SafeConfigParser):
|
||||
|
||||
class Configurable(object):
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.sectionslist = ['defaults']
|
||||
|
||||
def setSectionOrder(self,site,fileform=None):
|
||||
def __init__(self, site, fileform):
|
||||
ConfigParser.SafeConfigParser.__init__(self)
|
||||
self.sectionslist = ['defaults']
|
||||
self.addConfigSection(site)
|
||||
if fileform:
|
||||
self.addConfigSection(fileform)
|
||||
self.addConfigSection(site+":"+fileform)
|
||||
self.addConfigSection("overrides")
|
||||
|
||||
|
||||
self.validEntries = [
|
||||
'category',
|
||||
'genre',
|
||||
'language',
|
||||
'characters',
|
||||
'ships',
|
||||
'series',
|
||||
'status',
|
||||
'datePublished',
|
||||
'dateUpdated',
|
||||
'dateCreated',
|
||||
'rating',
|
||||
'warnings',
|
||||
'numChapters',
|
||||
'numWords',
|
||||
'site',
|
||||
'storyId',
|
||||
'authorId',
|
||||
'extratags',
|
||||
'title',
|
||||
'storyUrl',
|
||||
'description',
|
||||
'author',
|
||||
'authorUrl',
|
||||
'formatname',
|
||||
'formatext',
|
||||
'siteabbrev',
|
||||
'version',
|
||||
# internal stuff.
|
||||
'langcode',
|
||||
'output_css',
|
||||
'authorHTML'
|
||||
]
|
||||
|
||||
def addConfigSection(self,section):
|
||||
self.sectionslist.insert(0,section)
|
||||
|
||||
def isValidMetaEntry(self, key):
|
||||
return key in self.getValidMetaList()
|
||||
|
||||
def getValidMetaList(self):
|
||||
vl = []
|
||||
vl.extend(self.validEntries)
|
||||
vl.extend(self.getConfigList("extra_valid_entries"))
|
||||
return vl
|
||||
|
||||
def hasConfig(self, key):
|
||||
for section in self.sectionslist:
|
||||
try:
|
||||
self.config.get(section,key)
|
||||
self.get(section,key)
|
||||
#print("found %s in section [%s]"%(key,section))
|
||||
return True
|
||||
except:
|
||||
|
|
@ -65,11 +100,11 @@ class Configurable(object):
|
|||
|
||||
return False
|
||||
|
||||
def getConfig(self, key):
|
||||
val = ""
|
||||
def getConfig(self, key, default=""):
|
||||
val = default
|
||||
for section in self.sectionslist:
|
||||
try:
|
||||
val = self.config.get(section,key)
|
||||
val = self.get(section,key)
|
||||
if val and val.lower() == "false":
|
||||
val = False
|
||||
#print "getConfig(%s)=[%s]%s" % (key,section,val)
|
||||
|
|
@ -84,5 +119,26 @@ class Configurable(object):
|
|||
vlist = self.getConfig(key).split(',')
|
||||
vlist = [ v.strip() for v in vlist ]
|
||||
#print "vlist("+key+"):"+str(vlist)
|
||||
return vlist
|
||||
return vlist
|
||||
|
||||
# extended by adapter, writer and story for ease of calling configuration.
|
||||
class Configurable(object):
|
||||
|
||||
def __init__(self, configuration):
|
||||
self.configuration = configuration
|
||||
|
||||
def isValidMetaEntry(self, key):
|
||||
return self.configuration.isValidMetaEntry(key)
|
||||
|
||||
def getValidMetaList(self):
|
||||
return self.configuration.getValidMetaList()
|
||||
|
||||
def hasConfig(self, key):
|
||||
return self.configuration.hasConfig(key)
|
||||
|
||||
def getConfig(self, key, default=""):
|
||||
return self.configuration.getConfig(key)
|
||||
|
||||
def getConfigList(self, key):
|
||||
return self.configuration.getConfigList(key)
|
||||
|
||||
|
|
|
|||
|
|
@ -18,19 +18,19 @@
|
|||
import re
|
||||
import urlparse
|
||||
import urllib2 as u2
|
||||
import ConfigParser
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from gziphttp import GZipProcessor
|
||||
|
||||
import adapters
|
||||
from configurable import Configuration
|
||||
|
||||
def get_urls_from_page(url,config=None):
|
||||
def get_urls_from_page(url,configuration=None):
|
||||
|
||||
normalized = set() # normalized url
|
||||
retlist = [] # orig urls.
|
||||
if not config:
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
if not configuration:
|
||||
configuration = Configuration("test1.com","EPUB")
|
||||
|
||||
data = None
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ def get_urls_from_page(url,config=None):
|
|||
# them, AO3 doesn't even show them if not logged in. Only works
|
||||
# with saved user/pass--not going to prompt for list.
|
||||
if 'archiveofourown.org' in url:
|
||||
ao3adapter = adapters.getAdapter(config,"http://www.archiveofourown.org/works/0","EPUB")
|
||||
ao3adapter = adapters.getAdapter(configuration,"http://www.archiveofourown.org/works/0","EPUB")
|
||||
if ao3adapter.getConfig("username"):
|
||||
if ao3adapter.getConfig("is_adult"):
|
||||
addurl = "?view_adult=true"
|
||||
|
|
@ -72,7 +72,7 @@ def get_urls_from_page(url,config=None):
|
|||
|
||||
try:
|
||||
href = href.replace('&index=1','')
|
||||
adapter = adapters.getAdapter(config,href,"EPUB")
|
||||
adapter = adapters.getAdapter(configuration,href,"EPUB")
|
||||
if adapter.story.getMetadata('storyUrl') not in normalized:
|
||||
normalized.add(adapter.story.getMetadata('storyUrl'))
|
||||
retlist.append(href)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from math import floor
|
|||
|
||||
import exceptions
|
||||
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
|
||||
from configurable import Configurable
|
||||
|
||||
# Create convert_image method depending on which graphics lib we can
|
||||
# load. Preferred: calibre, PIL, none
|
||||
|
|
@ -186,9 +187,10 @@ langs = {
|
|||
"Devanagari":"hi",
|
||||
}
|
||||
|
||||
class Story:
|
||||
class Story(Configurable):
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, configuration):
|
||||
Configurable.__init__(self, configuration)
|
||||
try:
|
||||
self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
|
||||
except:
|
||||
|
|
@ -197,35 +199,50 @@ class Story:
|
|||
self.chapters = [] # chapters will be tuples of (title,html)
|
||||
self.imgurls = []
|
||||
self.imgtuples = []
|
||||
self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.
|
||||
|
||||
self.cover=None # *href* of new cover image--need to create html.
|
||||
self.oldcover=None # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
||||
self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update.
|
||||
self.logfile=None # cheesy way to carry log file forward across update.
|
||||
|
||||
def setMetadata(self, key, value):
|
||||
self.setReplace(self.getConfig('replace_metadata'))
|
||||
|
||||
def setMetadata(self, key, value, condremoveentities=True):
|
||||
## still keeps < < and &
|
||||
self.metadata[key]=conditionalRemoveEntities(value)
|
||||
if condremoveentities:
|
||||
self.metadata[key]=conditionalRemoveEntities(value)
|
||||
else:
|
||||
self.metadata[key]=value
|
||||
if key == "language":
|
||||
try:
|
||||
self.metadata['langcode'] = langs[self.metadata[key]]
|
||||
except:
|
||||
self.metadata['langcode'] = 'en'
|
||||
if key == 'dateUpdated':
|
||||
# Last Update tags for Bill.
|
||||
self.addToList('lastupdate',value.strftime("Last Update Year/Month: %Y/%m"))
|
||||
self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d"))
|
||||
|
||||
|
||||
def getMetadataRaw(self,key):
|
||||
if self.metadata.has_key(key):
|
||||
if self.isValidMetaEntry(key) and self.metadata.has_key(key):
|
||||
return self.metadata[key]
|
||||
|
||||
def doReplacments(self,value):
|
||||
for (p,v) in self.replacements:
|
||||
if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value):
|
||||
if (isinstance(value,basestring)) and re.match(p,value):
|
||||
value = re.sub(p,v,value)
|
||||
return value
|
||||
|
||||
def getMetadata(self, key, removeallentities=False, doreplacements=True):
|
||||
def getMetadata(self, key,
|
||||
removeallentities=False,
|
||||
doreplacements=True):
|
||||
value = None
|
||||
if not self.isValidMetaEntry(key):
|
||||
return value
|
||||
|
||||
if self.isList(key):
|
||||
value = ', '.join(self.getList(key, removeallentities))
|
||||
value = u', '.join(self.getList(key, removeallentities, doreplacements=True))
|
||||
elif self.metadata.has_key(key):
|
||||
value = self.metadata[key]
|
||||
if value:
|
||||
|
|
@ -233,10 +250,8 @@ class Story:
|
|||
value = commaGroups(value)
|
||||
if key == "numChapters":
|
||||
value = commaGroups("%d"%value)
|
||||
if key == "dateCreated":
|
||||
value = value.strftime("%Y-%m-%d %H:%M:%S")
|
||||
if key == "datePublished" or key == "dateUpdated":
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
if key in ("dateCreated","datePublished","dateUpdated"):
|
||||
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
|
||||
|
||||
if doreplacements:
|
||||
value=self.doReplacments(value)
|
||||
|
|
@ -244,19 +259,22 @@ class Story:
|
|||
return removeAllEntities(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
def getAllMetadata(self, removeallentities=False, doreplacements=True):
|
||||
|
||||
def getAllMetadata(self,
|
||||
removeallentities=False,
|
||||
doreplacements=True,
|
||||
keeplists=False):
|
||||
'''
|
||||
All single value *and* list value metadata as strings.
|
||||
All single value *and* list value metadata as strings (unless keeplists=True, then keep lists).
|
||||
'''
|
||||
allmetadata = {}
|
||||
|
||||
# special handling for authors/authorUrls
|
||||
authlinkhtml="<a class='authorlink' href='%s'>%s</a>"
|
||||
if 'author' in self.listables.keys(): # more than one author, assume multiple authorUrl too.
|
||||
if self.isList('author'): # more than one author, assume multiple authorUrl too.
|
||||
htmllist=[]
|
||||
for i, v in enumerate(self.listables['author']):
|
||||
aurl = self.listables['authorUrl'][i]
|
||||
for i, v in enumerate(self.getList('author')):
|
||||
aurl = self.getList('authorUrl')[i]
|
||||
auth = v
|
||||
# make sure doreplacements & removeallentities are honored.
|
||||
if doreplacements:
|
||||
|
|
@ -271,11 +289,12 @@ class Story:
|
|||
else:
|
||||
self.setMetadata('authorHTML',authlinkhtml%(self.getMetadata('authorUrl', removeallentities, doreplacements),
|
||||
self.getMetadata('author', removeallentities, doreplacements)))
|
||||
|
||||
for k in self.metadata.keys():
|
||||
allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
|
||||
for l in self.listables.keys():
|
||||
allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements)
|
||||
|
||||
for k in self.getValidMetaList():
|
||||
if self.isList(k) and keeplists:
|
||||
allmetadata[k] = self.getList(k, removeallentities, doreplacements)
|
||||
else:
|
||||
allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
|
||||
|
||||
return allmetadata
|
||||
|
||||
|
|
@ -288,52 +307,80 @@ class Story:
|
|||
if value==None:
|
||||
return
|
||||
value = conditionalRemoveEntities(value)
|
||||
if not self.isList(listname):
|
||||
self.listables[listname]=[]
|
||||
if not self.isList(listname) or not listname in self.metadata:
|
||||
# Calling addToList to a non-list meta will overwrite it.
|
||||
self.metadata[listname]=[]
|
||||
# prevent duplicates.
|
||||
if not value in self.listables[listname]:
|
||||
self.listables[listname].append(value)
|
||||
if not value in self.metadata[listname]:
|
||||
self.metadata[listname].append(value)
|
||||
|
||||
def getList(self,listname, removeallentities=False, doreplacements=True):
|
||||
def isList(self,listname):
|
||||
'Everything set with an include_in_* is considered a list.'
|
||||
return self.hasConfig("include_in_"+listname) or \
|
||||
( self.isValidMetaEntry(listname) and self.metadata.has_key(listname) \
|
||||
and isinstance(self.metadata[listname],list) )
|
||||
|
||||
def getList(self,listname,
|
||||
removeallentities=False,
|
||||
doreplacements=True,
|
||||
doincludein=True):
|
||||
retlist = []
|
||||
if not self.isList(listname):
|
||||
retlist = [self.getMetadata(listname,removeallentities=removeallentities)]
|
||||
if not self.isValidMetaEntry(listname):
|
||||
return retlist
|
||||
|
||||
# doincludein prevents recursion of include_in_'s
|
||||
if doincludein and self.hasConfig("include_in_"+listname):
|
||||
for k in self.getConfigList("include_in_"+listname):
|
||||
retlist.extend(self.getList(k,removeallentities,doreplacements,doincludein=False))
|
||||
else:
|
||||
retlist = self.listables[listname]
|
||||
|
||||
if not self.isList(listname):
|
||||
retlist = [self.getMetadata(listname,removeallentities, doreplacements)]
|
||||
else:
|
||||
retlist = self.getMetadataRaw(listname)
|
||||
|
||||
if doreplacements:
|
||||
retlist = filter( lambda x : x!=None and x!='' ,
|
||||
map(self.doReplacments,retlist) )
|
||||
if removeallentities:
|
||||
retlist = filter( lambda x : x!=None and x!='' ,
|
||||
map(removeAllEntities,retlist) )
|
||||
if doreplacements:
|
||||
retlist = filter( lambda x : x!=None and x!='' ,
|
||||
map(self.doReplacments,retlist) )
|
||||
if removeallentities:
|
||||
retlist = filter( lambda x : x!=None and x!='' ,
|
||||
map(removeAllEntities,retlist) )
|
||||
|
||||
return retlist
|
||||
|
||||
def getLists(self, removeallentities=False):
|
||||
lsts = {}
|
||||
for ln in self.listables.keys():
|
||||
lsts[ln] = self.getList(ln, removeallentities)
|
||||
return lsts
|
||||
def getSubjectTags(self, removeallentities=False):
|
||||
# set to avoid duplicates subject tags.
|
||||
subjectset = set()
|
||||
|
||||
tags_list = self.getConfigList("include_subject_tags") + self.getConfigList("extra_subject_tags")
|
||||
|
||||
# metadata all go into dc:subject tags, but only if they are configured.
|
||||
for (name,value) in self.getAllMetadata(removeallentities=removeallentities,keeplists=True).iteritems():
|
||||
if name in tags_list:
|
||||
if isinstance(value,list):
|
||||
for tag in value:
|
||||
subjectset.add(tag)
|
||||
else:
|
||||
subjectset.add(value)
|
||||
|
||||
def isList(self,listname):
|
||||
return self.listables.has_key(listname)
|
||||
|
||||
def addChapter(self, title, html, configurable=None):
|
||||
if configurable and \
|
||||
configurable.getConfig('strip_chapter_numbers') and \
|
||||
configurable.getConfig('chapter_title_strip_pattern'):
|
||||
title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title)
|
||||
if None in subjectset:
|
||||
subjectset.remove(None)
|
||||
|
||||
return list(subjectset)
|
||||
|
||||
def addChapter(self, title, html):
|
||||
if self.getConfig('strip_chapter_numbers') and \
|
||||
self.getConfig('chapter_title_strip_pattern'):
|
||||
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
|
||||
self.chapters.append( (title,html) )
|
||||
|
||||
def getChapters(self, configurable=None):
|
||||
def getChapters(self):
|
||||
"Chapters will be tuples of (title,html)"
|
||||
retval = []
|
||||
if configurable and \
|
||||
configurable.getConfig('add_chapter_numbers') and \
|
||||
configurable.getConfig('chapter_title_add_pattern'):
|
||||
if self.getConfig('add_chapter_numbers') and \
|
||||
self.getConfig('chapter_title_add_pattern'):
|
||||
for index, (title,html) in enumerate(self.chapters):
|
||||
retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
|
||||
retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
|
||||
else:
|
||||
retval = self.chapters
|
||||
|
||||
|
|
@ -355,11 +402,11 @@ class Story:
|
|||
|
||||
# pass fetch in from adapter in case we need the cookies collected
|
||||
# as well as it's a base_story class method.
|
||||
def addImgUrl(self,configurable,parenturl,url,fetch,cover=False,coverexclusion=None):
|
||||
def addImgUrl(self,parenturl,url,fetch,cover=False,coverexclusion=None):
|
||||
|
||||
# otherwise it saves the image in the epub even though it
|
||||
# isn't used anywhere.
|
||||
if cover and configurable.getConfig('never_make_cover'):
|
||||
if cover and self.getConfig('never_make_cover'):
|
||||
return
|
||||
|
||||
url = url.strip() # ran across an image with a space in the
|
||||
|
|
@ -397,20 +444,20 @@ class Story:
|
|||
if imgurl not in self.imgurls:
|
||||
parsedUrl = urlparse.urlparse(imgurl)
|
||||
try:
|
||||
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
except Exception, e:
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(configurable.getConfigList('image_max_size'),e))
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
|
||||
try:
|
||||
(data,ext,mime) = convert_image(imgurl,
|
||||
fetch(imgurl),
|
||||
sizes,
|
||||
configurable.getConfig('grayscale_images'))
|
||||
self.getConfig('grayscale_images'))
|
||||
except Exception, e:
|
||||
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
|
||||
return "failedtoload"
|
||||
|
||||
# explicit cover, make the first image.
|
||||
if cover and not configurable.getConfig('never_make_cover'):
|
||||
if cover and not self.getConfig('never_make_cover'):
|
||||
if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
|
||||
# remove existing cover, if there is one.
|
||||
del self.imgurls[0]
|
||||
|
|
@ -427,8 +474,8 @@ class Story:
|
|||
# NOT never_make_cover AND
|
||||
# either no coverexclusion OR coverexclusion doesn't match
|
||||
if self.cover == None and \
|
||||
configurable.getConfig('make_firstimage_cover') and \
|
||||
not configurable.getConfig('never_make_cover') and \
|
||||
self.getConfig('make_firstimage_cover') and \
|
||||
not self.getConfig('never_make_cover') and \
|
||||
(not coverexclusion or not re.search(coverexclusion,imgurl)):
|
||||
newsrc = "images/cover.%s"%ext
|
||||
self.cover=newsrc
|
||||
|
|
@ -457,7 +504,7 @@ class Story:
|
|||
return retlist
|
||||
|
||||
def __str__(self):
|
||||
return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters)
|
||||
return "Metadata: " +str(self.metadata)
|
||||
|
||||
def setReplace(self,replace):
|
||||
for line in replace.splitlines():
|
||||
|
|
|
|||
|
|
@ -36,44 +36,12 @@ class BaseStoryWriter(Configurable):
|
|||
def getFormatExt():
|
||||
return '.bse'
|
||||
|
||||
def __init__(self, config, adapter):
|
||||
Configurable.__init__(self, config)
|
||||
self.setSectionOrder(adapter.getConfigSection(),self.getFormatName())
|
||||
def __init__(self, configuration, adapter):
|
||||
Configurable.__init__(self, configuration)
|
||||
|
||||
self.adapter = adapter
|
||||
self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially.
|
||||
|
||||
self.story.setReplace(self.getConfig('replace_metadata'))
|
||||
|
||||
self.validEntries = [
|
||||
'category',
|
||||
'genre',
|
||||
'language',
|
||||
'characters',
|
||||
'ships',
|
||||
'series',
|
||||
'status',
|
||||
'datePublished',
|
||||
'dateUpdated',
|
||||
'dateCreated',
|
||||
'rating',
|
||||
'warnings',
|
||||
'numChapters',
|
||||
'numWords',
|
||||
'site',
|
||||
'storyId',
|
||||
'authorId',
|
||||
'extratags',
|
||||
'title',
|
||||
'storyUrl',
|
||||
'description',
|
||||
'author',
|
||||
'authorUrl',
|
||||
'formatname',
|
||||
'formatext',
|
||||
'siteabbrev',
|
||||
'version']
|
||||
|
||||
# fall back labels.
|
||||
self.titleLabels = {
|
||||
'category':'Category',
|
||||
|
|
@ -148,11 +116,11 @@ class BaseStoryWriter(Configurable):
|
|||
if WIDE_ENTRY==None:
|
||||
WIDE_ENTRY=ENTRY
|
||||
|
||||
titleEntriesList = self.getConfigList("titlepage_entries")
|
||||
titleEntriesList = self.getConfigList("titlepage_entries") + self.getConfigList("extra_titlepage_entries")
|
||||
wideTitleEntriesList = self.getConfigList("wide_titlepage_entries")
|
||||
|
||||
for entry in titleEntriesList:
|
||||
if entry in self.validEntries:
|
||||
if self.isValidMetaEntry(entry):
|
||||
if self.story.getMetadata(entry):
|
||||
if entry in wideTitleEntriesList:
|
||||
TEMPLATE=WIDE_ENTRY
|
||||
|
|
@ -161,9 +129,12 @@ class BaseStoryWriter(Configurable):
|
|||
|
||||
if self.hasConfig(entry+"_label"):
|
||||
label=self.getConfig(entry+"_label")
|
||||
else:
|
||||
print("Using fallback label for %s_label"%entry)
|
||||
elif entry in self.titleLabels:
|
||||
logging.debug("Using fallback label for %s_label"%entry)
|
||||
label=self.titleLabels[entry]
|
||||
else:
|
||||
label="%s"%entry.title()
|
||||
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
|
||||
# If the label for the title entry is empty, use the
|
||||
# 'no title' option if there is one.
|
||||
|
|
@ -184,10 +155,10 @@ class BaseStoryWriter(Configurable):
|
|||
names as Story.metadata, but ENTRY should use index and chapter.
|
||||
"""
|
||||
# Only do TOC if there's more than one chapter and it's configured.
|
||||
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
self._write(out,START.substitute(self.story.getAllMetadata()))
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters(self)):
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
|
||||
|
||||
|
|
@ -202,9 +173,11 @@ class BaseStoryWriter(Configurable):
|
|||
|
||||
# minor cheat, tucking css into metadata.
|
||||
if self.getConfig("output_css"):
|
||||
self.story.metadata["output_css"] = self.getConfig("output_css")
|
||||
self.story.setMetadata("output_css",
|
||||
self.getConfig("output_css"),
|
||||
condremoveentities=False)
|
||||
else:
|
||||
self.story.metadata["output_css"] = ''
|
||||
self.story.setMetadata("output_css",'')
|
||||
|
||||
if not outstream:
|
||||
close=True
|
||||
|
|
@ -261,29 +234,6 @@ class BaseStoryWriter(Configurable):
|
|||
if close:
|
||||
outstream.close()
|
||||
|
||||
def getTags(self, removeallentities=False):
|
||||
# set to avoid duplicates subject tags.
|
||||
subjectset = set()
|
||||
|
||||
if self.story.getMetadataRaw('dateUpdated'):
|
||||
# Last Update tags for Bill.
|
||||
self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update Year/Month: %Y/%m"))
|
||||
self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update: %Y/%m/%d"))
|
||||
|
||||
for entry in self.validEntries:
|
||||
if entry in self.getConfigList("include_subject_tags") and \
|
||||
entry not in self.story.getLists() and \
|
||||
self.story.getMetadata(entry):
|
||||
subjectset.add(self.getMetadata(entry, removeallentities))
|
||||
|
||||
# listables all go into dc:subject tags, but only if they are configured.
|
||||
for (name,lst) in self.story.getLists(removeallentities).iteritems():
|
||||
if name in self.getConfigList("include_subject_tags"):
|
||||
for tag in lst:
|
||||
subjectset.add(tag)
|
||||
|
||||
return list(subjectset)
|
||||
|
||||
def writeStoryImpl(self, out):
|
||||
"Must be overriden by sub classes."
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -193,31 +193,33 @@ ${value}<br />
|
|||
Switch rindex to index to search from top instead of bottom.
|
||||
"""
|
||||
values = {}
|
||||
for entry in self.getConfigList("logpage_entries"):
|
||||
if entry in self.validEntries:
|
||||
try:
|
||||
# <span id="dateUpdated">1975-04-15</span>
|
||||
span = '<span id="%s">'%entry
|
||||
idx = logfile.rindex(span)+len(span)
|
||||
values[entry] = logfile[idx:logfile.index('</span>',idx)]
|
||||
except Exception, e:
|
||||
#print("e:%s"%e)
|
||||
pass
|
||||
for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
|
||||
try:
|
||||
# <span id="dateUpdated">1975-04-15</span>
|
||||
span = '<span id="%s">'%entry
|
||||
idx = logfile.rindex(span)+len(span)
|
||||
values[entry] = logfile[idx:logfile.index('</span>',idx)]
|
||||
except Exception, e:
|
||||
#print("e:%s"%e)
|
||||
pass
|
||||
|
||||
return values
|
||||
|
||||
def _makeLogEntry(self, oldvalues={}):
|
||||
retval = "<p class='log_entry'>"
|
||||
|
||||
for entry in self.getConfigList("logpage_entries"):
|
||||
if entry in self.validEntries:
|
||||
for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
|
||||
if self.isValidMetaEntry(entry):
|
||||
val = self.story.getMetadata(entry)
|
||||
if val and ( entry not in oldvalues or val != oldvalues[entry] ):
|
||||
if self.hasConfig(entry+"_label"):
|
||||
label=self.getConfig(entry+"_label")
|
||||
else:
|
||||
print("Using fallback label for %s_label"%entry)
|
||||
elif entry in self.titleLabels:
|
||||
logging.debug("Using fallback label for %s_label"%entry)
|
||||
label=self.titleLabels[entry]
|
||||
else:
|
||||
label="%s"%entry.title()
|
||||
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
|
||||
retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry,
|
||||
'label':label,
|
||||
|
|
@ -342,7 +344,7 @@ ${value}<br />
|
|||
metadata.appendChild(newTag(contentdom,"dc:description",text=
|
||||
self.getMetadata('description')))
|
||||
|
||||
for subject in self.getTags():
|
||||
for subject in self.story.getSubjectTags():
|
||||
metadata.appendChild(newTag(contentdom,"dc:subject",text=subject))
|
||||
|
||||
|
||||
|
|
@ -441,7 +443,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
if self.getConfig("include_titlepage"):
|
||||
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
|
||||
itemrefs.append("title_page")
|
||||
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
|
||||
itemrefs.append("toc_page")
|
||||
|
||||
|
|
@ -449,7 +451,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
|
||||
itemrefs.append("log_page")
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters(self)):
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
i=index+1
|
||||
items.append(("file%04d"%i,
|
||||
|
|
@ -587,7 +589,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
|
||||
logpageIO.close()
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters(self)):
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ ${output_css}
|
|||
self.HTML_TOC_ENTRY,
|
||||
self.HTML_TOC_PAGE_END)
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters(self)):
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ ${value}<br />
|
|||
# files.append(tocpageIO.getvalue())
|
||||
# tocpageIO.close()
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters(self)):
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ End file.
|
|||
|
||||
self._write(out,self.lineends(self.wraplines(towrap)))
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters(self)):
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
|
||||
|
|
|
|||
18
index.html
18
index.html
|
|
@ -58,7 +58,23 @@
|
|||
<p>
|
||||
New sites bloodties-fans.com and thehookupzone.net, thanks Marillapm!
|
||||
</p>
|
||||
<h3>New Features and Fixes</h3>
|
||||
<p>
|
||||
Added extra metadata feature for certain sites. This one
|
||||
contains some metadata features that have been requested,
|
||||
plus quite a bit of internal refactoring to allow them.
|
||||
This is primarily intended for calibre plugin users, so
|
||||
it's <a href="">documented in the plugin forum</a>.
|
||||
|
||||
<br /> Also added cover images for dramione.org, thanks
|
||||
ilovejedd.
|
||||
</p>
|
||||
<p>
|
||||
Fixes for: squidge.org/peja Rating including '[' when
|
||||
there's no stars; loosen url check on AO3 to allow more
|
||||
copy/paste URLs; fix author on castlefans.org due to
|
||||
site/skin changes.
|
||||
</p>
|
||||
<p>
|
||||
Questions? Check out our
|
||||
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs">FAQs</a>.
|
||||
|
|
@ -67,7 +83,7 @@
|
|||
If you have any problems with this application, please
|
||||
report them in
|
||||
the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The
|
||||
<a href="http://4-4-24.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
|
||||
<a href="http://4-4-25.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
|
||||
</p>
|
||||
<div id='error'>
|
||||
{{ error_message }}
|
||||
|
|
|
|||
27
main.py
27
main.py
|
|
@ -29,7 +29,6 @@ import datetime
|
|||
|
||||
import traceback
|
||||
from StringIO import StringIO
|
||||
import ConfigParser
|
||||
|
||||
## Just to shut up the appengine warning about "You are using the
|
||||
## default Django version (0.96). The default Django version will
|
||||
|
|
@ -57,22 +56,25 @@ from google.appengine.runtime import DeadlineExceededError
|
|||
from ffstorage import *
|
||||
|
||||
from fanficdownloader import adapters, writers, exceptions
|
||||
from fanficdownloader.configurable import Configuration
|
||||
|
||||
class UserConfigServer(webapp2.RequestHandler):
|
||||
def getUserConfig(self,user):
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
|
||||
def getUserConfig(self,user,url,fileformat):
|
||||
|
||||
configuration = Configuration(adapters.getConfigSectionFor(url),fileformat)
|
||||
|
||||
logging.debug('reading defaults.ini config file')
|
||||
config.read('defaults.ini')
|
||||
configuration.read('defaults.ini')
|
||||
|
||||
## Pull user's config record.
|
||||
l = UserConfig.all().filter('user =', user).fetch(1)
|
||||
if l and l[0].config:
|
||||
uconfig=l[0]
|
||||
#logging.debug('reading config from UserConfig(%s)'%uconfig.config)
|
||||
config.readfp(StringIO(uconfig.config))
|
||||
configuration.readfp(StringIO(uconfig.config))
|
||||
|
||||
return config
|
||||
return configuration
|
||||
|
||||
class MainHandler(webapp2.RequestHandler):
|
||||
def get(self):
|
||||
|
|
@ -137,7 +139,8 @@ class EditConfigServer(UserConfigServer):
|
|||
uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case.
|
||||
uconfig.put()
|
||||
try:
|
||||
config = self.getUserConfig(user)
|
||||
# just getting config for testing purposes.
|
||||
configuration = self.getUserConfig(user,"test1.com","epub")
|
||||
self.redirect("/?error=configsaved")
|
||||
except Exception, e:
|
||||
logging.info("Saved Config Failed:%s"%e)
|
||||
|
|
@ -367,12 +370,12 @@ class FanfictionDownloader(UserConfigServer):
|
|||
adapter = None
|
||||
try:
|
||||
try:
|
||||
config = self.getUserConfig(user)
|
||||
configuration = self.getUserConfig(user,url,format)
|
||||
except Exception, e:
|
||||
self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e)))
|
||||
return
|
||||
|
||||
adapter = adapters.getAdapter(config,url,format)
|
||||
adapter = adapters.getAdapter(configuration,url)
|
||||
logging.info('Created an adaper: %s' % adapter)
|
||||
|
||||
if len(login) > 1:
|
||||
|
|
@ -474,8 +477,8 @@ class FanfictionDownloaderTask(UserConfigServer):
|
|||
logging.info('Creating adapter...')
|
||||
|
||||
try:
|
||||
config = self.getUserConfig(user)
|
||||
adapter = adapters.getAdapter(config,url,format)
|
||||
configuration = self.getUserConfig(user,url,format)
|
||||
adapter = adapters.getAdapter(configuration,url)
|
||||
|
||||
logging.info('Created an adapter: %s' % adapter)
|
||||
|
||||
|
|
@ -488,7 +491,7 @@ class FanfictionDownloaderTask(UserConfigServer):
|
|||
# adapter.getStoryMetadataOnly() only fetches enough to
|
||||
# get metadata. writer.writeStory() will call
|
||||
# adapter.getStory(), too.
|
||||
writer = writers.getWriter(format,config,adapter)
|
||||
writer = writers.getWriter(format,configuration,adapter)
|
||||
download.name = writer.getOutputFileName()
|
||||
#logging.debug('output_filename:'+writer.getConfig('output_filename'))
|
||||
logging.debug('getOutputFileName:'+writer.getOutputFileName())
|
||||
|
|
|
|||
|
|
@ -67,9 +67,14 @@ authorId_label:Author ID
|
|||
## show up in Calibre as tags. Also carried into mobi when converted.
|
||||
extratags_label:Extra Tags
|
||||
## The version of fanficdownloader
|
||||
##
|
||||
version_label:FFDL Version
|
||||
|
||||
## Date formats used by FFDL. Published and Update don't have time.
|
||||
## Note that ini format requires % to be escaped as %%.
|
||||
dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
datePublished_format:%%Y-%%m-%%d
|
||||
dateUpdated_format:%%Y-%%m-%%d
|
||||
|
||||
## items to include in the title page
|
||||
## Empty metadata entries will *not* appear, even if in the list.
|
||||
## You can include extra text or HTML that will be included as-is in
|
||||
|
|
@ -463,6 +468,55 @@ extraships:Draco Malfoy/Hermione Granger
|
|||
## personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
## Some adapters collect additional meta information beyond the
|
||||
## standard ones. They need to be defined in extra_valid_entries to
|
||||
## tell the rest of the FFDL system about them. They can be used in
|
||||
## include_subject_tags, titlepage_entries, extra_titlepage_entries,
|
||||
## logpage_entries, extra_logpage_entries, and include_in_* config
|
||||
## items. You can also add additional entries here to build up
|
||||
## composite metadata entries. dramione.org, for example, adds
|
||||
## 'cliches' and then defines as the composite of hermiones,dracos in
|
||||
## include_in_cliches.
|
||||
extra_valid_entries:themes,hermiones,dracos,timeline,cliches
|
||||
include_in_cliches:hermiones,dracos
|
||||
|
||||
## For another example, you could, by uncommenting this line, include
|
||||
## themes in with genre metadata.
|
||||
#include_in_genre:genre, themes
|
||||
|
||||
## You can give each new valid entry a specific label for use on
|
||||
## titlepage and logpage. If not defined, it will simply be the
|
||||
themes_label:Themes
|
||||
hermiones_label:Hermiones
|
||||
dracos_label:Dracos
|
||||
timeline_label:Timeline
|
||||
cliches_label:Character Cliches
|
||||
|
||||
## extra_titlepage_entries (and extra_logpage_entries) *add* to
|
||||
## titlepage_entries (and logpage_entries) so you can add site
|
||||
## specific entries to titlepage/logpage without having to copy the
|
||||
## entire titlepage_entries line. (But if you want them higher than
|
||||
## the end, you will need to copy titlepage_entries.)
|
||||
#extra_titlepage_entries: themes,timeline,cliches
|
||||
#extra_logpage_entries: themes,timeline,cliches
|
||||
#extra_subject_tags: themes,timeline,cliches
|
||||
|
||||
## (Plugin Only) - You can also populate calibre custom columns with
|
||||
## the site specific metadata using custom_columns_settings (but only
|
||||
## if 'Allow custom_columns_settings from personal.ini' is checked in
|
||||
## the plugin GUI config.) There are three parts, the entry name,
|
||||
## then the label of the calibre custom column, then (optionally) a
|
||||
## 'mode'. 'r' to Replace any existing values, 'a' to Add to existing
|
||||
## value (use with tag-like columns), and 'n' for setting on New books
|
||||
## only.
|
||||
## Make sure to keep at least one space at the start of each
|
||||
## line.
|
||||
|
||||
#custom_columns_settings:
|
||||
# cliches=>#acolumn,r
|
||||
# themes=>#bcolumn,a
|
||||
# timeline=>#ccolumn,n
|
||||
|
||||
[erosnsappho.sycophanthex.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
|
@ -774,9 +828,13 @@ extraships:Sesshoumaru/Kagome
|
|||
|
||||
[www.fanfiction.net]
|
||||
## fanfiction.net's 'cover' images are really just tiny thumbnails.
|
||||
## Comment this out or change it to false to use them anyway.
|
||||
## Change this to false to use them anyway.
|
||||
never_make_cover: true
|
||||
|
||||
## Extra metadata that this adapter knows about. See [dramione.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:reviews,favs,follows
|
||||
|
||||
[www.fanfiktion.de]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
|
|||
|
|
@ -42,7 +42,8 @@
|
|||
|
||||
|
||||
## Most common, I expect will be using this to save username/passwords
|
||||
## for different sites. Here are a few examples.
|
||||
## for different sites. Here are a few examples. See defaults.ini
|
||||
## for the full list.
|
||||
|
||||
[www.twilighted.net]
|
||||
#username:YourPenname
|
||||
|
|
|
|||
Loading…
Reference in a new issue