Extra metadata feature(refactoring), loosen url check on AO3, fix auth on castlefans.org.

This commit is contained in:
Jim Miller 2012-09-21 12:55:26 -05:00
parent e5b0974d0e
commit 4bb91cd0c5
27 changed files with 596 additions and 303 deletions

View file

@ -1,6 +1,6 @@
# ffd-retief-hrd fanfictiondownloader
application: fanfictiondownloader
version: 4-4-25
version: 4-4-26
runtime: python27
api_version: 1
threadsafe: true

View file

@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = 'UI plugin to download FanFiction stories from various sites.'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
version = (1, 6, 9)
version = (1, 6, 10)
minimum_calibre_version = (0, 8, 57)
#: This field defines the GUI plugin class that contains all the code

View file

@ -64,6 +64,7 @@ default_prefs['countpagesstats'] = []
default_prefs['errorcol'] = ''
default_prefs['custom_cols'] = {}
default_prefs['custom_cols_newonly'] = {}
default_prefs['allow_custcol_from_ini'] = True
default_prefs['std_cols_newonly'] = {}
@ -258,7 +259,7 @@ class ConfigWidget(QWidget):
# error column
prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()).toString())
# cust cols
# cust cols tab
colsmap = {}
for (col,combo) in self.cust_columns_tab.custcol_dropdowns.iteritems():
val = unicode(combo.itemData(combo.currentIndex()).toString())
@ -272,6 +273,8 @@ class ConfigWidget(QWidget):
colsnewonly[col] = checkbox.isChecked()
prefs['custom_cols_newonly'] = colsnewonly
prefs['allow_custcol_from_ini'] = self.cust_columns_tab.allow_custcol_from_ini.isChecked()
prefs.save_to_db()
def edit_shortcuts(self):
@ -437,7 +440,7 @@ class PersonalIniTab(QWidget):
self.ini.setText(prefs['personal.ini'])
self.l.addWidget(self.ini)
self.defaults = QPushButton('View Defaults', self)
self.defaults = QPushButton('View Defaults (plugin-defaults.ini)', self)
self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.")
self.defaults.clicked.connect(self.show_defaults)
self.l.addWidget(self.defaults)
@ -456,7 +459,7 @@ class ShowDefaultsIniDialog(QDialog):
self.resize(600, 500)
self.l = QVBoxLayout()
self.setLayout(self.l)
self.label = QLabel("Plugin Defaults (Read-Only)")
self.label = QLabel("Plugin Defaults (plugin-defaults.ini) (Read-Only)")
self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.")
self.setWindowTitle(_('Plugin Defaults'))
self.setWindowIcon(icon)
@ -595,6 +598,8 @@ class GenerateCoverTab(QWidget):
horz.addWidget(dropdown)
self.sl.addLayout(horz)
self.sl.insertStretch(-1)
self.gcnewonly = QCheckBox("Run Generate Cover Only on New Books",self)
self.gcnewonly.setToolTip("Default is to run GC any time the calibre metadata is updated.")
self.gcnewonly.setChecked(prefs['gcnewonly'])
@ -605,8 +610,6 @@ class GenerateCoverTab(QWidget):
self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini'])
self.l.addWidget(self.allow_gc_from_ini)
self.l.insertStretch(-1)
class CountPagesTab(QWidget):
def __init__(self, parent_dialog, plugin_action):
@ -838,11 +841,16 @@ class CustomColumnsTab(QWidget):
self.sl.insertStretch(-1)
self.l.addSpacing(5)
self.allow_custcol_from_ini = QCheckBox('Allow custom_columns_settings from personal.ini to override',self)
self.allow_custcol_from_ini.setToolTip("The personal.ini parameter custom_columns_settings allows you to set custom columns to site specific values that aren't common to all sites.<br \>custom_columns_settings is ignored when this is off.")
self.allow_custcol_from_ini.setChecked(prefs['allow_custcol_from_ini'])
self.l.addWidget(self.allow_custcol_from_ini)
self.l.addSpacing(5)
label = QLabel("Special column:")
label.setWordWrap(True)
self.l.addWidget(label)
self.l.addSpacing(5)
horz = QHBoxLayout()
label = QLabel("Update/Overwrite Error Column:")

View file

@ -8,7 +8,6 @@ __copyright__ = '2012, Jim Miller'
__docformat__ = 'restructuredtext en'
import time, os, copy, threading, re, platform
from ConfigParser import SafeConfigParser
from StringIO import StringIO
from functools import partial
from datetime import datetime
@ -37,7 +36,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
create_menu_action_unique, get_library_uuid)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
#from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page
@ -249,10 +248,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
return
print("get_urls_from_page URL:%s"%d.url.text())
ffdlconfig = SafeConfigParser()
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(prefs['personal.ini']))
url_list = get_urls_from_page("%s"%d.url.text(),ffdlconfig)
if 'archiveofourown.org' in url:
configuration = Configuration(adapters.getConfigSectionFor(url),"EPUB")
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
configuration.readfp(StringIO(options['personal.ini']))
else:
configuration = None
url_list = get_urls_from_page("%s"%d.url.text(),configuration)
if url_list:
d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui)
@ -442,12 +444,10 @@ keep_summary_html:true
make_firstimage_cover:true
''' + options['personal.ini']
## was self.ffdlconfig, but we need to be able to change it
## when doing epub update.
ffdlconfig = SafeConfigParser()
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(ffdlconfig,url,fileform)
configuration = Configuration(adapters.getConfigSectionFor(url),fileform)
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
configuration.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(configuration,url)
## three tries, that's enough if both user/pass & is_adult needed,
## or a couple tries of one or the other
@ -470,13 +470,13 @@ make_firstimage_cover:true
# let other exceptions percolate up.
story = adapter.getStoryMetadataOnly()
writer = writers.getWriter(options['fileform'],adapter.config,adapter)
writer = writers.getWriter(options['fileform'],configuration,adapter)
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
book['title'] = story.getMetadata("title", removeallentities=True)
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags(removeallentities=True) # getTags could be moved up into adapter now. Adapter didn't used to know the fileform
book['tags'] = story.getSubjectTags(removeallentities=True)
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
book['series'] = story.getMetadata("series", removeallentities=True)
@ -913,7 +913,7 @@ make_firstimage_cover:true
except AttributeError:
print("AttributeError? %s"%col)
pass
db.set_metadata(book_id,mi)
# do configured column updates here.
@ -950,6 +950,52 @@ make_firstimage_cover:true
val = book['all_metadata']['status'] == 'In-Progress'
db.set_custom(book_id, val, label=label, commit=False)
adapter = None
if prefs['allow_custcol_from_ini']:
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
configuration.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(configuration,book['url'])
# meta => custcol[,a|n|r]
# cliches=>\#acolumn,r
for line in adapter.getConfig('custom_columns_settings').splitlines():
if "=>" in line:
(meta,custcol) = map( lambda x: x.strip(), line.split("=>") )
flag='r'
if "," in custcol:
(custcol,flag) = map( lambda x: x.strip(), custcol.split(",") )
#print("meta:(%s) => custcol:(%s), flag(%s) "%(meta,custcol,flag))
if meta not in book['all_metadata']:
print("No value for %s, skipping custom column(%s) update."%(meta,custcol))
continue
if custcol not in custom_columns:
print("No custom column(%s), skipping."%(custcol))
continue
else:
coldef = custom_columns[custcol]
label = coldef['label']
if flag == 'r' or book['added']:
db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False)
if flag == 'a':
try:
existing=db.get_custom(book_id,label=label,index_is_id=True)
if isinstance(existing,list):
vallist = existing
else :
vallist = [existing]
vallist.append(book['all_metadata'][meta])
except:
vallist = [book['all_metadata'][meta]]
db.set_custom(book_id, ", ".join(vallist), label=label, commit=False)
db.commit()
if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']):
@ -961,10 +1007,11 @@ make_firstimage_cover:true
gc_plugin = self.gui.iactions['Generate Cover']
setting_name = None
if prefs['allow_gc_from_ini']:
ffdlconfig = SafeConfigParser()
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(prefs['personal.ini']))
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform'])
if not adapter: # might already have it from allow_custcol_from_ini
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
configuration.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(configuration,book['url'])
# template => regexp to match => GC Setting to use.
# generate_cover_settings:
@ -973,7 +1020,7 @@ make_firstimage_cover:true
if "=>" in line:
(template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") )
value = Template(template).safe_substitute(book['all_metadata']).encode('utf8')
print("%s(%s) => %s => %s"%(template,value,regexp,setting))
# print("%s(%s) => %s => %s"%(template,value,regexp,setting))
if re.search(regexp,value):
setting_name = setting
break
@ -1192,11 +1239,11 @@ make_firstimage_cover:true
return None
def _is_good_downloader_url(self,url):
# this is the accepted way to 'check for existance'? really?
# this is the accepted way to 'check for existance of a class variable'? really?
try:
self.dummyconfig
except AttributeError:
self.dummyconfig = SafeConfigParser()
self.dummyconfig = Configuration("test1.com","EPUB")
# pulling up an adapter is pretty low over-head. If
# it fails, it's a bad url.
try:

View file

@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
import time, os, traceback
from ConfigParser import SafeConfigParser
from StringIO import StringIO
from calibre.utils.ipc.server import Server
@ -20,6 +19,7 @@ from calibre.utils.logging import Log
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.configurable import Configuration
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data
# ------------------------------------------------------------------------------
@ -114,19 +114,19 @@ def do_download_for_worker(book,options):
book['comment'] = 'Download started...'
ffdlconfig = SafeConfigParser()
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(options['personal.ini']))
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
configuration.readfp(StringIO(options['personal.ini']))
if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
ffdlconfig.set("overrides","never_make_cover","true")
configuration.set("overrides","never_make_cover","true")
# images only for epub, even if the user mistakenly turned it
# on else where.
if options['fileform'] != "epub":
ffdlconfig.set("overrides","include_images","false")
configuration.set("overrides","include_images","false")
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform'])
adapter = adapters.getAdapter(configuration,book['url'])
adapter.is_adult = book['is_adult']
adapter.username = book['username']
adapter.password = book['password']
@ -137,7 +137,7 @@ def do_download_for_worker(book,options):
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
# else:
# print("no calibre_series")
writer = writers.getWriter(options['fileform'],adapter.config,adapter)
writer = writers.getWriter(options['fileform'],configuration,adapter)
outfile = book['outfile']

View file

@ -18,6 +18,10 @@
## [defaults] section applies to all formats and sites but may be
## overridden at several levels
## Some sites also require the user to confirm they are adult for
## adult content. Uncomment by removing '#' in front of is_adult.
#is_adult:true
## All available titlepage_entries and the label used for them:
## <entryname>_label:<label>
## Labels may be customized.
@ -63,9 +67,15 @@ authorId_label:Author ID
## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags
## The version of fanficdownloader
##
version_label:FFDL Version
## Date formats used by FFDL. Published and Update don't have time.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d
dateUpdated_format:%%Y-%%m-%%d
## items to include in the title page
## Empty metadata entries will *not* appear, even if in the list.
## You can include extra text or HTML that will be included as-is in
@ -478,6 +488,39 @@ extraships:Draco Malfoy/Hermione Granger
## personal.ini, not defaults.ini.
#is_adult:true
## Some adapters collect additional meta information beyond the
## standard ones. They need to be defined in extra_valid_entries to
## tell the rest of the FFDL system about them. They can be used in
## include_subject_tags, titlepage_entries, extra_titlepage_entries,
## logpage_entries, extra_logpage_entries, and include_in_* config
## items. You can also add additional entries here to build up
## composite metadata entries. dramione.org, for example, adds
## 'cliches' and then defines as the composite of hermiones,dracos in
## include_in_cliches.
extra_valid_entries:themes,hermiones,dracos,timeline,cliches
include_in_cliches:hermiones,dracos
## For another example, you could, by uncommenting this line, include
## themes in with genre metadata. Note, however, that you couldn't
## use cliches. include_in_* can only include the original real
## entries, not other include_in_* values.
#include_in_genre:genre, themes
## You can give each new valid entry a specific label for use on
## titlepage and logpage. If not defined, it will simply be the
themes_label:Themes
hermiones_label:Hermiones
dracos_label:Dracos
timeline_label:Timeline
cliches_label:Character Cliches
## extra_titlepage_entries (and extra_logpage_entries) *add* to
## titlepage_entries (and logpage_entries) so you can add site
## specific entries to titlepage/logpage without having to copy the
## entire titlepage_entries line. (But if you want them higher than
## the end, you will need to copy titlepage_entries.)
extra_titlepage_entries: themes,hermiones,dracos,timeline,cliches
[erosnsappho.sycophanthex.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter
@ -789,9 +832,13 @@ extraships:Sesshoumaru/Kagome
[www.fanfiction.net]
## fanfiction.net's 'cover' images are really just tiny thumbnails.
## Comment this out or change it to false to use them anyway.
## Change this to false to use them anyway.
never_make_cover: true
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
@ -818,6 +865,10 @@ output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
## Clear FanFiction from defaults, fictionpress.com is original fiction.
extratags:
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
[www.ficwad.com]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In

View file

@ -27,6 +27,7 @@ import ConfigParser
from subprocess import call
from fanficdownloader import adapters,writers,exceptions
from fanficdownloader.configurable import Configuration
from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data
from fanficdownloader.geturls import get_urls_from_page
@ -90,7 +91,15 @@ def main():
if options.update and options.format != 'epub':
parser.error("-u/--update-epub only works with epub")
config = ConfigParser.SafeConfigParser()
## Attempt to update an existing epub.
if options.update:
(url,chaptercount) = get_dcsource_chaptercount(args[0])
print "Updating %s, URL: %s" % (args[0],url)
output_filename = args[0]
else:
url = args[0]
configuration = Configuration(adapters.getConfigSectionFor(url),options.format)
conflist = []
homepath = join(expanduser("~"),".fanficdownloader")
@ -109,46 +118,43 @@ def main():
conflist.extend(options.configfile)
logging.debug('reading %s config file(s), if present'%conflist)
config.read(conflist)
configuration.read(conflist)
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
try:
config.add_section("overrides")
configuration.add_section("overrides")
except ConfigParser.DuplicateSectionError:
pass
if options.force:
config.set("overrides","always_overwrite","true")
configuration.set("overrides","always_overwrite","true")
if options.update:
configuration.set("overrides","output_filename",args[0])
if options.update and not options.updatecover:
config.set("overrides","never_make_cover","true")
configuration.set("overrides","never_make_cover","true")
# images only for epub, even if the user mistakenly turned it
# on else where.
if options.format != "epub":
config.set("overrides","include_images","false")
configuration.set("overrides","include_images","false")
if options.options:
for opt in options.options:
(var,val) = opt.split('=')
config.set("overrides",var,val)
configuration.set("overrides",var,val)
if options.list:
retlist = get_urls_from_page(args[0], config)
retlist = get_urls_from_page(args[0], configuration)
print "\n".join(retlist)
return
try:
## Attempt to update an existing epub.
if options.update:
(url,chaptercount) = get_dcsource_chaptercount(args[0])
print "Updating %s, URL: %s" % (args[0],url)
output_filename = args[0]
config.set("overrides","output_filename",args[0])
else:
url = args[0]
adapter = adapters.getAdapter(config,url,options.format)
adapter = adapters.getAdapter(configuration,url)
## Check for include_images and absence of PIL, give warning.
if adapter.getConfig('include_images'):
@ -206,7 +212,7 @@ def main():
adapter.calibrebookmark,
adapter.logfile) = get_update_data(args[0])
writeStory(config,adapter,"epub")
writeStory(configuration,adapter,"epub")
else:
# regular download
@ -215,7 +221,7 @@ def main():
adapter.setChaptersRange(options.begin,options.end)
output_filename=writeStory(config,adapter,options.format,options.metaonly)
output_filename=writeStory(configuration,adapter,options.format,options.metaonly)
if not options.metaonly and adapter.getConfig("post_process_cmd"):
metadata = adapter.story.metadata

View file

@ -3,10 +3,20 @@
[defaults]
## Some sites also require the user to confirm they are adult for
## adult content. In commandline version, this should go in your
## personal.ini, not defaults.ini.
## adult content. Uncomment by removing '#' in front of is_adult. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#is_adult:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
#strip_chapter_numbers:true
#add_chapter_numbers:true
[epub]
## include images from img tags in the body and summary of stories.
## Images will be converted to jpg for size if possible. Images work
@ -34,7 +44,8 @@
## Most common, I expect will be using this to save username/passwords
## for different sites. Here are a few examples.
## for different sites. Here are a few examples. See defaults.ini
## for the full list.
[www.twilighted.net]
#username:YourPenname

View file

@ -117,8 +117,8 @@ for x in imports():
if "fanficdownloader.adapters.adapter_" in x:
#print x
__class_list.append(sys.modules[x].getClass())
def getAdapter(config,url,fileform=None):
def getDomainURL(url):
## fix up leading protocol.
fixedurl = re.sub(r"(?i)^[htps]+[:/]+","http://",url.strip())
if not fixedurl.startswith("http"):
@ -135,20 +135,17 @@ def getAdapter(config,url,fileform=None):
if( domain != parsedUrl.netloc ):
fixedurl = fixedurl.replace(parsedUrl.netloc,domain)
return (domain,fixedurl)
def getAdapter(config,url):
logging.debug("trying url:"+url)
cls = getClassFor(domain)
if not cls and domain.startswith("www."):
domain = domain.replace("www.","")
logging.debug("trying site:without www: "+domain)
cls = getClassFor(domain)
fixedurl = fixedurl.replace("http://www.","http://")
if not cls:
logging.debug("trying site:www."+domain)
cls = getClassFor("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")
(domain,fixedurl) = getDomainURL(url)
cls = getClassFromList(domain)
logging.debug("fixedurl:"+fixedurl)
if cls:
adapter = cls(config,fixedurl) # raises InvalidStoryURL
adapter.setSectionOrder(adapter.getConfigSection(),fileform)
return adapter
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
@ -156,7 +153,28 @@ def getAdapter(config,url,fileform=None):
def getConfigSections():
return [cls.getConfigSection() for cls in __class_list]
def getConfigSectionFor(url):
(domain,fixedurl) = getDomainURL(url)
cls = getClassFromList(domain)
if cls:
return cls.getConfigSection()
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
def getClassFor(domain):
cls = getClassFromList(domain)
if not cls and domain.startswith("www."):
domain = domain.replace("www.","")
logging.debug("trying site:without www: "+domain)
cls = getClassFromList(domain)
fixedurl = fixedurl.replace("http://www.","http://")
if not cls:
logging.debug("trying site:www."+domain)
cls = getClassFromList("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")
def getClassFromList(domain):
for cls in __class_list:
if cls.matchesSite(domain):
return cls

View file

@ -83,7 +83,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
# http://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)(/chapters/\d+)?/?$"
return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)"
## Login
def needToLoginCheck(self, data):

View file

@ -174,13 +174,14 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# print data
# Now go hunting for all the meta data and the chapter list.
pagetitle = soup.find('div',{'id':'pagetitle'})
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)

View file

@ -161,20 +161,18 @@ class DramioneOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Use banner as cover if found
if self.getConfig('include_images'):
coverurl = ''
img = soup.find('img',{'class':'banner'})
if img:
coverurl = img['src']
#print "Cover: "+coverurl
a = soup.find(text="This story has a banner; click to view.")
if a:
#print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
coverurl = a.parent['href']
#print "Cover: "+coverurl
if coverurl:
self.story.addImgUrl(self,url,coverurl,self._fetchUrlRaw,cover=True)
coverurl = ''
img = soup.find('img',{'class':'banner'})
if img:
coverurl = img['src']
#print "Cover: "+coverurl
a = soup.find(text="This story has a banner; click to view.")
if a:
#print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
coverurl = a.parent['href']
#print "Cover: "+coverurl
if coverurl:
self.setCoverImage(url,coverurl)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
@ -194,6 +192,22 @@ class DramioneOrgAdapter(BaseSiteAdapter):
for warning in warnings:
self.story.addToList('warnings',warning.string)
themes=soup.findAll('a', {'class' : "tag-3"})
for theme in themes:
self.story.addToList('themes',theme.string)
hermiones=soup.findAll('a', {'class' : "tag-4"})
for hermione in hermiones:
self.story.addToList('hermiones',hermione.string)
dracos=soup.findAll('a', {'class' : "tag-5"})
for draco in dracos:
self.story.addToList('dracos',draco.string)
timelines=soup.findAll('a', {'class' : "tag-6"})
for timeline in timelines:
self.story.addToList('timeline',timeline.string)
# utility method
def defaultGetattr(d,k):
try:

View file

@ -186,8 +186,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
donechars = False
while len(metalist) > 0:
if metalist[0].startswith('Reviews') or metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:') or metalist[0].startswith('Favs:') or metalist[0].startswith('Follows:'):
if metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:'):
pass
elif metalist[0].startswith('Reviews'):
self.story.setMetadata('reviews',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Favs:'):
self.story.setMetadata('favs',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Follows:'):
self.story.setMetadata('follows',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Updated'):
self.story.setMetadata('dateUpdated',makeDate(metalist[0].split(':')[1].strip(), '%m-%d-%y'))
elif metalist[0].startswith('Published'):
@ -208,10 +214,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
else:
self.story.setMetadata('status', 'In-Progress')
if self.getConfig('include_images'):
img = soup.find('img',{'class':'cimage'})
if img:
self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw,cover=True)
img = soup.find('img',{'class':'cimage'})
if img:
self.setCoverImage(url,img['src'])
# Find the chapter selector
select = soup.find('select', { 'name' : 'chapter' } )

View file

@ -138,14 +138,14 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("numWords", str(storyMetadata["words"]))
# fimfic is the first site with an explicit cover image.
if self.getConfig('include_images') and "image" in storyMetadata.keys():
if "image" in storyMetadata.keys():
if "full_image" in storyMetadata:
coverurl = storyMetadata["full_image"]
else:
coverurl = storyMetadata["image"]
if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl
self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True)
self.setCoverImage(self.url,coverurl)
# the fimfic API gives bbcode for desc, not html.

View file

@ -67,10 +67,8 @@ class BaseSiteAdapter(Configurable):
def validateURL(self):
return re.match(self.getSiteURLPattern(), self.url)
def __init__(self, config, url):
self.config = config
Configurable.__init__(self, config)
self.setSectionOrder(self.getConfigSection())
def __init__(self, configuration, url):
Configurable.__init__(self, configuration)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
@ -79,7 +77,7 @@ class BaseSiteAdapter(Configurable):
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
self.storyDone = False
self.metadataDone = False
self.story = Story()
self.story = Story(configuration)
self.story.setMetadata('site',self.getConfigSection())
self.story.setMetadata('dateCreated',datetime.datetime.now())
self.chapterUrls = [] # tuples of (chapter title,chapter url)
@ -138,7 +136,7 @@ class BaseSiteAdapter(Configurable):
logging.debug("try code:"+code)
return data.decode(code)
except:
logging.info("code failed:"+code)
logging.debug("code failed:"+code)
pass
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128])
@ -199,8 +197,7 @@ class BaseSiteAdapter(Configurable):
if (self.chapterFirst!=None and index < self.chapterFirst) or \
(self.chapterLast!=None and index > self.chapterLast):
self.story.addChapter(removeEntities(title),
None,
self)
None)
else:
if self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None,
@ -209,16 +206,14 @@ class BaseSiteAdapter(Configurable):
else:
data = self.getChapterText(url)
self.story.addChapter(removeEntities(title),
removeEntities(data),
self)
removeEntities(data))
self.storyDone = True
# include image, but no cover from story, add default_cover_image cover.
if self.getConfig('include_images') and \
not self.story.cover and \
self.getConfig('default_cover_image'):
self.story.addImgUrl(self,
None,
self.story.addImgUrl(None,
#self.getConfig('default_cover_image'),
self.story.formatFileName(self.getConfig('default_cover_image'),
self.getConfig('allow_unsafe_filename')),
@ -298,6 +293,10 @@ class BaseSiteAdapter(Configurable):
self.story.setMetadata('description',stripHTML(svalue))
#print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n")
def setCoverImage(self,storyurl,imgurl):
if self.getConfig('include_images'):
self.story.addImgUrl(storyurl,imgurl,self._fetchUrlRaw,cover=True)
# This gives us a unicode object, not just a string containing bytes.
# (I gave soup a unicode string, you'd think it could give it back...)
# Now also does a bunch of other common processing for us.
@ -313,7 +312,7 @@ class BaseSiteAdapter(Configurable):
# some pre-existing epubs have img tags that had src stripped off.
if img.has_key('src'):
img['longdesc']=img['src']
img['src']=self.story.addImgUrl(self,url,img['src'],fetch,
img['src']=self.story.addImgUrl(url,img['src'],fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp'))
for attr in soup._getAttrMap().keys():

View file

@ -21,10 +21,6 @@ import ConfigParser
# inherit from Configurable. The config file(s) uses ini format:
# [sections] with key:value settings.
#
# writer does [defaults], [www.whofic.com], [epub], [www.whofic.com:epub], [overrides]
#
# Until a write is created, the adapter only has [defaults], [www.whofic.com], [overrides]
#
# [defaults]
# titlepage_entries: category,genre, status
# [www.whofic.com]
@ -36,28 +32,67 @@ import ConfigParser
# [overrides]
# titlepage_entries: category
class Configuration(ConfigParser.SafeConfigParser):
class Configurable(object):
def __init__(self, config):
self.config = config
self.sectionslist = ['defaults']
def setSectionOrder(self,site,fileform=None):
def __init__(self, site, fileform):
ConfigParser.SafeConfigParser.__init__(self)
self.sectionslist = ['defaults']
self.addConfigSection(site)
if fileform:
self.addConfigSection(fileform)
self.addConfigSection(site+":"+fileform)
self.addConfigSection("overrides")
self.validEntries = [
'category',
'genre',
'language',
'characters',
'ships',
'series',
'status',
'datePublished',
'dateUpdated',
'dateCreated',
'rating',
'warnings',
'numChapters',
'numWords',
'site',
'storyId',
'authorId',
'extratags',
'title',
'storyUrl',
'description',
'author',
'authorUrl',
'formatname',
'formatext',
'siteabbrev',
'version',
# internal stuff.
'langcode',
'output_css',
'authorHTML'
]
def addConfigSection(self,section):
self.sectionslist.insert(0,section)
def isValidMetaEntry(self, key):
return key in self.getValidMetaList()
def getValidMetaList(self):
vl = []
vl.extend(self.validEntries)
vl.extend(self.getConfigList("extra_valid_entries"))
return vl
def hasConfig(self, key):
for section in self.sectionslist:
try:
self.config.get(section,key)
self.get(section,key)
#print("found %s in section [%s]"%(key,section))
return True
except:
@ -65,11 +100,11 @@ class Configurable(object):
return False
def getConfig(self, key):
val = ""
def getConfig(self, key, default=""):
val = default
for section in self.sectionslist:
try:
val = self.config.get(section,key)
val = self.get(section,key)
if val and val.lower() == "false":
val = False
#print "getConfig(%s)=[%s]%s" % (key,section,val)
@ -84,5 +119,26 @@ class Configurable(object):
vlist = self.getConfig(key).split(',')
vlist = [ v.strip() for v in vlist ]
#print "vlist("+key+"):"+str(vlist)
return vlist
return vlist
# extended by adapter, writer and story for ease of calling configuration.
class Configurable(object):
def __init__(self, configuration):
self.configuration = configuration
def isValidMetaEntry(self, key):
return self.configuration.isValidMetaEntry(key)
def getValidMetaList(self):
return self.configuration.getValidMetaList()
def hasConfig(self, key):
return self.configuration.hasConfig(key)
def getConfig(self, key, default=""):
return self.configuration.getConfig(key)
def getConfigList(self, key):
return self.configuration.getConfigList(key)

View file

@ -18,19 +18,19 @@
import re
import urlparse
import urllib2 as u2
import ConfigParser
from BeautifulSoup import BeautifulSoup
from gziphttp import GZipProcessor
import adapters
from configurable import Configuration
def get_urls_from_page(url,config=None):
def get_urls_from_page(url,configuration=None):
normalized = set() # normalized url
retlist = [] # orig urls.
if not config:
config = ConfigParser.SafeConfigParser()
if not configuration:
configuration = Configuration("test1.com","EPUB")
data = None
@ -39,7 +39,7 @@ def get_urls_from_page(url,config=None):
# them, AO3 doesn't even show them if not logged in. Only works
# with saved user/pass--not going to prompt for list.
if 'archiveofourown.org' in url:
ao3adapter = adapters.getAdapter(config,"http://www.archiveofourown.org/works/0","EPUB")
ao3adapter = adapters.getAdapter(configuration,"http://www.archiveofourown.org/works/0","EPUB")
if ao3adapter.getConfig("username"):
if ao3adapter.getConfig("is_adult"):
addurl = "?view_adult=true"
@ -72,7 +72,7 @@ def get_urls_from_page(url,config=None):
try:
href = href.replace('&index=1','')
adapter = adapters.getAdapter(config,href,"EPUB")
adapter = adapters.getAdapter(configuration,href,"EPUB")
if adapter.story.getMetadata('storyUrl') not in normalized:
normalized.add(adapter.story.getMetadata('storyUrl'))
retlist.append(href)

View file

@ -22,6 +22,7 @@ from math import floor
import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
from configurable import Configurable
# Create convert_image method depending on which graphics lib we can
# load. Preferred: calibre, PIL, none
@ -186,9 +187,10 @@ langs = {
"Devanagari":"hi",
}
class Story:
class Story(Configurable):
def __init__(self):
def __init__(self, configuration):
Configurable.__init__(self, configuration)
try:
self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
except:
@ -197,35 +199,50 @@ class Story:
self.chapters = [] # chapters will be tuples of (title,html)
self.imgurls = []
self.imgtuples = []
self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.
self.cover=None # *href* of new cover image--need to create html.
self.oldcover=None # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update.
self.logfile=None # cheesy way to carry log file forward across update.
def setMetadata(self, key, value):
self.setReplace(self.getConfig('replace_metadata'))
def setMetadata(self, key, value, condremoveentities=True):
## still keeps &lt; &lt; and &amp;
self.metadata[key]=conditionalRemoveEntities(value)
if condremoveentities:
self.metadata[key]=conditionalRemoveEntities(value)
else:
self.metadata[key]=value
if key == "language":
try:
self.metadata['langcode'] = langs[self.metadata[key]]
except:
self.metadata['langcode'] = 'en'
if key == 'dateUpdated':
# Last Update tags for Bill.
self.addToList('lastupdate',value.strftime("Last Update Year/Month: %Y/%m"))
self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d"))
def getMetadataRaw(self,key):
if self.metadata.has_key(key):
if self.isValidMetaEntry(key) and self.metadata.has_key(key):
return self.metadata[key]
def doReplacments(self,value):
for (p,v) in self.replacements:
if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value):
if (isinstance(value,basestring)) and re.match(p,value):
value = re.sub(p,v,value)
return value
def getMetadata(self, key, removeallentities=False, doreplacements=True):
def getMetadata(self, key,
removeallentities=False,
doreplacements=True):
value = None
if not self.isValidMetaEntry(key):
return value
if self.isList(key):
value = ', '.join(self.getList(key, removeallentities))
value = u', '.join(self.getList(key, removeallentities, doreplacements=True))
elif self.metadata.has_key(key):
value = self.metadata[key]
if value:
@ -233,10 +250,8 @@ class Story:
value = commaGroups(value)
if key == "numChapters":
value = commaGroups("%d"%value)
if key == "dateCreated":
value = value.strftime("%Y-%m-%d %H:%M:%S")
if key == "datePublished" or key == "dateUpdated":
value = value.strftime("%Y-%m-%d")
if key in ("dateCreated","datePublished","dateUpdated"):
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
if doreplacements:
value=self.doReplacments(value)
@ -244,19 +259,22 @@ class Story:
return removeAllEntities(value)
else:
return value
def getAllMetadata(self, removeallentities=False, doreplacements=True):
def getAllMetadata(self,
removeallentities=False,
doreplacements=True,
keeplists=False):
'''
All single value *and* list value metadata as strings.
All single value *and* list value metadata as strings (unless keeplists=True, then keep lists).
'''
allmetadata = {}
# special handling for authors/authorUrls
authlinkhtml="<a class='authorlink' href='%s'>%s</a>"
if 'author' in self.listables.keys(): # more than one author, assume multiple authorUrl too.
if self.isList('author'): # more than one author, assume multiple authorUrl too.
htmllist=[]
for i, v in enumerate(self.listables['author']):
aurl = self.listables['authorUrl'][i]
for i, v in enumerate(self.getList('author')):
aurl = self.getList('authorUrl')[i]
auth = v
# make sure doreplacements & removeallentities are honored.
if doreplacements:
@ -271,11 +289,12 @@ class Story:
else:
self.setMetadata('authorHTML',authlinkhtml%(self.getMetadata('authorUrl', removeallentities, doreplacements),
self.getMetadata('author', removeallentities, doreplacements)))
for k in self.metadata.keys():
allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
for l in self.listables.keys():
allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements)
for k in self.getValidMetaList():
if self.isList(k) and keeplists:
allmetadata[k] = self.getList(k, removeallentities, doreplacements)
else:
allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
return allmetadata
@ -288,52 +307,80 @@ class Story:
if value==None:
return
value = conditionalRemoveEntities(value)
if not self.isList(listname):
self.listables[listname]=[]
if not self.isList(listname) or not listname in self.metadata:
# Calling addToList to a non-list meta will overwrite it.
self.metadata[listname]=[]
# prevent duplicates.
if not value in self.listables[listname]:
self.listables[listname].append(value)
if not value in self.metadata[listname]:
self.metadata[listname].append(value)
def getList(self,listname, removeallentities=False, doreplacements=True):
def isList(self,listname):
'Everything set with an include_in_* is considered a list.'
return self.hasConfig("include_in_"+listname) or \
( self.isValidMetaEntry(listname) and self.metadata.has_key(listname) \
and isinstance(self.metadata[listname],list) )
def getList(self,listname,
removeallentities=False,
doreplacements=True,
doincludein=True):
retlist = []
if not self.isList(listname):
retlist = [self.getMetadata(listname,removeallentities=removeallentities)]
if not self.isValidMetaEntry(listname):
return retlist
# doincludein prevents recursion of include_in_'s
if doincludein and self.hasConfig("include_in_"+listname):
for k in self.getConfigList("include_in_"+listname):
retlist.extend(self.getList(k,removeallentities,doreplacements,doincludein=False))
else:
retlist = self.listables[listname]
if not self.isList(listname):
retlist = [self.getMetadata(listname,removeallentities, doreplacements)]
else:
retlist = self.getMetadataRaw(listname)
if doreplacements:
retlist = filter( lambda x : x!=None and x!='' ,
map(self.doReplacments,retlist) )
if removeallentities:
retlist = filter( lambda x : x!=None and x!='' ,
map(removeAllEntities,retlist) )
if doreplacements:
retlist = filter( lambda x : x!=None and x!='' ,
map(self.doReplacments,retlist) )
if removeallentities:
retlist = filter( lambda x : x!=None and x!='' ,
map(removeAllEntities,retlist) )
return retlist
def getLists(self, removeallentities=False):
lsts = {}
for ln in self.listables.keys():
lsts[ln] = self.getList(ln, removeallentities)
return lsts
def getSubjectTags(self, removeallentities=False):
# set to avoid duplicates subject tags.
subjectset = set()
tags_list = self.getConfigList("include_subject_tags") + self.getConfigList("extra_subject_tags")
# metadata all go into dc:subject tags, but only if they are configured.
for (name,value) in self.getAllMetadata(removeallentities=removeallentities,keeplists=True).iteritems():
if name in tags_list:
if isinstance(value,list):
for tag in value:
subjectset.add(tag)
else:
subjectset.add(value)
def isList(self,listname):
return self.listables.has_key(listname)
def addChapter(self, title, html, configurable=None):
if configurable and \
configurable.getConfig('strip_chapter_numbers') and \
configurable.getConfig('chapter_title_strip_pattern'):
title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title)
if None in subjectset:
subjectset.remove(None)
return list(subjectset)
def addChapter(self, title, html):
if self.getConfig('strip_chapter_numbers') and \
self.getConfig('chapter_title_strip_pattern'):
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) )
def getChapters(self, configurable=None):
def getChapters(self):
"Chapters will be tuples of (title,html)"
retval = []
if configurable and \
configurable.getConfig('add_chapter_numbers') and \
configurable.getConfig('chapter_title_add_pattern'):
if self.getConfig('add_chapter_numbers') and \
self.getConfig('chapter_title_add_pattern'):
for index, (title,html) in enumerate(self.chapters):
retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
else:
retval = self.chapters
@ -355,11 +402,11 @@ class Story:
# pass fetch in from adapter in case we need the cookies collected
# as well as it's a base_story class method.
def addImgUrl(self,configurable,parenturl,url,fetch,cover=False,coverexclusion=None):
def addImgUrl(self,parenturl,url,fetch,cover=False,coverexclusion=None):
# otherwise it saves the image in the epub even though it
# isn't used anywhere.
if cover and configurable.getConfig('never_make_cover'):
if cover and self.getConfig('never_make_cover'):
return
url = url.strip() # ran across an image with a space in the
@ -397,20 +444,20 @@ class Story:
if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl)
try:
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
except Exception, e:
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(configurable.getConfigList('image_max_size'),e))
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
try:
(data,ext,mime) = convert_image(imgurl,
fetch(imgurl),
sizes,
configurable.getConfig('grayscale_images'))
self.getConfig('grayscale_images'))
except Exception, e:
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
return "failedtoload"
# explicit cover, make the first image.
if cover and not configurable.getConfig('never_make_cover'):
if cover and not self.getConfig('never_make_cover'):
if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
# remove existing cover, if there is one.
del self.imgurls[0]
@ -427,8 +474,8 @@ class Story:
# NOT never_make_cover AND
# either no coverexclusion OR coverexclusion doesn't match
if self.cover == None and \
configurable.getConfig('make_firstimage_cover') and \
not configurable.getConfig('never_make_cover') and \
self.getConfig('make_firstimage_cover') and \
not self.getConfig('never_make_cover') and \
(not coverexclusion or not re.search(coverexclusion,imgurl)):
newsrc = "images/cover.%s"%ext
self.cover=newsrc
@ -457,7 +504,7 @@ class Story:
return retlist
def __str__(self):
return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters)
return "Metadata: " +str(self.metadata)
def setReplace(self,replace):
for line in replace.splitlines():

View file

@ -36,44 +36,12 @@ class BaseStoryWriter(Configurable):
def getFormatExt():
return '.bse'
def __init__(self, config, adapter):
Configurable.__init__(self, config)
self.setSectionOrder(adapter.getConfigSection(),self.getFormatName())
def __init__(self, configuration, adapter):
Configurable.__init__(self, configuration)
self.adapter = adapter
self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially.
self.story.setReplace(self.getConfig('replace_metadata'))
self.validEntries = [
'category',
'genre',
'language',
'characters',
'ships',
'series',
'status',
'datePublished',
'dateUpdated',
'dateCreated',
'rating',
'warnings',
'numChapters',
'numWords',
'site',
'storyId',
'authorId',
'extratags',
'title',
'storyUrl',
'description',
'author',
'authorUrl',
'formatname',
'formatext',
'siteabbrev',
'version']
# fall back labels.
self.titleLabels = {
'category':'Category',
@ -148,11 +116,11 @@ class BaseStoryWriter(Configurable):
if WIDE_ENTRY==None:
WIDE_ENTRY=ENTRY
titleEntriesList = self.getConfigList("titlepage_entries")
titleEntriesList = self.getConfigList("titlepage_entries") + self.getConfigList("extra_titlepage_entries")
wideTitleEntriesList = self.getConfigList("wide_titlepage_entries")
for entry in titleEntriesList:
if entry in self.validEntries:
if self.isValidMetaEntry(entry):
if self.story.getMetadata(entry):
if entry in wideTitleEntriesList:
TEMPLATE=WIDE_ENTRY
@ -161,9 +129,12 @@ class BaseStoryWriter(Configurable):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
else:
print("Using fallback label for %s_label"%entry)
elif entry in self.titleLabels:
logging.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
# If the label for the title entry is empty, use the
# 'no title' option if there is one.
@ -184,10 +155,10 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use index and chapter.
"""
# Only do TOC if there's more than one chapter and it's configured.
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
self._write(out,START.substitute(self.story.getAllMetadata()))
for index, (title,html) in enumerate(self.story.getChapters(self)):
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
@ -202,9 +173,11 @@ class BaseStoryWriter(Configurable):
# minor cheat, tucking css into metadata.
if self.getConfig("output_css"):
self.story.metadata["output_css"] = self.getConfig("output_css")
self.story.setMetadata("output_css",
self.getConfig("output_css"),
condremoveentities=False)
else:
self.story.metadata["output_css"] = ''
self.story.setMetadata("output_css",'')
if not outstream:
close=True
@ -261,29 +234,6 @@ class BaseStoryWriter(Configurable):
if close:
outstream.close()
def getTags(self, removeallentities=False):
# set to avoid duplicates subject tags.
subjectset = set()
if self.story.getMetadataRaw('dateUpdated'):
# Last Update tags for Bill.
self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update Year/Month: %Y/%m"))
self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update: %Y/%m/%d"))
for entry in self.validEntries:
if entry in self.getConfigList("include_subject_tags") and \
entry not in self.story.getLists() and \
self.story.getMetadata(entry):
subjectset.add(self.getMetadata(entry, removeallentities))
# listables all go into dc:subject tags, but only if they are configured.
for (name,lst) in self.story.getLists(removeallentities).iteritems():
if name in self.getConfigList("include_subject_tags"):
for tag in lst:
subjectset.add(tag)
return list(subjectset)
def writeStoryImpl(self, out):
"Must be overriden by sub classes."
pass

View file

@ -193,31 +193,33 @@ ${value}<br />
Switch rindex to index to search from top instead of bottom.
"""
values = {}
for entry in self.getConfigList("logpage_entries"):
if entry in self.validEntries:
try:
# <span id="dateUpdated">1975-04-15</span>
span = '<span id="%s">'%entry
idx = logfile.rindex(span)+len(span)
values[entry] = logfile[idx:logfile.index('</span>',idx)]
except Exception, e:
#print("e:%s"%e)
pass
for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
try:
# <span id="dateUpdated">1975-04-15</span>
span = '<span id="%s">'%entry
idx = logfile.rindex(span)+len(span)
values[entry] = logfile[idx:logfile.index('</span>',idx)]
except Exception, e:
#print("e:%s"%e)
pass
return values
def _makeLogEntry(self, oldvalues={}):
retval = "<p class='log_entry'>"
for entry in self.getConfigList("logpage_entries"):
if entry in self.validEntries:
for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
if self.isValidMetaEntry(entry):
val = self.story.getMetadata(entry)
if val and ( entry not in oldvalues or val != oldvalues[entry] ):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
else:
print("Using fallback label for %s_label"%entry)
elif entry in self.titleLabels:
logging.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry,
'label':label,
@ -342,7 +344,7 @@ ${value}<br />
metadata.appendChild(newTag(contentdom,"dc:description",text=
self.getMetadata('description')))
for subject in self.getTags():
for subject in self.story.getSubjectTags():
metadata.appendChild(newTag(contentdom,"dc:subject",text=subject))
@ -441,7 +443,7 @@ div { margin: 0pt; padding: 0pt; }
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
itemrefs.append("title_page")
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
itemrefs.append("toc_page")
@ -449,7 +451,7 @@ div { margin: 0pt; padding: 0pt; }
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
itemrefs.append("log_page")
for index, (title,html) in enumerate(self.story.getChapters(self)):
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
i=index+1
items.append(("file%04d"%i,
@ -587,7 +589,7 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close()
for index, (title,html) in enumerate(self.story.getChapters(self)):
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})

View file

@ -94,7 +94,7 @@ ${output_css}
self.HTML_TOC_ENTRY,
self.HTML_TOC_PAGE_END)
for index, (title,html) in enumerate(self.story.getChapters(self)):
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))

View file

@ -169,7 +169,7 @@ ${value}<br />
# files.append(tocpageIO.getvalue())
# tocpageIO.close()
for index, (title,html) in enumerate(self.story.getChapters(self)):
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})

View file

@ -133,7 +133,7 @@ End file.
self._write(out,self.lineends(self.wraplines(towrap)))
for index, (title,html) in enumerate(self.story.getChapters(self)):
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))

View file

@ -58,7 +58,23 @@
<p>
New sites bloodties-fans.com and thehookupzone.net, thanks Marillapm!
</p>
<h3>New Features and Fixes</h3>
<p>
Added extra metadata feature for certain sites. This one
contains some metadata features that have been requested,
plus quite a bit of internal refactoring to allow them.
This is primarily intended for calibre plugin users, so
it's <a href="">documented in the plugin forum</a>.
<br /> Also added cover images for dramione.org, thanks
ilovejedd.
</p>
<p>
Fixes for: squidge.org/peja Rating including '[' when
there's no stars; loosen url check on AO3 to allow more
copy/paste URLs; fix author on castlefans.org due to
site/skin changes.
</p>
<p>
Questions? Check out our
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs">FAQs</a>.
@ -67,7 +83,7 @@
If you have any problems with this application, please
report them in
the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The
<a href="http://4-4-24.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
<a href="http://4-4-25.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
</p>
<div id='error'>
{{ error_message }}

27
main.py
View file

@ -29,7 +29,6 @@ import datetime
import traceback
from StringIO import StringIO
import ConfigParser
## Just to shut up the appengine warning about "You are using the
## default Django version (0.96). The default Django version will
@ -57,22 +56,25 @@ from google.appengine.runtime import DeadlineExceededError
from ffstorage import *
from fanficdownloader import adapters, writers, exceptions
from fanficdownloader.configurable import Configuration
class UserConfigServer(webapp2.RequestHandler):
def getUserConfig(self,user):
config = ConfigParser.SafeConfigParser()
def getUserConfig(self,user,url,fileformat):
configuration = Configuration(adapters.getConfigSectionFor(url),fileformat)
logging.debug('reading defaults.ini config file')
config.read('defaults.ini')
configuration.read('defaults.ini')
## Pull user's config record.
l = UserConfig.all().filter('user =', user).fetch(1)
if l and l[0].config:
uconfig=l[0]
#logging.debug('reading config from UserConfig(%s)'%uconfig.config)
config.readfp(StringIO(uconfig.config))
configuration.readfp(StringIO(uconfig.config))
return config
return configuration
class MainHandler(webapp2.RequestHandler):
def get(self):
@ -137,7 +139,8 @@ class EditConfigServer(UserConfigServer):
uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case.
uconfig.put()
try:
config = self.getUserConfig(user)
# just getting config for testing purposes.
configuration = self.getUserConfig(user,"test1.com","epub")
self.redirect("/?error=configsaved")
except Exception, e:
logging.info("Saved Config Failed:%s"%e)
@ -367,12 +370,12 @@ class FanfictionDownloader(UserConfigServer):
adapter = None
try:
try:
config = self.getUserConfig(user)
configuration = self.getUserConfig(user,url,format)
except Exception, e:
self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e)))
return
adapter = adapters.getAdapter(config,url,format)
adapter = adapters.getAdapter(configuration,url)
logging.info('Created an adaper: %s' % adapter)
if len(login) > 1:
@ -474,8 +477,8 @@ class FanfictionDownloaderTask(UserConfigServer):
logging.info('Creating adapter...')
try:
config = self.getUserConfig(user)
adapter = adapters.getAdapter(config,url,format)
configuration = self.getUserConfig(user,url,format)
adapter = adapters.getAdapter(configuration,url)
logging.info('Created an adapter: %s' % adapter)
@ -488,7 +491,7 @@ class FanfictionDownloaderTask(UserConfigServer):
# adapter.getStoryMetadataOnly() only fetches enough to
# get metadata. writer.writeStory() will call
# adapter.getStory(), too.
writer = writers.getWriter(format,config,adapter)
writer = writers.getWriter(format,configuration,adapter)
download.name = writer.getOutputFileName()
#logging.debug('output_filename:'+writer.getConfig('output_filename'))
logging.debug('getOutputFileName:'+writer.getOutputFileName())

View file

@ -67,9 +67,14 @@ authorId_label:Author ID
## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags
## The version of fanficdownloader
##
version_label:FFDL Version
## Date formats used by FFDL. Published and Update don't have time.
## Note that ini format requires % to be escaped as %%.
dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d
dateUpdated_format:%%Y-%%m-%%d
## items to include in the title page
## Empty metadata entries will *not* appear, even if in the list.
## You can include extra text or HTML that will be included as-is in
@ -463,6 +468,55 @@ extraships:Draco Malfoy/Hermione Granger
## personal.ini, not defaults.ini.
#is_adult:true
## Some adapters collect additional meta information beyond the
## standard ones. They need to be defined in extra_valid_entries to
## tell the rest of the FFDL system about them. They can be used in
## include_subject_tags, titlepage_entries, extra_titlepage_entries,
## logpage_entries, extra_logpage_entries, and include_in_* config
## items. You can also add additional entries here to build up
## composite metadata entries. dramione.org, for example, adds
## 'cliches' and then defines as the composite of hermiones,dracos in
## include_in_cliches.
extra_valid_entries:themes,hermiones,dracos,timeline,cliches
include_in_cliches:hermiones,dracos
## For another example, you could, by uncommenting this line, include
## themes in with genre metadata.
#include_in_genre:genre, themes
## You can give each new valid entry a specific label for use on
## titlepage and logpage. If not defined, it will simply be the
themes_label:Themes
hermiones_label:Hermiones
dracos_label:Dracos
timeline_label:Timeline
cliches_label:Character Cliches
## extra_titlepage_entries (and extra_logpage_entries) *add* to
## titlepage_entries (and logpage_entries) so you can add site
## specific entries to titlepage/logpage without having to copy the
## entire titlepage_entries line. (But if you want them higher than
## the end, you will need to copy titlepage_entries.)
#extra_titlepage_entries: themes,timeline,cliches
#extra_logpage_entries: themes,timeline,cliches
#extra_subject_tags: themes,timeline,cliches
## (Plugin Only) - You can also populate calibre custom columns with
## the site specific metadata using custom_columns_settings (but only
## if 'Allow custom_columns_settings from personal.ini' is checked in
## the plugin GUI config.) There are three parts, the entry name,
## then the label of the calibre custom column, then (optionally) a
## 'mode'. 'r' to Replace any existing values, 'a' to Add to existing
## value (use with tag-like columns), and 'n' for setting on New books
## only.
## Make sure to keep at least one space at the start of each
## line.
#custom_columns_settings:
# cliches=>#acolumn,r
# themes=>#bcolumn,a
# timeline=>#ccolumn,n
[erosnsappho.sycophanthex.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter
@ -774,9 +828,13 @@ extraships:Sesshoumaru/Kagome
[www.fanfiction.net]
## fanfiction.net's 'cover' images are really just tiny thumbnails.
## Comment this out or change it to false to use them anyway.
## Change this to false to use them anyway.
never_make_cover: true
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In

View file

@ -42,7 +42,8 @@
## Most common, I expect will be using this to save username/passwords
## for different sites. Here are a few examples.
## for different sites. Here are a few examples. See defaults.ini
## for the full list.
[www.twilighted.net]
#username:YourPenname