mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
Merging changes from trunk.
This commit is contained in:
commit
6a83131a99
30 changed files with 324 additions and 108 deletions
4
app.yaml
4
app.yaml
|
|
@ -1,6 +1,6 @@
|
|||
# ffd-retief-hrd fanfictiondownloader
|
||||
application: fanfictiondownloader
|
||||
version: 4-3-2
|
||||
application: ffd-retief-hrd
|
||||
version: 4-3-3
|
||||
runtime: python27
|
||||
api_version: 1
|
||||
threadsafe: true
|
||||
|
|
|
|||
|
|
@ -15,6 +15,10 @@ from datetime import datetime
|
|||
|
||||
from PyQt4.Qt import (QApplication, QMenu, QToolButton)
|
||||
|
||||
from PyQt4.Qt import QPixmap, Qt
|
||||
from PyQt4.QtCore import QBuffer
|
||||
|
||||
|
||||
from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory, remove_dir
|
||||
from calibre.ebooks.metadata import MetaInformation, authors_to_string
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
|
|
@ -30,6 +34,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
|
|||
create_menu_action_unique, get_library_uuid)
|
||||
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
|
||||
from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge
|
||||
from calibre_plugins.fanfictiondownloader_plugin.dcsource import get_dcsource
|
||||
|
||||
|
|
@ -93,6 +98,8 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
|
|||
# are not found in the zip file will result in null QIcons.
|
||||
icon = get_icon('images/icon.png')
|
||||
|
||||
#self.qaction.setText('FFDL')
|
||||
|
||||
# The qaction is automatically created from the action_spec defined
|
||||
# above
|
||||
self.qaction.setIcon(icon)
|
||||
|
|
@ -408,7 +415,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
|
|||
ffdlconfig = SafeConfigParser()
|
||||
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
ffdlconfig.readfp(StringIO(prefs['personal.ini']))
|
||||
adapter = adapters.getAdapter(ffdlconfig,url)
|
||||
adapter = adapters.getAdapter(ffdlconfig,url,fileform)
|
||||
|
||||
options['personal.ini'] = prefs['personal.ini']
|
||||
|
||||
|
|
@ -440,7 +447,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
|
|||
book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("site")
|
||||
book['tags'] = writer.getTags()
|
||||
book['comments'] = story.getMetadata("description") #, removeallentities=True) comments handles entities better.
|
||||
book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better.
|
||||
book['series'] = story.getMetadata("series")
|
||||
|
||||
# adapter.opener is the element with a threadlock. But del
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ def do_download_for_worker(book,options):
|
|||
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
ffdlconfig.readfp(StringIO(options['personal.ini']))
|
||||
|
||||
adapter = adapters.getAdapter(ffdlconfig,book['url'])
|
||||
adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform'])
|
||||
adapter.is_adult = book['is_adult']
|
||||
adapter.username = book['username']
|
||||
adapter.password = book['password']
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ for x in imports():
|
|||
#print x
|
||||
__class_list.append(sys.modules[x].getClass())
|
||||
|
||||
def getAdapter(config,url):
|
||||
def getAdapter(config,url,fileform=None):
|
||||
## fix up leading protocol.
|
||||
fixedurl = re.sub(r"(?i)^[htp]+[:/]+","http://",url.strip())
|
||||
if not fixedurl.startswith("http"):
|
||||
|
|
@ -89,6 +89,7 @@ def getAdapter(config,url):
|
|||
fixedurl = fixedurl.replace("http://","http://www.")
|
||||
if cls:
|
||||
adapter = cls(config,fixedurl) # raises InvalidStoryURL
|
||||
adapter.setSectionOrder(adapter.getSiteDomain(),fileform)
|
||||
return adapter
|
||||
# No adapter found.
|
||||
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -133,7 +133,8 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
||||
# to all labels being included.
|
||||
svalue=svalue[:svalue.find('<span class="label">')]
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -220,7 +221,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return AdAstraFanficComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ArchiveOfOurOwnOrgAdapter
|
||||
|
|
@ -126,7 +126,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
a = metasoup.find('blockquote',{'class':'userstuff'})
|
||||
if a != None:
|
||||
self.story.setMetadata('description',a.text)
|
||||
self.setDescription(url,a.text)
|
||||
#self.story.setMetadata('description',a.text)
|
||||
|
||||
a = metasoup.find('dd',{'class':"rating tags"})
|
||||
if a != None:
|
||||
|
|
@ -213,10 +214,11 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
print('Getting chapter text from: %s' % url)
|
||||
|
||||
chapter=bs.BeautifulSoup('<div class="story"></div>')
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr'))
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulSoup(data,selfClosingTags=('br','hr'))
|
||||
|
||||
headnotes = soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
|
||||
if headnotes != None:
|
||||
|
|
@ -257,5 +259,5 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
if None == soup:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(chapter)
|
||||
|
||||
return self.utf8FromSoup(url,chapter)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
||||
|
|
@ -218,7 +218,8 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
while not defaultGetattr(value,'class') == 'label':
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -305,4 +306,4 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(div)
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import time
|
|||
from .. import BeautifulSoup as bs
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -153,7 +153,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
if 'title_t' in var:
|
||||
self.story.setMetadata('title', value)
|
||||
if 'summary' in var:
|
||||
self.story.setMetadata('description', value)
|
||||
self.setDescription(url,value)
|
||||
#self.story.setMetadata('description', value)
|
||||
if 'datep' in var:
|
||||
self.story.setMetadata('datePublished',makeDate(value, '%m-%d-%y'))
|
||||
if 'dateu' in var:
|
||||
|
|
@ -270,7 +271,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
logging.debug('div id=storytext not found. data:%s'%data)
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(div)
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def getClass():
|
||||
return FanFictionNetSiteAdapter
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
|
|
@ -201,7 +201,8 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
break
|
||||
|
||||
summary=soup.find('span', {'class' : 'urlize'})
|
||||
self.story.setMetadata('description', summary.text)
|
||||
self.setDescription(url,summary.text)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
|
@ -218,4 +219,4 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
if None == chapter:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(chapter)
|
||||
return self.utf8FromSoup(url,chapter)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -187,7 +187,8 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
for small in storydd.findAll('small'):
|
||||
small.extract() ## removes the <small> tags, leaving only the summary.
|
||||
self.story.setMetadata('description',stripHTML(storydd))
|
||||
self.setDescription(url,storydd)
|
||||
#self.story.setMetadata('description',stripHTML(storydd))
|
||||
|
||||
return
|
||||
|
||||
|
|
@ -223,7 +224,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
if not data or not text:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(text)
|
||||
return self.utf8FromSoup(url,text)
|
||||
|
||||
def getClass():
|
||||
return FictionAlleyOrgSiteAdapter
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from .. import BeautifulSoup as bs
|
|||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -124,7 +124,8 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# description
|
||||
storydiv = soup.find("div",{"id":"story"})
|
||||
self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
|
||||
# most of the meta data is here:
|
||||
metap = storydiv.find("p",{"class":"meta"})
|
||||
|
|
@ -209,7 +210,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return FicwadComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FimFictionNetSiteAdapter
|
||||
|
|
@ -141,7 +141,15 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
description_soup.find('a', {"class":"more"}).extract()
|
||||
except:
|
||||
pass
|
||||
self.story.setMetadata('description', description_soup.text)
|
||||
|
||||
story_img = soup.find('img',{'class':'story_image'})
|
||||
if self.getConfig('keep_summary_html') and \
|
||||
self.getConfig('include_images') and \
|
||||
story_img:
|
||||
self.setDescription(self.url,"%s<br/>%s"%(story_img,description_soup.text))
|
||||
else:
|
||||
self.setDescription(self.url,description_soup.text)
|
||||
#self.story.setMetadata('description', description_soup.text)
|
||||
|
||||
# Unfortunately, nowhere on the page is the year mentioned.
|
||||
# Best effort to deal with this:
|
||||
|
|
@ -171,5 +179,5 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'})
|
||||
if soup == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
return utf8FromSoup(soup)
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -125,7 +125,8 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Finding the metadata is a bit of a pain. Desc is the only thing this color.
|
||||
desctable= soup.find('table',{'bgcolor':'#f0e8e8'})
|
||||
self.story.setMetadata('description',stripHTML(desctable))
|
||||
self.setDescription(url,desctable)
|
||||
#self.story.setMetadata('description',stripHTML(desctable))
|
||||
|
||||
## Finding the metadata is a bit of a pain. Most of the meta
|
||||
## data is in a center.table without a bgcolor.
|
||||
|
|
@ -193,7 +194,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(div)
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def getClass():
|
||||
return HarryPotterFanFictionComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -174,7 +174,8 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
# Summary: ....
|
||||
m = re.match(r".*?Summary: (.*)$",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('description', m.group(1))
|
||||
self.setDescription(url, m.group(1))
|
||||
#self.story.setMetadata('description', m.group(1))
|
||||
|
||||
# completed
|
||||
m = re.match(r".*?Status: Completed.*?",metastr)
|
||||
|
|
@ -210,7 +211,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
del div['style']
|
||||
del div['align']
|
||||
anchor.name='div'
|
||||
return utf8FromSoup(anchor)
|
||||
return self.utf8FromSoup(url,anchor)
|
||||
|
||||
else:
|
||||
logging.debug('Using kludgey text find for older mediaminer story.')
|
||||
|
|
@ -226,7 +227,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
soup.findAll('table',{'class':'tbbrdr'}):
|
||||
tag.extract() # remove tag from soup.
|
||||
|
||||
return utf8FromSoup(soup)
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
|
||||
def getClass():
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -131,7 +131,8 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
while not defaultGetattr(value,'class') == 'listbox':
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -209,7 +210,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(div)
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def getClass():
|
||||
return PotionsAndSnitchesNetSiteAdapter
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
|
|
@ -227,7 +227,8 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
if part.startswith("Summary:"):
|
||||
part = part[part.find(':')+1:]
|
||||
self.story.setMetadata('description',part)
|
||||
self.setDescription(url,part)
|
||||
#self.story.setMetadata('description',part)
|
||||
|
||||
# want to get the next tr of the table.
|
||||
#print("%s"%titlea.parent.parent.findNextSibling('tr'))
|
||||
|
|
@ -295,4 +296,4 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
if None == story:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(story)
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -164,7 +164,8 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
while not defaultGetattr(value,'class') == 'label':
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -238,7 +239,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return TenhawkPresentsComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import logging
|
|||
from .. import BeautifulSoup as bs
|
||||
from .. import exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class TestSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -191,7 +191,7 @@ horizontal rules
|
|||
</div>
|
||||
'''
|
||||
soup = bs.BeautifulStoneSoup(text,selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
return utf8FromSoup(soup)
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def getClass():
|
||||
return TestSiteAdapter
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
while not defaultGetattr(value,'class') == 'label':
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -245,7 +245,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return TheWritersCoffeeShopComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -127,6 +127,8 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
raise e
|
||||
|
||||
descurl = url
|
||||
|
||||
if "<h2>Story Not Found</h2>" in data:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
|
||||
|
|
@ -154,12 +156,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# going to pull part of the meta data from author list page.
|
||||
logging.debug("**AUTHOR** URL: "+self.story.getMetadata('authorUrl'))
|
||||
authordata = self._fetchUrl(self.story.getMetadata('authorUrl'))
|
||||
descurl=self.story.getMetadata('authorUrl')
|
||||
authorsoup = bs.BeautifulSoup(authordata)
|
||||
# author can have several pages, scan until we find it.
|
||||
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
|
||||
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
|
||||
logging.debug("**AUTHOR** nextpage URL: "+nextpage)
|
||||
authordata = self._fetchUrl(nextpage)
|
||||
descurl=nextpage
|
||||
authorsoup = bs.BeautifulSoup(authordata)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
|
|
@ -168,7 +172,8 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
raise e
|
||||
|
||||
storydiv = authorsoup.find('div', {'id':'st'+self.story.getMetadata('storyId'), 'class':re.compile(r"storylistitem")})
|
||||
self.story.setMetadata('description',stripHTML(storydiv.find('div',{'class':'storydesc'})))
|
||||
self.setDescription(descurl,storydiv.find('div',{'class':'storydesc'}))
|
||||
#self.story.setMetadata('description',stripHTML(storydiv.find('div',{'class':'storydesc'})))
|
||||
self.story.setMetadata('title',stripHTML(storydiv.find('a',{'class':'storylink'})))
|
||||
|
||||
verticaltable = soup.find('table', {'class':'verticaltable'})
|
||||
|
|
@ -238,7 +243,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
div.find('h3').extract()
|
||||
except:
|
||||
pass
|
||||
return utf8FromSoup(div)
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def getClass():
|
||||
return TwistingTheHellmouthSiteAdapter
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -162,7 +162,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
while not defaultGetattr(value,'class') == 'label':
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -243,7 +243,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return TwilightedNetSiteAdapter
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -169,7 +169,8 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
while not defaultGetattr(value,'class') == 'label':
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
|
@ -255,7 +256,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return TwiwriteNetSiteAdapter
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ import urllib2
|
|||
from .. import BeautifulSoup as bs
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class WhoficComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -120,9 +120,10 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
# link instead to find the appropriate metadata.
|
||||
a = soup.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId')))
|
||||
metadata = a.findParent('td')
|
||||
metadatachunks = utf8FromSoup(metadata).split('<br />')
|
||||
metadatachunks = self.utf8FromSoup(None,metadata).split('<br />')
|
||||
# process metadata for this story.
|
||||
self.story.setMetadata('description', metadatachunks[1])
|
||||
self.setDescription(url,metadatachunks[1])
|
||||
#self.story.setMetadata('description', metadatachunks[1])
|
||||
|
||||
# First line of the stuff with ' - ' separators
|
||||
moremeta = metadatachunks[2]
|
||||
|
|
@ -224,7 +225,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return WhoficComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ import urllib
|
|||
import urllib2 as u2
|
||||
import urlparse as up
|
||||
|
||||
from .. import BeautifulSoup as bs
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
try:
|
||||
from google.appengine.api import apiproxy_stub_map
|
||||
def urlfetch_timeout_hook(service, call, request, response):
|
||||
|
|
@ -66,8 +69,9 @@ class BaseSiteAdapter(Configurable):
|
|||
def __init__(self, config, url):
|
||||
self.config = config
|
||||
Configurable.__init__(self, config)
|
||||
self.addConfigSection(self.getSiteDomain())
|
||||
self.addConfigSection("overrides")
|
||||
self.setSectionOrder(self.getSiteDomain())
|
||||
# self.addConfigSection(self.getSiteDomain())
|
||||
# self.addConfigSection("overrides")
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
|
|
@ -150,6 +154,12 @@ class BaseSiteAdapter(Configurable):
|
|||
headers=headers)
|
||||
return self._decode(self.opener.open(req).read())
|
||||
|
||||
def _fetchUrlRaw(self, url, parameters=None):
|
||||
if parameters != None:
|
||||
return self.opener.open(url,urllib.urlencode(parameters)).read()
|
||||
else:
|
||||
return self.opener.open(url).read()
|
||||
|
||||
# parameters is a dict()
|
||||
def _fetchUrl(self, url, parameters=None):
|
||||
if self.getConfig('slow_down_sleep_time'):
|
||||
|
|
@ -159,10 +169,7 @@ class BaseSiteAdapter(Configurable):
|
|||
for sleeptime in [0, 0.5, 4, 9]:
|
||||
time.sleep(sleeptime)
|
||||
try:
|
||||
if parameters:
|
||||
return self._decode(self.opener.open(url,urllib.urlencode(parameters)).read())
|
||||
else:
|
||||
return self._decode(self.opener.open(url).read())
|
||||
return self._decode(self._fetchUrlRaw(url,parameters))
|
||||
except Exception, e:
|
||||
excpt=e
|
||||
logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
|
||||
|
|
@ -235,6 +242,49 @@ class BaseSiteAdapter(Configurable):
|
|||
if self.getConfig('collect_series'):
|
||||
self.story.setMetadata('series','%s [%s]'%(name, num))
|
||||
|
||||
def setDescription(self,url,svalue):
|
||||
#print("\n\nsvalue:\n%s\n"%svalue)
|
||||
if self.getConfig('keep_summary_html'):
|
||||
if isinstance(svalue,str) or isinstance(svalue,unicode):
|
||||
svalue = bs.BeautifulSoup(svalue)
|
||||
self.story.setMetadata('description',self.utf8FromSoup(url,svalue))
|
||||
else:
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
#print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n")
|
||||
|
||||
# this gives us a unicode object, not just a string containing bytes.
|
||||
# (I gave soup a unicode string, you'd think it could give it back...)
|
||||
def utf8FromSoup(self,url,soup):
|
||||
|
||||
acceptable_attributes = ['href','name']
|
||||
#print("include_images:"+self.getConfig('include_images'))
|
||||
if self.getConfig('include_images'):
|
||||
acceptable_attributes.extend(('src','alt'))
|
||||
for img in soup.findAll('img'):
|
||||
img['src']=self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw)
|
||||
|
||||
for attr in soup._getAttrMap().keys():
|
||||
if attr not in acceptable_attributes:
|
||||
del soup[attr] ## strip all tag attributes except href and name
|
||||
|
||||
for t in soup.findAll(recursive=True):
|
||||
for attr in t._getAttrMap().keys():
|
||||
if attr not in acceptable_attributes:
|
||||
del t[attr] ## strip all tag attributes except href and name
|
||||
|
||||
# these are not acceptable strict XHTML. But we do already have
|
||||
# CSS classes of the same names defined in constants.py
|
||||
if t.name in ('u'):
|
||||
t['class']=t.name
|
||||
t.name='span'
|
||||
if t.name in ('center'):
|
||||
t['class']=t.name
|
||||
t.name='div'
|
||||
# removes paired, but empty tags.
|
||||
if t.string != None and len(t.string.strip()) == 0 :
|
||||
t.extract()
|
||||
return soup.__str__('utf8').decode('utf-8')
|
||||
|
||||
fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05",
|
||||
"June":"06","July":"07", "August":"08", "September":"09", "October":"10",
|
||||
"November":"11", "December":"12" }
|
||||
|
|
@ -245,7 +295,9 @@ def makeDate(string,format):
|
|||
|
||||
# fudge english month names for people who's locale is set to
|
||||
# non-english. All our current sites date in english, even if
|
||||
# there's non-english content.
|
||||
# there's non-english content. -- ficbook.net now makes that a
|
||||
# lie. It has to do something even more complicated to get
|
||||
# Russian month names correct everywhere.
|
||||
do_abbrev = "%b" in format
|
||||
|
||||
if "%B" in format or do_abbrev:
|
||||
|
|
@ -259,24 +311,3 @@ def makeDate(string,format):
|
|||
|
||||
return datetime.datetime.strptime(string,format)
|
||||
|
||||
acceptable_attributes = ['href','name']
|
||||
|
||||
# this gives us a unicode object, not just a string containing bytes.
|
||||
# (I gave soup a unicode string, you'd think it could give it back...)
|
||||
def utf8FromSoup(soup):
|
||||
for t in soup.findAll(recursive=True):
|
||||
for attr in t._getAttrMap().keys():
|
||||
if attr not in acceptable_attributes:
|
||||
del t[attr] ## strip all tag attributes except href and name
|
||||
# these are not acceptable strict XHTML. But we do already have
|
||||
# CSS classes of the same names defined in constants.py
|
||||
if t.name in ('u'):
|
||||
t['class']=t.name
|
||||
t.name='span'
|
||||
if t.name in ('center'):
|
||||
t['class']=t.name
|
||||
t.name='div'
|
||||
# removes paired, but empty tags.
|
||||
if t.string != None and len(t.string.strip()) == 0 :
|
||||
t.extract()
|
||||
return soup.__str__('utf8').decode('utf-8')
|
||||
|
|
|
|||
|
|
@ -21,16 +21,21 @@ import ConfigParser
|
|||
# inherit from Configurable. The config file(s) uses ini format:
|
||||
# [sections] with key:value settings.
|
||||
#
|
||||
# There's a [defaults] section which is overriden by the writer's
|
||||
# section [epub], which is overriden by the adapter's section for each
|
||||
# site.
|
||||
# writer does [defaults], [www.whofic.com], [epub], [www.whofic.com:epub], [overrides]
|
||||
#
|
||||
# Until a write is created, the adapter only has [defaults], [www.whofic.com], [overrides]
|
||||
#
|
||||
# [defaults]
|
||||
# titlepage_entries: category,genre, status
|
||||
# [epub]
|
||||
# titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
|
||||
# [www.whofic.com]
|
||||
# titlepage_entries: category,genre, status,dateUpdated,rating
|
||||
# [epub]
|
||||
# titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
|
||||
# [www.whofic.com:epub]
|
||||
# titlepage_entries: category,genre, status,datePublished
|
||||
# [overrides]
|
||||
# titlepage_entries: category
|
||||
|
||||
|
||||
class Configurable(object):
|
||||
|
||||
|
|
@ -38,6 +43,14 @@ class Configurable(object):
|
|||
self.config = config
|
||||
self.sectionslist = ['defaults']
|
||||
|
||||
def setSectionOrder(self,site,fileform=None):
|
||||
self.sectionslist = ['defaults']
|
||||
self.addConfigSection(site)
|
||||
if fileform:
|
||||
self.addConfigSection(fileform)
|
||||
self.addConfigSection(site+":"+fileform)
|
||||
self.addConfigSection("overrides")
|
||||
|
||||
def addConfigSection(self,section):
|
||||
self.sectionslist.insert(0,section)
|
||||
|
||||
|
|
|
|||
|
|
@ -16,9 +16,27 @@
|
|||
#
|
||||
|
||||
import os, re
|
||||
import urlparse
|
||||
from base64 import b64encode
|
||||
|
||||
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
|
||||
|
||||
# Create convert_image method depending on which graphics lib we can
|
||||
# load. Preferred: calibre, PIL, none
|
||||
try:
|
||||
from calibre.utils.magick.draw import minify_image
|
||||
|
||||
def convert_image(data,sizes,grayscale):
|
||||
img = minify_image(data, minify_to=sizes)
|
||||
if grayscale:
|
||||
img.type = "GrayscaleType"
|
||||
return img.export('JPG')
|
||||
except:
|
||||
# Problem: writer_epub assumes image is jpg.
|
||||
def convert_image(data,sizes,grayscale):
|
||||
return data
|
||||
|
||||
|
||||
# The list comes from ffnet, the only multi-language site we support
|
||||
# at the time of writing. Values are taken largely from pycountry,
|
||||
# but with some corrections and guesses.
|
||||
|
|
@ -72,6 +90,8 @@ class Story:
|
|||
self.metadata = {'version':'4.3'}
|
||||
self.replacements = []
|
||||
self.chapters = [] # chapters will be tuples of (title,html)
|
||||
self.imgurls = []
|
||||
self.imgurldata = []
|
||||
self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.
|
||||
|
||||
def setMetadata(self, key, value):
|
||||
|
|
@ -153,6 +173,57 @@ class Story:
|
|||
def getChapters(self):
|
||||
"Chapters will be tuples of (title,html)"
|
||||
return self.chapters
|
||||
|
||||
# pass fetch in from adapter in case we need the cookies collected
|
||||
# as well as it's a base_story class method.
|
||||
def addImgUrl(self,configurable,parenturl,url,fetch):
|
||||
if url.startswith("http") :
|
||||
imgurl = url
|
||||
elif parenturl != None:
|
||||
parsedUrl = urlparse.urlparse(parenturl)
|
||||
if url.startswith("/") :
|
||||
imgurl = urlparse.urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
url,
|
||||
'','',''))
|
||||
else:
|
||||
imgurl = urlparse.urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
parsedUrl.path + url,
|
||||
'','',''))
|
||||
|
||||
# using b64 encode of the url means that the same image ends
|
||||
# up with the same name both now, in different chapters, and
|
||||
# later with new update chapters. Numbering them didn't do
|
||||
# that.
|
||||
newsrc = "images/%s.jpg"%(b64encode(imgurl))
|
||||
if imgurl not in self.imgurls:
|
||||
self.imgurls.append(imgurl)
|
||||
parsedUrl = urlparse.urlparse(imgurl)
|
||||
# newsrc = "images/%s.jpg"%(
|
||||
# self.imgurls.index(imgurl))
|
||||
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
|
||||
data = convert_image(fetch(imgurl),
|
||||
sizes,
|
||||
configurable.getConfig('grayscale_images'))
|
||||
#print("\nimgurl\nimage size:%d\n"%len(data))
|
||||
self.imgurldata.append((newsrc,data))
|
||||
# else:
|
||||
# newsrc = "images/%s.jpg"%(
|
||||
# self.imgurls.index(imgurl))
|
||||
|
||||
#print("===============\n%s\nimg url:%s\n============"%(newsrc,self.imgurls[-1]))
|
||||
|
||||
return newsrc
|
||||
|
||||
def getImgUrls(self):
|
||||
retlist = []
|
||||
for i, url in enumerate(self.imgurls):
|
||||
parsedUrl = urlparse.urlparse(url)
|
||||
retlist.append(self.imgurldata[i])
|
||||
return retlist
|
||||
|
||||
def __str__(self):
|
||||
return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters)
|
||||
|
|
|
|||
|
|
@ -39,10 +39,11 @@ class BaseStoryWriter(Configurable):
|
|||
|
||||
def __init__(self, config, adapter):
|
||||
Configurable.__init__(self, config)
|
||||
self.addConfigSection(adapter.getSiteDomain())
|
||||
self.addConfigSection(self.getFormatName())
|
||||
self.addConfigSection(adapter.getSiteDomain()+":"+self.getFormatName())
|
||||
self.addConfigSection("overrides")
|
||||
self.setSectionOrder(adapter.getSiteDomain(),self.getFormatName())
|
||||
# self.addConfigSection(adapter.getSiteDomain())
|
||||
# self.addConfigSection(self.getFormatName())
|
||||
# self.addConfigSection(adapter.getSiteDomain()+":"+self.getFormatName())
|
||||
# self.addConfigSection("overrides")
|
||||
|
||||
self.adapter = adapter
|
||||
self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially.
|
||||
|
|
@ -144,7 +145,7 @@ class BaseStoryWriter(Configurable):
|
|||
def _write(self, out, text):
|
||||
out.write(text.encode('utf8'))
|
||||
|
||||
def writeTitlePage(self, out, START, ENTRY, END, WIDE_ENTRY=None):
|
||||
def writeTitlePage(self, out, START, ENTRY, END, WIDE_ENTRY=None, NO_TITLE_ENTRY=None):
|
||||
"""
|
||||
Write the title page, but only include entries that there's
|
||||
metadata for. START, ENTRY and END are expected to already by
|
||||
|
|
@ -171,6 +172,12 @@ class BaseStoryWriter(Configurable):
|
|||
label=self.getConfig(entry+"_label")
|
||||
else:
|
||||
label=self.titleLabels[entry]
|
||||
|
||||
# If the label for the title entry is empty, use the
|
||||
# 'no title' option if there is one.
|
||||
if label == "" and NO_TITLE_ENTRY:
|
||||
TEMPLATE= NO_TITLE_ENTRY
|
||||
|
||||
self._write(out,TEMPLATE.substitute({'label':label,
|
||||
'value':self.story.getMetadata(entry)}))
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import string
|
|||
import StringIO
|
||||
import zipfile
|
||||
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
|
||||
import urllib
|
||||
|
||||
## XML isn't as forgiving as HTML, so rather than generate as strings,
|
||||
## use DOM to generate the XML files.
|
||||
|
|
@ -57,6 +58,10 @@ class EpubWriter(BaseStoryWriter):
|
|||
|
||||
self.EPUB_TITLE_ENTRY = string.Template('''
|
||||
<b>${label}:</b> ${value}<br />
|
||||
''')
|
||||
|
||||
self.EPUB_NO_TITLE_ENTRY = string.Template('''
|
||||
${value}<br />
|
||||
''')
|
||||
|
||||
self.EPUB_TITLE_PAGE_END = string.Template('''
|
||||
|
|
@ -84,6 +89,10 @@ class EpubWriter(BaseStoryWriter):
|
|||
|
||||
self.EPUB_TABLE_TITLE_WIDE_ENTRY = string.Template('''
|
||||
<tr><td colspan="2"><b>${label}:</b> ${value}</td></tr>
|
||||
''')
|
||||
|
||||
self.EPUB_TABLE_NO_TITLE_ENTRY = string.Template('''
|
||||
<tr><td colspan="2">${label}${value}</td></tr>
|
||||
''')
|
||||
|
||||
self.EPUB_TABLE_TITLE_PAGE_END = string.Template('''
|
||||
|
|
@ -268,6 +277,24 @@ class EpubWriter(BaseStoryWriter):
|
|||
title))
|
||||
itemrefs.append("file%04d"%i)
|
||||
|
||||
if self.getConfig('include_images'):
|
||||
#from calibre.utils.magick.draw import minify_image
|
||||
|
||||
imgcount=0
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
for (newsrc,data) in self.story.getImgUrls():
|
||||
imgfile = "OEBPS/"+newsrc
|
||||
# saveimg = minify_image(data, minify_to=sizes)
|
||||
# if self.getConfig('grayscale_images'):
|
||||
# saveimg.type = "GrayscaleType"
|
||||
# outputepub.writestr(imgfile,saveimg.export('JPG'))
|
||||
outputepub.writestr(imgfile,data)
|
||||
items.append(("image%04d"%imgcount,
|
||||
imgfile,
|
||||
"image/jpeg",
|
||||
None))
|
||||
imgcount+=1
|
||||
|
||||
manifest = contentdom.createElement("manifest")
|
||||
package.appendChild(manifest)
|
||||
for item in items:
|
||||
|
|
@ -346,11 +373,13 @@ class EpubWriter(BaseStoryWriter):
|
|||
TITLE_PAGE_START = self.EPUB_TABLE_TITLE_PAGE_START
|
||||
TITLE_ENTRY = self.EPUB_TABLE_TITLE_ENTRY
|
||||
WIDE_TITLE_ENTRY = self.EPUB_TABLE_TITLE_WIDE_ENTRY
|
||||
NO_TITLE_ENTRY = self.EPUB_TABLE_NO_TITLE_ENTRY
|
||||
TITLE_PAGE_END = self.EPUB_TABLE_TITLE_PAGE_END
|
||||
else:
|
||||
TITLE_PAGE_START = self.EPUB_TITLE_PAGE_START
|
||||
TITLE_ENTRY = self.EPUB_TITLE_ENTRY
|
||||
WIDE_TITLE_ENTRY = self.EPUB_TITLE_ENTRY # same, only wide in tables.
|
||||
NO_TITLE_ENTRY = self.EPUB_NO_TITLE_ENTRY
|
||||
TITLE_PAGE_END = self.EPUB_TITLE_PAGE_END
|
||||
|
||||
titlepageIO = StringIO.StringIO()
|
||||
|
|
@ -358,7 +387,8 @@ class EpubWriter(BaseStoryWriter):
|
|||
START=TITLE_PAGE_START,
|
||||
ENTRY=TITLE_ENTRY,
|
||||
WIDE_ENTRY=WIDE_TITLE_ENTRY,
|
||||
END=TITLE_PAGE_END)
|
||||
END=TITLE_PAGE_END,
|
||||
NO_TITLE_ENTRY=NO_TITLE_ENTRY)
|
||||
if titlepageIO.getvalue(): # will be false if no title page.
|
||||
outputepub.writestr("OEBPS/title_page.xhtml",titlepageIO.getvalue())
|
||||
titlepageIO.close()
|
||||
|
|
@ -384,7 +414,7 @@ class EpubWriter(BaseStoryWriter):
|
|||
fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n')
|
||||
outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8'))
|
||||
del fullhtml
|
||||
|
||||
|
||||
# declares all the files created by Windows. otherwise, when
|
||||
# it runs in appengine, windows unzips the files as 000 perms.
|
||||
for zf in outputepub.filelist:
|
||||
|
|
|
|||
|
|
@ -49,6 +49,10 @@ class MobiWriter(BaseStoryWriter):
|
|||
|
||||
self.MOBI_TITLE_ENTRY = string.Template('''
|
||||
<b>${label}:</b> ${value}<br />
|
||||
''')
|
||||
|
||||
self.MOBI_NO_TITLE_ENTRY = string.Template('''
|
||||
${value}<br />
|
||||
''')
|
||||
|
||||
self.MOBI_TITLE_PAGE_END = string.Template('''
|
||||
|
|
@ -75,6 +79,10 @@ class MobiWriter(BaseStoryWriter):
|
|||
|
||||
self.MOBI_TABLE_TITLE_WIDE_ENTRY = string.Template('''
|
||||
<tr><td colspan="2"><b>${label}:</b> ${value}</td></tr>
|
||||
''')
|
||||
|
||||
self.MOBI_TABLE_NO_TITLE_WIDE_ENTRY = string.Template('''
|
||||
<tr><td colspan="2">${value}</td></tr>
|
||||
''')
|
||||
|
||||
self.MOBI_TABLE_TITLE_PAGE_END = string.Template('''
|
||||
|
|
@ -129,11 +137,13 @@ class MobiWriter(BaseStoryWriter):
|
|||
TITLE_PAGE_START = self.MOBI_TABLE_TITLE_PAGE_START
|
||||
TITLE_ENTRY = self.MOBI_TABLE_TITLE_ENTRY
|
||||
WIDE_TITLE_ENTRY = self.MOBI_TABLE_TITLE_WIDE_ENTRY
|
||||
NO_TITLE_ENTRY = self.MOBI_TABLE_NO_TITLE_ENTRY
|
||||
TITLE_PAGE_END = self.MOBI_TABLE_TITLE_PAGE_END
|
||||
else:
|
||||
TITLE_PAGE_START = self.MOBI_TITLE_PAGE_START
|
||||
TITLE_ENTRY = self.MOBI_TITLE_ENTRY
|
||||
WIDE_TITLE_ENTRY = self.MOBI_TITLE_ENTRY # same, only wide in tables.
|
||||
NO_TITLE_ENTRY = self.MOBI_NO_TITLE_ENTRY
|
||||
TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END
|
||||
|
||||
titlepageIO = StringIO.StringIO()
|
||||
|
|
@ -141,7 +151,8 @@ class MobiWriter(BaseStoryWriter):
|
|||
START=TITLE_PAGE_START,
|
||||
ENTRY=TITLE_ENTRY,
|
||||
WIDE_ENTRY=WIDE_TITLE_ENTRY,
|
||||
END=TITLE_PAGE_END)
|
||||
END=TITLE_PAGE_END,
|
||||
NO_TITLE_ENTRY=NO_TITLE_ENTRY)
|
||||
if titlepageIO.getvalue(): # will be false if no title page.
|
||||
files.append(titlepageIO.getvalue())
|
||||
titlepageIO.close()
|
||||
|
|
|
|||
|
|
@ -213,6 +213,23 @@ output_css:
|
|||
.u {text-decoration: underline;}
|
||||
.bold {font-weight: bold;}
|
||||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories
|
||||
#include_images:false
|
||||
|
||||
## Resize images down to width, height, preserving aspect ratio.
|
||||
## Nook size, with margin.
|
||||
#image_max_size: 580, 725
|
||||
|
||||
## Change image to grayscale, if graphics library allows, to save
|
||||
## space.
|
||||
#grayscale_images: false
|
||||
|
||||
## If not set, the summary will have all html stripped for safety.
|
||||
## Both this and include_images must be true to get images in the
|
||||
## summary.
|
||||
#keep_summary_html:false
|
||||
|
||||
[mobi]
|
||||
## mobi TOC cannot be turned off right now.
|
||||
#include_tocpage: true
|
||||
|
|
|
|||
Loading…
Reference in a new issue