Add a BBCode lib for summaries on fimfic(needed to mod for unicode),

Default keep_summary_html:true to now,
Changes to the AO3, ficbook.net, ficwad.com adapters for summary HTML.
Add options to strip and add chapter numbers to chapter titles.
This commit is contained in:
Jim Miller 2012-09-12 18:03:30 -05:00
parent ab521ac093
commit 0ca71a6455
24 changed files with 1328 additions and 44 deletions

View file

@ -4,7 +4,7 @@
'<a href="http://www.mobileread.com/forums/showthread.php?t=134856">Reading List</a>',
'<a href="http://www.mobileread.com/forums/showthread.php?t=126727">Extract ISBN</a>' and
'<a href="http://www.mobileread.com/forums/showthread.php?t=134000">Count Pages</a>'
plugins.</p>
plugins. bbcodeutils code contributed by Pau Sanchez.</p>
<p>
Calibre officially distributes plugins from the mobileread.com forum site.

View file

@ -28,6 +28,7 @@ from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dia
from calibre.gui2.dialogs.message_box import ViewLog
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.utils.date import local_tz
from calibre.library.comments import sanitize_comments_html
# The class that all interface action plugins must inherit from
from calibre.gui2.actions import InterfaceAction
@ -36,7 +37,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
create_menu_action_unique, get_library_uuid)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
#from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page
@ -432,13 +433,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
print("url:%s"%url)
skip_date_update = False
## was self.ffdlconfig, but we need to be able to change it
## when doing epub update.
ffdlconfig = SafeConfigParser()
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(prefs['personal.ini']))
adapter = adapters.getAdapter(ffdlconfig,url,fileform)
options['personal.ini'] = prefs['personal.ini']
if prefs['includeimages']:
# this is a cheat to make it easier for users.
@ -448,6 +442,13 @@ keep_summary_html:true
make_firstimage_cover:true
''' + options['personal.ini']
## was self.ffdlconfig, but we need to be able to change it
## when doing epub update.
ffdlconfig = SafeConfigParser()
ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
ffdlconfig.readfp(StringIO(options['personal.ini']))
adapter = adapters.getAdapter(ffdlconfig,url,fileform)
## three tries, that's enough if both user/pass & is_adult needed,
## or a couple tries of one or the other
for x in range(0,2):
@ -476,7 +477,7 @@ make_firstimage_cover:true
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags(removeallentities=True) # getTags could be moved up into adapter now. Adapter didn't used to know the fileform
book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better.
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
book['series'] = story.getMetadata("series", removeallentities=True)
# adapter.opener is the element with a threadlock. But del

View file

@ -164,6 +164,45 @@ extratags: FanFiction
## doesn't work on some devices either.)
#replace_hr: false
## If set false, the summary will have all html stripped.
## Both this and include_images must be true to get images in the
## summary.
keep_summary_html:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
strip_chapter_numbers:false
add_chapter_numbers:false
## (Two versions of chapter_title_strip_pattern are shown below. You
## should only have one uncommented.)
## This version will remove the leading number from:
## "1." => ""
## "1. The Beginning" => "The Beginning"
## "1: Start" => "Start"
## "2, Chapter the second" => "Chapter the second"
## etc
chapter_title_strip_pattern:^[0-9]+[\.: -]+
## This version will strip all of the above *plus* remove 'Chapter 1':
## "Chapter 1" => ""
## "1. Chapter 1" => ""
## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue"
## "Chapter 2 - Pirates Place" => "Pirates Place"
## etc
#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)?
## Uses a python template substitution. The ${index} is the 'chapter'
## number and ${title} is the chapter title, after applying
## chapter_title_strip_pattern. Those are the only variables available.
## "The Beginning" => "1. The Beginning"
chapter_title_add_pattern:${index}. ${title}
## Each output format has a section that overrides [defaults]
[html]
@ -271,11 +310,6 @@ output_css:
## stories. Images will be converted to jpg for size if possible.
#include_images:false
## If not set, the summary will have all html stripped for safety.
## Both this and include_images must be true to get images in the
## summary.
#keep_summary_html:false
## If set, the first image found will be made the cover image. If
## keep_summary_html is true, any images in summary will be before any
## in chapters.

View file

@ -191,7 +191,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
a = metasoup.find('blockquote',{'class':'userstuff'})
if a != None:
self.setDescription(url,a.text)
self.setDescription(url,a)
#self.story.setMetadata('description',a.text)
a = metasoup.find('dd',{'class':"rating tags"})

View file

@ -201,7 +201,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
break
summary=soup.find('span', {'class' : 'urlize'})
self.setDescription(url,summary.text)
self.setDescription(url,summary)
#self.story.setMetadata('description', summary.text)
# grab the text for an individual chapter.

View file

@ -124,7 +124,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# description
storydiv = soup.find("div",{"id":"story"})
self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p.string)
self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p)
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
# most of the meta data is here:

View file

@ -27,6 +27,8 @@ from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
from ..bbcodeutils.bbcodeparser import bbcodeparser
from base_adapter import BaseSiteAdapter, makeDate
def getClass():
@ -137,12 +139,19 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
# fimfic is the first site with an explicit cover image.
if self.getConfig('include_images') and "image" in storyMetadata.keys():
coverurl = storyMetadata["image"]
if "full_image" in storyMetadata:
coverurl = storyMetadata["full_image"]
else:
coverurl = storyMetadata["image"]
if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl
self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True)
self.setDescription(self.url, storyMetadata["description"])
# the fimfic API gives bbcode for desc, not html.
# btw, bbcode honors newlines, html doesn't. change newlines to br tags.
self.setDescription(self.url,
bbcodeparser().parse(storyMetadata["description"]).html(doDeepCopy=False).replace('\r','').replace('\n','<br />'))
# Dates are in Unix time
# Take the publish date from the first chapter posted
@ -152,8 +161,18 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("dateUpdated", datetime.fromtimestamp(rawDateUpdated))
soup = bs.BeautifulSoup(data).find("div", {"class":"story"})
for character in [character_icon["title"] for character_icon in soup.findAll("a", {"class":"character_icon"})]:
self.story.addToList("characters", character)
# fimfic stopped putting the char name on or around the char
# icon now for some reason. Pull it from the image name with
# some heuristics.
for character in [character_icon["src"] for character_icon in soup.findAll("img", {"class":"character_icon"})]:
# //static.fimfiction.net/images/characters/twilight_sparkle.png
# 5th split /, remove last four, replace _, capitolize every word(title())
char = character.split('/')[5][:-4].replace('_',' ').title()
if char == 'Oc':
char = "OC"
if char == 'Cmc':
char = "Cutie Mark Crusaders"
self.story.addToList("characters", char)
def getChapterText(self, url):

View file

@ -73,8 +73,8 @@ class TestSiteAdapter(BaseSiteAdapter):
self.story.setMetadata(u'title',"Test Story Title "+self.story.getMetadata('storyId'))
self.story.setMetadata('author','Test Author aa')
self.story.setMetadata('storyUrl',self.url)
self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done
self.setDescription(self.url,u'Description '+self.crazystring+u''' Done
<p>
Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic"
''')
self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d"))

View file

@ -199,7 +199,8 @@ class BaseSiteAdapter(Configurable):
if (self.chapterFirst!=None and index < self.chapterFirst) or \
(self.chapterLast!=None and index > self.chapterLast):
self.story.addChapter(removeEntities(title),
None)
None,
self)
else:
if self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None,
@ -208,7 +209,8 @@ class BaseSiteAdapter(Configurable):
else:
data = self.getChapterText(url)
self.story.addChapter(removeEntities(title),
removeEntities(data))
removeEntities(data),
self)
self.storyDone = True
# include image, but no cover from story, add default_cover_image cover.
@ -289,7 +291,7 @@ class BaseSiteAdapter(Configurable):
def setDescription(self,url,svalue):
#print("\n\nsvalue:\n%s\n"%svalue)
if self.getConfig('keep_summary_html'):
if isinstance(svalue,str) or isinstance(svalue,unicode):
if isinstance(svalue,basestring):
svalue = bs.BeautifulSoup(svalue)
self.story.setMetadata('description',self.utf8FromSoup(url,svalue))
else:

View file

View file

@ -0,0 +1,325 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Author: Pau Sanchez (contact@pausanchez.com)
# Version: v1.0
# Last Modified: 2010/09/15
#
# For the latest version check out:
# http://www.codigomanso.com/en/projects
#
# My blog:
# http://www.codigomanso.com/en/ - English Version
# http://www.codigomanso.com/es/ - Spanish Version
#
import sys
import os
import re
import urllib
class bbcode2html:
'''
This class gets a parsed BBCode and transforms it to valid HTML
Useful functions of this class:
html
convertToHTML
Example:
> parser = bbcodeparser ()
> parser.parse ('[b]bold[/b]')
> bbcode2html (parser).html()
<b>bold</b>
# This is faster for huge strings but changes the parser object internally
> bbcode2html (parser).html(doDeepCopy = False)
<b>bold</b>
'''
def __init__ (self, parser):
self._parser = parser
return
def html (self, allowClassAttr = False, doDeepCopy = True, parser = None):
'''
Convert current parsed code to HTML
Example:
code = bbcodeparser ('[b]bold[/b]')
code.html() -> '<b>bold</b>'
'''
if parser is None:
parser = self._parser
tokens = parser
if instanceof (parser, bbcodeparser):
tokens = parser.getTokens()
return bbcode2html.convertToHTML (tokens, allowClassAttr = allowClassAttr, doDeepCopy = doDeepCopy)
@staticmethod
def htmlString (string):
toReplace = {
u'<' : '&lt;',
u'>' : '&gt;',
u'"' : "&quot;",
u'&' : "&amp;"
}
for entity in toReplace:
string = string.replace(entity, toReplace[entity])
return string
@staticmethod
def getValidTags ():
simpleTags = ['b', 'u', 'i', 'sup', 'sub', 'ul', 'ol', 'li', 'table', 'tr', 'th', 'td', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']
validTags = {
'p' : { 'color' : 'color', 'size' : 'size', 'font' : 'font' },
'color' : { 'color' : 'color' },
'size' : { 'size' : 'size' },
'font' : { 'font' : 'font' },
'img' : { 'alt' : 'alt', 'title' : 'title', 'width' : 'width' , 'height' : 'height', 'img' : 'img'},
'url' : { 'href' : 'href', 'url' : 'href', 'link' : 'href', 'title' : 'title' },
's' : { },
'code' : { },
'quote' : { },
'list' : { 'list' : 'type' },
'email' : { 'email': 'href'},
'google' : { 'google': 'google'},
'wikipedia' : { 'wikipedia' : 'wikipedia', 'language' : 'language', 'lang' : 'lang'}
}
for tag in simpleTags:
validTags[tag] = { }
return validTags
@staticmethod
def convertToHTML (tokens, allowClassAttr = False, validTags = None, doDeepCopy = True):
'''
Convert internally parsed BBCode to XHTML
@doDeepCopy
True: it does a deep copy of tokens so this list will remain unchanged
False: tokens will be modified internally, but the output will be produced like 5x faster
it's a good idea to use False only when this is the last operation
'''
# do a deep copy
if doDeepCopy:
import copy
tokens = copy.deepcopy (tokens)
# filter invalid tags and attributes
if validTags is None:
validTags = bbcode2html.getValidTags()
bbcode2html._filterInvalidTagsAndAttributes (tokens, validTags, allowClassAttr)
# Start to convert
index = 0
tokenLength = len (tokens)
# use a list for the output (an order of magnitude faster than using string concatenation)
htmlList = []
lastListOpener = []
while index < tokenLength:
if isinstance (tokens [index], basestring):
htmlList.append (bbcode2html.htmlString (tokens [index]))
index += 1
continue
token = tokens[index]
tag = token['tag'] # opening or closing simple tag. e.g: 'b', '/b', '/u', ...
tagName = (tag[1:] if tag[0] == '/' else tag)
tagOpener = (u'/' if tag[0] == '/' else u'')
tokenArgs = (token['args'] if 'args' in token else {})
# opening or closing simple tag COLOR / SIZE
if (tagName in ['p', 'color', 'size', 'font']):
style = ''
style += ((u' color: ' + tokenArgs['color'] + u';') if ('color' in tokenArgs) else '')
style += ((u' font-size: ' + tokenArgs['size'] + u'pt;') if ('size' in tokenArgs) else '')
style += ((u' font-family: ' + tokenArgs['font'] + u';') if ('font' in tokenArgs) else '')
style = style.strip()
pArgs = {}
if style != '':
pArgs ['style'] = style
if 'class' in tokenArgs:
pArgs ['class'] = tokenArgs['class']
if ('args' not in token) and (tagName != 'p'):
if (tagOpener == '/'): # if closing tag, close it
htmlList.append (u'</span>')
index += 1
continue
if tagName != 'p':
tag = tagOpener + u'span'
htmlList.append (bbcode2html.xml (tag, pArgs))
# IMG tag
elif tag == 'img' and (index+2 < tokenLength):
if 'img' in tokenArgs:
# has the form of <width>x<height> ?
sizeMatch = re.match (u'^\s*(\d+)[xX](\d+)\s*$', tokenArgs['img'])
if sizeMatch is not None:
tokenArgs['width'] = sizeMatch.group(1)
tokenArgs['height'] = sizeMatch.group(2)
# then assume is the alternative text
else:
tokenArgs['alt'] = tokenArgs['img']
del tokenArgs['img']
# add the source of the image
tokenArgs ['src'] = tokens[index+1]
# [img]http://www.whatever.com/pic.jpg[/img]
htmlList.append (
bbcode2html.xml ('img', tokenArgs, soloTag=True)
)
index += 2 # skip next token and closing tag
# URL tag
elif tag == 'url':
if ('args' not in token) and (index+2 < tokenLength):
# [url]http://www.google.com[/url]
htmlList.append (bbcode2html.xml ('a', { 'href' : tokens[index+1] }))
else:
# [url=http://www.google.com]Google[/url]
# [url link=http://www.google.com title="This is Google"]Google[/url]
htmlList.append (bbcode2html.xml ('a', tokenArgs))
# URL closing tag (sometimes needed)
elif (tag == '/url') or (tag == '/email'):
htmlList.append (u'</a>')
# Email tag
elif tag == 'email':
if ('args' not in token) and (index+2 < tokenLength):
# [email]asdf@asdf.com]
htmlList.append (bbcode2html.xml ('a', { 'href' : u'mailto:' + tokens[index+1].strip() }))
else:
# [email=asdf@asfd.com]john smith[/email]
if 'href' in tokenArgs:
tokenArgs['href'] = u'mailto:' + tokenArgs['href']
htmlList.append (bbcode2html.xml ('a', tokenArgs))
elif tagName == 'list':
if tagOpener == '/':
htmlList.append (bbcode2html.xml (u'/' + lastListOpener.pop()))
else:
if ('type' not in tokenArgs):
htmlList.append (bbcode2html.xml (tagOpener + u'ul', tokenArgs))
lastListOpener.append ('ul')
else:
htmlList.append (bbcode2html.xml (tagOpener + u'ol', tokenArgs))
lastListOpener.append ('ol')
elif tagName == '*':
htmlList.append (bbcode2html.xml (tagOpener + u'li', tokenArgs))
elif (tagName == 's'):
tokenArgs['style'] = 'text-decoration: line-through;'
htmlList.append (bbcode2html.xml (tagOpener + u'span', tokenArgs))
elif (tagName == 'code'):
htmlList.append (bbcode2html.xml (tagOpener + u'pre', tokenArgs))
elif (tagName == 'quote'):
htmlList.append (bbcode2html.xml (tagOpener + u'blockquote', tokenArgs))
elif (tagName == 'google'):
htmlList.append (
bbcode2html.xml (
tagOpener + u'a',
{'href' : 'http://www.google.com/search?q=' + urllib.quote_plus (tokens[index+1])},
tokens[index+1]
)
)
index += 2
elif (tagName == 'wikipedia'):
subdomain = 'www'
for arg in ['lang', 'language', 'wikipedia']:
if arg in tokenArgs:
subdomain = tokenArgs[arg]
htmlList.append (
bbcode2html.xml (
tagOpener + u'a',
{'href' : 'http://' + subdomain + '.wikipedia.org/wiki/' + tokens[index+1].replace (' ', '_')},
tokens[index+1]
)
)
index += 2
elif (tagName in validTags):
htmlList.append (
bbcode2html.xml (tag, tokenArgs)
)
else:
# ignore this tag
pass
index += 1
return ''.join (htmlList)
@staticmethod
def _filterInvalidTagsAndAttributes (tokens, validTags, allowClassAttr):
'''
Helper function to filter out invalid attributes from the tokens list
'''
# add 'class' attribute as valid (mapping 'class' itself)
if allowClassAttr:
for attr in validTags:
validTags[attr]['class'] = 'class'
# remove invalid attributes from tokens
for tindex in range(0, len(tokens)):
if isinstance (tokens[tindex], dict) and ('args' in tokens[tindex]) and (tokens[tindex]['tag'] in validTags):
validList = validTags[tokens[tindex]['tag']]
filteredArgs = {}
for arg in tokens[tindex]['args']:
if arg in validList:
# rename the argument
filteredArgs[validList[arg]] = tokens[tindex]['args'][arg]
else:
pass # do not include this arg in the filteredArgs
tokens[tindex]['args'] = filteredArgs
return
@staticmethod
def xml (tag, attrs = {}, text = None, soloTag = False):
'''
Helper function to produce valid XML output
'''
xml = u'<' + tag.lower()
# make sure we sort attributes alphabetically (for deterministic output)
# Faster but non-deterministic:
# for (key, value) in attrs.iteritems():
# xml += u' ' + key + u'="' + value + u'"'
for key in sorted (attrs.keys()):
xml += u' ' + key + u'="' + attrs[key] + u'"'
# close tag
if text is None:
if soloTag:
xml += u' />'
else:
xml += u'>'
else:
xml += u'>' + text + u'</' + tag.lower() + '>'
return xml

View file

@ -0,0 +1,77 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Author: Pau Sanchez (contact@pausanchez.com)
# Version: v1.0
# Last Modified: 2010/09/15
#
# For the latest version check out:
# http://www.codigomanso.com/en/projects
#
# My blog:
# http://www.codigomanso.com/en/ - English Version
# http://www.codigomanso.com/es/ - Spanish Version
#
import sys
import os
import re
import hashlib
class bbcodebuilder:
'''
This class helps to build BBCode programmatically.
The function names are used as the tag name, then the first parameter
is the string that goes inside the tags and any extra parameter is
appended as a parameter to the tag
Examples:
> bbcode = bbcodebuilder() # create a instance!
> print bbcode.b ('bold')
[b]bold[/b]
> print bbcode.color ('this goes in red', 'red')
[color=red]this goes in red[/color]
> print bbcode.url ('Google', 'http://www.google.com')
[url=http://www.google.com]Google[/url]
> print bbcode.alist('item 1', 'item 2')
[list=a]
[*]item 1
[*]item 2
[/list]
This solution is based on the recipe found on:
http://code.activestate.com/recipes/576831-simple-bbcode-support/
'''
def __getattr__(self, name):
'''
This is a generic getter that returns a function which gets the first parameter
as the string that goes between the tags, and extra parameters as tag parameters.
The name of the attribute is used as the tag name
'''
class bbcodebuilder_helper:
def __init__(self, name):
self._name = name
def __call__(self, string, *args):
return u'[{0}{1}]{2}[/{0}]'.format(self._name, (u'=' + u','.join(map(str, args))) if args else u'', string)
return bbcodebuilder_helper (name)
def list(self, *items):
return u'[list]' + u''.join(map(lambda item: u"\n [*]" + item, items)) + u"\n[/list]"
def nlist(self, *items):
return u'[list=1]' + u''.join(map(lambda item: u"\n [*]" + item, items)) + u"\n[/list]"
def alist(self, *items):
return u'[list=a]' + u''.join(map(lambda item: u"\n [*]" + item, items)) + u"\n[/list]"

View file

@ -0,0 +1,251 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Author: Pau Sanchez (contact@pausanchez.com)
# Version: v1.0
# Last Modified: 2010/09/15
#
# For the latest version check out:
# http://www.codigomanso.com/en/projects
#
# My blog:
# http://www.codigomanso.com/en/ - English Version
# http://www.codigomanso.com/es/ - Spanish Version
#
import sys
import os
import re
import hashlib
class bbcodeparser:
'''
This class parses BBCode into a internal structure to allow later processing and
conversion to HTML.
The parser tries to fix invalid code (like unclosed tags)
Useful URLs:
http://en.wikipedia.org/wiki/BBCode
http://www.bbcode.org/reference.php
Example:
> bbcode = bbcodeparser ()
> bbcode.parse ('[b]text in bold[/b]').html()
<b>text in bold</b>
# dump HTML
> bbcode.parse ('[p][color=red]text in red').html()
<p><span style="color:red;">text in red</span></p>
# dump fixed BBCode
> bbcode.parse ('[p][color=red]text in red').bbcode()
[p][color=red]text in red[/color][/p]
> bbcode.parse ('This [b][i]code[/b] will be fixed[/invalid]').bbcode()
This [b][i]code[/i][/b] will be fixed
# dump fixed bbcode
> str (bbcodeparse ('This [b][i]code[/b] will be fixed[/invalid]'))
This [b][i]code[/i][/b] will be fixed
'''
_bbcode = ''
_tokens = []
def __init__ (self, bbcode = '', fixInvalidCode = True):
''' Initialize and parse bbcode string (if any is given)
'''
self.parse (bbcode, fixInvalidCode)
return
def __str__ (self):
return self.bbcode()
def parse (self, bbcode = None, fixInvalidCode = True):
'''
It will parse and return the token list, trying to fix tags if
fixInvalidCode is True
It will return the current object to allow chaining
Example:
code = bbcode()
code.parse ('<b>bold</b>', True) ->
code.parse ('<b>bold<i>italics</b>', True) -> internally will add the missing '</i>'
'''
if bbcode is not None:
self._bbcode = bbcode
self._tokens = self.tokenize (bbcode)
if fixInvalidCode:
self._tokens = self.fixWrongTags (self._tokens)
return self
# return ALL tokens
def getTokens (self):
return self._tokens
def bbcode (self):
'''
Dump BBCode again. This is useful for dumping valid BBCode
'''
bbcode = []
for token in self._tokens:
if token is None:
continue
if isinstance (token, basestring):
bbcode.append (token.replace (u'[', u'\[').replace (u']', u'\]'))
continue
tag = token['tag'] # opening or closing simple tag. e.g: 'b', '/b', '/u', ...
tagOpener = (u'/' if tag[0] == u'/' else u'')
if (tagOpener == '/') or ('args' not in token):
bbcode.append (u'[' + tag + u']')
else:
# process args
argstr = ''
# the arg with the same name as the tag repersents the '=whatever'
if tag in token['args']:
if re.match ('\s|"', token['args'][tag]) is None:
argstr = u'=' + token['args'][tag]
else:
argstr = u'="' + token['args'][tag].replace (u'"', u'\"') + u'"'
for (k,v) in token['args'].iteritems():
if k == tag: # already processed
continue
argstr += ' ' + k + u'="' + v.replace (u'"', u'\"') + u'"'
bbcode.append (u'[' + tag + argstr + ']')
return u''.join (bbcode)
def html (self, allowClassAttr = False, doDeepCopy = True):
'''
Convert current parsed code to HTML
@allowClassAttr
Is something like [b class="asdf"] allowed?
@doDeepCopy
True: it does a deep copy of tokens so this list will remain unchanged
False: tokens will be modified internally, but the output will be produced like 5x faster
it's a good idea to use False when the string parsed is huge and this is the
last operation on the string
Example:
code = bbcode ('[b]bold[/b]')
code.html() -> '<b>bold</b>'
'''
from bbcode2html import bbcode2html
return bbcode2html.convertToHTML (self._tokens, allowClassAttr = allowClassAttr, doDeepCopy = doDeepCopy)
@staticmethod
def fixWrongTags (inTokenList):
''' Add missing tokens that have not been closed properly and try to fix some scenarios
'''
opened = []
outTokenList = []
for token in inTokenList:
# normal string... do nothing
if isinstance(token, basestring):
outTokenList.append (token)
else:
# if starts with '/' is closing a tag
if token['tag'][0] == '/':
while (len (opened) > 0) and (opened[-1] != token['tag'][1:]):
outTokenList.append ({'tag' : '/' + opened[-1] })
del opened[-1]
if len(opened):
del opened[-1]
outTokenList.append (token)
# opening tag
else:
# if I open the same tag I opened before, close it, and open it again
if (len(opened) > 0) and (token['tag'] == opened[-1]):
outTokenList.append ({'tag' : '/' + opened[-1] })
else:
opened.append (token['tag'])
outTokenList.append (token)
# close all elements that have not been closed
while len(opened):
outTokenList.append ({'tag' : '/' + opened[-1] })
del opened[-1]
return outTokenList
@staticmethod
def tokenize(code):
'''
Tokenize BBCode tags and parameters
Return the token list using a internal format. See the example:
[
{ 'tag' : 'p', 'args' : { 'font' : 'arial' } },
'This is ',
{ 'tag' : 'url', 'args' : {'url' : 'http://www.google.com'} },
'a link to google',
{ 'tag' : '/url' },
{ 'tag' : '/p' }
]
'''
re_tags = re.compile (r'(\[[^]]+\])', re.DOTALL | re.UNICODE)
re_tagName = re.compile (r'\[([^]=\s]+)([^]]*)\]', re.DOTALL | re.UNICODE)
#re_tagArgs = re.compile (r'\s*([^=]*)=(("([^"]+)")|([^\s]+))', re.DOTALL | re.UNICODE)
re_tagArgs = re.compile (r'\s*([\w]*)=(("([^"]+)")|([^\s]+))', re.DOTALL | re.UNICODE)
# get a unique name and replace escaped braces
unique = hashlib.md5(code).hexdigest()
code = code.replace ('\[', unique+'_OPEN_BRACE')
code = code.replace ('\]', unique+'_CLOSE_BRACE')
splitted = re_tags.split(code)
outTokenList = []
for token in splitted:
if len(token) == 0:
continue
if token[0] == '[':
match = re_tagName.match (token)
tagName = match.group(1)
tagArgs = match.group(2)
tagToken = { 'tag' : tagName.lower() }
# parse arguments (if any)
if len(tagArgs) > 0:
allArgs = re_tagArgs.findall(tagArgs)
tagArgs = {}
for arg in allArgs:
# if the argument has no name, use the tagName itself
argName = (arg[0] if arg[0] != '' else tagName)
argValue = (arg[3] if (arg[1][0] == '"') else arg[4])
tagArgs[argName.lower()] = argValue.replace ('\"', '"')
tagToken['args'] = tagArgs
outTokenList.append (tagToken)
# append the text as it is
else:
outTokenList.append (token)
# restore escaped braces back (once code is parsed)
restoredTokenList = []
for token in outTokenList:
if isinstance (token, basestring):
token = token.replace (unique+'_OPEN_BRACE', '[').replace (unique+'_CLOSE_BRACE', ']')
restoredTokenList.append (token)
return restoredTokenList

View file

@ -0,0 +1,81 @@
AUTHOR
Pau Sanchez
http://www.codigomanso.com/
VERSION:
bbcodeutils v1.0
LICENSE
This code is licensed under Creative Commons Attribution 3.0
http://creativecommons.org/licenses/by/3.0/
You can use this python module or any part of the code you want as long as you add
my name as a contributor to your project.
DESCRIPTION
This module can be used to produce HTML from BBCode, to generate BBCode or to fix invalid BBCode.
The classes are:
- bbcodeparser
- bbcodebuilder
- bbcode2html
You can use bbcodeparser to parse BBCode and to produce output in any format you want.
Open the python file to find more information and examples of use of each class. It can
be a good idea to check the test.py for examples
To run the unit tests:
> python test.py
To run the performance test:
> python test.py BBCodeTests.performanceTest
EXAMPLES OF BBCode:
[b] -> bold
[u] -> underline
[i] -> italic
[center] -> center the text inside
[color=XXX] -> change color of text
[size=XXX] -> change size of text
Lists:
[ul] -> unordered list
[ol] -> ordered list
[li] -> list item
[list] -> start unordered list
[*] -> list item
[list=1] -> start a list of numbers
[list=a] -> start a list of alphabetic characters
Advanced:
[url] -> link to url
[url=http://link/url/]text[/url]
[url link=http://link/url/ title="This is the title"]text[/url]
[img]http://to/image[/img]
[img=230x330]http://to/image[/img]
[img="Alt text here"]http://to/image[/img]
[img="Alt text here" width=320 height=240]http://to/image[/img]
[email]asdf@asdf.com[/email]
[email=john@asdf.com]John Smith[/email]
[google]search this[/google]
[wikipedia]Tom Hanks[/wikipedia]
[wikipedia lang=es]Tom Hanks[/wikipedia]
Tables:
[table]
[tr]
[th]
[td]
Advanced:
[google]
[wikipedia]

View file

@ -0,0 +1,420 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Author: Pau Sanchez (contact@pausanchez.com)
# Version: v1.0
# Last Modified: 2010/09/15
#
# For the latest version check out:
# http://www.codigomanso.com/en/projects
#
# My blog:
# http://www.codigomanso.com/en/ - English Version
# http://www.codigomanso.com/es/ - Spanish Version
#
from bbcodeparser import bbcodeparser
from bbcodebuilder import bbcodebuilder
import random
import unittest
class BBCodeTests(unittest.TestCase):
def setUp (self):
self.bbcode = bbcodeparser()
return
def testConstructor (self):
self.assertEqual (bbcodeparser ('whatever').html(), 'whatever')
self.assertEqual (bbcodeparser ('[b]bold[/b]').html(), '<b>bold</b>')
self.assertEqual (str (bbcodeparser ('[b]bold[/b]')), '[b]bold[/b]')
return
def testBold (self):
self.assertEqual (self.bbcode.parse ('whatever').html(), 'whatever')
self.assertEqual (self.bbcode.parse ('[b]bold[/b]').html(), '<b>bold</b>')
self.assertEqual (self.bbcode.parse ('[B]bold[/b]').html(), '<b>bold</b>')
self.assertEqual (self.bbcode.parse ('this is [B]bold[/B]').html(), 'this is <b>bold</b>')
return
def testItalic (self):
self.assertEqual (self.bbcode.parse ('[i]italic[/i]').html(), '<i>italic</i>')
return
def testUnderline (self):
self.assertEqual (self.bbcode.parse ('[u]italic[/u]').html(), '<u>italic</u>')
return
def testURLs (self):
self.assertEqual (
self.bbcode.parse ('[url]http://www.google.com[/url]').html(),
'<a href="http://www.google.com">http://www.google.com</a>'
)
self.assertEqual (
self.bbcode.parse ('[url=http://www.google.com]Google[/url]').html(),
'<a href="http://www.google.com">Google</a>'
)
self.assertEqual (
self.bbcode.parse ('[url="http://www.google.com"]Google[/url]').html(),
'<a href="http://www.google.com">Google</a>'
)
self.assertEqual (
self.bbcode.parse ('[url="http://www.google.com" title="Search Engine"]Google[/url]').html(),
'<a href="http://www.google.com" title="Search Engine">Google</a>'
)
self.assertEqual (
self.bbcode.parse ('[url link="http://www.google.com"]Google[/url]').html(),
'<a href="http://www.google.com">Google</a>'
)
return
def testPTag (self):
self.assertEqual (
self.bbcode.parse ('[p color=#0000ff]blue[/p]').html(),
u'<p style="color: #0000ff;">blue</p>'
)
self.assertEqual (
self.bbcode.parse ('[p size=12]12pt font[/p]').html(),
u'<p style="font-size: 12pt;">12pt font</p>'
)
self.assertEqual (
self.bbcode.parse ('[p font=arial]arial[/p]').html(),
u'<p style="font-family: arial;">arial</p>'
)
self.assertEqual (
self.bbcode.parse ('[p font=arial color=blue size=14]blue 14pt arial').html(),
u'<p style="color: blue; font-size: 14pt; font-family: arial;">blue 14pt arial</p>'
)
self.assertEqual (
self.bbcode.parse ('[p class=whatever]text[/p]').html(),
u'<p>text</p>'
)
return
def testColorTag (self):
self.assertEqual (
self.bbcode.parse ('[color=#0000ff]blue[/color]').html(),
u'<span style="color: #0000ff;">blue</span>'
)
return
def testSizeTag (self):
self.assertEqual (
self.bbcode.parse ('[size=12]12pt font[/size]').html(),
u'<span style="font-size: 12pt;">12pt font</span>'
)
return
def testEmail(self):
self.assertEqual (
self.bbcode.parse ('[email]asdf@asdf.com[/email]').html(),
u'<a href="mailto:asdf@asdf.com">asdf@asdf.com</a>'
)
self.assertEqual (
self.bbcode.parse ('[email=john@smith.com]John Smith[/email]').html(),
u'<a href="mailto:john@smith.com">John Smith</a>'
)
return
def testImgTag (self):
self.assertEqual (
self.bbcode.parse ('[img]http://www.codigomanso.com/image.jpg[/img]').html(),
u'<img src="http://www.codigomanso.com/image.jpg" />'
)
self.assertEqual (
self.bbcode.parse ('[img="This is the ALT of the image"]http://www.codigomanso.com/image.jpg[/img]').html(),
u'<img alt="This is the ALT of the image" src="http://www.codigomanso.com/image.jpg" />'
)
self.assertEqual (
self.bbcode.parse ('[img=320x200]http://www.codigomanso.com/image.jpg[/img]').html(),
u'<img height="200" src="http://www.codigomanso.com/image.jpg" width="320" />'
)
self.assertEqual (
self.bbcode.parse ('[img=320x200 title="Image Test"]http://www.codigomanso.com/image.jpg[/img]').html(),
u'<img height="200" src="http://www.codigomanso.com/image.jpg" title="Image Test" width="320" />'
)
self.assertEqual (
self.bbcode.parse ('[img="whatever" width=320 height="212" title="Image Test"]http://www.codigomanso.com/image.jpg[/img]').html(),
u'<img alt="whatever" height="212" src="http://www.codigomanso.com/image.jpg" title="Image Test" width="320" />'
)
return
def testGoogleURL (self):
self.assertEqual (
self.bbcode.parse ('[google]asdf[/google]').html(),
u'<a href="http://www.google.com/search?q=asdf">asdf</a>'
)
self.assertEqual (
self.bbcode.parse ('[google]Tom Hanks[/google]').html(),
u'<a href="http://www.google.com/search?q=Tom+Hanks">Tom Hanks</a>'
)
return
def testWikipediaURL (self):
self.assertEqual (
self.bbcode.parse ('[wikipedia]Tom Hanks[/wikipedia]').html(),
u'<a href="http://www.wikipedia.org/wiki/Tom_Hanks">Tom Hanks</a>'
)
self.assertEqual (
self.bbcode.parse ('[wikipedia language=en]Tom Hanks[/wikipedia]').html(),
u'<a href="http://en.wikipedia.org/wiki/Tom_Hanks">Tom Hanks</a>'
)
self.assertEqual (
self.bbcode.parse ('[wikipedia lang=es]Tom Hanks[/wikipedia]').html(),
u'<a href="http://es.wikipedia.org/wiki/Tom_Hanks">Tom Hanks</a>'
)
self.assertEqual (
self.bbcode.parse ('[wikipedia=es]Tom Hanks[/wikipedia]').html(),
u'<a href="http://es.wikipedia.org/wiki/Tom_Hanks">Tom Hanks</a>'
)
return
def testListTags (self):
self.assertEqual (
self.bbcode.parse ('[ul][li]item 1[/li][li]item 2[/li][/ul]').html(),
u'<ul><li>item 1</li><li>item 2</li></ul>'
)
self.assertEqual (
self.bbcode.parse ('[ol][li]item 1[/li][li]item 2[/li][/ol]').html(),
u'<ol><li>item 1</li><li>item 2</li></ol>'
)
self.assertEqual (
self.bbcode.parse ('[list][li]item 1[/li][li]item 2[/li][/list]').html(),
u'<ul><li>item 1</li><li>item 2</li></ul>'
)
self.assertEqual (
self.bbcode.parse ('[list][*]item 1[*]item 2[/list]').html(),
u'<ul><li>item 1</li><li>item 2</li></ul>'
)
self.assertEqual (
self.bbcode.parse ('[list=1][li]item 1[/li][li]item 2[/li][/list]').html(),
u'<ol type="1"><li>item 1</li><li>item 2</li></ol>'
)
return
def testInvalidCode (self):
self.assertEqual (self.bbcode.parse ('[invalid]valid text[/invalid]').html(), 'valid text')
self.assertEqual (
self.bbcode.parse ('[b]bold and [i]italics[/b]').html(),
'<b>bold and <i>italics</i></b>'
)
self.assertEqual (
self.bbcode.parse ('[/b]invalid[/b][/p]').html(),
'invalid'
)
self.assertEqual (
self.bbcode.parse ('[p][b]bold').html(),
'<p><b>bold</b></p>'
)
self.assertEqual (
self.bbcode.parse ('[p][b]a <b>').html(),
'<p><b>a &lt;b&gt;</b></p>'
)
self.assertEqual (
self.bbcode.parse ('[ol][li]item 1[li]item 2[/li][/ol]').html(),
u'<ol><li>item 1</li><li>item 2</li></ol>'
)
self.assertEqual (
self.bbcode.parse ('[b]\[b\] stands for [b]bold[/b]').html(),
u'<b>[b] stands for </b><b>bold</b>'
)
return
def testEscapedBrackets (self):
self.assertEqual (
self.bbcode.parse ('\[b\]not bold\[/b\]').html(),
u'[b]not bold[/b]'
)
self.assertEqual (
self.bbcode.parse ('[b]\[b\] stands for bold[/b]').html(),
u'<b>[b] stands for bold</b>'
)
self.assertEqual (
self.bbcode.parse ('\[b\][b]stands for bold[/b]').html(),
u'[b]<b>stands for bold</b>'
)
self.assertEqual (
self.bbcode.parse ('\[b\][b]stands for bold[/b] just like <b> in HTML').html(),
u'[b]<b>stands for bold</b> just like &lt;b&gt; in HTML'
)
def testBigExample (self):
inputText = """check this out
[h1 class=circle]heading[/h1]
[p size=14 color=blue font="verdana, Times New Roman"]This is [b] bold [/b] and this [i]italic[/i] and this is [color=red]red[/color] and this is [color="red"]also red[/color].
[/p]
fix [b][i]bold [font=verdana][size=12]and[/size][/font] italic[/b]
[img]http://www.codigomanso.com/b.jpg[/img]
[url]http://www.codigomanso.com/[/url]
[url=http://www.codigomanso.com/]Codigo Manso[/url]
[uRl link=http://www.codigomanso.com title="Codigo Manso Blog"]Codigo Manso[/url]
[ul]
[Li]item 1[/Li]
[li]item 2[/LI]
[/UL]
[list=1 ]
[*]item 1
[*]item 2
[/list]
[table class="big"]
[tr]
[th]big[/th]
[/tr]
[/table]
[invalid class="extra"]whatever[/invalid]"""
out = self.bbcode.parse (inputText).html(allowClassAttr = True)
self.assertEquals (out, '''check this out
<h1 class="circle">heading</h1>
<p style="color: blue; font-size: 14pt; font-family: verdana, Times New Roman;">This is <b> bold </b> and this <i>italic</i> and this is <span style="color: red;">red</span> and this is <span style="color: red;">also red</span>.
</p>
fix <b><i>bold <span style="font-family: verdana;"><span style="font-size: 12pt;">and</span></span> italic</i></b>
<img src="http://www.codigomanso.com/b.jpg" />
<a href="http://www.codigomanso.com/">http://www.codigomanso.com/</a>
<a href="http://www.codigomanso.com/">Codigo Manso</a>
<a href="http://www.codigomanso.com" title="Codigo Manso Blog">Codigo Manso</a>
<ul>
<li>item 1</li>
<li>item 2</li>
</ul>
<ol type="1">
<li>item 1
</li><li>item 2
</li></ol>
<table class="big">
<tr>
<th>big</th>
</tr>
</table>
whatever''')
def testBBCodeDumper (self):
self.assertEquals (
self.bbcode.parse ('[b]bold[/b]').bbcode(),
'[b]bold[/b]'
)
self.assertEquals (
self.bbcode.parse ('[color=red]text in red[/color]').bbcode(),
'[color=red]text in red[/color]'
)
self.assertEquals (
self.bbcode.parse ('[p][color=red]text in red').bbcode(),
'[p][color=red]text in red[/color][/p]'
)
self.assertEquals (
self.bbcode.parse ('This [b][i]code[/b] will be fixed[/invalid]').bbcode(),
'This [b][i]code[/i][/b] will be fixed'
)
self.assertEquals (
self.bbcode.parse ('\[[url]http://www.codigomanso.com/en[/url]\]').bbcode(),
"\[[url]http://www.codigomanso.com/en[/url]\]"
)
def performanceTest(self):
'''
This test checks the performance of parse and html operations
To run this test type:
> python test.py BBCodeTests.performanceTest
'''
inputText = """check this out
[h1 class=circle]heading[/h1]
[p size=14 color=blue font="verdana, Times New Roman"]This is [b] bold [/b] and this [i]italic[/i] and this is [color=red]red[/color] and this is [color="red"]also red[/color].
[/p]
fix [b][i]bold [font=verdana][size=12]and[/size][/font] italic[/b]
[img]http://www.codigomanso.com/b.jpg[/img]
[url]http://www.codigomanso.com/[/url]
[url=http://www.codigomanso.com/]Codigo Manso[/url]
[uRl link=http://www.codigomanso.com title="Codigo Manso Blog"]Codigo Manso[/url]
[ul]
[Li]item 1[/Li]
[li]item 2[/LI]
[/UL]
[list=1 ]
[*]item 1
[*]item 2
[/list]
[table class="big"]
[tr]
[th]big[/th]
[/tr]
[/table]
[invalid class="extra"]whatever[/invalid]"""
import time
start = time.time()
for i in range(0, 12):
inputText += inputText
print "len(inputText) = %.2f MB (took %.2f seconds)" % (len(inputText)/(1024.0*1024.0), time.time() - start)
bbcode = bbcodeparser()
start = time.time()
bbcode.parse (inputText)
total = (time.time() - start)
print "time (bbcode.parse()) = %f" % total
print " >> %.2f chars/second" % (len(inputText) / total)
start = time.time()
bbcode.html(doDeepCopy = False)
total = (time.time() - start)
print "time (bbcode.html()) = %f" % total
print " >> %.2f chars/second" % (len(inputText) / total)
return
def testCodeBuilder (self):
bbcode = bbcodebuilder ()
self.assertEquals (bbcode.b ('bold'), u'[b]bold[/b]')
self.assertEquals (bbcode.color ('this goes in red', 'red'), u'[color=red]this goes in red[/color]')
self.assertEquals (bbcode.url ('Google', 'http://www.google.com'), u'[url=http://www.google.com]Google[/url]')
self.assertEquals (bbcode.alist('item 1', 'item 2'), u"[list=a]\n [*]item 1\n [*]item 2\n[/list]")
return
if __name__ == '__main__':
unittest.main()

View file

@ -319,12 +319,25 @@ class Story:
def isList(self,listname):
return self.listables.has_key(listname)
def addChapter(self, title, html):
def addChapter(self, title, html, configurable=None):
if configurable and \
configurable.getConfig('strip_chapter_numbers') and \
configurable.getConfig('chapter_title_strip_pattern'):
title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) )
def getChapters(self):
def getChapters(self, configurable=None):
"Chapters will be tuples of (title,html)"
return self.chapters
retval = []
if configurable and \
configurable.getConfig('add_chapter_numbers') and \
configurable.getConfig('chapter_title_add_pattern'):
for index, (title,html) in enumerate(self.chapters):
retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
else:
retval = self.chapters
return retval
def formatFileName(self,template,allowunsafefilename=True):
values = origvalues = self.getAllMetadata()

View file

@ -184,10 +184,10 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use index and chapter.
"""
# Only do TOC if there's more than one chapter and it's configured.
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
self._write(out,START.substitute(self.story.getAllMetadata()))
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))

View file

@ -441,7 +441,7 @@ div { margin: 0pt; padding: 0pt; }
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
itemrefs.append("title_page")
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
itemrefs.append("toc_page")
@ -449,7 +449,7 @@ div { margin: 0pt; padding: 0pt; }
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
itemrefs.append("log_page")
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
i=index+1
items.append(("file%04d"%i,
@ -587,7 +587,7 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close()
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})

View file

@ -94,7 +94,7 @@ ${output_css}
self.HTML_TOC_ENTRY,
self.HTML_TOC_PAGE_END)
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))

View file

@ -169,7 +169,7 @@ ${value}<br />
# files.append(tocpageIO.getvalue())
# tocpageIO.close()
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})

View file

@ -133,7 +133,7 @@ End file.
self._write(out,self.lineends(self.wraplines(towrap)))
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))

View file

@ -152,6 +152,45 @@ extratags: FanFiction
# ${category} => Buffy:? [tT]he Vampire Slayer => BuffyCover
# ${category} => Star Trek => StarTrekCover
## If set false, the summary will have all html stripped.
## Both this and include_images must be true to get images in the
## summary.
keep_summary_html:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
strip_chapter_numbers:false
add_chapter_numbers:false
## (Two versions of chapter_title_strip_pattern are shown below. You
## should only have one uncommented.)
## This version will remove the leading number from:
## "1." => ""
## "1. The Beginning" => "The Beginning"
## "1: Start" => "Start"
## "2, Chapter the second" => "Chapter the second"
## etc
chapter_title_strip_pattern:^[0-9]+[\.: -]+
## This version will strip all of the above *plus* remove 'Chapter 1':
## "Chapter 1" => ""
## "1. Chapter 1" => ""
## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue"
## "Chapter 2 - Pirates Place" => "Pirates Place"
## etc
#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)?
## Uses a python template substitution. The ${index} is the 'chapter'
## number and ${title} is the chapter title, after applying
## chapter_title_strip_pattern. Those are the only variables available.
## "The Beginning" => "1. The Beginning"
chapter_title_add_pattern:${index}. ${title}
## Each output format has a section that overrides [defaults]
[html]
@ -256,11 +295,6 @@ output_css:
## stories. Images will be converted to jpg for size if possible.
#include_images:false
## If not set, the summary will have all html stripped for safety.
## Both this and include_images must be true to get images in the
## summary.
#keep_summary_html:false
## If set, the first image found will be made the cover image. If
## keep_summary_html is true, any images in summary will be before any
## in chapters.

View file

@ -6,6 +6,15 @@
## adult content. Uncomment by removing '#' in front of is_adult.
#is_adult:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
#strip_chapter_numbers:true
#add_chapter_numbers:true
[epub]
## include images from img tags in the body and summary of stories.
## Images will be converted to jpg for size if possible. Images work

View file

@ -1,3 +1,21 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Other code contributed by Pau Sanchez(bbcodeutils).
To use, do:
python downloader.py [-f (epub|html|txt)] <url>