From 0ca71a6455b5311300a6a178055b648a31725645 Mon Sep 17 00:00:00 2001
From: Jim Miller
Calibre officially distributes plugins from the mobileread.com forum site.
diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py
index 25ec2dd3..daad8d29 100644
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@@ -28,6 +28,7 @@ from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dia
from calibre.gui2.dialogs.message_box import ViewLog
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.utils.date import local_tz
+from calibre.library.comments import sanitize_comments_html
# The class that all interface action plugins must inherit from
from calibre.gui2.actions import InterfaceAction
@@ -36,7 +37,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
create_menu_action_unique, get_library_uuid)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
-from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
+#from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page
@@ -432,13 +433,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
print("url:%s"%url)
skip_date_update = False
- ## was self.ffdlconfig, but we need to be able to change it
- ## when doing epub update.
- ffdlconfig = SafeConfigParser()
- ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
- ffdlconfig.readfp(StringIO(prefs['personal.ini']))
- adapter = adapters.getAdapter(ffdlconfig,url,fileform)
-
options['personal.ini'] = prefs['personal.ini']
if prefs['includeimages']:
# this is a cheat to make it easier for users.
@@ -448,6 +442,13 @@ keep_summary_html:true
make_firstimage_cover:true
''' + options['personal.ini']
+ ## was self.ffdlconfig, but we need to be able to change it
+ ## when doing epub update.
+ ffdlconfig = SafeConfigParser()
+ ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini")))
+ ffdlconfig.readfp(StringIO(options['personal.ini']))
+ adapter = adapters.getAdapter(ffdlconfig,url,fileform)
+
## three tries, that's enough if both user/pass & is_adult needed,
## or a couple tries of one or the other
for x in range(0,2):
@@ -476,7 +477,7 @@ make_firstimage_cover:true
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags(removeallentities=True) # getTags could be moved up into adapter now. Adapter didn't used to know the fileform
- book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better.
+ book['comments'] = sanitize_comments_html(story.getMetadata("description"))
book['series'] = story.getMetadata("series", removeallentities=True)
# adapter.opener is the element with a threadlock. But del
diff --git a/defaults.ini b/defaults.ini
index e73bf2cd..5726210d 100644
--- a/defaults.ini
+++ b/defaults.ini
@@ -164,6 +164,45 @@ extratags: FanFiction
## doesn't work on some devices either.)
#replace_hr: false
+## If set false, the summary will have all html stripped.
+## Both this and include_images must be true to get images in the
+## summary.
+keep_summary_html:true
+
+## Don't like the numbers at the start of chapter titles on some
+## sites? You can use strip_chapter_numbers to strip them off. Just
+## want to make them all look the same? Strip them off, then add them
+## back on with add_chapter_numbers. Don't like the way it strips
+## numbers or adds them back? See chapter_title_strip_pattern and
+## chapter_title_add_pattern.
+strip_chapter_numbers:false
+add_chapter_numbers:false
+
+## (Two versions of chapter_title_strip_pattern are shown below. You
+## should only have one uncommented.)
+## This version will remove the leading number from:
+## "1." => ""
+## "1. The Beginning" => "The Beginning"
+## "1: Start" => "Start"
+## "2, Chapter the second" => "Chapter the second"
+## etc
+chapter_title_strip_pattern:^[0-9]+[\.: -]+
+
+## This version will strip all of the above *plus* remove 'Chapter 1':
+## "Chapter 1" => ""
+## "1. Chapter 1" => ""
+## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue"
+## "Chapter 2 - Pirates Place" => "Pirates Place"
+## etc
+#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)?
+
+## Uses a python template substitution. The ${index} is the 'chapter'
+## number and ${title} is the chapter title, after applying
+## chapter_title_strip_pattern. Those are the only variables available.
+## "The Beginning" => "1. The Beginning"
+chapter_title_add_pattern:${index}. ${title}
+
+
## Each output format has a section that overrides [defaults]
[html]
@@ -271,11 +310,6 @@ output_css:
## stories. Images will be converted to jpg for size if possible.
#include_images:false
-## If not set, the summary will have all html stripped for safety.
-## Both this and include_images must be true to get images in the
-## summary.
-#keep_summary_html:false
-
## If set, the first image found will be made the cover image. If
## keep_summary_html is true, any images in summary will be before any
## in chapters.
diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py
index 25322bea..1d7fddfb 100644
--- a/fanficdownloader/adapters/adapter_archiveofourownorg.py
+++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py
@@ -191,7 +191,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
a = metasoup.find('blockquote',{'class':'userstuff'})
if a != None:
- self.setDescription(url,a.text)
+ self.setDescription(url,a)
#self.story.setMetadata('description',a.text)
a = metasoup.find('dd',{'class':"rating tags"})
diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py
index 4874db0c..de6ea953 100644
--- a/fanficdownloader/adapters/adapter_ficbooknet.py
+++ b/fanficdownloader/adapters/adapter_ficbooknet.py
@@ -201,7 +201,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
break
summary=soup.find('span', {'class' : 'urlize'})
- self.setDescription(url,summary.text)
+ self.setDescription(url,summary)
#self.story.setMetadata('description', summary.text)
# grab the text for an individual chapter.
diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py
index 2adfeae4..964fc31d 100644
--- a/fanficdownloader/adapters/adapter_ficwadcom.py
+++ b/fanficdownloader/adapters/adapter_ficwadcom.py
@@ -124,7 +124,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# description
storydiv = soup.find("div",{"id":"story"})
- self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p.string)
+ self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p)
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
# most of the meta data is here:
diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py
index 4b61d91f..77d90642 100644
--- a/fanficdownloader/adapters/adapter_fimfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fimfictionnet.py
@@ -27,6 +27,8 @@ from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
+from ..bbcodeutils.bbcodeparser import bbcodeparser
+
from base_adapter import BaseSiteAdapter, makeDate
def getClass():
@@ -137,12 +139,19 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
# fimfic is the first site with an explicit cover image.
if self.getConfig('include_images') and "image" in storyMetadata.keys():
- coverurl = storyMetadata["image"]
+ if "full_image" in storyMetadata:
+ coverurl = storyMetadata["full_image"]
+ else:
+ coverurl = storyMetadata["image"]
if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl
self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True)
-
- self.setDescription(self.url, storyMetadata["description"])
+
+
+ # the fimfic API gives bbcode for desc, not html.
+ # btw, bbcode honors newlines, html doesn't. change newlines to br tags.
+ self.setDescription(self.url,
+ bbcodeparser().parse(storyMetadata["description"]).html(doDeepCopy=False).replace('\r','').replace('\n','
'))
# Dates are in Unix time
# Take the publish date from the first chapter posted
@@ -152,8 +161,18 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("dateUpdated", datetime.fromtimestamp(rawDateUpdated))
soup = bs.BeautifulSoup(data).find("div", {"class":"story"})
- for character in [character_icon["title"] for character_icon in soup.findAll("a", {"class":"character_icon"})]:
- self.story.addToList("characters", character)
+ # fimfic stopped putting the char name on or around the char
+ # icon now for some reason. Pull it from the image name with
+ # some heuristics.
+ for character in [character_icon["src"] for character_icon in soup.findAll("img", {"class":"character_icon"})]:
+ # //static.fimfiction.net/images/characters/twilight_sparkle.png
+ # 5th split /, remove last four, replace _, capitolize every word(title())
+ char = character.split('/')[5][:-4].replace('_',' ').title()
+ if char == 'Oc':
+ char = "OC"
+ if char == 'Cmc':
+ char = "Cutie Mark Crusaders"
+ self.story.addToList("characters", char)
def getChapterText(self, url):
diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py
index 10d5420b..1e0a1905 100644
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@@ -73,8 +73,8 @@ class TestSiteAdapter(BaseSiteAdapter):
self.story.setMetadata(u'title',"Test Story Title "+self.story.getMetadata('storyId'))
self.story.setMetadata('author','Test Author aa')
self.story.setMetadata('storyUrl',self.url)
- self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done
-
+ self.setDescription(self.url,u'Description '+self.crazystring+u''' Done
+
Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic"
''')
self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d"))
diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index cbe938d4..9ed89eb5 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -199,7 +199,8 @@ class BaseSiteAdapter(Configurable):
if (self.chapterFirst!=None and index < self.chapterFirst) or \
(self.chapterLast!=None and index > self.chapterLast):
self.story.addChapter(removeEntities(title),
- None)
+ None,
+ self)
else:
if self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None,
@@ -208,7 +209,8 @@ class BaseSiteAdapter(Configurable):
else:
data = self.getChapterText(url)
self.story.addChapter(removeEntities(title),
- removeEntities(data))
+ removeEntities(data),
+ self)
self.storyDone = True
# include image, but no cover from story, add default_cover_image cover.
@@ -289,7 +291,7 @@ class BaseSiteAdapter(Configurable):
def setDescription(self,url,svalue):
#print("\n\nsvalue:\n%s\n"%svalue)
if self.getConfig('keep_summary_html'):
- if isinstance(svalue,str) or isinstance(svalue,unicode):
+ if isinstance(svalue,basestring):
svalue = bs.BeautifulSoup(svalue)
self.story.setMetadata('description',self.utf8FromSoup(url,svalue))
else:
diff --git a/fanficdownloader/bbcodeutils/__init__.py b/fanficdownloader/bbcodeutils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/fanficdownloader/bbcodeutils/bbcode2html.py b/fanficdownloader/bbcodeutils/bbcode2html.py
new file mode 100644
index 00000000..6f3e5352
--- /dev/null
+++ b/fanficdownloader/bbcodeutils/bbcode2html.py
@@ -0,0 +1,325 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+#
+# Author: Pau Sanchez (contact@pausanchez.com)
+# Version: v1.0
+# Last Modified: 2010/09/15
+#
+# For the latest version check out:
+# http://www.codigomanso.com/en/projects
+#
+# My blog:
+# http://www.codigomanso.com/en/ - English Version
+# http://www.codigomanso.com/es/ - Spanish Version
+#
+
+import sys
+import os
+import re
+import urllib
+
+class bbcode2html:
+ '''
+ This class gets a parsed BBCode and transforms it to valid HTML
+
+ Useful functions of this class:
+ html
+ convertToHTML
+
+ Example:
+ > parser = bbcodeparser ()
+ > parser.parse ('[b]bold[/b]')
+ > bbcode2html (parser).html()
+ bold
+
+ # This is faster for huge strings but changes the parser object internally
+ > bbcode2html (parser).html(doDeepCopy = False)
+ bold
+ '''
+ def __init__ (self, parser):
+ self._parser = parser
+ return
+
+ def html (self, allowClassAttr = False, doDeepCopy = True, parser = None):
+ '''
+ Convert current parsed code to HTML
+
+ Example:
+ code = bbcodeparser ('[b]bold[/b]')
+ code.html() -> 'bold'
+ '''
+ if parser is None:
+ parser = self._parser
+
+ tokens = parser
+ if instanceof (parser, bbcodeparser):
+ tokens = parser.getTokens()
+
+ return bbcode2html.convertToHTML (tokens, allowClassAttr = allowClassAttr, doDeepCopy = doDeepCopy)
+
+ @staticmethod
+ def htmlString (string):
+ toReplace = {
+ u'<' : '<',
+ u'>' : '>',
+ u'"' : """,
+ u'&' : "&"
+ }
+ for entity in toReplace:
+ string = string.replace(entity, toReplace[entity])
+ return string
+
+ @staticmethod
+ def getValidTags ():
+ simpleTags = ['b', 'u', 'i', 'sup', 'sub', 'ul', 'ol', 'li', 'table', 'tr', 'th', 'td', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']
+ validTags = {
+ 'p' : { 'color' : 'color', 'size' : 'size', 'font' : 'font' },
+ 'color' : { 'color' : 'color' },
+ 'size' : { 'size' : 'size' },
+ 'font' : { 'font' : 'font' },
+ 'img' : { 'alt' : 'alt', 'title' : 'title', 'width' : 'width' , 'height' : 'height', 'img' : 'img'},
+ 'url' : { 'href' : 'href', 'url' : 'href', 'link' : 'href', 'title' : 'title' },
+ 's' : { },
+ 'code' : { },
+ 'quote' : { },
+ 'list' : { 'list' : 'type' },
+ 'email' : { 'email': 'href'},
+ 'google' : { 'google': 'google'},
+ 'wikipedia' : { 'wikipedia' : 'wikipedia', 'language' : 'language', 'lang' : 'lang'}
+ }
+
+ for tag in simpleTags:
+ validTags[tag] = { }
+ return validTags
+
+ @staticmethod
+ def convertToHTML (tokens, allowClassAttr = False, validTags = None, doDeepCopy = True):
+ '''
+ Convert internally parsed BBCode to XHTML
+
+ @doDeepCopy
+ True: it does a deep copy of tokens so this list will remain unchanged
+ False: tokens will be modified internally, but the output will be produced like 5x faster
+ it's a good idea to use False only when this is the last operation
+ '''
+ # do a deep copy
+ if doDeepCopy:
+ import copy
+ tokens = copy.deepcopy (tokens)
+
+ # filter invalid tags and attributes
+ if validTags is None:
+ validTags = bbcode2html.getValidTags()
+
+ bbcode2html._filterInvalidTagsAndAttributes (tokens, validTags, allowClassAttr)
+
+ # Start to convert
+ index = 0
+ tokenLength = len (tokens)
+
+ # use a list for the output (an order of magnitude faster than using string concatenation)
+ htmlList = []
+ lastListOpener = []
+
+ while index < tokenLength:
+
+ if isinstance (tokens [index], basestring):
+ htmlList.append (bbcode2html.htmlString (tokens [index]))
+ index += 1
+ continue
+
+ token = tokens[index]
+ tag = token['tag'] # opening or closing simple tag. e.g: 'b', '/b', '/u', ...
+ tagName = (tag[1:] if tag[0] == '/' else tag)
+ tagOpener = (u'/' if tag[0] == '/' else u'')
+ tokenArgs = (token['args'] if 'args' in token else {})
+
+ # opening or closing simple tag COLOR / SIZE
+ if (tagName in ['p', 'color', 'size', 'font']):
+ style = ''
+ style += ((u' color: ' + tokenArgs['color'] + u';') if ('color' in tokenArgs) else '')
+ style += ((u' font-size: ' + tokenArgs['size'] + u'pt;') if ('size' in tokenArgs) else '')
+ style += ((u' font-family: ' + tokenArgs['font'] + u';') if ('font' in tokenArgs) else '')
+ style = style.strip()
+
+ pArgs = {}
+ if style != '':
+ pArgs ['style'] = style
+
+ if 'class' in tokenArgs:
+ pArgs ['class'] = tokenArgs['class']
+
+ if ('args' not in token) and (tagName != 'p'):
+ if (tagOpener == '/'): # if closing tag, close it
+ htmlList.append (u'')
+ index += 1
+ continue
+
+ if tagName != 'p':
+ tag = tagOpener + u'span'
+
+ htmlList.append (bbcode2html.xml (tag, pArgs))
+
+ # IMG tag
+ elif tag == 'img' and (index+2 < tokenLength):
+ if 'img' in tokenArgs:
+ # has the form of text in red blue 12pt font arial blue 14pt arial text bold a <b> This is bold and this italic and this is red and this is also red.
+
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[img="This is the ALT of the image"]http://www.codigomanso.com/image.jpg[/img]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[img=320x200]http://www.codigomanso.com/image.jpg[/img]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[img=320x200 title="Image Test"]http://www.codigomanso.com/image.jpg[/img]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[img="whatever" width=320 height="212" title="Image Test"]http://www.codigomanso.com/image.jpg[/img]').html(),
+ u'
'
+ )
+ return
+
+ def testGoogleURL (self):
+ self.assertEqual (
+ self.bbcode.parse ('[google]asdf[/google]').html(),
+ u'asdf'
+ )
+ self.assertEqual (
+ self.bbcode.parse ('[google]Tom Hanks[/google]').html(),
+ u'Tom Hanks'
+ )
+ return
+
+ def testWikipediaURL (self):
+ self.assertEqual (
+ self.bbcode.parse ('[wikipedia]Tom Hanks[/wikipedia]').html(),
+ u'Tom Hanks'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[wikipedia language=en]Tom Hanks[/wikipedia]').html(),
+ u'Tom Hanks'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[wikipedia lang=es]Tom Hanks[/wikipedia]').html(),
+ u'Tom Hanks'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[wikipedia=es]Tom Hanks[/wikipedia]').html(),
+ u'Tom Hanks'
+ )
+ return
+
+ def testListTags (self):
+ self.assertEqual (
+ self.bbcode.parse ('[ul][li]item 1[/li][li]item 2[/li][/ul]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[ol][li]item 1[/li][li]item 2[/li][/ol]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[list][li]item 1[/li][li]item 2[/li][/list]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[list][*]item 1[*]item 2[/list]').html(),
+ u'
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[list=1][li]item 1[/li][li]item 2[/li][/list]').html(),
+ u'
'
+ )
+ return
+
+ def testInvalidCode (self):
+ self.assertEqual (self.bbcode.parse ('[invalid]valid text[/invalid]').html(), 'valid text')
+ self.assertEqual (
+ self.bbcode.parse ('[b]bold and [i]italics[/b]').html(),
+ 'bold and italics'
+ )
+ self.assertEqual (
+ self.bbcode.parse ('[/b]invalid[/b][/p]').html(),
+ 'invalid'
+ )
+ self.assertEqual (
+ self.bbcode.parse ('[p][b]bold').html(),
+ '
'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[b]\[b\] stands for [b]bold[/b]').html(),
+ u'[b] stands for bold'
+ )
+ return
+
+ def testEscapedBrackets (self):
+ self.assertEqual (
+ self.bbcode.parse ('\[b\]not bold\[/b\]').html(),
+ u'[b]not bold[/b]'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('[b]\[b\] stands for bold[/b]').html(),
+ u'[b] stands for bold'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('\[b\][b]stands for bold[/b]').html(),
+ u'[b]stands for bold'
+ )
+
+ self.assertEqual (
+ self.bbcode.parse ('\[b\][b]stands for bold[/b] just like in HTML').html(),
+ u'[b]stands for bold just like <b> in HTML'
+ )
+
+ def testBigExample (self):
+ inputText = """check this out
+
+ [h1 class=circle]heading[/h1]
+
+ [p size=14 color=blue font="verdana, Times New Roman"]This is [b] bold [/b] and this [i]italic[/i] and this is [color=red]red[/color] and this is [color="red"]also red[/color].
+ [/p]
+
+ fix [b][i]bold [font=verdana][size=12]and[/size][/font] italic[/b]
+ [img]http://www.codigomanso.com/b.jpg[/img]
+ [url]http://www.codigomanso.com/[/url]
+ [url=http://www.codigomanso.com/]Codigo Manso[/url]
+ [uRl link=http://www.codigomanso.com title="Codigo Manso Blog"]Codigo Manso[/url]
+
+ [ul]
+ [Li]item 1[/Li]
+ [li]item 2[/LI]
+ [/UL]
+
+ [list=1 ]
+ [*]item 1
+ [*]item 2
+ [/list]
+
+ [table class="big"]
+ [tr]
+ [th]big[/th]
+ [/tr]
+ [/table]
+ [invalid class="extra"]whatever[/invalid]"""
+
+ out = self.bbcode.parse (inputText).html(allowClassAttr = True)
+ self.assertEquals (out, '''check this out
+
+ heading
+
+
+ http://www.codigomanso.com/
+ Codigo Manso
+ Codigo Manso
+
+
+
+
+
+
+
+
+
+ whatever''')
+
+
+ def testBBCodeDumper (self):
+ self.assertEquals (
+ self.bbcode.parse ('[b]bold[/b]').bbcode(),
+ '[b]bold[/b]'
+ )
+
+ self.assertEquals (
+ self.bbcode.parse ('[color=red]text in red[/color]').bbcode(),
+ '[color=red]text in red[/color]'
+ )
+ self.assertEquals (
+ self.bbcode.parse ('[p][color=red]text in red').bbcode(),
+ '[p][color=red]text in red[/color][/p]'
+ )
+
+ self.assertEquals (
+ self.bbcode.parse ('This [b][i]code[/b] will be fixed[/invalid]').bbcode(),
+ 'This [b][i]code[/i][/b] will be fixed'
+ )
+
+ self.assertEquals (
+ self.bbcode.parse ('\[[url]http://www.codigomanso.com/en[/url]\]').bbcode(),
+ "\[[url]http://www.codigomanso.com/en[/url]\]"
+ )
+
+ def performanceTest(self):
+ '''
+ This test checks the performance of parse and html operations
+
+ To run this test type:
+ > python test.py BBCodeTests.performanceTest
+ '''
+ inputText = """check this out
+
+ [h1 class=circle]heading[/h1]
+
+ [p size=14 color=blue font="verdana, Times New Roman"]This is [b] bold [/b] and this [i]italic[/i] and this is [color=red]red[/color] and this is [color="red"]also red[/color].
+ [/p]
+
+ fix [b][i]bold [font=verdana][size=12]and[/size][/font] italic[/b]
+ [img]http://www.codigomanso.com/b.jpg[/img]
+ [url]http://www.codigomanso.com/[/url]
+ [url=http://www.codigomanso.com/]Codigo Manso[/url]
+ [uRl link=http://www.codigomanso.com title="Codigo Manso Blog"]Codigo Manso[/url]
+
+ [ul]
+ [Li]item 1[/Li]
+ [li]item 2[/LI]
+ [/UL]
+
+ [list=1 ]
+ [*]item 1
+ [*]item 2
+ [/list]
+
+ [table class="big"]
+ [tr]
+ [th]big[/th]
+ [/tr]
+ [/table]
+ [invalid class="extra"]whatever[/invalid]"""
+
+ import time
+ start = time.time()
+
+ for i in range(0, 12):
+ inputText += inputText
+
+ print "len(inputText) = %.2f MB (took %.2f seconds)" % (len(inputText)/(1024.0*1024.0), time.time() - start)
+
+ bbcode = bbcodeparser()
+ start = time.time()
+ bbcode.parse (inputText)
+ total = (time.time() - start)
+ print "time (bbcode.parse()) = %f" % total
+ print " >> %.2f chars/second" % (len(inputText) / total)
+
+ start = time.time()
+ bbcode.html(doDeepCopy = False)
+ total = (time.time() - start)
+ print "time (bbcode.html()) = %f" % total
+ print " >> %.2f chars/second" % (len(inputText) / total)
+ return
+
+ def testCodeBuilder (self):
+ bbcode = bbcodebuilder ()
+ self.assertEquals (bbcode.b ('bold'), u'[b]bold[/b]')
+ self.assertEquals (bbcode.color ('this goes in red', 'red'), u'[color=red]this goes in red[/color]')
+ self.assertEquals (bbcode.url ('Google', 'http://www.google.com'), u'[url=http://www.google.com]Google[/url]')
+ self.assertEquals (bbcode.alist('item 1', 'item 2'), u"[list=a]\n [*]item 1\n [*]item 2\n[/list]")
+ return
+
+if __name__ == '__main__':
+ unittest.main()
+
+
+
diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py
index 0451d558..73008008 100644
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@@ -319,12 +319,25 @@ class Story:
def isList(self,listname):
return self.listables.has_key(listname)
- def addChapter(self, title, html):
+ def addChapter(self, title, html, configurable=None):
+ if configurable and \
+ configurable.getConfig('strip_chapter_numbers') and \
+ configurable.getConfig('chapter_title_strip_pattern'):
+ title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) )
- def getChapters(self):
+ def getChapters(self, configurable=None):
"Chapters will be tuples of (title,html)"
- return self.chapters
+ retval = []
+ if configurable and \
+ configurable.getConfig('add_chapter_numbers') and \
+ configurable.getConfig('chapter_title_add_pattern'):
+ for index, (title,html) in enumerate(self.chapters):
+ retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
+ else:
+ retval = self.chapters
+
+ return retval
def formatFileName(self,template,allowunsafefilename=True):
values = origvalues = self.getAllMetadata()
diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py
index 7d72120f..25cca2fe 100644
--- a/fanficdownloader/writers/base_writer.py
+++ b/fanficdownloader/writers/base_writer.py
@@ -184,10 +184,10 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use index and chapter.
"""
# Only do TOC if there's more than one chapter and it's configured.
- if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
+ if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
self._write(out,START.substitute(self.story.getAllMetadata()))
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py
index 6db5ef56..312c937b 100644
--- a/fanficdownloader/writers/writer_epub.py
+++ b/fanficdownloader/writers/writer_epub.py
@@ -441,7 +441,7 @@ div { margin: 0pt; padding: 0pt; }
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
itemrefs.append("title_page")
- if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
+ if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
itemrefs.append("toc_page")
@@ -449,7 +449,7 @@ div { margin: 0pt; padding: 0pt; }
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
itemrefs.append("log_page")
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
i=index+1
items.append(("file%04d"%i,
@@ -587,7 +587,7 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close()
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py
index 85ba1fa5..5caf1bca 100644
--- a/fanficdownloader/writers/writer_html.py
+++ b/fanficdownloader/writers/writer_html.py
@@ -94,7 +94,7 @@ ${output_css}
self.HTML_TOC_ENTRY,
self.HTML_TOC_PAGE_END)
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py
index 8a443b88..07d8704c 100644
--- a/fanficdownloader/writers/writer_mobi.py
+++ b/fanficdownloader/writers/writer_mobi.py
@@ -169,7 +169,7 @@ ${value}
+
+ big
+
# files.append(tocpageIO.getvalue())
# tocpageIO.close()
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py
index 6b9f35b0..47545402 100644
--- a/fanficdownloader/writers/writer_txt.py
+++ b/fanficdownloader/writers/writer_txt.py
@@ -133,7 +133,7 @@ End file.
self._write(out,self.lineends(self.wraplines(towrap)))
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(self)):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
diff --git a/plugin-defaults.ini b/plugin-defaults.ini
index 49ca8e46..713d3560 100644
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@@ -152,6 +152,45 @@ extratags: FanFiction
# ${category} => Buffy:? [tT]he Vampire Slayer => BuffyCover
# ${category} => Star Trek => StarTrekCover
+## If set false, the summary will have all html stripped.
+## Both this and include_images must be true to get images in the
+## summary.
+keep_summary_html:true
+
+## Don't like the numbers at the start of chapter titles on some
+## sites? You can use strip_chapter_numbers to strip them off. Just
+## want to make them all look the same? Strip them off, then add them
+## back on with add_chapter_numbers. Don't like the way it strips
+## numbers or adds them back? See chapter_title_strip_pattern and
+## chapter_title_add_pattern.
+strip_chapter_numbers:false
+add_chapter_numbers:false
+
+## (Two versions of chapter_title_strip_pattern are shown below. You
+## should only have one uncommented.)
+## This version will remove the leading number from:
+## "1." => ""
+## "1. The Beginning" => "The Beginning"
+## "1: Start" => "Start"
+## "2, Chapter the second" => "Chapter the second"
+## etc
+chapter_title_strip_pattern:^[0-9]+[\.: -]+
+
+## This version will strip all of the above *plus* remove 'Chapter 1':
+## "Chapter 1" => ""
+## "1. Chapter 1" => ""
+## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue"
+## "Chapter 2 - Pirates Place" => "Pirates Place"
+## etc
+#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)?
+
+## Uses a python template substitution. The ${index} is the 'chapter'
+## number and ${title} is the chapter title, after applying
+## chapter_title_strip_pattern. Those are the only variables available.
+## "The Beginning" => "1. The Beginning"
+chapter_title_add_pattern:${index}. ${title}
+
+
## Each output format has a section that overrides [defaults]
[html]
@@ -256,11 +295,6 @@ output_css:
## stories. Images will be converted to jpg for size if possible.
#include_images:false
-## If not set, the summary will have all html stripped for safety.
-## Both this and include_images must be true to get images in the
-## summary.
-#keep_summary_html:false
-
## If set, the first image found will be made the cover image. If
## keep_summary_html is true, any images in summary will be before any
## in chapters.
diff --git a/plugin-example.ini b/plugin-example.ini
index 1c92afc3..11839b22 100644
--- a/plugin-example.ini
+++ b/plugin-example.ini
@@ -6,6 +6,15 @@
## adult content. Uncomment by removing '#' in front of is_adult.
#is_adult:true
+## Don't like the numbers at the start of chapter titles on some
+## sites? You can use strip_chapter_numbers to strip them off. Just
+## want to make them all look the same? Strip them off, then add them
+## back on with add_chapter_numbers. Don't like the way it strips
+## numbers or adds them back? See chapter_title_strip_pattern and
+## chapter_title_add_pattern.
+#strip_chapter_numbers:true
+#add_chapter_numbers:true
+
[epub]
## include images from img tags in the body and summary of stories.
## Images will be converted to jpg for size if possible. Images work
diff --git a/readme.txt b/readme.txt
index b602c53d..e21a5b1b 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,3 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Other code contributed by Pau Sanchez(bbcodeutils).
+
To use, do:
python downloader.py [-f (epub|html|txt)]