diff --git a/calibre-plugin/about.txt b/calibre-plugin/about.txt index 9ea9cd05..6fca52ef 100644 --- a/calibre-plugin/about.txt +++ b/calibre-plugin/about.txt @@ -4,7 +4,7 @@ 'Reading List', 'Extract ISBN' and 'Count Pages' -plugins.

+plugins. bbcodeutils code contributed by Pau Sanchez.

Calibre officially distributes plugins from the mobileread.com forum site. diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 25ec2dd3..daad8d29 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -28,6 +28,7 @@ from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dia from calibre.gui2.dialogs.message_box import ViewLog from calibre.gui2.dialogs.confirm_delete import confirm from calibre.utils.date import local_tz +from calibre.library.comments import sanitize_comments_html # The class that all interface action plugins must inherit from from calibre.gui2.actions import InterfaceAction @@ -36,7 +37,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin create_menu_action_unique, get_library_uuid) from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions -from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML +#from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page @@ -432,13 +433,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): print("url:%s"%url) skip_date_update = False - ## was self.ffdlconfig, but we need to be able to change it - ## when doing epub update. - ffdlconfig = SafeConfigParser() - ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) - ffdlconfig.readfp(StringIO(prefs['personal.ini'])) - adapter = adapters.getAdapter(ffdlconfig,url,fileform) - options['personal.ini'] = prefs['personal.ini'] if prefs['includeimages']: # this is a cheat to make it easier for users. @@ -448,6 +442,13 @@ keep_summary_html:true make_firstimage_cover:true ''' + options['personal.ini'] + ## was self.ffdlconfig, but we need to be able to change it + ## when doing epub update. + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(options['personal.ini'])) + adapter = adapters.getAdapter(ffdlconfig,url,fileform) + ## three tries, that's enough if both user/pass & is_adult needed, ## or a couple tries of one or the other for x in range(0,2): @@ -476,7 +477,7 @@ make_firstimage_cover:true book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) book['publisher'] = story.getMetadata("site") book['tags'] = writer.getTags(removeallentities=True) # getTags could be moved up into adapter now. Adapter didn't used to know the fileform - book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better. + book['comments'] = sanitize_comments_html(story.getMetadata("description")) book['series'] = story.getMetadata("series", removeallentities=True) # adapter.opener is the element with a threadlock. But del diff --git a/defaults.ini b/defaults.ini index e73bf2cd..5726210d 100644 --- a/defaults.ini +++ b/defaults.ini @@ -164,6 +164,45 @@ extratags: FanFiction ## doesn't work on some devices either.) #replace_hr: false +## If set false, the summary will have all html stripped. +## Both this and include_images must be true to get images in the +## summary. +keep_summary_html:true + +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers. Don't like the way it strips +## numbers or adds them back? See chapter_title_strip_pattern and +## chapter_title_add_pattern. +strip_chapter_numbers:false +add_chapter_numbers:false + +## (Two versions of chapter_title_strip_pattern are shown below. You +## should only have one uncommented.) +## This version will remove the leading number from: +## "1." => "" +## "1. The Beginning" => "The Beginning" +## "1: Start" => "Start" +## "2, Chapter the second" => "Chapter the second" +## etc +chapter_title_strip_pattern:^[0-9]+[\.: -]+ + +## This version will strip all of the above *plus* remove 'Chapter 1': +## "Chapter 1" => "" +## "1. Chapter 1" => "" +## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue" +## "Chapter 2 - Pirates Place" => "Pirates Place" +## etc +#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)? + +## Uses a python template substitution. The ${index} is the 'chapter' +## number and ${title} is the chapter title, after applying +## chapter_title_strip_pattern. Those are the only variables available. +## "The Beginning" => "1. The Beginning" +chapter_title_add_pattern:${index}. ${title} + + ## Each output format has a section that overrides [defaults] [html] @@ -271,11 +310,6 @@ output_css: ## stories. Images will be converted to jpg for size if possible. #include_images:false -## If not set, the summary will have all html stripped for safety. -## Both this and include_images must be true to get images in the -## summary. -#keep_summary_html:false - ## If set, the first image found will be made the cover image. If ## keep_summary_html is true, any images in summary will be before any ## in chapters. diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py index 25322bea..1d7fddfb 100644 --- a/fanficdownloader/adapters/adapter_archiveofourownorg.py +++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py @@ -191,7 +191,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): a = metasoup.find('blockquote',{'class':'userstuff'}) if a != None: - self.setDescription(url,a.text) + self.setDescription(url,a) #self.story.setMetadata('description',a.text) a = metasoup.find('dd',{'class':"rating tags"}) diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py index 4874db0c..de6ea953 100644 --- a/fanficdownloader/adapters/adapter_ficbooknet.py +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -201,7 +201,7 @@ class FicBookNetAdapter(BaseSiteAdapter): break summary=soup.find('span', {'class' : 'urlize'}) - self.setDescription(url,summary.text) + self.setDescription(url,summary) #self.story.setMetadata('description', summary.text) # grab the text for an individual chapter. diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index 2adfeae4..964fc31d 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -124,7 +124,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter): # description storydiv = soup.find("div",{"id":"story"}) - self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p.string) + self.setDescription(url,storydiv.find("blockquote",{'class':'summary'}).p) #self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string) # most of the meta data is here: diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py index 4b61d91f..77d90642 100644 --- a/fanficdownloader/adapters/adapter_fimfictionnet.py +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -27,6 +27,8 @@ from .. import BeautifulSoup as bs from ..htmlcleanup import stripHTML from .. import exceptions as exceptions +from ..bbcodeutils.bbcodeparser import bbcodeparser + from base_adapter import BaseSiteAdapter, makeDate def getClass(): @@ -137,12 +139,19 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): # fimfic is the first site with an explicit cover image. if self.getConfig('include_images') and "image" in storyMetadata.keys(): - coverurl = storyMetadata["image"] + if "full_image" in storyMetadata: + coverurl = storyMetadata["full_image"] + else: + coverurl = storyMetadata["image"] if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:' coverurl = "http:"+coverurl self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True) - - self.setDescription(self.url, storyMetadata["description"]) + + + # the fimfic API gives bbcode for desc, not html. + # btw, bbcode honors newlines, html doesn't. change newlines to br tags. + self.setDescription(self.url, + bbcodeparser().parse(storyMetadata["description"]).html(doDeepCopy=False).replace('\r','').replace('\n','
')) # Dates are in Unix time # Take the publish date from the first chapter posted @@ -152,8 +161,18 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata("dateUpdated", datetime.fromtimestamp(rawDateUpdated)) soup = bs.BeautifulSoup(data).find("div", {"class":"story"}) - for character in [character_icon["title"] for character_icon in soup.findAll("a", {"class":"character_icon"})]: - self.story.addToList("characters", character) + # fimfic stopped putting the char name on or around the char + # icon now for some reason. Pull it from the image name with + # some heuristics. + for character in [character_icon["src"] for character_icon in soup.findAll("img", {"class":"character_icon"})]: + # //static.fimfiction.net/images/characters/twilight_sparkle.png + # 5th split /, remove last four, replace _, capitolize every word(title()) + char = character.split('/')[5][:-4].replace('_',' ').title() + if char == 'Oc': + char = "OC" + if char == 'Cmc': + char = "Cutie Mark Crusaders" + self.story.addToList("characters", char) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 10d5420b..1e0a1905 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -73,8 +73,8 @@ class TestSiteAdapter(BaseSiteAdapter): self.story.setMetadata(u'title',"Test Story Title "+self.story.getMetadata('storyId')) self.story.setMetadata('author','Test Author aa') self.story.setMetadata('storyUrl',self.url) - self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done - + self.setDescription(self.url,u'Description '+self.crazystring+u''' Done +

Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic" ''') self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d")) diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index cbe938d4..9ed89eb5 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -199,7 +199,8 @@ class BaseSiteAdapter(Configurable): if (self.chapterFirst!=None and index < self.chapterFirst) or \ (self.chapterLast!=None and index > self.chapterLast): self.story.addChapter(removeEntities(title), - None) + None, + self) else: if self.oldchapters and index < len(self.oldchapters): data = self.utf8FromSoup(None, @@ -208,7 +209,8 @@ class BaseSiteAdapter(Configurable): else: data = self.getChapterText(url) self.story.addChapter(removeEntities(title), - removeEntities(data)) + removeEntities(data), + self) self.storyDone = True # include image, but no cover from story, add default_cover_image cover. @@ -289,7 +291,7 @@ class BaseSiteAdapter(Configurable): def setDescription(self,url,svalue): #print("\n\nsvalue:\n%s\n"%svalue) if self.getConfig('keep_summary_html'): - if isinstance(svalue,str) or isinstance(svalue,unicode): + if isinstance(svalue,basestring): svalue = bs.BeautifulSoup(svalue) self.story.setMetadata('description',self.utf8FromSoup(url,svalue)) else: diff --git a/fanficdownloader/bbcodeutils/__init__.py b/fanficdownloader/bbcodeutils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fanficdownloader/bbcodeutils/bbcode2html.py b/fanficdownloader/bbcodeutils/bbcode2html.py new file mode 100644 index 00000000..6f3e5352 --- /dev/null +++ b/fanficdownloader/bbcodeutils/bbcode2html.py @@ -0,0 +1,325 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +# Author: Pau Sanchez (contact@pausanchez.com) +# Version: v1.0 +# Last Modified: 2010/09/15 +# +# For the latest version check out: +# http://www.codigomanso.com/en/projects +# +# My blog: +# http://www.codigomanso.com/en/ - English Version +# http://www.codigomanso.com/es/ - Spanish Version +# + +import sys +import os +import re +import urllib + +class bbcode2html: + ''' + This class gets a parsed BBCode and transforms it to valid HTML + + Useful functions of this class: + html + convertToHTML + + Example: + > parser = bbcodeparser () + > parser.parse ('[b]bold[/b]') + > bbcode2html (parser).html() + bold + + # This is faster for huge strings but changes the parser object internally + > bbcode2html (parser).html(doDeepCopy = False) + bold + ''' + def __init__ (self, parser): + self._parser = parser + return + + def html (self, allowClassAttr = False, doDeepCopy = True, parser = None): + ''' + Convert current parsed code to HTML + + Example: + code = bbcodeparser ('[b]bold[/b]') + code.html() -> 'bold' + ''' + if parser is None: + parser = self._parser + + tokens = parser + if instanceof (parser, bbcodeparser): + tokens = parser.getTokens() + + return bbcode2html.convertToHTML (tokens, allowClassAttr = allowClassAttr, doDeepCopy = doDeepCopy) + + @staticmethod + def htmlString (string): + toReplace = { + u'<' : '<', + u'>' : '>', + u'"' : """, + u'&' : "&" + } + for entity in toReplace: + string = string.replace(entity, toReplace[entity]) + return string + + @staticmethod + def getValidTags (): + simpleTags = ['b', 'u', 'i', 'sup', 'sub', 'ul', 'ol', 'li', 'table', 'tr', 'th', 'td', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'] + validTags = { + 'p' : { 'color' : 'color', 'size' : 'size', 'font' : 'font' }, + 'color' : { 'color' : 'color' }, + 'size' : { 'size' : 'size' }, + 'font' : { 'font' : 'font' }, + 'img' : { 'alt' : 'alt', 'title' : 'title', 'width' : 'width' , 'height' : 'height', 'img' : 'img'}, + 'url' : { 'href' : 'href', 'url' : 'href', 'link' : 'href', 'title' : 'title' }, + 's' : { }, + 'code' : { }, + 'quote' : { }, + 'list' : { 'list' : 'type' }, + 'email' : { 'email': 'href'}, + 'google' : { 'google': 'google'}, + 'wikipedia' : { 'wikipedia' : 'wikipedia', 'language' : 'language', 'lang' : 'lang'} + } + + for tag in simpleTags: + validTags[tag] = { } + return validTags + + @staticmethod + def convertToHTML (tokens, allowClassAttr = False, validTags = None, doDeepCopy = True): + ''' + Convert internally parsed BBCode to XHTML + + @doDeepCopy + True: it does a deep copy of tokens so this list will remain unchanged + False: tokens will be modified internally, but the output will be produced like 5x faster + it's a good idea to use False only when this is the last operation + ''' + # do a deep copy + if doDeepCopy: + import copy + tokens = copy.deepcopy (tokens) + + # filter invalid tags and attributes + if validTags is None: + validTags = bbcode2html.getValidTags() + + bbcode2html._filterInvalidTagsAndAttributes (tokens, validTags, allowClassAttr) + + # Start to convert + index = 0 + tokenLength = len (tokens) + + # use a list for the output (an order of magnitude faster than using string concatenation) + htmlList = [] + lastListOpener = [] + + while index < tokenLength: + + if isinstance (tokens [index], basestring): + htmlList.append (bbcode2html.htmlString (tokens [index])) + index += 1 + continue + + token = tokens[index] + tag = token['tag'] # opening or closing simple tag. e.g: 'b', '/b', '/u', ... + tagName = (tag[1:] if tag[0] == '/' else tag) + tagOpener = (u'/' if tag[0] == '/' else u'') + tokenArgs = (token['args'] if 'args' in token else {}) + + # opening or closing simple tag COLOR / SIZE + if (tagName in ['p', 'color', 'size', 'font']): + style = '' + style += ((u' color: ' + tokenArgs['color'] + u';') if ('color' in tokenArgs) else '') + style += ((u' font-size: ' + tokenArgs['size'] + u'pt;') if ('size' in tokenArgs) else '') + style += ((u' font-family: ' + tokenArgs['font'] + u';') if ('font' in tokenArgs) else '') + style = style.strip() + + pArgs = {} + if style != '': + pArgs ['style'] = style + + if 'class' in tokenArgs: + pArgs ['class'] = tokenArgs['class'] + + if ('args' not in token) and (tagName != 'p'): + if (tagOpener == '/'): # if closing tag, close it + htmlList.append (u'') + index += 1 + continue + + if tagName != 'p': + tag = tagOpener + u'span' + + htmlList.append (bbcode2html.xml (tag, pArgs)) + + # IMG tag + elif tag == 'img' and (index+2 < tokenLength): + if 'img' in tokenArgs: + # has the form of x ? + sizeMatch = re.match (u'^\s*(\d+)[xX](\d+)\s*$', tokenArgs['img']) + if sizeMatch is not None: + tokenArgs['width'] = sizeMatch.group(1) + tokenArgs['height'] = sizeMatch.group(2) + # then assume is the alternative text + else: + tokenArgs['alt'] = tokenArgs['img'] + del tokenArgs['img'] + + # add the source of the image + tokenArgs ['src'] = tokens[index+1] + + # [img]http://www.whatever.com/pic.jpg[/img] + htmlList.append ( + bbcode2html.xml ('img', tokenArgs, soloTag=True) + ) + index += 2 # skip next token and closing tag + + # URL tag + elif tag == 'url': + if ('args' not in token) and (index+2 < tokenLength): + # [url]http://www.google.com[/url] + htmlList.append (bbcode2html.xml ('a', { 'href' : tokens[index+1] })) + else: + # [url=http://www.google.com]Google[/url] + # [url link=http://www.google.com title="This is Google"]Google[/url] + htmlList.append (bbcode2html.xml ('a', tokenArgs)) + + # URL closing tag (sometimes needed) + elif (tag == '/url') or (tag == '/email'): + htmlList.append (u'') + + # Email tag + elif tag == 'email': + if ('args' not in token) and (index+2 < tokenLength): + # [email]asdf@asdf.com] + htmlList.append (bbcode2html.xml ('a', { 'href' : u'mailto:' + tokens[index+1].strip() })) + else: + # [email=asdf@asfd.com]john smith[/email] + if 'href' in tokenArgs: + tokenArgs['href'] = u'mailto:' + tokenArgs['href'] + htmlList.append (bbcode2html.xml ('a', tokenArgs)) + + elif tagName == 'list': + if tagOpener == '/': + htmlList.append (bbcode2html.xml (u'/' + lastListOpener.pop())) + else: + if ('type' not in tokenArgs): + htmlList.append (bbcode2html.xml (tagOpener + u'ul', tokenArgs)) + lastListOpener.append ('ul') + else: + htmlList.append (bbcode2html.xml (tagOpener + u'ol', tokenArgs)) + lastListOpener.append ('ol') + + elif tagName == '*': + htmlList.append (bbcode2html.xml (tagOpener + u'li', tokenArgs)) + + elif (tagName == 's'): + tokenArgs['style'] = 'text-decoration: line-through;' + htmlList.append (bbcode2html.xml (tagOpener + u'span', tokenArgs)) + + elif (tagName == 'code'): + htmlList.append (bbcode2html.xml (tagOpener + u'pre', tokenArgs)) + + elif (tagName == 'quote'): + htmlList.append (bbcode2html.xml (tagOpener + u'blockquote', tokenArgs)) + + elif (tagName == 'google'): + htmlList.append ( + bbcode2html.xml ( + tagOpener + u'a', + {'href' : 'http://www.google.com/search?q=' + urllib.quote_plus (tokens[index+1])}, + tokens[index+1] + ) + ) + index += 2 + + elif (tagName == 'wikipedia'): + subdomain = 'www' + for arg in ['lang', 'language', 'wikipedia']: + if arg in tokenArgs: + subdomain = tokenArgs[arg] + + htmlList.append ( + bbcode2html.xml ( + tagOpener + u'a', + {'href' : 'http://' + subdomain + '.wikipedia.org/wiki/' + tokens[index+1].replace (' ', '_')}, + tokens[index+1] + ) + ) + index += 2 + + elif (tagName in validTags): + htmlList.append ( + bbcode2html.xml (tag, tokenArgs) + ) + + else: + # ignore this tag + pass + + index += 1 + + return ''.join (htmlList) + + @staticmethod + def _filterInvalidTagsAndAttributes (tokens, validTags, allowClassAttr): + ''' + Helper function to filter out invalid attributes from the tokens list + ''' + # add 'class' attribute as valid (mapping 'class' itself) + if allowClassAttr: + for attr in validTags: + validTags[attr]['class'] = 'class' + + # remove invalid attributes from tokens + for tindex in range(0, len(tokens)): + if isinstance (tokens[tindex], dict) and ('args' in tokens[tindex]) and (tokens[tindex]['tag'] in validTags): + validList = validTags[tokens[tindex]['tag']] + + filteredArgs = {} + for arg in tokens[tindex]['args']: + if arg in validList: + # rename the argument + filteredArgs[validList[arg]] = tokens[tindex]['args'][arg] + else: + pass # do not include this arg in the filteredArgs + + tokens[tindex]['args'] = filteredArgs + + return + + @staticmethod + def xml (tag, attrs = {}, text = None, soloTag = False): + ''' + Helper function to produce valid XML output + ''' + xml = u'<' + tag.lower() + + # make sure we sort attributes alphabetically (for deterministic output) + # Faster but non-deterministic: + # for (key, value) in attrs.iteritems(): + # xml += u' ' + key + u'="' + value + u'"' + for key in sorted (attrs.keys()): + xml += u' ' + key + u'="' + attrs[key] + u'"' + + # close tag + if text is None: + if soloTag: + xml += u' />' + else: + xml += u'>' + else: + xml += u'>' + text + u'' + + return xml + + + diff --git a/fanficdownloader/bbcodeutils/bbcodebuilder.py b/fanficdownloader/bbcodeutils/bbcodebuilder.py new file mode 100644 index 00000000..ada940fd --- /dev/null +++ b/fanficdownloader/bbcodeutils/bbcodebuilder.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +# Author: Pau Sanchez (contact@pausanchez.com) +# Version: v1.0 +# Last Modified: 2010/09/15 +# +# For the latest version check out: +# http://www.codigomanso.com/en/projects +# +# My blog: +# http://www.codigomanso.com/en/ - English Version +# http://www.codigomanso.com/es/ - Spanish Version +# + +import sys +import os +import re +import hashlib + +class bbcodebuilder: + ''' + This class helps to build BBCode programmatically. + + The function names are used as the tag name, then the first parameter + is the string that goes inside the tags and any extra parameter is + appended as a parameter to the tag + + Examples: + > bbcode = bbcodebuilder() # create a instance! + + > print bbcode.b ('bold') + [b]bold[/b] + + > print bbcode.color ('this goes in red', 'red') + [color=red]this goes in red[/color] + + > print bbcode.url ('Google', 'http://www.google.com') + [url=http://www.google.com]Google[/url] + + > print bbcode.alist('item 1', 'item 2') + [list=a] + [*]item 1 + [*]item 2 + [/list] + + + This solution is based on the recipe found on: + http://code.activestate.com/recipes/576831-simple-bbcode-support/ + ''' + + def __getattr__(self, name): + ''' + This is a generic getter that returns a function which gets the first parameter + as the string that goes between the tags, and extra parameters as tag parameters. + + The name of the attribute is used as the tag name + ''' + class bbcodebuilder_helper: + def __init__(self, name): + self._name = name + + def __call__(self, string, *args): + return u'[{0}{1}]{2}[/{0}]'.format(self._name, (u'=' + u','.join(map(str, args))) if args else u'', string) + + return bbcodebuilder_helper (name) + + def list(self, *items): + return u'[list]' + u''.join(map(lambda item: u"\n [*]" + item, items)) + u"\n[/list]" + + def nlist(self, *items): + return u'[list=1]' + u''.join(map(lambda item: u"\n [*]" + item, items)) + u"\n[/list]" + + def alist(self, *items): + return u'[list=a]' + u''.join(map(lambda item: u"\n [*]" + item, items)) + u"\n[/list]" + + diff --git a/fanficdownloader/bbcodeutils/bbcodeparser.py b/fanficdownloader/bbcodeutils/bbcodeparser.py new file mode 100644 index 00000000..2d59dd56 --- /dev/null +++ b/fanficdownloader/bbcodeutils/bbcodeparser.py @@ -0,0 +1,251 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +# Author: Pau Sanchez (contact@pausanchez.com) +# Version: v1.0 +# Last Modified: 2010/09/15 +# +# For the latest version check out: +# http://www.codigomanso.com/en/projects +# +# My blog: +# http://www.codigomanso.com/en/ - English Version +# http://www.codigomanso.com/es/ - Spanish Version +# + +import sys +import os +import re +import hashlib + +class bbcodeparser: + ''' + This class parses BBCode into a internal structure to allow later processing and + conversion to HTML. + + The parser tries to fix invalid code (like unclosed tags) + + Useful URLs: + http://en.wikipedia.org/wiki/BBCode + http://www.bbcode.org/reference.php + + Example: + > bbcode = bbcodeparser () + > bbcode.parse ('[b]text in bold[/b]').html() + text in bold + + # dump HTML + > bbcode.parse ('[p][color=red]text in red').html() +

text in red

+ + # dump fixed BBCode + > bbcode.parse ('[p][color=red]text in red').bbcode() + [p][color=red]text in red[/color][/p] + + > bbcode.parse ('This [b][i]code[/b] will be fixed[/invalid]').bbcode() + This [b][i]code[/i][/b] will be fixed + + # dump fixed bbcode + > str (bbcodeparse ('This [b][i]code[/b] will be fixed[/invalid]')) + This [b][i]code[/i][/b] will be fixed + ''' + _bbcode = '' + _tokens = [] + + def __init__ (self, bbcode = '', fixInvalidCode = True): + ''' Initialize and parse bbcode string (if any is given) + ''' + self.parse (bbcode, fixInvalidCode) + return + + def __str__ (self): + return self.bbcode() + + def parse (self, bbcode = None, fixInvalidCode = True): + ''' + It will parse and return the token list, trying to fix tags if + fixInvalidCode is True + + It will return the current object to allow chaining + + Example: + code = bbcode() + code.parse ('bold', True) -> + code.parse ('bolditalics', True) -> internally will add the missing '' + ''' + if bbcode is not None: + self._bbcode = bbcode + self._tokens = self.tokenize (bbcode) + if fixInvalidCode: + self._tokens = self.fixWrongTags (self._tokens) + + return self + + # return ALL tokens + def getTokens (self): + return self._tokens + + def bbcode (self): + ''' + Dump BBCode again. This is useful for dumping valid BBCode + ''' + bbcode = [] + for token in self._tokens: + if token is None: + continue + + if isinstance (token, basestring): + bbcode.append (token.replace (u'[', u'\[').replace (u']', u'\]')) + continue + + tag = token['tag'] # opening or closing simple tag. e.g: 'b', '/b', '/u', ... + tagOpener = (u'/' if tag[0] == u'/' else u'') + + if (tagOpener == '/') or ('args' not in token): + bbcode.append (u'[' + tag + u']') + else: + # process args + argstr = '' + + # the arg with the same name as the tag repersents the '=whatever' + if tag in token['args']: + if re.match ('\s|"', token['args'][tag]) is None: + argstr = u'=' + token['args'][tag] + else: + argstr = u'="' + token['args'][tag].replace (u'"', u'\"') + u'"' + + for (k,v) in token['args'].iteritems(): + if k == tag: # already processed + continue + argstr += ' ' + k + u'="' + v.replace (u'"', u'\"') + u'"' + + bbcode.append (u'[' + tag + argstr + ']') + + return u''.join (bbcode) + + def html (self, allowClassAttr = False, doDeepCopy = True): + ''' + Convert current parsed code to HTML + + @allowClassAttr + Is something like [b class="asdf"] allowed? + + @doDeepCopy + True: it does a deep copy of tokens so this list will remain unchanged + False: tokens will be modified internally, but the output will be produced like 5x faster + it's a good idea to use False when the string parsed is huge and this is the + last operation on the string + + Example: + code = bbcode ('[b]bold[/b]') + code.html() -> 'bold' + ''' + from bbcode2html import bbcode2html + return bbcode2html.convertToHTML (self._tokens, allowClassAttr = allowClassAttr, doDeepCopy = doDeepCopy) + + + @staticmethod + def fixWrongTags (inTokenList): + ''' Add missing tokens that have not been closed properly and try to fix some scenarios + ''' + opened = [] + outTokenList = [] + for token in inTokenList: + # normal string... do nothing + if isinstance(token, basestring): + outTokenList.append (token) + else: + # if starts with '/' is closing a tag + if token['tag'][0] == '/': + while (len (opened) > 0) and (opened[-1] != token['tag'][1:]): + outTokenList.append ({'tag' : '/' + opened[-1] }) + del opened[-1] + + if len(opened): + del opened[-1] + outTokenList.append (token) + + # opening tag + else: + # if I open the same tag I opened before, close it, and open it again + if (len(opened) > 0) and (token['tag'] == opened[-1]): + outTokenList.append ({'tag' : '/' + opened[-1] }) + else: + opened.append (token['tag']) + outTokenList.append (token) + + # close all elements that have not been closed + while len(opened): + outTokenList.append ({'tag' : '/' + opened[-1] }) + del opened[-1] + + return outTokenList + + @staticmethod + def tokenize(code): + ''' + Tokenize BBCode tags and parameters + + Return the token list using a internal format. See the example: + [ + { 'tag' : 'p', 'args' : { 'font' : 'arial' } }, + 'This is ', + { 'tag' : 'url', 'args' : {'url' : 'http://www.google.com'} }, + 'a link to google', + { 'tag' : '/url' }, + { 'tag' : '/p' } + ] + ''' + re_tags = re.compile (r'(\[[^]]+\])', re.DOTALL | re.UNICODE) + re_tagName = re.compile (r'\[([^]=\s]+)([^]]*)\]', re.DOTALL | re.UNICODE) + #re_tagArgs = re.compile (r'\s*([^=]*)=(("([^"]+)")|([^\s]+))', re.DOTALL | re.UNICODE) + re_tagArgs = re.compile (r'\s*([\w]*)=(("([^"]+)")|([^\s]+))', re.DOTALL | re.UNICODE) + + # get a unique name and replace escaped braces + unique = hashlib.md5(code).hexdigest() + code = code.replace ('\[', unique+'_OPEN_BRACE') + code = code.replace ('\]', unique+'_CLOSE_BRACE') + + splitted = re_tags.split(code) + + outTokenList = [] + for token in splitted: + if len(token) == 0: + continue + + if token[0] == '[': + match = re_tagName.match (token) + tagName = match.group(1) + tagArgs = match.group(2) + + tagToken = { 'tag' : tagName.lower() } + + # parse arguments (if any) + if len(tagArgs) > 0: + allArgs = re_tagArgs.findall(tagArgs) + + tagArgs = {} + for arg in allArgs: + # if the argument has no name, use the tagName itself + argName = (arg[0] if arg[0] != '' else tagName) + argValue = (arg[3] if (arg[1][0] == '"') else arg[4]) + + tagArgs[argName.lower()] = argValue.replace ('\"', '"') + + tagToken['args'] = tagArgs + + outTokenList.append (tagToken) + + # append the text as it is + else: + outTokenList.append (token) + + # restore escaped braces back (once code is parsed) + restoredTokenList = [] + for token in outTokenList: + if isinstance (token, basestring): + token = token.replace (unique+'_OPEN_BRACE', '[').replace (unique+'_CLOSE_BRACE', ']') + restoredTokenList.append (token) + + return restoredTokenList + diff --git a/fanficdownloader/bbcodeutils/readme.txt b/fanficdownloader/bbcodeutils/readme.txt new file mode 100644 index 00000000..e8a59494 --- /dev/null +++ b/fanficdownloader/bbcodeutils/readme.txt @@ -0,0 +1,81 @@ +AUTHOR + Pau Sanchez + http://www.codigomanso.com/ + +VERSION: + bbcodeutils v1.0 + +LICENSE + This code is licensed under Creative Commons Attribution 3.0 + http://creativecommons.org/licenses/by/3.0/ + + You can use this python module or any part of the code you want as long as you add + my name as a contributor to your project. + +DESCRIPTION + This module can be used to produce HTML from BBCode, to generate BBCode or to fix invalid BBCode. + + The classes are: + - bbcodeparser + - bbcodebuilder + - bbcode2html + + You can use bbcodeparser to parse BBCode and to produce output in any format you want. + + Open the python file to find more information and examples of use of each class. It can + be a good idea to check the test.py for examples + + To run the unit tests: + > python test.py + + To run the performance test: + > python test.py BBCodeTests.performanceTest + + +EXAMPLES OF BBCode: + + [b] -> bold + [u] -> underline + [i] -> italic + + [center] -> center the text inside + [color=XXX] -> change color of text + [size=XXX] -> change size of text + + Lists: + [ul] -> unordered list + [ol] -> ordered list + [li] -> list item + + [list] -> start unordered list + [*] -> list item + [list=1] -> start a list of numbers + [list=a] -> start a list of alphabetic characters + + Advanced: + [url] -> link to url + [url=http://link/url/]text[/url] + [url link=http://link/url/ title="This is the title"]text[/url] + + [img]http://to/image[/img] + [img=230x330]http://to/image[/img] + [img="Alt text here"]http://to/image[/img] + [img="Alt text here" width=320 height=240]http://to/image[/img] + + [email]asdf@asdf.com[/email] + [email=john@asdf.com]John Smith[/email] + + [google]search this[/google] + [wikipedia]Tom Hanks[/wikipedia] + [wikipedia lang=es]Tom Hanks[/wikipedia] + + Tables: + [table] + [tr] + [th] + [td] + + Advanced: + [google] + [wikipedia] + diff --git a/fanficdownloader/bbcodeutils/test.py b/fanficdownloader/bbcodeutils/test.py new file mode 100644 index 00000000..15e1093f --- /dev/null +++ b/fanficdownloader/bbcodeutils/test.py @@ -0,0 +1,420 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +# Author: Pau Sanchez (contact@pausanchez.com) +# Version: v1.0 +# Last Modified: 2010/09/15 +# +# For the latest version check out: +# http://www.codigomanso.com/en/projects +# +# My blog: +# http://www.codigomanso.com/en/ - English Version +# http://www.codigomanso.com/es/ - Spanish Version +# + +from bbcodeparser import bbcodeparser +from bbcodebuilder import bbcodebuilder + +import random +import unittest + +class BBCodeTests(unittest.TestCase): + def setUp (self): + self.bbcode = bbcodeparser() + return + + def testConstructor (self): + self.assertEqual (bbcodeparser ('whatever').html(), 'whatever') + self.assertEqual (bbcodeparser ('[b]bold[/b]').html(), 'bold') + self.assertEqual (str (bbcodeparser ('[b]bold[/b]')), '[b]bold[/b]') + return + + def testBold (self): + self.assertEqual (self.bbcode.parse ('whatever').html(), 'whatever') + self.assertEqual (self.bbcode.parse ('[b]bold[/b]').html(), 'bold') + self.assertEqual (self.bbcode.parse ('[B]bold[/b]').html(), 'bold') + self.assertEqual (self.bbcode.parse ('this is [B]bold[/B]').html(), 'this is bold') + return + + def testItalic (self): + self.assertEqual (self.bbcode.parse ('[i]italic[/i]').html(), 'italic') + return + + def testUnderline (self): + self.assertEqual (self.bbcode.parse ('[u]italic[/u]').html(), 'italic') + return + + def testURLs (self): + self.assertEqual ( + self.bbcode.parse ('[url]http://www.google.com[/url]').html(), + 'http://www.google.com' + ) + self.assertEqual ( + self.bbcode.parse ('[url=http://www.google.com]Google[/url]').html(), + 'Google' + ) + self.assertEqual ( + self.bbcode.parse ('[url="http://www.google.com"]Google[/url]').html(), + 'Google' + ) + self.assertEqual ( + self.bbcode.parse ('[url="http://www.google.com" title="Search Engine"]Google[/url]').html(), + 'Google' + ) + self.assertEqual ( + self.bbcode.parse ('[url link="http://www.google.com"]Google[/url]').html(), + 'Google' + ) + return + + def testPTag (self): + self.assertEqual ( + self.bbcode.parse ('[p color=#0000ff]blue[/p]').html(), + u'

blue

' + ) + self.assertEqual ( + self.bbcode.parse ('[p size=12]12pt font[/p]').html(), + u'

12pt font

' + ) + + self.assertEqual ( + self.bbcode.parse ('[p font=arial]arial[/p]').html(), + u'

arial

' + ) + + self.assertEqual ( + self.bbcode.parse ('[p font=arial color=blue size=14]blue 14pt arial').html(), + u'

blue 14pt arial

' + ) + + self.assertEqual ( + self.bbcode.parse ('[p class=whatever]text[/p]').html(), + u'

text

' + ) + return + + def testColorTag (self): + self.assertEqual ( + self.bbcode.parse ('[color=#0000ff]blue[/color]').html(), + u'blue' + ) + return + + def testSizeTag (self): + self.assertEqual ( + self.bbcode.parse ('[size=12]12pt font[/size]').html(), + u'12pt font' + ) + return + + def testEmail(self): + self.assertEqual ( + self.bbcode.parse ('[email]asdf@asdf.com[/email]').html(), + u'asdf@asdf.com' + ) + + self.assertEqual ( + self.bbcode.parse ('[email=john@smith.com]John Smith[/email]').html(), + u'John Smith' + ) + return + + def testImgTag (self): + self.assertEqual ( + self.bbcode.parse ('[img]http://www.codigomanso.com/image.jpg[/img]').html(), + u'' + ) + + self.assertEqual ( + self.bbcode.parse ('[img="This is the ALT of the image"]http://www.codigomanso.com/image.jpg[/img]').html(), + u'This is the ALT of the image' + ) + + self.assertEqual ( + self.bbcode.parse ('[img=320x200]http://www.codigomanso.com/image.jpg[/img]').html(), + u'' + ) + + self.assertEqual ( + self.bbcode.parse ('[img=320x200 title="Image Test"]http://www.codigomanso.com/image.jpg[/img]').html(), + u'' + ) + + self.assertEqual ( + self.bbcode.parse ('[img="whatever" width=320 height="212" title="Image Test"]http://www.codigomanso.com/image.jpg[/img]').html(), + u'whatever' + ) + return + + def testGoogleURL (self): + self.assertEqual ( + self.bbcode.parse ('[google]asdf[/google]').html(), + u'asdf' + ) + self.assertEqual ( + self.bbcode.parse ('[google]Tom Hanks[/google]').html(), + u'Tom Hanks' + ) + return + + def testWikipediaURL (self): + self.assertEqual ( + self.bbcode.parse ('[wikipedia]Tom Hanks[/wikipedia]').html(), + u'Tom Hanks' + ) + + self.assertEqual ( + self.bbcode.parse ('[wikipedia language=en]Tom Hanks[/wikipedia]').html(), + u'Tom Hanks' + ) + + self.assertEqual ( + self.bbcode.parse ('[wikipedia lang=es]Tom Hanks[/wikipedia]').html(), + u'Tom Hanks' + ) + + self.assertEqual ( + self.bbcode.parse ('[wikipedia=es]Tom Hanks[/wikipedia]').html(), + u'Tom Hanks' + ) + return + + def testListTags (self): + self.assertEqual ( + self.bbcode.parse ('[ul][li]item 1[/li][li]item 2[/li][/ul]').html(), + u'' + ) + + self.assertEqual ( + self.bbcode.parse ('[ol][li]item 1[/li][li]item 2[/li][/ol]').html(), + u'
  1. item 1
  2. item 2
' + ) + + self.assertEqual ( + self.bbcode.parse ('[list][li]item 1[/li][li]item 2[/li][/list]').html(), + u'' + ) + + self.assertEqual ( + self.bbcode.parse ('[list][*]item 1[*]item 2[/list]').html(), + u'' + ) + + self.assertEqual ( + self.bbcode.parse ('[list=1][li]item 1[/li][li]item 2[/li][/list]').html(), + u'
  1. item 1
  2. item 2
' + ) + return + + def testInvalidCode (self): + self.assertEqual (self.bbcode.parse ('[invalid]valid text[/invalid]').html(), 'valid text') + self.assertEqual ( + self.bbcode.parse ('[b]bold and [i]italics[/b]').html(), + 'bold and italics' + ) + self.assertEqual ( + self.bbcode.parse ('[/b]invalid[/b][/p]').html(), + 'invalid' + ) + self.assertEqual ( + self.bbcode.parse ('[p][b]bold').html(), + '

bold

' + ) + self.assertEqual ( + self.bbcode.parse ('[p][b]a ').html(), + '

a <b>

' + ) + + self.assertEqual ( + self.bbcode.parse ('[ol][li]item 1[li]item 2[/li][/ol]').html(), + u'
  1. item 1
  2. item 2
' + ) + + self.assertEqual ( + self.bbcode.parse ('[b]\[b\] stands for [b]bold[/b]').html(), + u'[b] stands for bold' + ) + return + + def testEscapedBrackets (self): + self.assertEqual ( + self.bbcode.parse ('\[b\]not bold\[/b\]').html(), + u'[b]not bold[/b]' + ) + + self.assertEqual ( + self.bbcode.parse ('[b]\[b\] stands for bold[/b]').html(), + u'[b] stands for bold' + ) + + self.assertEqual ( + self.bbcode.parse ('\[b\][b]stands for bold[/b]').html(), + u'[b]stands for bold' + ) + + self.assertEqual ( + self.bbcode.parse ('\[b\][b]stands for bold[/b] just like in HTML').html(), + u'[b]stands for bold just like <b> in HTML' + ) + + def testBigExample (self): + inputText = """check this out + + [h1 class=circle]heading[/h1] + + [p size=14 color=blue font="verdana, Times New Roman"]This is [b] bold [/b] and this [i]italic[/i] and this is [color=red]red[/color] and this is [color="red"]also red[/color]. + [/p] + + fix [b][i]bold [font=verdana][size=12]and[/size][/font] italic[/b] + [img]http://www.codigomanso.com/b.jpg[/img] + [url]http://www.codigomanso.com/[/url] + [url=http://www.codigomanso.com/]Codigo Manso[/url] + [uRl link=http://www.codigomanso.com title="Codigo Manso Blog"]Codigo Manso[/url] + + [ul] + [Li]item 1[/Li] + [li]item 2[/LI] + [/UL] + + [list=1 ] + [*]item 1 + [*]item 2 + [/list] + + [table class="big"] + [tr] + [th]big[/th] + [/tr] + [/table] + [invalid class="extra"]whatever[/invalid]""" + + out = self.bbcode.parse (inputText).html(allowClassAttr = True) + self.assertEquals (out, '''check this out + +

heading

+ +

This is bold and this italic and this is red and this is also red. +

+ + fix bold and italic + + http://www.codigomanso.com/ + Codigo Manso + Codigo Manso + + + +
    +
  1. item 1 +
  2. item 2 +
+ + + + + +
big
+ whatever''') + + + def testBBCodeDumper (self): + self.assertEquals ( + self.bbcode.parse ('[b]bold[/b]').bbcode(), + '[b]bold[/b]' + ) + + self.assertEquals ( + self.bbcode.parse ('[color=red]text in red[/color]').bbcode(), + '[color=red]text in red[/color]' + ) + self.assertEquals ( + self.bbcode.parse ('[p][color=red]text in red').bbcode(), + '[p][color=red]text in red[/color][/p]' + ) + + self.assertEquals ( + self.bbcode.parse ('This [b][i]code[/b] will be fixed[/invalid]').bbcode(), + 'This [b][i]code[/i][/b] will be fixed' + ) + + self.assertEquals ( + self.bbcode.parse ('\[[url]http://www.codigomanso.com/en[/url]\]').bbcode(), + "\[[url]http://www.codigomanso.com/en[/url]\]" + ) + + def performanceTest(self): + ''' + This test checks the performance of parse and html operations + + To run this test type: + > python test.py BBCodeTests.performanceTest + ''' + inputText = """check this out + + [h1 class=circle]heading[/h1] + + [p size=14 color=blue font="verdana, Times New Roman"]This is [b] bold [/b] and this [i]italic[/i] and this is [color=red]red[/color] and this is [color="red"]also red[/color]. + [/p] + + fix [b][i]bold [font=verdana][size=12]and[/size][/font] italic[/b] + [img]http://www.codigomanso.com/b.jpg[/img] + [url]http://www.codigomanso.com/[/url] + [url=http://www.codigomanso.com/]Codigo Manso[/url] + [uRl link=http://www.codigomanso.com title="Codigo Manso Blog"]Codigo Manso[/url] + + [ul] + [Li]item 1[/Li] + [li]item 2[/LI] + [/UL] + + [list=1 ] + [*]item 1 + [*]item 2 + [/list] + + [table class="big"] + [tr] + [th]big[/th] + [/tr] + [/table] + [invalid class="extra"]whatever[/invalid]""" + + import time + start = time.time() + + for i in range(0, 12): + inputText += inputText + + print "len(inputText) = %.2f MB (took %.2f seconds)" % (len(inputText)/(1024.0*1024.0), time.time() - start) + + bbcode = bbcodeparser() + start = time.time() + bbcode.parse (inputText) + total = (time.time() - start) + print "time (bbcode.parse()) = %f" % total + print " >> %.2f chars/second" % (len(inputText) / total) + + start = time.time() + bbcode.html(doDeepCopy = False) + total = (time.time() - start) + print "time (bbcode.html()) = %f" % total + print " >> %.2f chars/second" % (len(inputText) / total) + return + + def testCodeBuilder (self): + bbcode = bbcodebuilder () + self.assertEquals (bbcode.b ('bold'), u'[b]bold[/b]') + self.assertEquals (bbcode.color ('this goes in red', 'red'), u'[color=red]this goes in red[/color]') + self.assertEquals (bbcode.url ('Google', 'http://www.google.com'), u'[url=http://www.google.com]Google[/url]') + self.assertEquals (bbcode.alist('item 1', 'item 2'), u"[list=a]\n [*]item 1\n [*]item 2\n[/list]") + return + +if __name__ == '__main__': + unittest.main() + + + diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 0451d558..73008008 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -319,12 +319,25 @@ class Story: def isList(self,listname): return self.listables.has_key(listname) - def addChapter(self, title, html): + def addChapter(self, title, html, configurable=None): + if configurable and \ + configurable.getConfig('strip_chapter_numbers') and \ + configurable.getConfig('chapter_title_strip_pattern'): + title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title) self.chapters.append( (title,html) ) - def getChapters(self): + def getChapters(self, configurable=None): "Chapters will be tuples of (title,html)" - return self.chapters + retval = [] + if configurable and \ + configurable.getConfig('add_chapter_numbers') and \ + configurable.getConfig('chapter_title_add_pattern'): + for index, (title,html) in enumerate(self.chapters): + retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) ) + else: + retval = self.chapters + + return retval def formatFileName(self,template,allowunsafefilename=True): values = origvalues = self.getAllMetadata() diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 7d72120f..25cca2fe 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -184,10 +184,10 @@ class BaseStoryWriter(Configurable): names as Story.metadata, but ENTRY should use index and chapter. """ # Only do TOC if there's more than one chapter and it's configured. - if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly : self._write(out,START.substitute(self.story.getAllMetadata())) - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(self)): if html: self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 6db5ef56..312c937b 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -441,7 +441,7 @@ div { margin: 0pt; padding: 0pt; } if self.getConfig("include_titlepage"): items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) itemrefs.append("title_page") - if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly : items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) itemrefs.append("toc_page") @@ -449,7 +449,7 @@ div { margin: 0pt; padding: 0pt; } items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log")) itemrefs.append("log_page") - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(self)): if html: i=index+1 items.append(("file%04d"%i, @@ -587,7 +587,7 @@ div { margin: 0pt; padding: 0pt; } outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue()) logpageIO.close() - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(self)): if html: logging.debug('Writing chapter text for: %s' % title) fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 85ba1fa5..5caf1bca 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -94,7 +94,7 @@ ${output_css} self.HTML_TOC_ENTRY, self.HTML_TOC_PAGE_END) - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(self)): if html: logging.debug('Writing chapter text for: %s' % title) self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py index 8a443b88..07d8704c 100644 --- a/fanficdownloader/writers/writer_mobi.py +++ b/fanficdownloader/writers/writer_mobi.py @@ -169,7 +169,7 @@ ${value}
# files.append(tocpageIO.getvalue()) # tocpageIO.close() - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(self)): if html: logging.debug('Writing chapter text for: %s' % title) fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py index 6b9f35b0..47545402 100644 --- a/fanficdownloader/writers/writer_txt.py +++ b/fanficdownloader/writers/writer_txt.py @@ -133,7 +133,7 @@ End file. self._write(out,self.lineends(self.wraplines(towrap))) - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(self)): if html: logging.debug('Writing chapter text for: %s' % title) self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 49ca8e46..713d3560 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -152,6 +152,45 @@ extratags: FanFiction # ${category} => Buffy:? [tT]he Vampire Slayer => BuffyCover # ${category} => Star Trek => StarTrekCover +## If set false, the summary will have all html stripped. +## Both this and include_images must be true to get images in the +## summary. +keep_summary_html:true + +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers. Don't like the way it strips +## numbers or adds them back? See chapter_title_strip_pattern and +## chapter_title_add_pattern. +strip_chapter_numbers:false +add_chapter_numbers:false + +## (Two versions of chapter_title_strip_pattern are shown below. You +## should only have one uncommented.) +## This version will remove the leading number from: +## "1." => "" +## "1. The Beginning" => "The Beginning" +## "1: Start" => "Start" +## "2, Chapter the second" => "Chapter the second" +## etc +chapter_title_strip_pattern:^[0-9]+[\.: -]+ + +## This version will strip all of the above *plus* remove 'Chapter 1': +## "Chapter 1" => "" +## "1. Chapter 1" => "" +## "1. Chapter 1, Bob's First Clue" => "Bob's First Clue" +## "Chapter 2 - Pirates Place" => "Pirates Place" +## etc +#chapter_title_strip_pattern:^([0-9]+[\.: -]+)?(Chapter *[0-9]+[\.:, -]*)? + +## Uses a python template substitution. The ${index} is the 'chapter' +## number and ${title} is the chapter title, after applying +## chapter_title_strip_pattern. Those are the only variables available. +## "The Beginning" => "1. The Beginning" +chapter_title_add_pattern:${index}. ${title} + + ## Each output format has a section that overrides [defaults] [html] @@ -256,11 +295,6 @@ output_css: ## stories. Images will be converted to jpg for size if possible. #include_images:false -## If not set, the summary will have all html stripped for safety. -## Both this and include_images must be true to get images in the -## summary. -#keep_summary_html:false - ## If set, the first image found will be made the cover image. If ## keep_summary_html is true, any images in summary will be before any ## in chapters. diff --git a/plugin-example.ini b/plugin-example.ini index 1c92afc3..11839b22 100644 --- a/plugin-example.ini +++ b/plugin-example.ini @@ -6,6 +6,15 @@ ## adult content. Uncomment by removing '#' in front of is_adult. #is_adult:true +## Don't like the numbers at the start of chapter titles on some +## sites? You can use strip_chapter_numbers to strip them off. Just +## want to make them all look the same? Strip them off, then add them +## back on with add_chapter_numbers. Don't like the way it strips +## numbers or adds them back? See chapter_title_strip_pattern and +## chapter_title_add_pattern. +#strip_chapter_numbers:true +#add_chapter_numbers:true + [epub] ## include images from img tags in the body and summary of stories. ## Images will be converted to jpg for size if possible. Images work diff --git a/readme.txt b/readme.txt index b602c53d..e21a5b1b 100644 --- a/readme.txt +++ b/readme.txt @@ -1,3 +1,21 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Other code contributed by Pau Sanchez(bbcodeutils). + To use, do: python downloader.py [-f (epub|html|txt)]