diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe
index eaa428e731..6f80f4f85f 100644
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@@ -685,3 +685,28 @@ def postprocess_html(self,soup, True):
divTag.replaceWith(tag)
return soup
+
+ def populate_article_metadata(self, article, soup, first):
+ shortparagraph = ""
+ try:
+ if len(article.text_summary.strip()) == 0:
+ articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
+ if articlebodies:
+ for articlebody in articlebodies:
+ if articlebody:
+ paras = articlebody.findAll('p')
+ for p in paras:
+ refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
+ #account for blank paragraphs and short paragraphs by appending them to longer ones
+ if len(refparagraph) > 0:
+ if len(refparagraph) > 70: #approximately one line of text
+ article.summary = article.text_summary = shortparagraph + refparagraph
+ return
+ else:
+ shortparagraph = refparagraph + " "
+ if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
+ shortparagraph = shortparagraph + "- "
+ except:
+ self.log("Error creating article descriptions")
+ return
+
diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index e56fd9cdec..8ac7c735f7 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -685,4 +685,27 @@ def postprocess_html(self,soup, True):
divTag.replaceWith(tag)
return soup
+ def populate_article_metadata(self, article, soup, first):
+ shortparagraph = ""
+ try:
+ if len(article.text_summary.strip()) == 0:
+ articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
+ if articlebodies:
+ for articlebody in articlebodies:
+ if articlebody:
+ paras = articlebody.findAll('p')
+ for p in paras:
+ refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
+ #account for blank paragraphs and short paragraphs by appending them to longer ones
+ if len(refparagraph) > 0:
+ if len(refparagraph) > 70: #approximately one line of text
+ article.summary = article.text_summary = shortparagraph + refparagraph
+ return
+ else:
+ shortparagraph = refparagraph + " "
+ if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
+ shortparagraph = shortparagraph + "- "
+ except:
+ self.log("Error creating article descriptions")
+ return
diff --git a/resources/recipes/tyzden.recipe b/resources/recipes/tyzden.recipe
index c206244ff6..b8d7389fbe 100644
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
if (weeknum > 1):
weeknum -= 1
- title = u'.tyzden ' + str(weeknum) + '/' + str(year)
+ title = u'tyzden'
base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
base_url = base_url_path + '.html'
diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index 987b90c748..ca05885645 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -91,3 +91,19 @@ class NOOK_COLOR(NOOK):
EBOOK_DIR_MAIN = 'My Files/Books'
+ '''
+ def create_upload_path(self, path, mdata, fname, create_dirs=True):
+ filepath = NOOK.create_upload_path(self, path, mdata, fname,
+ create_dirs=create_dirs)
+ edm = self.EBOOK_DIR_MAIN.replace('/', os.sep)
+ npath = os.path.join(edm, _('News')) + os.sep
+ if npath in filepath:
+ filepath = filepath.replace(npath, os.sep.join('My Files',
+ 'Magazines')+os.sep)
+ filedir = os.path.dirname(filepath)
+ if create_dirs and not os.path.exists(filedir):
+ os.makedirs(filedir)
+
+ return filepath
+ '''
+
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index b1d760ea2d..9b22fb46ec 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -88,6 +88,7 @@ def __init__(self, input, output, log, report_progress=DummyReporter(),
self.ui_reporter = report_progress
self.abort_after_input_dump = abort_after_input_dump
+ # Pipeline options {{{
# Initialize the conversion options that are independent of input and
# output formats. The input and output plugins can still disable these
# options via recommendations.
@@ -527,6 +528,7 @@ def __init__(self, input, output, log, report_progress=DummyReporter(),
help=_('Set the book timestamp (used by the date column in calibre).')),
]
+ # }}}
input_fmt = os.path.splitext(self.input)[1]
if not input_fmt:
diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 4dd6e7c7ae..796a94533a 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -16,7 +16,6 @@
from lxml import etree
-from calibre import guess_type
from calibre import prepare_string_for_xml
from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@@ -41,7 +40,7 @@ def reset_state(self):
# in different directories. FB2 images are all in a flat layout so we rename all images
# into a sequential numbering system to ensure there are no collisions between image names.
self.image_hrefs = {}
- # Mapping of toc items and their
+ # Mapping of toc items and their
self.toc = {}
# Used to see whether a new \s*
', '
\n\n', text) - + text = re.sub(r'(?miu)
some textile
' + """ + self.html_type = html_type + + # text = unicode(text) + text = _normalize_newlines(text) + + if self.restricted: + text = self.encode_html(text, quotes=False) + + if rel: + self.rel = ' rel="%s"' % rel + + text = self.getRefs(text) + + text = self.block(text, int(head_offset)) + + text = self.retrieve(text) + + return text + + def pba(self, input, element=None): + """ + Parse block attributes. + + >>> t = Textile() + >>> t.pba(r'\3') + '' + >>> t.pba(r'\\3', element='td') + ' colspan="3"' + >>> t.pba(r'/4', element='td') + ' rowspan="4"' + >>> t.pba(r'\\3/4', element='td') + ' colspan="3" rowspan="4"' + + >>> t.vAlign('^') + 'top' + + >>> t.pba('^', element='td') + ' style="vertical-align:top;"' + + >>> t.pba('{line-height:18px}') + ' style="line-height:18px;"' + + >>> t.pba('(foo-bar)') + ' class="foo-bar"' + + >>> t.pba('(#myid)') + ' id="myid"' + + >>> t.pba('(foo-bar#myid)') + ' class="foo-bar" id="myid"' + + >>> t.pba('((((') + ' style="padding-left:4em;"' + + >>> t.pba(')))') + ' style="padding-right:3em;"' + + >>> t.pba('[fr]') + ' lang="fr"' + + """ + style = [] + aclass = '' + lang = '' + colspan = '' + rowspan = '' + id = '' + + if not input: + return '' + + matched = input + if element == 'td': + m = re.search(r'\\(\d+)', matched) + if m: + colspan = m.group(1) + + m = re.search(r'/(\d+)', matched) + if m: + rowspan = m.group(1) + + if element == 'td' or element == 'tr': + m = re.search(r'(%s)' % self.vlgn, matched) + if m: + style.append("vertical-align:%s;" % self.vAlign(m.group(1))) + + m = re.search(r'\{([^}]*)\}', matched) + if m: + style.append(m.group(1).rstrip(';') + ';') + matched = matched.replace(m.group(0), '') + + m = re.search(r'\[([^\]]+)\]', matched, re.U) + if m: + lang = m.group(1) + matched = matched.replace(m.group(0), '') + + m = re.search(r'\(([^()]+)\)', matched, re.U) + if m: + aclass = m.group(1) + matched = matched.replace(m.group(0), '') + + m = re.search(r'([(]+)', matched) + if m: + style.append("padding-left:%sem;" % len(m.group(1))) + matched = matched.replace(m.group(0), '') + + m = re.search(r'([)]+)', matched) + if m: + style.append("padding-right:%sem;" % len(m.group(1))) + matched = matched.replace(m.group(0), '') + + m = re.search(r'(%s)' % self.hlgn, matched) + if m: + style.append("text-align:%s;" % self.hAlign(m.group(1))) + + m = re.search(r'^(.*)#(.*)$', aclass) + if m: + id = m.group(2) + aclass = m.group(1) + + if self.restricted: + if lang: + return ' lang="%s"' + else: + return '' + + result = [] + if style: + result.append(' style="%s"' % "".join(style)) + if aclass: + result.append(' class="%s"' % aclass) + if lang: + result.append(' lang="%s"' % lang) + if id: + result.append(' id="%s"' % id) + if colspan: + result.append(' colspan="%s"' % colspan) + if rowspan: + result.append(' rowspan="%s"' % rowspan) + return ''.join(result) + + def hasRawText(self, text): + """ + checks whether the text has text not already enclosed by a block tag + + >>> t = Textile() + >>> t.hasRawText('foo bar biz baz
') + False + + >>> t.hasRawText(' why yes, yes it does') + True + + """ + r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*\1>', re.S).sub('', text.strip()).strip() + r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r) + return '' != r + + def table(self, text): + r""" + >>> t = Textile() + >>> t.table('|one|two|three|\n|a|b|c|') + '\t| one | \n\t\t\ttwo | \n\t\t\tthree | \n\t\t
| a | \n\t\t\tb | \n\t\t\tc | \n\t\t
\\n', '\\t\\t') + + >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote") + ('\\t', 'Hello BlockQuote', '
', '\\n\\t
\\n', '\\t\\t') + + >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS + ('', 'Hello BlockQuote', '
', '\\n\\t
', '', ..., '', '')
+
+ >>> t.fBlock("h1", "", None, "", "foobar")
+ ('', '\\t\n" % (cite, atts) + o2 = "\t\t" + + elif tag == 'bc': + o1 = "" % atts + c2 = "
" + c1 = "\n\t
" % atts
+ o2 = "" % atts
+ c2 = ""
+ c1 = ""
+ content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+
+ elif tag == 'notextile':
+ content = self.shelve(content)
+ o1 = o2 = ''
+ c1 = c2 = ''
+
+ elif tag == 'pre':
+ content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+ o1 = "" % atts + o2 = c2 = '' + c1 = '' + + else: + o2 = "\t<%s%s>" % (tag, atts) + c2 = "%s>" % tag + + content = self.graf(content) + return o1, o2, content, c2, c1 + + def footnoteRef(self, text): + """ + >>> t = Textile() + >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS + 'foo1 ' + """ + return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text) + + def footnoteID(self, match): + id, t = match.groups() + if id not in self.fn: + self.fn[id] = str(uuid.uuid4()) + fnid = self.fn[id] + if not t: + t = '' + return '%s%s' % (fnid, id, t) + + def glyphs(self, text): + """ + >>> t = Textile() + + >>> t.glyphs("apostrophe's") + 'apostrophe’s' + + >>> t.glyphs("back in '88") + 'back in ’88' + + >>> t.glyphs('foo ...') + 'foo …' + + >>> t.glyphs('--') + '—' + + >>> t.glyphs('FooBar[tm]') + 'FooBar™' + + >>> t.glyphs("
Cat's Cradle by Vonnegut
") + 'Cat’s Cradle by Vonnegut
' + + """ + # fix: hackish + text = re.sub(r'"\Z', '\" ', text) + + glyph_search = ( + re.compile(r"(\w)\'(\w)"), # apostrophe's + re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88 + re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing + re.compile(r'\'/'), # single opening + re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing + re.compile(r'"'), # double opening + re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym + re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase + re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis + re.compile(r'(\s?)--(\s?)'), # em dash + re.compile(r'\s-(?:\s|$)'), # en dash + re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign + re.compile(r'\b ?[([]TM[])]', re.I), # trademark + re.compile(r'\b ?[([]R[])]', re.I), # registered + re.compile(r'\b ?[([]C[])]', re.I), # copyright + ) + + glyph_replace = [x % dict(self.glyph_defaults) for x in ( + r'\1%(txt_apostrophe)s\2', # apostrophe's + r'\1%(txt_apostrophe)s\2', # back in '88 + r'\1%(txt_quote_single_close)s', # single closing + r'%(txt_quote_single_open)s', # single opening + r'\1%(txt_quote_double_close)s', # double closing + r'%(txt_quote_double_open)s', # double opening + r'\1', # 3+ uppercase acronym + r'\1', # 3+ uppercase + r'\1%(txt_ellipsis)s', # ellipsis + r'\1%(txt_emdash)s\2', # em dash + r' %(txt_endash)s ', # en dash + r'\1\2%(txt_dimension)s\3', # dimension sign + r'%(txt_trademark)s', # trademark + r'%(txt_registered)s', # registered + r'%(txt_copyright)s', # copyright + )] + + result = [] + for line in re.compile(r'(<.*?>)', re.U).split(text): + if not re.search(r'<.*>', line): + for s, r in zip(glyph_search, glyph_replace): + line = s.sub(r, line) + result.append(line) + return ''.join(result) + + def vAlign(self, input): + d = {'^':'top', '-':'middle', '~':'bottom'} + return d.get(input, '') + + def hAlign(self, input): + d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'} + return d.get(input, '') + + def getRefs(self, text): + """ + what is this for? + """ + pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U) + text = pattern.sub(self.refs, text) + return text + + def refs(self, match): + flag, url = match.groups() + self.urlrefs[flag] = url + return '' + + def checkRefs(self, url): + return self.urlrefs.get(url, url) + + def isRelURL(self, url): + """ + Identify relative urls. + + >>> t = Textile() + >>> t.isRelURL("http://www.google.com/") + False + >>> t.isRelURL("/foo") + True + + """ + (scheme, netloc) = urlparse(url)[0:2] + return not scheme and not netloc + + def relURL(self, url): + scheme = urlparse(url)[0] + if self.restricted and scheme and scheme not in self.url_schemes: + return '#' + return url + + def shelve(self, text): + id = str(uuid.uuid4()) + self.shelf[id] = text + return id + + def retrieve(self, text): + """ + >>> t = Textile() + >>> id = t.shelve("foobar") + >>> t.retrieve(id) + 'foobar' + """ + while True: + old = text + for k, v in self.shelf.items(): + text = text.replace(k, v) + if text == old: + break + return text + + def encode_html(self, text, quotes=True): + a = ( + ('&', '&'), + ('<', '<'), + ('>', '>') + ) + + if quotes: + a = a + ( + ("'", '''), + ('"', '"') + ) + + for k, v in a: + text = text.replace(k, v) + return text + + def graf(self, text): + if not self.lite: + text = self.noTextile(text) + text = self.code(text) + + text = self.links(text) + + if not self.noimage: + text = self.image(text) + + if not self.lite: + text = self.lists(text) + text = self.table(text) + + text = self.span(text) + text = self.footnoteRef(text) + text = self.glyphs(text) + + return text.rstrip('\n') + + def links(self, text): + """ + >>> t = Textile() + >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS + 'fooobar ... and hello world ...' + """ + + punct = '!"#$%&\'*+,-./:;=?@\\^_`|~' + + pattern = r''' + (?P [\s\[{(]|[%s] )?
+ " # start
+ (?P %s )
+ (?P [^"]+? )
+ \s?
+ (?: \(([^)]+?)\)(?=") )? # $title
+ ":
+ (?P (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|] )
+ (?P [^\w\/;]*? )
+ (?=<|\s|$)
+ ''' % (re.escape(punct), self.c)
+
+ text = re.compile(pattern, re.X).sub(self.fLink, text)
+
+ return text
+
+ def fLink(self, match):
+ pre, atts, text, title, url, post = match.groups()
+
+ if pre == None:
+ pre = ''
+
+ # assume ) at the end of the url is not actually part of the url
+ # unless the url also contains a (
+ if url.endswith(')') and not url.find('(') > -1:
+ post = url[-1] + post
+ url = url[:-1]
+
+ url = self.checkRefs(url)
+
+ atts = self.pba(atts)
+ if title:
+ atts = atts + ' title="%s"' % self.encode_html(title)
+
+ if not self.noimage:
+ text = self.image(text)
+
+ text = self.span(text)
+ text = self.glyphs(text)
+
+ url = self.relURL(url)
+ out = '%s' % (self.encode_html(url), atts, self.rel, text)
+ out = self.shelve(out)
+ return ''.join([pre, out, post])
+
+ def span(self, text):
+ """
+ >>> t = Textile()
+ >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
+ 'hello span strong and bold goodbye'
+ """
+ qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
+ pnct = ".,\"'?!;:"
+
+ for qtag in qtags:
+ pattern = re.compile(r"""
+ (?:^|(?<=[\s>%(pnct)s])|([\]}]))
+ (%(qtag)s)(?!%(qtag)s)
+ (%(c)s)
+ (?::(\S+))?
+ ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
+ ([%(pnct)s]*)
+ %(qtag)s
+ (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
+ """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct,
+ 'selfpnct':self.pnct}, re.X)
+ text = pattern.sub(self.fSpan, text)
+ return text
+
+
+ def fSpan(self, match):
+ _, tag, atts, cite, content, end, _ = match.groups()
+
+ qtags = {
+ '*': 'strong',
+ '**': 'b',
+ '??': 'cite',
+ '_' : 'em',
+ '__': 'i',
+ '-' : 'del',
+ '%' : 'span',
+ '+' : 'ins',
+ '~' : 'sub',
+ '^' : 'sup'
+ }
+ tag = qtags[tag]
+ atts = self.pba(atts)
+ if cite:
+ atts = atts + 'cite="%s"' % cite
+
+ content = self.span(content)
+
+ out = "<%s%s>%s%s%s>" % (tag, atts, content, end, tag)
+ return out
+
+ def image(self, text):
+ """
+ >>> t = Textile()
+ >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
+ '
'
+ """
+ pattern = re.compile(r"""
+ (?:[\[{])? # pre
+ \! # opening !
+ (%s) # optional style,class atts
+ (?:\. )? # optional dot-space
+ ([^\s(!]+) # presume this is the src
+ \s? # optional space
+ (?:\(([^\)]+)\))? # optional title
+ \! # closing
+ (?::(\S+))? # optional href
+ (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
+ """ % self.c, re.U|re.X)
+ return pattern.sub(self.fImage, text)
+
+ def fImage(self, match):
+ # (None, '', '/imgs/myphoto.jpg', None, None)
+ atts, url, title, href = match.groups()
+ atts = self.pba(atts)
+
+ if title:
+ atts = atts + ' title="%s" alt="%s"' % (title, title)
+ else:
+ atts = atts + ' alt=""'
+
+ if not self.isRelURL(url) and self.get_sizes:
+ size = getimagesize(url)
+ if (size):
+ atts += " %s" % size
+
+ if href:
+ href = self.checkRefs(href)
+
+ url = self.checkRefs(url)
+ url = self.relURL(url)
+
+ out = []
+ if href:
+ out.append('' % href)
+ if self.html_type == 'html':
+ out.append('
' % (url, atts))
+ else:
+ out.append('
' % (url, atts))
+ if href:
+ out.append('')
+
+ return ''.join(out)
+
+ def code(self, text):
+ text = self.doSpecial(text, '', '', self.fCode)
+ text = self.doSpecial(text, '@', '@', self.fCode)
+ text = self.doSpecial(text, '', '
', self.fPre)
+ return text
+
+ def fCode(self, match):
+ before, text, after = match.groups()
+ if after == None:
+ after = ''
+ # text needs to be escaped
+ if not self.restricted:
+ text = self.encode_html(text)
+ return ''.join([before, self.shelve('%s' % text), after])
+
+ def fPre(self, match):
+ before, text, after = match.groups()
+ if after == None:
+ after = ''
+ # text needs to be escapedd
+ if not self.restricted:
+ text = self.encode_html(text)
+ return ''.join([before, '', self.shelve(text), '
', after])
+
+ def doSpecial(self, text, start, end, method=None):
+ if method == None:
+ method = self.fSpecial
+ pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S)
+ return pattern.sub(method, text)
+
+ def fSpecial(self, match):
+ """
+ special blocks like notextile or code
+ """
+ before, text, after = match.groups()
+ if after == None:
+ after = ''
+ return ''.join([before, self.shelve(self.encode_html(text)), after])
+
+ def noTextile(self, text):
+ text = self.doSpecial(text, '', ' ', self.fTextile)
+ return self.doSpecial(text, '==', '==', self.fTextile)
+
+ def fTextile(self, match):
+ before, notextile, after = match.groups()
+ if after == None:
+ after = ''
+ return ''.join([before, self.shelve(notextile), after])
+
+
+def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None):
+ """
+ this function takes additional parameters:
+ head_offset - offset to apply to heading levels (default: 0)
+ html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
+ """
+ return Textile().textile(text, head_offset=head_offset,
+ html_type=html_type)
+
+def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
+ """
+ Restricted version of Textile designed for weblog comments and other
+ untrusted input.
+
+ Raw HTML is escaped.
+ Style attributes are disabled.
+ rel='nofollow' is added to external links.
+
+ When lite=True is set (the default):
+ Block tags are restricted to p, bq, and bc.
+ Lists and tables are disabled.
+
+ When noimage=True is set (the default):
+ Image tags are disabled.
+
+ """
+ return Textile(restricted=True, lite=lite,
+ noimage=noimage).textile(text, rel='nofollow',
+ html_type=html_type)
+
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index aaff8b55c0..0b0bd6d570 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -12,7 +12,7 @@
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
- convert_heuristic, normalize_line_endings
+ convert_heuristic, normalize_line_endings, convert_textile
from calibre import _ent_pat, xml_entity_to_unicode
class TXTInput(InputFormatPlugin):
@@ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin):
'paragraph and no styling is applied.\n'
'* heuristic: Process using heuristics to determine formatting such '
'as chapter headings and italic text.\n'
+ '* textile: Processing using textile formatting.\n'
'* markdown: Processing using markdown formatting. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False,
@@ -91,6 +92,9 @@ def convert(self, stream, options, file_ext, log,
except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+ elif options.formatting_type == 'textile':
+ log.debug('Running text though textile conversion...')
+ html = convert_textile(txt)
else:
# Determine the paragraph type of the document.
if options.paragraph_type == 'auto':
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 6a1a106681..3702bbfabe 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -7,7 +7,6 @@
import os, re
from calibre import prepare_string_for_xml, isbytestring
-from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
from calibre.ebooks.conversion.preprocess import DocAnalysis
@@ -37,7 +36,7 @@ def clean_txt(txt):
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
txt = illegal_chars.sub('', txt)
-
+
return txt
def split_txt(txt, epub_split_size_kb=0):
@@ -74,12 +73,18 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0):
return tp.convert(txt, title, epub_split_size_kb)
def convert_markdown(txt, title='', disable_toc=False):
+ from calibre.ebooks.markdown import markdown
md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'],
extension_configs={"toc": {"disable_toc": disable_toc}},
safe_mode=False)
return HTML_TEMPLATE % (title, md.convert(txt))
+def convert_textile(txt, title=''):
+ from calibre.ebooks.textile import textile
+ html = textile(txt, encoding='utf-8')
+ return HTML_TEMPLATE % (title, html)
+
def normalize_line_endings(txt):
txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n')
@@ -115,66 +120,75 @@ def split_string_separator(txt, size) :
def detect_paragraph_type(txt):
'''
Tries to determine the formatting of the document.
-
+
block: Paragraphs are separated by a blank line.
single: Each line is a paragraph.
print: Each paragraph starts with a 2+ spaces or a tab
and ends when a new paragraph is reached.
unformatted: most lines have hard line breaks, few/no blank lines or indents
-
+
returns block, single, print, unformatted
'''
txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n')
txt_line_count = len(re.findall('(?mu)^\s*.+$', txt))
-
+
# Check for hard line breaks - true if 55% of the doc breaks in the same region
docanalysis = DocAnalysis('txt', txt)
hardbreaks = docanalysis.line_histogram(.55)
-
+
if hardbreaks:
# Determine print percentage
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
print_percent = tab_line_count / float(txt_line_count)
-
+
# Determine block percentage
empty_line_count = len(re.findall('(?mu)^\s*$', txt))
block_percent = empty_line_count / float(txt_line_count)
-
+
# Compare the two types - the type with the larger number of instances wins
# in cases where only one or the other represents the vast majority of the document neither wins
if print_percent >= block_percent:
if .15 <= print_percent <= .75:
return 'print'
elif .15 <= block_percent <= .75:
- return 'block'
+ return 'block'
- # Assume unformatted text with hardbreaks if nothing else matches
+ # Assume unformatted text with hardbreaks if nothing else matches
return 'unformatted'
-
+
# return single if hardbreaks is false
return 'single'
def detect_formatting_type(txt):
+ markdown_count = 0
+ textile_count = 0
+
# Check for markdown
# Headings
- if len(re.findall('(?mu)^#+', txt)) >= 5:
- return 'markdown'
- if len(re.findall('(?mu)^=+$', txt)) >= 5:
- return 'markdown'
- if len(re.findall('(?mu)^-+$', txt)) >= 5:
- return 'markdown'
+ markdown_count += len(re.findall('(?mu)^#+', txt))
+ markdown_count += len(re.findall('(?mu)^=+$', txt))
+ markdown_count += len(re.findall('(?mu)^-+$', txt))
# Images
- if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
- return 'markdown'
+ markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
# Links
- if len(re.findall('(?u)(^|(?P[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
- return 'markdown'
- # Escaped characters
- md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
- for c in md_escapted_characters:
- if txt.count('\\'+c) > 10:
+ markdown_count += len(re.findall('(?u)(^|(?P[^!]))\[.*?\]\([^)]+\)', txt))
+
+ # Check for textile
+ # Headings
+ textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
+ # Block quote.
+ textile_count += len(re.findall(r'(?mu)^bq\.', txt))
+ # Images
+ textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
+ # Links
+ textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
+
+ if markdown_count > 5 or textile_count > 5:
+ if markdown_count > textile_count:
return 'markdown'
-
+ else:
+ return 'textile'
+
return 'heuristic'
diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py
index 71c9ebcd04..04c41f0c5e 100644
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@@ -4,7 +4,7 @@
import time, os
from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
- QVariant, QInputDialog
+ QVariant
from calibre.web.feeds.recipes import compile_recipe
from calibre.web.feeds.news import AutomaticNewsRecipe
@@ -256,24 +256,61 @@ def add_profile(self, clicked):
def add_builtin_recipe(self):
from calibre.web.feeds.recipes.collection import \
- get_builtin_recipe_by_title, get_builtin_recipe_titles
- items = sorted(get_builtin_recipe_titles(), key=sort_key)
+ get_builtin_recipe_collection, get_builtin_recipe_by_id
+ from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
+ QListWidget, QDialogButtonBox, QSize
+ d = QDialog(self)
+ d.l = QVBoxLayout()
+ d.setLayout(d.l)
+ d.list = QListWidget(d)
+ d.list.doubleClicked.connect(lambda x: d.accept())
+ d.l.addWidget(d.list)
+ d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
+ Qt.Horizontal, d)
+ d.bb.accepted.connect(d.accept)
+ d.bb.rejected.connect(d.reject)
+ d.l.addWidget(d.bb)
+ d.setWindowTitle(_('Choose builtin recipe'))
+ items = []
+ for r in get_builtin_recipe_collection():
+ id_ = r.get('id', '')
+ title = r.get('title', '')
+ lang = r.get('language', '')
+ if id_ and title:
+ items.append((title + ' [%s]'%lang, id_))
- title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'),
- items, 0, False)
- if ok:
- title = unicode(title)
- profile = get_builtin_recipe_by_title(title)
- if self._model.has_title(title):
- if question_dialog(self, _('Replace recipe?'),
- _('A custom recipe named %s already exists. Do you want to '
- 'replace it?')%title):
- self._model.replace_by_title(title, profile)
- else:
- return
+ items.sort(key=lambda x:sort_key(x[0]))
+ for title, id_ in items:
+ item = QListWidgetItem(title)
+ item.setData(Qt.UserRole, id_)
+ d.list.addItem(item)
+
+ d.resize(QSize(450, 400))
+ ret = d.exec_()
+ d.list.doubleClicked.disconnect()
+ if ret != d.Accepted:
+ return
+
+ items = list(d.list.selectedItems())
+ if not items:
+ return
+ item = items[-1]
+ id_ = unicode(item.data(Qt.UserRole).toString())
+ title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
+ profile = get_builtin_recipe_by_id(id_)
+ if profile is None:
+ raise Exception('Something weird happened')
+
+ if self._model.has_title(title):
+ if question_dialog(self, _('Replace recipe?'),
+ _('A custom recipe named %s already exists. Do you want to '
+ 'replace it?')%title):
+ self._model.replace_by_title(title, profile)
else:
- self.model.add(title, profile)
+ return
+ else:
+ self.model.add(title, profile)
self.clear()
diff --git a/src/calibre/gui2/preferences/plugins.ui b/src/calibre/gui2/preferences/plugins.ui
index 18f0786a66..83a904eb08 100644
--- a/src/calibre/gui2/preferences/plugins.ui
+++ b/src/calibre/gui2/preferences/plugins.ui
@@ -76,6 +76,10 @@
&Add a new plugin
+
+
+ :/images/plugins.png :/images/plugins.png
+
diff --git a/src/calibre/gui2/shortcuts.py b/src/calibre/gui2/shortcuts.py
index bdd699a69d..5e56435e10 100644
--- a/src/calibre/gui2/shortcuts.py
+++ b/src/calibre/gui2/shortcuts.py
@@ -150,7 +150,7 @@ def setModelData(self, editor, model, index):
custom = []
if editor.custom.isChecked():
for x in ('1', '2'):
- sc = getattr(editor, 'shortcut'+x)
+ sc = getattr(editor, 'shortcut'+x, None)
if sc is not None:
custom.append(sc)
@@ -266,6 +266,11 @@ def scrollTo(self, index):
self.view.scrollTo(index)
+ @property
+ def is_editing(self):
+ return self.view.state() == self.view.EditingState
+
+
if __name__ == '__main__':
from calibre.gui2 import is_ok_to_use_qt
from calibre.gui2.viewer.keys import SHORTCUTS
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index 55abae0392..4485e63373 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -120,6 +120,13 @@ def __init__(self, shortcuts, parent=None):
def accept(self, *args):
+ if self.shortcut_config.is_editing:
+ from calibre.gui2 import info_dialog
+ info_dialog(self, _('Still editing'),
+ _('You are in the middle of editing a keyboard shortcut'
+ ' first complete that, by clicking outside the '
+ ' shortcut editing box.'), show=True)
+ return
c = config()
c.set('serif_family', unicode(self.serif_family.currentFont().family()))
c.set('sans_family', unicode(self.sans_family.currentFont().family()))
diff --git a/src/calibre/utils/wmf/__init__.py b/src/calibre/utils/wmf/__init__.py
index 68dfb8d2b5..cb7736e06a 100644
--- a/src/calibre/utils/wmf/__init__.py
+++ b/src/calibre/utils/wmf/__init__.py
@@ -5,5 +5,52 @@
__copyright__ = '2011, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+import glob
+
+from calibre.constants import plugins, iswindows, filesystem_encoding
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
+from calibre.utils.magick import Image, PixelWand
+
+class Unavailable(Exception):
+ pass
+
+class NoRaster(Exception):
+ pass
+
+def extract_raster_image(wmf_data):
+ try:
+ wmf, wmf_err = plugins['wmf']
+ except KeyError:
+ raise Unavailable('libwmf not available on this platform')
+ if wmf_err:
+ raise Unavailable(wmf_err)
+
+ if iswindows:
+ import sys, os
+ appdir = sys.app_dir
+ if isinstance(appdir, unicode):
+ appdir = appdir.encode(filesystem_encoding)
+ fdir = os.path.join(appdir, 'wmffonts')
+ wmf.set_font_dir(fdir)
+
+ data = ''
+
+ with TemporaryDirectory('wmf2png') as tdir:
+ with CurrentDir(tdir):
+ wmf.render(wmf_data)
+
+ images = list(sorted(glob.glob('*.png')))
+ if not images:
+ raise NoRaster('No raster images in WMF')
+ data = open(images[0], 'rb').read()
+
+ im = Image()
+ im.load(data)
+ pw = PixelWand()
+ pw.color = '#ffffff'
+ im.rotate(pw, 180)
+
+ return im.export('png')
diff --git a/src/calibre/utils/wmf/wmf.c b/src/calibre/utils/wmf/wmf.c
index 1f8e8a27f3..74d3ca813f 100644
--- a/src/calibre/utils/wmf/wmf.c
+++ b/src/calibre/utils/wmf/wmf.c
@@ -4,6 +4,7 @@
#include
#include
+//#include
typedef struct {
char *data;
@@ -13,7 +14,7 @@ typedef struct {
//This code is taken mostly from the Abiword wmf plugin
-
+// Buffer read {{{
// returns unsigned char cast to int, or EOF
static int wmf_WMF_read(void * context) {
char c;
@@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
if (info->pos == info->len)
return EOF;
- c = info->data[pos];
+ c = info->data[info->pos];
info->pos++;
- return (int)c;
+ return (int)((unsigned char)c);
}
// returns (-1) on error, else 0
@@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {
return (long) info->pos;
}
+// }}}
+char _png_name_buf[100];
+char *wmf_png_name(void *ctxt) {
+ int *num = (int*)ctxt;
+ *num = *num + 1;
+ snprintf(_png_name_buf, 90, "%04d.png", *num);
+ return _png_name_buf;
+}
+
#define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };
static PyObject *
@@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {
unsigned int max_width = 1600;
unsigned int max_height = 1200;
- unsigned long max_flags = 0;
static const char* Default_Description = "wmf2svg";
+ int fname_counter = 0;
wmf_error_t err;
@@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
ddata->Description = (char *)Default_Description;
ddata->bbox = bbox;
+ ddata->image.context = (void *)&fname_counter;
+ ddata->image.name = wmf_png_name;
wmf_display_size(API, &disp_width, &disp_height, 96, 96);
@@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
ddata->height = (unsigned int) ceil ((double) wmf_height);
}
- ddata->flags |= WMF_SVG_INLINE_IMAGES;
-
- ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
+ // Needs GD
+ //ddata->flags |= WMF_SVG_INLINE_IMAGES;
+ //ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
err = wmf_play(API, 0, &(bbox));
@@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
return ans;
}
+#ifdef _WIN32
+void set_libwmf_fontdir(const char *);
+
+static PyObject *
+wmf_setfontdir(PyObject *self, PyObject *args) {
+ char *path;
+ if (!PyArg_ParseTuple(args, "s", &path))
+ return NULL;
+ set_libwmf_fontdir(path);
+
+ Py_RETURN_NONE;
+}
+#endif
+
+
+
static PyMethodDef wmf_methods[] = {
{"render", wmf_render, METH_VARARGS,
- "render(path) -> Render wmf as svg."
+ "render(data) -> Render wmf as svg."
},
+#ifdef _WIN32
+ {"set_font_dir", wmf_setfontdir, METH_VARARGS,
+ "set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
+ },
+#endif
{NULL} /* Sentinel */
};
diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py
index a513cf3880..5dd360213b 100644
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
br = browser()
return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()
-
def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
for x in get_builtin_recipe_collection():
if x.get('title') == title:
@@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
'Failed to download recipe, using builtin version')
return P('recipes/%s.recipe'%urn, data=True)
+def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
+ for x in get_builtin_recipe_collection():
+ if x.get('id') == id_:
+ urn = x.get('id')[8:]
+ if download_recipe:
+ try:
+ if log is not None:
+ log('Trying to get latest version of recipe:', urn)
+ return download_builtin_recipe(urn)
+ except:
+ if log is None:
+ import traceback
+ traceback.print_exc()
+ else:
+ log.exception(
+ 'Failed to download recipe, using builtin version')
+ return P('recipes/%s.recipe'%urn, data=True)
+
class SchedulerConfig(object):
def __init__(self):