diff --git a/manual/conversion.rst b/manual/conversion.rst index 817821a9b1..bf451d0980 100644 --- a/manual/conversion.rst +++ b/manual/conversion.rst @@ -434,6 +434,18 @@ a number of older formats either do not support a metadata based Table of Conten documents do not have one. In these cases, the options in this section can help you automatically generate a Table of Contents in the converted ebook, based on the actual content in the input document. +.. note:: Using these options can be a little challenging to get exactly right. + If you prefer creating/editing the Table of Contents by hand, convert to + the EPUB or AZW3 formats and select the checkbox at the bottom of the + screen that says + :guilabel:`Manually fine-tune the Table of Contents after conversion`. + This will launch the ToC Editor tool after the conversion. It allows you to + create entries in the Table of Contents by simply clicking the place in the + book where you want the entry to point. You can also use the ToC Editor by + itself, without doing a conversion. Go to :guilabel:`Preferences->Toolbars` + and add the ToC Editor to the main toolbar. Then just select the book you + want to edit and click the ToC Editor button. + The first option is :guilabel:`Force use of auto-generated Table of Contents`. By checking this option you can have |app| override any Table of Contents found in the metadata of the input document with the auto generated one. @@ -456,7 +468,7 @@ For example, to remove all entries titles "Next" or "Previous" use:: Next|Previous -Finally, the :guilabel:`Level 1,2,3 TOC` options allow you to create a sophisticated multi-level Table of Contents. +The :guilabel:`Level 1,2,3 TOC` options allow you to create a sophisticated multi-level Table of Contents. They are XPath expressions that match tags in the intermediate XHTML produced by the conversion pipeline. See the :ref:`conversion-introduction` for how to get access to this XHTML. Also read the :ref:`xpath-tutorial`, to learn how to construct XPath expressions. Next to each option is a button that launches a wizard to help with the creation diff --git a/manual/faq.rst b/manual/faq.rst index ba11c865f3..7ef0f20a95 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -87,7 +87,9 @@ this bug. How do I convert a collection of HTML files in a specific order? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like:: +In order to convert a collection of HTML files in a specific oder, you have to +create a table of contents file. That is, another HTML file that contains links +to all the other files in the desired order. Such a file looks like::
@@ -102,18 +104,35 @@ In order to convert a collection of HTML files in a specific oder, you have to c -Then just add this HTML file to the GUI and use the convert button to create your ebook. +Then, just add this HTML file to the GUI and use the convert button to create +your ebook. You can use the option in the Table of Contents section in the +conversion dialog to control how the Table of Contents is generated. -.. note:: By default, when adding HTML files, |app| follows links in the files in *depth first* order. This means that if file A.html links to B.html and C.html and D.html, but B.html also links to D.html, then the files will be in the order A.html, B.html, D.html, C.html. If instead you want the order to be A.html, B.html, C.html, D.html then you must tell |app| to add your files in *breadth first* order. Do this by going to Preferences->Plugins and customizing the HTML to ZIP plugin. +.. note:: By default, when adding HTML files, |app| follows links in the files + in *depth first* order. This means that if file A.html links to B.html and + C.html and D.html, but B.html also links to D.html, then the files will be + in the order A.html, B.html, D.html, C.html. If instead you want the order + to be A.html, B.html, C.html, D.html then you must tell |app| to add your + files in *breadth first* order. Do this by going to Preferences->Plugins + and customizing the HTML to ZIP plugin. The EPUB I produced with |app| is not valid? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|app| does not guarantee that an EPUB produced by it is valid. The only guarantee it makes is that if you feed it valid XHTML 1.1 + CSS 2.1 it will output a valid EPUB. |app| is designed for ebook consumers, not producers. It tries hard to ensure that EPUBs it produces actually work as intended on a wide variety of devices, a goal that is incompatible with producing valid EPUBs, and one that is far more important to the vast majority of its users. If you need a tool that always produces valid EPUBs, |app| is not for you. +|app| does not guarantee that an EPUB produced by it is valid. The only +guarantee it makes is that if you feed it valid XHTML 1.1 + CSS 2.1 it will +output a valid EPUB. |app| is designed for ebook consumers, not producers. It +tries hard to ensure that EPUBs it produces actually work as intended on a wide +variety of devices, a goal that is incompatible with producing valid EPUBs, and +one that is far more important to the vast majority of its users. If you need a +tool that always produces valid EPUBs, |app| is not for you. How do I use some of the advanced features of the conversion tools? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - You can get help on any individual feature of the converters by mousing over it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal. A good place to start is to look at the following demo files that demonstrate some of the advanced features: + You can get help on any individual feature of the converters by mousing over + it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal. + A good place to start is to look at the following demo files that demonstrate + some of the advanced features: * `html-demo.zip', '
').replace('
', '') - - def postprocess_html(self, soup, first_fetch): - for t in soup.findAll(['table', 'tr', 'td','center']): - t.name = 'div' - return soup - def parse_index(self): - today = time.strftime('%Y-%m-%d') - soup = self.index_to_soup( - 'http://www.thehindu.com/todays-paper/tp-index/?date=' + today) - div = soup.find(id='left-column') - feeds = [] + soup = self.index_to_soup('http://www.thehindu.com/todays-paper/') + div = soup.find('div', attrs={'id':'left-column'}) + soup.find(id='subnav-tpbar').extract() + + + current_section = None current_articles = [] - for x in div.findAll(['h3', 'div']): - if current_section and x.get('class', '') == 'tpaper': - a = x.find('a', href=True) - if a is not None: - title = self.tag_to_string(a) - self.log('\tFound article:', title) - current_articles.append({'url':a['href']+'?css=print', - 'title':title, 'date': '', - 'description':''}) - if x.name == 'h3': - if current_section and current_articles: + feeds = [] + for x in div.findAll(['a', 'span']): + if x.name == 'span' and x['class'] == 's-link': + # Section heading found + if current_articles and current_section: feeds.append((current_section, current_articles)) current_section = self.tag_to_string(x) - self.log('Found section:', current_section) current_articles = [] + self.log('\tFound section:', current_section) + elif x.name == 'a': + + title = self.tag_to_string(x) + url = x.get('href', False) + if not url or not title: + continue + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + current_articles.append({'title': title, 'url':url, + 'description':'', 'date':''}) + + if current_articles and current_section: + feeds.append((current_section, current_articles)) + return feeds - diff --git a/recipes/interia_fakty.recipe b/recipes/interia_fakty.recipe index baedd35d0c..053a264e7e 100644 --- a/recipes/interia_fakty.recipe +++ b/recipes/interia_fakty.recipe @@ -5,7 +5,7 @@ ''' fakty.interia.pl ''' - +import re from calibre.web.feeds.news import BasicNewsRecipe class InteriaFakty(BasicNewsRecipe): diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index 31ccd306e4..075d3dd4dd 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -1,65 +1,62 @@ __license__ = 'GPL v3' -__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns" +__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns, 2013 Tom Scholl" ''' irishtimes.com ''' -import re +import urlparse, re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ptempfile import PersistentTemporaryFile + class IrishTimes(BasicNewsRecipe): title = u'The Irish Times' - encoding = 'ISO-8859-15' - __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns" + __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns, Tom Scholl" language = 'en_IE' - timefmt = ' (%A, %B %d, %Y)' + masthead_url = 'http://www.irishtimes.com/assets/images/generic/website/logo_theirishtimes.png' + encoding = 'utf-8' oldest_article = 1.0 - max_articles_per_feed = 100 + max_articles_per_feed = 100 + remove_empty_feeds = True no_stylesheets = True - simultaneous_downloads= 5 - - r = re.compile('.*(?P'+_('No matches for %(text)s found in the current file [%(current)s].'
+ ' Do you want to search in the %(which)s file [%(next)s]?')
+ msg = msg%dict(text=text, current=current, next=next,
+ which=_('next') if forwards else _('previous'))
+ if question_dialog(self, _('No match found'), msg):
+ self.pending_search = self.find_next if forwards else self.find_previous
+ d.setCurrentRow(next_index)
+
+ def find_next(self):
+ return self.find()
+
+ def find_previous(self):
+ return self.find(forwards=False)
+
def load(self, container):
self.container = container
spine_names = [container.abspath_to_name(p) for p in
@@ -175,6 +222,10 @@ def current_changed(self, item):
self.view.load_js()
self.dest_label.setText(self.base_msg + '
' + _('File:') + ' ' +
name + '
' + _('Top of the file'))
+ if hasattr(self, 'pending_search'):
+ f = self.pending_search
+ del self.pending_search
+ f()
def __call__(self, item, where):
self.current_item, self.current_where = item, where
diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index 98a59ccdd5..eda60a4fec 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -82,8 +82,13 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
args = [in_file.name, out_file.name, recs]
temp_files.append(out_file)
func = 'gui_convert_override'
+ parts = []
+ if not auto_conversion and d.manually_fine_tune_toc:
+ parts.append('manually_fine_tune_toc')
if same_fmt:
- func += ':same_fmt'
+ parts.append('same_fmt')
+ if parts:
+ func += ':%s'%(';'.join(parts))
jobs.append((func, args, desc, d.output_format.upper(), book_id, temp_files))
changed = True