Input plugin for recipes

2026-05-09 11:14:02 +02:00 · 2009-04-27 15:41:10 -07:00 · 2009-04-27 15:41:10 -07:00 · 2da5589964
commit 2da5589964
parent 996dda3ffe
8 changed files with 108 additions and 31 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -287,6 +287,7 @@ def set_metadata(self, stream, mi, type):
 from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.comic.input import ComicInput
+from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -296,7 +297,7 @@ def set_metadata(self, stream, mi, type):

 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
-        FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
+        FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log):
        raise SystemExit(1)

    input = os.path.abspath(args[1])
-    if not os.access(input, os.R_OK):
+    if not input.endswith('.recipe') and not os.access(input, os.R_OK):
        log.error('Cannot read from', input)
        raise SystemExit(1)

@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
            if rec.level < rec.HIGH:
                option_recommendation_to_cli_option(add_option, rec)

+    option_recommendation_to_cli_option(parser.add_option,
+            plumber.get_option_by_name('list_recipes'))
+
 def option_parser():
    return OptionParser(usage=USAGE)

--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -360,6 +360,10 @@ def __init__(self, input, output, log):
 OptionRecommendation(name='language',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the language.')),
+
+OptionRecommendation(name='list_recipes',
+    recommended_value=False, help=_('List available recipes.')),
+
 ]

        input_fmt = os.path.splitext(self.input)[1]
@ -525,6 +529,13 @@ def run(self):
        self.setup_options()
        if self.opts.verbose:
            self.log.filter_level = self.log.DEBUG
+        if self.opts.list_recipes:
+            from calibre.web.feeds.recipes import titles
+            self.log('Available recipes:')
+            for title in sorted(titles):
+                self.log('\t'+title)
+            self.log('%d recipes available'%len(titles))
+            raise SystemExit(0)

        # Run any preprocess plugins
        from calibre.customize.ui import run_plugins_on_preprocess
@ -535,8 +546,13 @@ def run(self):
        accelerators = {}

        tdir = PersistentTemporaryDirectory('_plumber')
+        stream = self.input if self.input_fmt == 'recipe' else \
+                open(self.input, 'rb')

-        self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
+        if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
+            self.opts.lrf = True
+
+        self.oeb = self.input_plugin(stream, self.opts,
                                    self.input_fmt, self.log,
                                    accelerators, tdir)
        if self.opts.debug_input is not None:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -1578,15 +1578,15 @@ def decode(self, data):
                return data.decode('utf-16')
            except UnicodeDecodeError:
                pass
-        try:
-            return data.decode('utf-8')
-        except UnicodeDecodeError:
-            pass
        if self.encoding is not None:
            try:
                return data.decode(self.encoding)
            except UnicodeDecodeError:
                pass
+        try:
+            return data.decode('utf-8')
+        except UnicodeDecodeError:
+            pass
        data, _ = xml_to_unicode(data)
        data = data.replace('\r\n', '\n')
        data = data.replace('\r', '\n')
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -59,6 +59,7 @@ def __call__(self, oeb, context):
        self.fix_links()

    def split_item(self, item):
+        page_breaks, page_break_ids = [], []
        if self.split_on_page_breaks:
            page_breaks, page_break_ids = self.find_page_breaks(item)

--- a/src/calibre/web/init.py
+++ b/src/calibre/web/init.py
@ -2,5 +2,6 @@
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'


+class Recipe(object):
+    pass

- 
--- a/src/calibre/web/feeds/input.py
+++ b/src/calibre/web/feeds/input.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+
+class RecipeInput(InputFormatPlugin):
+
+    name        = 'Recipe Input'
+    author      = 'Kovid Goyal'
+    description = _('Download periodical content from the internet')
+    file_types  = set(['recipe'])
+
+    recommendations = set([
+        ('chapter_mark', 'none', OptionRecommendation.HIGH),
+        ('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
+        ('use_auto_toc', False, OptionRecommendation.HIGH),
+        ])
+
+    options = set([
+        OptionRecommendation(name='test', recommended_value=False,
+            help=_('Useful for recipe development. Forces '
+            'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
+        OptionRecommendation(name='username', recommended_value=None,
+            help=_('Username for sites that require a login to access '
+                'content.')),
+        OptionRecommendation(name='password', recommended_value=None,
+            help=_('Password for sites that require a login to access '
+                'content.')),
+        OptionRecommendation(name='lrf', recommended_value=False,
+            help='Optimize fetching for subsequent conversion to LRF.'),
+        ])
+
+    def convert(self, recipe_or_file, opts, file_ext, log,
+            accelerators, progress=lambda x, y: x):
+        from calibre.web.feeds.recipes import \
+                get_builtin_recipe, compile_recipe
+        if os.access(recipe_or_file, os.R_OK):
+            recipe = compile_recipe(open(recipe_or_file, 'rb').read())
+        else:
+            title = os.path.basename(recipe_or_file).rpartition('.')[0]
+            recipe = get_builtin_recipe(title)
+
+        if recipe is None:
+            raise ValueError('%s is not a valid recipe file or builtin recipe' %
+                    recipe_or_file)
+
+        ro = recipe(opts, log, progress)
+        ro.download()
+
+        opts.output_profile.flow_size = 0
+
+        for f in os.listdir('.'):
+            if f.endswith('.opf'):
+                return os.path.abspath(f)
+
+
+
+
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -20,6 +20,7 @@
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.lrf import entity_to_unicode
+from calibre.web import Recipe
 from calibre.ebooks import render_html
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
@ -27,12 +28,11 @@
 from calibre.web.fetch.simple import option_parser as web2disk_option_parser
 from calibre.web.fetch.simple import RecursiveFetcher
 from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
-from calibre.utils.logging import Log
 from calibre.ptempfile import PersistentTemporaryFile, \
                              PersistentTemporaryDirectory


-class BasicNewsRecipe(object):
+class BasicNewsRecipe(Recipe):
    '''
    Abstract base class that contains logic needed in all feed fetchers.
    '''
@ -443,40 +443,34 @@ def get_obfuscated_article(self, url):
        '''
        raise NotImplementedError

-    def __init__(self, options, parser, progress_reporter):
+    def __init__(self, options, log, progress_reporter):
        '''
        Initialize the recipe.
        :param options: Parsed commandline options
        :param parser:  Command line option parser. Used to intelligently merge options.
        :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
        '''
-        self.log = Log()
-        if options.verbose:
-            self.log.filter_level = self.log.DEBUG
+        self.log = log
        if not isinstance(self.title, unicode):
            self.title = unicode(self.title, 'utf-8', 'replace')

-        for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
-            setattr(self, attr, getattr(options, attr))
+        self.debug = options.verbose > 1
+        self.output_dir = os.getcwd()
+        self.verbose = options.verbose
+        self.test = options.test
+        self.username = options.username
+        self.password = options.password
+        self.lrf = options.lrf
+
        self.output_dir = os.path.abspath(self.output_dir)
        if options.test:
            self.max_articles_per_feed = 2
            self.simultaneous_downloads = min(4, self.simultaneous_downloads)

-
        if self.debug:
            self.verbose = True
        self.report_progress = progress_reporter

-        self.username = self.password = None
-        #: If True optimize downloading for eventual conversion to LRF
-        self.lrf = False
-        defaults = parser.get_default_values()
-
-        for opt in options.__dict__.keys():
-            if getattr(options, opt) != getattr(defaults, opt, None):
-                setattr(self, opt, getattr(options, opt))
-
        if isinstance(self.feeds, basestring):
            self.feeds = eval(self.feeds)
            if isinstance(self.feeds, basestring):
@ -493,7 +487,6 @@ def __init__(self, options, parser, progress_reporter):
            '--timeout', str(self.timeout),
            '--max-recursions', str(self.recursions),
            '--delay', str(self.delay),
-            '--timeout', str(self.timeout),
            ]
        if self.encoding is not None:
            web2disk_cmdline.extend(['--encoding', self.encoding])
@ -520,9 +513,6 @@ def __init__(self, options, parser, progress_reporter):
            self.simultaneous_downloads = 1

        self.navbar = templates.NavBarTemplate()
-        self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
-        if '--base-font-size' not in self.html2lrf_options:
-            self.html2lrf_options.extend(['--base-font-size', '12'])
        self.failed_downloads = []
        self.partial_failures = []

@ -557,7 +547,7 @@ def _postprocess_html(self, soup, first_fetch, job_info):
        return self.postprocess_html(soup, first_fetch)


-    def download(self, for_lrf=False):
+    def download(self):
        '''
        Download and pre-process all articles from the feeds in this recipe.
        This method should be called only one on a particular Recipe instance.