Sync to trunk.

2026-01-29 14:35:30 +01:00 · 2011-02-24 18:46:46 -05:00 · 2011-02-24 18:46:46 -05:00 · 6d568aee69
commit 6d568aee69
parent deea9f48bc e82f006873
56 changed files with 1830 additions and 483 deletions
--- a/resources/images/news/20minutos.png
+++ b/resources/images/news/20minutos.png
--- a/resources/recipes/20minutos.recipe
+++ b/resources/recipes/20minutos.recipe
@ -0,0 +1,68 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.20minutos.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class t20Minutos(BasicNewsRecipe):
+    title                 = '20 Minutos'
+    __author__            = 'Darko Miletic'
+    description           = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas'
+    publisher             = '20 Minutos Online SL'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = True
+    language              = 'es'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(attrs={'class':'mf-viral'})]
+    remove_attributes=['border']
+
+    feeds = [
+              (u'Principal'      , u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
+             ,(u'Cine'           , u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss')
+             ,(u'Internacional'  , u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss')
+             ,(u'Deportes'       , u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
+             ,(u'Nacional'       , u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss')
+             ,(u'Economia'       , u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss')
+             ,(u'Tecnologia'     , u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
+
--- a/resources/recipes/adevarul.recipe
+++ b/resources/recipes/adevarul.recipe
@ -32,16 +32,25 @@ class Adevarul(BasicNewsRecipe):
                         }

    keep_only_tags = [  dict(name='div', attrs={'class':'article_header'})
-                       ,dict(name='div', attrs={'class':'bd'})
+                       ,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
                     ]


-    remove_tags = [  dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
+    remove_tags = [ 
+					 dict(name='li', attrs={'class':'author'})
+					,dict(name='li', attrs={'class':'date'})
+					,dict(name='li', attrs={'class':'comments'})
+					,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
                    ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
-                ,dict(name='form', attrs={'id':'bb-comment-create-form'})
-              ]
+                    ,dict(name='form', attrs={'id':'bb-comment-create-form'})
+				    ,dict(name='div', attrs={'id':'mediatag'})
+					,dict(name='div', attrs={'id':'ft'})	
+					,dict(name='div', attrs={'id':'comment_wrapper'})
+                 ]

-    remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
+    remove_tags_after = [ 
+							dict(name='div', attrs={'id':'comment_wrapper'}),											
+						]

    feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]

--- a/resources/recipes/espn.recipe
+++ b/resources/recipes/espn.recipe
@ -41,7 +41,8 @@ class ESPN(BasicNewsRecipe):
                '''


-    feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
+    feeds = [
+            ('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
             'http://sports.espn.go.com/espn/rss/nfl/news',
             'http://sports.espn.go.com/espn/rss/nba/news',
             'http://sports.espn.go.com/espn/rss/mlb/news',
@ -107,10 +108,11 @@ def print_version(self, url):
        if match and 'soccernet'  not in url and 'bassmaster' not in url:
            return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
        else:
-            if match and 'soccernet' in url:
-                splitlist = url.split("&", 5)
-                newurl =  'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] )
-                return newurl
+            if 'soccernet' in url:
+                match = re.search(r'/id/(\d+)/', url)
+                if match:
+                    return \
+                        'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1)
            #else:
            #    if 'bassmaster' in url:
            #        return url
--- a/resources/recipes/flickr.recipe
+++ b/resources/recipes/flickr.recipe
@ -0,0 +1,48 @@
+__license__   = 'GPL v3'
+__author__    = 'Ricardo Jurado'
+__copyright__ = 'Ricardo Jurado'
+__version__     = 'v0.1'
+__date__        = '22 February 2011'
+
+'''
+http://blog.flickr.net/
+'''
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1297031650(BasicNewsRecipe):
+
+    title          = u'Flickr Blog'
+    masthead_url   = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    cover_url      = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    publisher      = u''
+
+    __author__            = 'Ricardo Jurado'
+    description           = 'Pictures Blog'
+    category              = 'Blog,Pictures'
+
+    oldest_article = 120
+    max_articles_per_feed = 10
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'UTF-8'
+    remove_javascript = True
+    language = 'en'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
+                               .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                               .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                                 """
+
+    keep_only_tags = [
+                 dict(name='div', attrs={'class':'entry'})
+                 ]
+
+    feeds          = [
+                     (u'BLOG', u'http://feeds.feedburner.com/Flickrblog'),
+                     #(u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
+                     ]
--- a/resources/recipes/flickr_es.recipe
+++ b/resources/recipes/flickr_es.recipe
@ -0,0 +1,47 @@
+__license__   = 'GPL v3'
+__author__    = 'Ricardo Jurado'
+__copyright__ = 'Ricardo Jurado'
+__version__     = 'v0.1'
+__date__        = '22 February 2011'
+
+'''
+http://blog.flickr.net/
+'''
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1297031650(BasicNewsRecipe):
+
+    title          = u'Flickr Blog'
+    masthead_url   = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    cover_url      = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    publisher      = u''
+
+    __author__            = 'Ricardo Jurado'
+    description           = 'Pictures Blog'
+    category              = 'Blog,Pictures'
+
+    oldest_article = 120
+    max_articles_per_feed = 10
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'UTF-8'
+    remove_javascript = True
+    language = 'es'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
+                               .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                               .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                                 """
+
+    keep_only_tags = [
+                 dict(name='div', attrs={'class':'entry'})
+                 ]
+
+    feeds          = [
+                     (u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
+                     ]
--- a/resources/recipes/gizmodo.recipe
+++ b/resources/recipes/gizmodo.recipe
@ -17,10 +17,9 @@ class Gizmodo(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
-    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '

    conversion_options = {
                          'comment'   : description
@ -29,13 +28,12 @@ class Gizmodo(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width','height']
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
-    remove_tags_before = dict(name='h1')
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')]
+
+    remove_tags = [
+            {'class': 'feedflare'},
+    ]

-    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/gsp.recipe
+++ b/resources/recipes/gsp.recipe
@ -1,20 +1,43 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+gsp.ro
+'''
+
 from calibre.web.feeds.news import BasicNewsRecipe

-class AdvancedUserRecipe1286351181(BasicNewsRecipe):
-    title          = u'gsp.ro'
-    __author__   = 'bucsie'
-    oldest_article = 2
+class GSP(BasicNewsRecipe):
+    title                 = u'Gazeta Sporturilor'
+    language              = 'ro'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'Gazeta Sporturilor'
+    publisher             = u'Gazeta Sporturilor'
+    category              = 'Ziare,Sport,Stiri,Romania'
+    oldest_article        = 5
    max_articles_per_feed = 100
-    language='ro'
-    cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    remove_javascript     = True
+    cover_url             = 'http://www.gsp.ro/images/logo.jpg'

-    remove_tags = [
-                       dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}),
-                       dict(name='div', attrs={'id':'icons'})
-                        ]
-    remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'})
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+							,'publisher'  : publisher
+                         }

-    feeds          = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')]
+    keep_only_tags = [  dict(name='h1', attrs={'class':'serif title_2'})
+                       ,dict(name='div', attrs={'id':'only_text'})
+					   ,dict(name='span', attrs={'class':'block poza_principala'})
+                     ]
+					 
+    feeds = [ (u'\u0218tiri', u'http://www.gsp.ro/rss.xml') ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)

-    def print_version(self, url):
-         return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):]
--- a/resources/recipes/lifehacker.recipe
+++ b/resources/recipes/lifehacker.recipe
@ -16,15 +16,9 @@ class Lifehacker(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
-    extra_css             = '''
-	body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
-	img{margin-bottom: 1em}
-	h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
-	h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
-	              '''
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
@ -32,20 +26,12 @@ class Lifehacker(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width', 'height', 'style']
-    remove_tags_before = dict(name='h1')
-    keep_only_tags = [dict(id='container')]
-    remove_tags_after  = dict(attrs={'class':'post-body'})
    remove_tags = [
-            dict(id="sharemenu"),
-            {'class': 'related'},
+            {'class': 'feedflare'},
    ]

-    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
+    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)

-    def print_version(self, url):
-        return url.replace('#!', '?_escaped_fragment_=')
-
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -88,8 +88,8 @@ class NYTimes(BasicNewsRecipe):

    if headlinesOnly:
        title='New York Times Headlines'
-        description = 'Headlines from the New York Times'
-        needs_subscription = False
+        description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com'
+        needs_subscription = 'optional'
    elif webEdition:
        title='New York Times (Web)'
        description = 'New York Times on the Web'
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -96,18 +96,18 @@ class NYTimes(BasicNewsRecipe):
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
-        needs_subscription = False
+        needs_subscription = True
    elif webEdition:
        title='New York Times (Web)'
        description = 'New York Times on the Web'
        needs_subscription = True
    elif replaceKindleVersion:
-	title='The New York Times'
+        title='The New York Times'
        description = 'Today\'s New York Times'
        needs_subscription = True
    else:
        title='New York Times'
-        description = 'Today\'s New York Times'
+        description = 'Today\'s New York Times. Needs subscription from http://www.nytimes.com'
        needs_subscription = True


@ -676,7 +676,7 @@ def preprocess_html(self, soup):
                        if hlines:
                            for hline in hlines:
                                hline.extract()
-                                
+
                        #find all section headers
                        hlines = runAround.findAll('h6')
                        if hlines:
--- a/resources/template-functions.json
+++ b/resources/template-functions.json
@ -15,6 +15,7 @@
    "template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n    template = template.replace('[[', '{').replace(']]', '}')\n    return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n", 
    "print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n    print args\n    return None\n", 
    "titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n    return titlecase(val)\n", 
+    "sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n    if not val:\n        return ''\n    si = int(start_index)\n    ei = int(end_index)\n    val = val.split(sep)\n    try:\n        if ei == 0:\n            return sep.join(val[si:])\n        else:\n            return sep.join(val[si:ei])\n    except:\n        return ''\n", 
    "test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n    if val:\n        return value_if_set\n    else:\n        return value_not_set\n", 
    "eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n    from formatter import eval_formatter\n    template = template.replace('[[', '{').replace(']]', '}')\n    return eval_formatter.safe_format(template, locals, 'EVAL', None)\n", 
    "multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n    x = float(x if x else 0)\n    y = float(y if y else 0)\n    return unicode(x * y)\n", 
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@ -391,11 +391,6 @@ noembed, param, link {
   display: none;
 }

-/* Page breaks at body tags, to help out with LIT-generation */
-body {
-  page-break-before: always;
-}
-
 /* Explicit line-breaks are blocks, sure... */
 br {
  display: block;
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -571,7 +571,7 @@ def set_metadata(self, stream, mi, type):
 from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
-        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
+        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
@ -679,7 +679,7 @@ def set_metadata(self, stream, mi, type):
    ELONEX,
    TECLAST_K3,
    NEWSMY,
-    PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH,
+    PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, WEXLER,
    IPAPYRUS,
    EDGE,
    SNE,
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -594,7 +594,7 @@ def main(args=sys.argv):
        if remove_plugin(opts.remove_plugin):
            print 'Plugin removed'
        else:
-            print 'No custom pluginnamed', opts.remove_plugin
+            print 'No custom plugin named', opts.remove_plugin
    if opts.customize_plugin is not None:
        name, custom = opts.customize_plugin.split(',')
        plugin = find_plugin(name.strip())
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -74,6 +74,9 @@ class ANDROID(USBMS):
            # T-Mobile
            0x0408 : { 0x03ba : [0x0109], },

+            # Xperia
+            0x13d3 : { 0x3304 : [0x0001, 0x0002] },
+
            }
    EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -83,7 +86,7 @@ class ANDROID(USBMS):

    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
-            'TELECHIP', 'HUAWEI', 'T-MOBILE', ]
+            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -78,9 +78,13 @@ def books(self, oncard=None, end_session=True):
                 else self._main_prefix

        # Determine the firmware version
-        f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r')
-        self.fwversion = f.readline().split(',')[2]
-        f.close()
+        try:
+            with open(self.normalize_path(self._main_prefix + '.kobo/version'),
+                    'rb') as f:
+                self.fwversion = f.readline().split(',')[2]
+        except:
+            self.fwversion = 'unknown'
+
        if self.fwversion != '1.0' and self.fwversion != '1.4':
            self.has_kepubs = True
        debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)
@ -161,7 +165,7 @@ def update_booklist(prefix, path, title, authors, mime, date, ContentType, Image
            return changed

        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
-        
+
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

@ -234,7 +238,7 @@ def delete_via_sql(self, ContentID, ContentType):

        debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
-        
+
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

@ -511,7 +515,7 @@ def update_device_database_collections(self, booklists, collections_attributes,
        # the last book from the collection the list of books is empty
        # and the removal of the last book would not occur
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
-        
+
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

--- a/src/calibre/devices/teclast/driver.py
+++ b/src/calibre/devices/teclast/driver.py
@ -104,3 +104,14 @@ class STASH(TECLAST_K3):
    VENDOR_NAME = 'STASH'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'W950'

+class WEXLER(TECLAST_K3):
+
+    name = 'Wexler device interface'
+    gui_name = 'Wexler'
+    description    = _('Communicate with the Wexler reader.')
+
+    FORMATS = ['epub', 'fb2', 'pdf', 'txt']
+
+    VENDOR_NAME = 'WEXLER'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'T7001'
+
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -304,6 +304,10 @@ class ComicInput(InputFormatPlugin):
            help=_('Specify the image size as widthxheight pixels. Normally,'
                ' an image size is automatically calculated from the output '
                'profile, this option overrides it.')),
+        OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
+            help=_('When converting a CBC do not add links to each page to'
+                ' the TOC. Note this only applies if the TOC has more than one'
+                ' section')),
        ])

    recommendations = set([
@ -449,10 +453,11 @@ def href(x):
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                        None, comic[0], play_order=po)
-                for i, x in enumerate(wrappers):
-                    stoc.add_item(href(x), None,
-                            _('Page')+' %d'%(i+1), play_order=po)
-                    po += 1
+                if not opts.dont_add_comic_pages_to_toc:
+                    for i, x in enumerate(wrappers):
+                        stoc.add_item(href(x), None,
+                                _('Page')+' %d'%(i+1), play_order=po)
+                        po += 1
        opf.set_toc(toc)
        m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, 'toc.ncx')
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -984,7 +984,9 @@ def run(self):
        flattener = CSSFlattener(fbase=fbase, fkey=fkey,
                lineh=line_height,
                untable=self.output_plugin.file_type in ('mobi','lit'),
-                unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
+                unfloat=self.output_plugin.file_type in ('mobi', 'lit'),
+                page_break_on_body=self.output_plugin.file_type in ('mobi',
+                    'lit'))
        flattener(self.oeb, self.opts)
        self.opts.insert_blank_line = oibl
        self.opts.remove_paragraph_spacing = orps
--- a/src/calibre/ebooks/lit/output.py
+++ b/src/calibre/ebooks/lit/output.py
@ -22,7 +22,8 @@ def convert(self, oeb, output_path, input_plugin, opts, log):
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
        from calibre.ebooks.lit.writer import LitWriter
        from calibre.ebooks.oeb.transforms.split import Split
-        split = Split(split_on_page_breaks=True, max_flow_size=0)
+        split = Split(split_on_page_breaks=True, max_flow_size=0,
+                remove_css_pagebreaks=False)
        split(self.oeb, self.opts)


--- a/src/calibre/ebooks/metadata/book/init.py
+++ b/src/calibre/ebooks/metadata/book/init.py
@ -83,6 +83,10 @@
    'application_id',   # An application id, currently set to the db_id.
    'db_id',            # the calibre primary key of the item.
    'formats',          # list of formats (extensions) for this book
+    # a dict of user category names, where the value is a list of item names
+    # from the book that are in that category
+    'user_categories',
+
    ]
 )

--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -30,6 +30,7 @@
                'author_sort_map': {},
                'authors'      : [_('Unknown')],
                'title'        : _('Unknown'),
+                'user_categories' : {},
                'language'     : 'und'
 }

--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -470,6 +470,13 @@ def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8))
        metadata_elem.append(meta)


+def dump_user_categories(cats):
+    if not cats:
+        cats = {}
+    from calibre.ebooks.metadata.book.json_codec import object_to_unicode
+    return json.dumps(object_to_unicode(cats), ensure_ascii=False,
+            skipkeys=True)
+
 class OPF(object): # {{{

    MIMETYPE         = 'application/oebps-package+xml'
@ -524,6 +531,9 @@ class OPF(object): # {{{
    publication_type = MetadataField('publication_type', is_dc=False)
    timestamp       = MetadataField('timestamp', is_dc=False,
                                    formatter=parse_date, renderer=isoformat)
+    user_categories = MetadataField('user_categories', is_dc=False,
+                                    formatter=json.loads,
+                                    renderer=dump_user_categories)


    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
@ -994,7 +1004,7 @@ def smart_update(self, mi, replace_metadata=False):
        for attr in ('title', 'authors', 'author_sort', 'title_sort',
                     'publisher', 'series', 'series_index', 'rating',
                     'isbn', 'tags', 'category', 'comments',
-                     'pubdate'):
+                     'pubdate', 'user_categories'):
            val = getattr(mi, attr, None)
            if val is not None and val != [] and val != (None, None):
                setattr(self, attr, val)
@ -1175,6 +1185,10 @@ def CAL_ELEM(name, content):
            a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
        if self.publication_type is not None:
            a(CAL_ELEM('calibre:publication_type', self.publication_type))
+        if self.user_categories:
+            from calibre.ebooks.metadata.book.json_codec import object_to_unicode
+            a(CAL_ELEM('calibre:user_categories',
+                       json.dumps(object_to_unicode(self.user_categories))))
        manifest = E.manifest()
        if self.manifest is not None:
            for ref in self.manifest:
@ -1299,6 +1313,8 @@ def factory(tag, text=None, sort=None, role=None, scheme=None, name=None,
        meta('publication_type', mi.publication_type)
    if mi.title_sort:
        meta('title_sort', mi.title_sort)
+    if mi.user_categories:
+        meta('user_categories', dump_user_categories(mi.user_categories))

    serialize_user_metadata(metadata, mi.get_all_user_metadata(False))

--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -7,7 +7,7 @@
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re
+import re, threading

 from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
@ -30,7 +30,21 @@ class Source(Plugin):

    touched_fields = frozenset()

+    def __init__(self, *args, **kwargs):
+        Plugin.__init__(self, *args, **kwargs)
+        self._isbn_to_identifier_cache = {}
+        self.cache_lock = threading.RLock()
+
    # Utility functions {{{
+
+    def cache_isbn_to_identifier(self, isbn, identifier):
+        with self.cache_lock:
+            self._isbn_to_identifier_cache[isbn] = identifier
+
+    def cached_isbn_to_identifier(self, isbn):
+        with self.cache_lock:
+            return self._isbn_to_identifier_cache.get(isbn, None)
+
    def get_author_tokens(self, authors, only_first_author=True):
        '''
        Take a list of authors and return a list of tokens useful for an
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -13,6 +13,7 @@

 from lxml import etree

+from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
@ -69,6 +70,7 @@ def get_text(extra, x):


    id_url = entry_id(entry_)[0].text
+    google_id = id_url.split('/')[-1]
    title_ = ': '.join([x.text for x in title(entry_)]).strip()
    authors = [x.text.strip() for x in creator(entry_) if x.text]
    if not authors:
@ -78,6 +80,7 @@ def get_text(extra, x):
        return None

    mi = Metadata(title_, authors)
+    mi.identifiers = {'google':google_id}
    try:
        raw = get_details(browser, id_url, timeout)
        feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
@ -103,9 +106,12 @@ def get_text(extra, x):
        t = str(x.text).strip()
        if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
            if t[:5].upper() == 'ISBN:':
-                isbns.append(t[5:])
+                t = check_isbn(t[5:])
+                if t:
+                    isbns.append(t)
    if isbns:
        mi.isbn = sorted(isbns, key=len)[-1]
+    mi.all_isbns = isbns

    # Tags
    try:
@ -133,20 +139,6 @@ def get_text(extra, x):
    return mi


-def get_all_details(br, log, entries, abort, result_queue, timeout):
-    for i in entries:
-        try:
-            ans = to_metadata(br, log, i, timeout)
-            if isinstance(ans, Metadata):
-                result_queue.put(ans)
-        except:
-            log.exception(
-                'Failed to get metadata for identify entry:',
-                etree.tostring(i))
-        if abort.is_set():
-            break
-
-
 class GoogleBooks(Source):

    name = 'Google Books'
@ -185,6 +177,36 @@ def build_term(prefix, parts):
            'min-viewability':'none',
            })

+    def cover_url_from_identifiers(self, identifiers):
+        goog = identifiers.get('google', None)
+        if goog is None:
+            isbn = identifiers.get('isbn', None)
+            goog = self.cached_isbn_to_identifier(isbn)
+        if goog is not None:
+            return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
+                goog)
+
+    def is_cover_image_valid(self, raw):
+        # When no cover is present, returns a PNG saying image not available
+        # Try for example google identifier llNqPwAACAAJ
+        # I have yet to see an actual cover in PNG format
+        return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
+
+    def get_all_details(self, br, log, entries, abort, result_queue, timeout):
+        for i in entries:
+            try:
+                ans = to_metadata(br, log, i, timeout)
+                if isinstance(ans, Metadata):
+                    result_queue.put(ans)
+                    for isbn in ans.all_isbns:
+                        self.cache_isbn_to_identifier(isbn,
+                                ans.identifiers['google'])
+            except:
+                log.exception(
+                    'Failed to get metadata for identify entry:',
+                    etree.tostring(i))
+            if abort.is_set():
+                break

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=5):
@ -207,8 +229,8 @@ def identify(self, log, result_queue, abort, title=None, authors=None,
            return as_unicode(e)

        # There is no point running these queries in threads as google
-        # throttles requests returning Forbidden errors
-        get_all_details(br, log, entries, abort, result_queue, timeout)
+        # throttles requests returning 403 Forbidden errors
+        self.get_all_details(br, log, entries, abort, result_queue, timeout)

        return None

@ -218,8 +240,14 @@ def identify(self, log, result_queue, abort, title=None, authors=None,
            title_test)
    test_identify_plugin(GoogleBooks.name,
        [
+
            (
-                {'title': 'Great Expectations', 'authors':['Charles Dickens']},
-                [title_test('Great Expectations', exact=True)]
+                {'identifiers':{'isbn': '0743273567'}},
+                [title_test('The great gatsby', exact=True)]
            ),
+
+            #(
+            #    {'title': 'Great Expectations', 'authors':['Charles Dickens']},
+            #    [title_test('Great Expectations', exact=True)]
+            #),
    ])
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -242,9 +242,11 @@ def __init__(self, filename_or_stream, log, user_encoding=None, debug=None,
        self.debug = debug
        self.embedded_mi = None
        self.base_css_rules = textwrap.dedent('''
-                blockquote { margin: 0em 0em 0em 2em; text-align: justify }
+                body { text-align: justify }

-                p { margin: 0em; text-align: justify; text-indent: 1.5em }
+                blockquote { margin: 0em 0em 0em 2em; }
+
+                p { margin: 0em; text-indent: 1.5em }

                .bold { font-weight: bold }

--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -2256,22 +2256,22 @@ def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc )
        return sectionIndices, sectionParents

    def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
-                sectionArticles = list(section.iter())[1:]
-                # Iterate over the section's articles
+        sectionArticles = list(section.iter())[1:]
+        # Iterate over the section's articles

-                for (j, article) in enumerate(sectionArticles):
-                    # Recompute offset and length for each article
-                    offset, length = self._compute_offset_length(i, article, entries)
-                    if self.opts.verbose > 2 :
-                        self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )
+        for (j, article) in enumerate(sectionArticles):
+            # Recompute offset and length for each article
+            offset, length = self._compute_offset_length(i, article, entries)
+            if self.opts.verbose > 2 :
+                self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )

-                    ctoc_map_index = i + j + 1
+            ctoc_map_index = i + j + 1

-                    #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
-                    #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
-                    mySectionParent = sectionParents[sectionIndices[i-1]]
-                    myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
-                    mySectionParent.addArticle( myNewArticle )
+            #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
+            #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
+            mySectionParent = sectionParents[sectionIndices[i-1]]
+            myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
+            mySectionParent.addArticle( myNewArticle )

    def _add_book_chapters(self, myDoc, indxt, indices):
        chapterCount = myDoc.documentStructure.chapterCount()
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -32,6 +32,12 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
            for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
                href, root = results.pop(key, [None, None])
                if root is not None:
+                    if key == OPF_MIME:
+                        try:
+                            self.workaround_nook_cover_bug(root)
+                        except:
+                            self.log.exception('Something went wrong while trying to'
+                                    ' workaround Nook cover bug, ignoring')
                    raw = etree.tostring(root, pretty_print=True,
                            encoding='utf-8', xml_declaration=True)
                    if key == OPF_MIME:
@ -49,3 +55,24 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
+
+    def workaround_nook_cover_bug(self, root): # {{{
+        cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
+                ' @content != "cover"]')
+        if len(cov) == 1:
+            manpath = ('//*[local-name() = "manifest"]/*[local-name() = "item" '
+                ' and @id="%s" and @media-type]')
+            cov = cov[0]
+            covid = cov.get('content')
+            manifest_item = root.xpath(manpath%covid)
+            has_cover = root.xpath(manpath%'cover')
+            if len(manifest_item) == 1 and not has_cover and \
+                    manifest_item[0].get('media-type',
+                            '').startswith('image/'):
+                self.log.warn('The cover image has an id != "cover". Renaming'
+                        ' to work around Nook Color bug')
+                manifest_item = manifest_item[0]
+                manifest_item.set('id', 'cover')
+                cov.set('content', 'cover')
+    # }}}
+
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -100,12 +100,13 @@ def FontMapper(sbase=None, dbase=None, dkey=None):

 class CSSFlattener(object):
    def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
-                 untable=False):
+                 untable=False, page_break_on_body=False):
        self.fbase = fbase
        self.fkey = fkey
        self.lineh = lineh
        self.unfloat = unfloat
        self.untable = untable
+        self.page_break_on_body = page_break_on_body

    @classmethod
    def config(cls, cfg):
@ -139,6 +140,8 @@ def stylize_spine(self):
            bs.append('margin-right : %fpt'%\
                    float(self.context.margin_right))
            bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
+            if self.page_break_on_body:
+                bs.extend(['page-break-before: always'])
            if self.context.change_justification != 'original':
                bs.append('text-align: '+ self.context.change_justification)
            body.set('style', '; '.join(bs))
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -38,11 +38,12 @@ def __init__(self, path, root):
 class Split(object):

    def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
-            max_flow_size=0):
+            max_flow_size=0, remove_css_pagebreaks=True):
        self.split_on_page_breaks = split_on_page_breaks
        self.page_breaks_xpath = page_breaks_xpath
        self.max_flow_size = max_flow_size
        self.page_break_selectors = None
+        self.remove_css_pagebreaks = remove_css_pagebreaks
        if self.page_breaks_xpath is not None:
            self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]

@ -83,12 +84,16 @@ def find_page_breaks(self, item):
                    if before and before != 'avoid':
                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
                            True))
+                        if self.remove_css_pagebreaks:
+                            rule.style.removeProperty('page-break-before')
                except:
                    pass
                try:
                    if after and after != 'avoid':
                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
                            False))
+                        if self.remove_css_pagebreaks:
+                            rule.style.removeProperty('page-break-after')
                except:
                    pass
        page_breaks = set([])
--- a/src/calibre/gui2/convert/comic_input.py
+++ b/src/calibre/gui2/convert/comic_input.py
@ -22,7 +22,8 @@ def __init__(self, parent, get_option, get_help, db=None, book_id=None):
                ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
                    'despeckle', 'no_sort', 'no_process', 'landscape',
                    'dont_sharpen', 'disable_trim', 'wide', 'output_format',
-                    'dont_grayscale', 'comic_image_size']
+                    'dont_grayscale', 'comic_image_size',
+                    'dont_add_comic_pages_to_toc']
                )
        self.db, self.book_id = db, book_id
        for x in get_option('output_format').option.choices:
--- a/src/calibre/gui2/convert/comic_input.ui
+++ b/src/calibre/gui2/convert/comic_input.ui
@ -14,7 +14,7 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label_3">
     <property name="text">
      <string>&amp;Number of Colors:</string>
@ -24,7 +24,7 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QSpinBox" name="opt_colors">
     <property name="minimum">
      <number>8</number>
@ -37,70 +37,70 @@
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QCheckBox" name="opt_dont_normalize">
     <property name="text">
      <string>Disable &amp;normalize</string>
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
+   <item row="6" column="0">
    <widget class="QCheckBox" name="opt_keep_aspect_ratio">
     <property name="text">
      <string>Keep &amp;aspect ratio</string>
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
    <widget class="QCheckBox" name="opt_dont_sharpen">
     <property name="text">
      <string>Disable &amp;Sharpening</string>
     </property>
    </widget>
   </item>
-   <item row="7" column="0">
+   <item row="8" column="0">
    <widget class="QCheckBox" name="opt_disable_trim">
     <property name="text">
      <string>Disable &amp;Trimming</string>
     </property>
    </widget>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <widget class="QCheckBox" name="opt_wide">
     <property name="text">
      <string>&amp;Wide</string>
     </property>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="opt_landscape">
     <property name="text">
      <string>&amp;Landscape</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="opt_right2left">
     <property name="text">
      <string>&amp;Right to left</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="opt_no_sort">
     <property name="text">
      <string>Don't so&amp;rt</string>
     </property>
    </widget>
   </item>
-   <item row="12" column="0">
+   <item row="13" column="0">
    <widget class="QCheckBox" name="opt_despeckle">
     <property name="text">
      <string>De&amp;speckle</string>
     </property>
    </widget>
   </item>
-   <item row="14" column="0">
+   <item row="15" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
     </property>
    </widget>
   </item>
-   <item row="13" column="0">
+   <item row="14" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>&amp;Output format:</string>
@ -130,7 +130,7 @@
     </property>
    </widget>
   </item>
-   <item row="13" column="1">
+   <item row="14" column="1">
    <widget class="QComboBox" name="opt_output_format"/>
   </item>
   <item row="1" column="0">
@ -140,7 +140,7 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>Override image  &amp;size:</string>
@ -150,9 +150,16 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="1">
+   <item row="4" column="1">
    <widget class="QLineEdit" name="opt_comic_image_size"/>
   </item>
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_dont_add_comic_pages_to_toc">
+     <property name="text">
+      <string>Don't add links to &amp;pages to the Table of Contents for CBC files</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/dialogs/drm_error.ui
+++ b/src/calibre/gui2/dialogs/drm_error.ui
@ -44,7 +44,8 @@
    <widget class="QLabel" name="msg">
     <property name="text">
      <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, 
-&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
+          &lt;a href=&quot;http://drmfree.calibre-ebook.com/about#drm&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;A large number of recent, DRM free releases are 
+          available at &lt;a href=&quot;http://drmfree.calibre-ebook.com&quot;&gt;Open Books&lt;/a&gt;.</string>
     </property>
     <property name="wordWrap">
      <bool>true</bool>
--- a/src/calibre/gui2/dialogs/tag_categories.py
+++ b/src/calibre/gui2/dialogs/tag_categories.py
@ -73,16 +73,17 @@ def __init__(self, window, db, on_category=None):
            if idx == 0:
                continue
            for n in category_values[idx]():
-                t = Item(name=n, label=label, index=len(self.all_items),icon=category_icons[idx], exists=True)
+                t = Item(name=n, label=label, index=len(self.all_items),
+                         icon=category_icons[idx], exists=True)
                self.all_items.append(t)
-                self.all_items_dict[label+':'+n] = t
+                self.all_items_dict[icu_lower(label+':'+n)] = t

        self.categories = dict.copy(db.prefs.get('user_categories', {}))
        if self.categories is None:
            self.categories = {}
        for cat in self.categories:
            for item,l in enumerate(self.categories[cat]):
-                key = ':'.join([l[1], l[0]])
+                key = icu_lower(':'.join([l[1], l[0]]))
                t = self.all_items_dict.get(key, None)
                if l[1] in self.category_labels:
                    if t is None:
@ -231,6 +232,12 @@ def select_category(self, idx):

    def accept(self):
        self.save_category()
+        for cat in sorted(self.categories.keys(), key=sort_key):
+            components = cat.split('.')
+            for i in range(0,len(components)):
+                c = '.'.join(components[0:i+1])
+                if c not in self.categories:
+                    self.categories[c] = []
        QDialog.accept(self)

    def save_category(self):
--- a/src/calibre/gui2/dialogs/tag_list_editor.py
+++ b/src/calibre/gui2/dialogs/tag_list_editor.py
@ -58,10 +58,12 @@ def __init__(self, window, tag_to_match, data, key):

        self.to_rename = {}
        self.to_delete = set([])
+        self.original_names = {}
        self.all_tags = {}

        for k,v in data:
            self.all_tags[v] = k
+            self.original_names[k] = v
        for tag in sorted(self.all_tags.keys(), key=key):
            item = ListWidgetItem(tag)
            item.setData(Qt.UserRole, self.all_tags[tag])
--- a/src/calibre/gui2/email.py
+++ b/src/calibre/gui2/email.py
@ -209,7 +209,6 @@ class EmailMixin(object): # {{{

    def __init__(self):
        self.emailer = Emailer(self.job_manager)
-        self.emailer.start()

    def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,
            do_auto_convert=True, specific_format=None):
@ -255,6 +254,8 @@ def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,

        to_s = list(repeat(to, len(attachments)))
        if attachments:
+            if not self.emailer.is_alive():
+                self.emailer.start()
            self.emailer.send_mails(jobnames,
                    Dispatcher(partial(self.email_sent, remove=remove)),
                    attachments, to_s, subjects, texts, attachment_names)
@ -325,6 +326,8 @@ def get_fmts(fmts):
            files, auto = self.library_view.model().\
                    get_preferred_formats_from_ids([id_], fmts)
            return files
+        if not self.emailer.is_alive():
+            self.emailer.start()
        sent_mails = self.emailer.email_news(mi, remove,
                get_fmts, self.email_sent)
        if sent_mails:
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@ -7,17 +7,19 @@

 from PyQt4.Qt import QApplication, QFont, QFontInfo, QFontDialog

-from calibre.gui2.preferences import ConfigWidgetBase, test_widget
+from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
 from calibre.gui2.preferences.look_feel_ui import Ui_Form
 from calibre.gui2 import config, gprefs, qt_app
 from calibre.utils.localization import available_translations, \
    get_language, get_lang
 from calibre.utils.config import prefs
+from calibre.utils.icu import sort_key

 class ConfigWidget(ConfigWidgetBase, Ui_Form):

    def genesis(self, gui):
        self.gui = gui
+        db = gui.library_view.model().db

        r = self.register

@ -61,6 +63,15 @@ def genesis(self, gui):
        r('tags_browser_partition_method', gprefs, choices=choices)
        r('tags_browser_collapse_at', gprefs)

+        choices = set([k for k in db.field_metadata.all_field_keys()
+                    if db.field_metadata[k]['is_category'] and
+                       db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']])
+        choices -= set(['authors', 'publisher', 'formats', 'news'])
+        self.opt_categories_using_hierarchy.update_items_cache(choices)
+        r('categories_using_hierarchy', db.prefs, setting=CommaSeparatedList,
+          choices=sorted(list(choices), key=sort_key))
+
+
        self.current_font = None
        self.change_font_button.clicked.connect(self.change_font)

--- a/src/calibre/gui2/preferences/look_feel.ui
+++ b/src/calibre/gui2/preferences/look_feel.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>670</width>
-    <height>392</height>
+    <height>422</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -136,7 +136,7 @@
     <item>
      <widget class="QLabel" name="label_6">
       <property name="text">
-        <string>Tags browser category partitioning method:</string>
+        <string>Tags browser category &amp;partitioning method:</string>
       </property>
       <property name="buddy">
        <cstring>opt_tags_browser_partition_method</cstring>
@ -157,7 +157,7 @@ if you never want subcategories</string>
     <item>
      <widget class="QLabel" name="label_6">
       <property name="text">
-        <string>Collapse when more items than:</string>
+        <string>&amp;Collapse when more items than:</string>
       </property>
       <property name="buddy">
        <cstring>opt_tags_browser_collapse_at</cstring>
@ -190,6 +190,28 @@ up into sub-categories. If the partition method is set to disable, this value is
     </item>
    </layout>
   </item>
+   <item row="8" column="0">
+    <widget class="QLabel" name="label_81">
+     <property name="text">
+      <string>Categories with &amp;hierarchical items:</string>
+     </property>
+     <property name="buddy">
+      <cstring>opt_categories_using_hierarchy</cstring>
+     </property>
+    </widget>
+   </item>
+   <item row="8" column="1">
+    <widget class="MultiCompleteLineEdit" name="opt_categories_using_hierarchy">
+     <property name="toolTip">
+      <string>A comma-separated list of columns in which items containing
+periods are displayed in the tag browser trees. For example, if
+this box contains 'tags' then tags of the form 'Mystery.English'
+and 'Mystery.Thriller' will be displayed with English and Thriller
+both under 'Mystery'. If 'tags' is not in this box,
+then the tags will be displayed each on their own line.</string>
+     </property>
+    </widget>
+   </item>
   <item row="15" column="0" colspan="2">
    <widget class="QGroupBox" name="groupBox_2">
     <property name="title">
@ -275,6 +297,13 @@ up into sub-categories. If the partition method is set to disable, this value is
   </item>
  </layout>
 </widget>
+ <customwidgets>
+  <customwidget>
+   <class>MultiCompleteLineEdit</class>
+   <extends>QLineEdit</extends>
+   <header>calibre/gui2/complete.h</header>
+  </customwidget>
+ </customwidgets>
 <resources/>
 <connections/>
 </ui>
--- a/src/calibre/gui2/store_download.py
+++ b/src/calibre/gui2/store_download.py
@ -167,9 +167,10 @@ class StoreDownloadMixin(object):
    
    def __init__(self):
        self.store_downloader = StoreDownloader(self.job_manager)
-        self.store_downloader.start()
    
    def download_from_store(self, url='', save_as_loc='', add_to_lib=True):
+        if not self.store_downloader.is_alive():
+            self.store_downloader.start()
        self.store_downloader.download_from_store(Dispatcher(self.downloaded_from_store), self.library_view.model().db, url, save_as_loc, add_to_lib)
        self.status_bar.show_message(_('Downloading') + ' ' + url, 3000)
    
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -635,7 +635,8 @@ def shutdown(self, write_settings=True):
            mb.stop()

        self.hide_windows()
-        self.emailer.stop()
+        if self.emailer.is_alive():
+            self.emailer.stop()
        try:
            try:
                if self.content_server is not None:
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -124,9 +124,16 @@ def _match(query, value, matchkind):
    for t in value:
        t = icu_lower(t)
        try:     ### ignore regexp exceptions, required because search-ahead tries before typing is finished
-            if ((matchkind == EQUALS_MATCH and query == t) or
-                (matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
-                (matchkind == CONTAINS_MATCH and query in t)):
+            if (matchkind == EQUALS_MATCH):
+                if query[0] == '.':
+                    if t.startswith(query[1:]):
+                        ql = len(query) - 1
+                        if (len(t) == ql) or (t[ql:ql+1] == '.'):
+                            return True
+                elif query == t:
+                    return True
+            elif ((matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
+                  (matchkind == CONTAINS_MATCH and query in t)):
                    return True
        except re.error:
            pass
@ -415,13 +422,25 @@ def get_user_category_matches(self, location, query, candidates):
        if self.db_prefs is None:
            return  res
        user_cats = self.db_prefs.get('user_categories', [])
-        if location not in user_cats:
-            return res
        c = set(candidates)
-        for (item, category, ign) in user_cats[location]:
-            s = self.get_matches(category, '=' + item, candidates=c)
-            c -= s
-            res |= s
+        l = location.rfind('.')
+        if l > 0:
+            alt_loc = location[0:l]
+            alt_item = location[l+1:]
+        else:
+            alt_loc = None
+        for key in user_cats:
+            if key == location or key.startswith(location + '.'):
+                for (item, category, ign) in user_cats[key]:
+                    s = self.get_matches(category, '=' + item, candidates=c)
+                    c -= s
+                    res |= s
+            elif key == alt_loc:
+                for (item, category, ign) in user_cats[key]:
+                    if item == alt_item:
+                        s = self.get_matches(category, '=' + item, candidates=c)
+                        c -= s
+                        res |= s
        if query == 'false':
            return candidates - res
        return res
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -174,6 +174,7 @@ def initialize_dynamic(self):
        self.prefs = DBPrefs(self)
        defs = self.prefs.defaults
        defs['gui_restriction'] = defs['cs_restriction'] = ''
+        defs['categories_using_hierarchy'] = []

        # Migrate saved search and user categories to db preference scheme
        def migrate_preference(key, default):
@ -812,6 +813,21 @@ def get_metadata(self, idx, index_is_id=False, get_cover=False):
                                            index_is_id=index_is_id),
                        extra=self.get_custom_extra(idx, label=meta['label'],
                                                    index_is_id=index_is_id))
+
+        user_cats = self.prefs['user_categories']
+        user_cat_vals = {}
+        for ucat in user_cats:
+            res = []
+            for name,cat,ign in user_cats[ucat]:
+                v = mi.get(cat, None)
+                if isinstance(v, list):
+                    if name in v:
+                        res.append([name,cat])
+                elif name == v:
+                    res.append([name,cat])
+            user_cat_vals[ucat] = res
+        mi.user_categories = user_cat_vals
+
        if get_cover:
            mi.cover = self.cover(id, index_is_id=True, as_path=True)
        return mi
@ -1406,7 +1422,7 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
        # temporarily duplicating the categories lists.
        taglist = {}
        for c in categories.keys():
-            taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
+            taglist[c] = dict(map(lambda t:(icu_lower(t.name), t), categories[c]))

        muc = self.prefs.get('grouped_search_make_user_categories', [])
        gst = self.prefs.get('grouped_search_terms', {})
@ -1422,8 +1438,9 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
        for user_cat in sorted(user_categories.keys(), key=sort_key):
            items = []
            for (name,label,ign) in user_categories[user_cat]:
-                if label in taglist and name in taglist[label]:
-                    items.append(taglist[label][name])
+                n = icu_lower(name)
+                if label in taglist and n in taglist[label]:
+                    items.append(taglist[label][n])
                # else: do nothing, to not include nodes w zero counts
            cat_name = '@' + user_cat # add the '@' to avoid name collision
            # Not a problem if we accumulate entries in the icon map
@ -2434,7 +2451,8 @@ def add_news(self, path, arg):
        stream.seek(0)
        mi = get_metadata(stream, format, use_libprs_metadata=False)
        stream.seek(0)
-        mi.series_index = 1.0
+        if not mi.series_index:
+            mi.series_index = 1.0
        mi.tags = [_('News')]
        if arg['add_title_tag']:
            mi.tags += [arg['title']]
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -32,7 +32,7 @@ def __init__(self, icon_dict):
                    'news'      : 'news.png',
                    'tags'      : 'tags.png',
                    'custom:'   : 'column.png',
-                    'user:'     : 'drawer.png',
+                    'user:'     : 'tb_folder.png',
                    'search'    : 'search.png'
            }

--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -413,6 +413,27 @@ The Book Details display shows you extra information and the cover for the curre

 .. _jobs:

+.. _tag_browser:
+
+Tag Browser
+-------------
+.. image:: images/tag_browser.png
+
+The Tag Browser allows you to easily browse your collection by Author/Tags/Series/etc. If you click on any Item in the Tag Browser, for example, the Author name, Isaac Asimov, then the list of books to the right is restricted to books by that author. Clicking once again on Isaac Asimov will restrict the list of books to books not by Isaac Asimov. A third click will remove the restriction. If you hold down the Ctrl or Shift keys and click on multiple items, then restrictions based on multiple items are created. For example you could Hold Ctrl and click on the tags History and Europe for find books on European history. The Tag Browser works by constructing search expressions that are automatically entered into the Search bar. It is a good way to learn how to construct basic search expressions. 
+
+There is a search bar at the top of the Tag Browser that allows you to easily find any item in the Tag Browser. In addition, you can right click on any item and choose to hide it or rename it or open a "Manage x" dialog that allows you to manage items of that kind. For example the "Manage Authors" dialog allows you to rename authors and control how their names are sorted. 
+
+For convenience, you can drag and drop books from the book list to items in the Tag Browser and that item will be automatically applied to the dropped books. For example, dragging a book to Isaac Asimov will set the author of that book to Isaac Asimov or dragging it to the tag History will add the tag History to its tags.
+
+The outer-level items in the tag browser such as Authors and Series are called categories. You can create your own categories, called User Categories, which are useful for organizing items. For example, you can use the user categories editor (push the Manage User Categories button) to create a user category called Favorite Authors, then put the items for your favorites into the category. User categories act like built-in categories; you can click on items to search for them. You can search for all items in a category by right-clicking on the category name and choosing "Search for books in ...".
+
+User categories can have sub-categories. For example, the user category Favorites.Authors is a sub-category of Favorites. You might also have Favorites.Series, in which case there will be two sub-categories under Favorites. Sub-categories can be created using Manage User Categories by entering names like the Favorites example. They can also be created by right-clicking on a user category, choosing "Add sub-category to ...", and entering the category name.
+
+It is also possible to create hierarchies inside some of the built-in categories (the text categories). These hierarchies show with the small triangle permitting the sub-items to be hidden. To use hierarchies in a category, you must first go to Preferences / Look & Feel and enter the category name(s) into the "Categories with hierarchical items" box. Once this is done, items in that category that contain periods will be shown using the small triangle. For example, assume you create a custom column called "Genre" and indicate that it contains hierarchical items. Once done, items such as Mystery.Thriller and Mystery.English will display as Mystery with the small triangle next to it. Clicking on the triangle will show Thriller and English as sub-items.
+
+You can drag and drop items in the Tag browser onto user categories to add them to that category.
+
+
 Jobs
 -----
 .. image:: images/jobs.png
--- a/src/calibre/manual/images/tag_browser.png
+++ b/src/calibre/manual/images/tag_browser.png
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@ -396,6 +396,34 @@ def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):
        except:
            return ''

+class BuiltinSublist(BuiltinFormatterFunction):
+    name = 'sublist'
+    arg_count = 4
+    doc = _('sublist(val, start_index, end_index, separator) -- interpret the '
+            ' value as a list of items separated by `separator`, returning a '
+            ' new list made from the `start_index`th to the `end_index`th item. '
+            'The first item is number zero. If an index is negative, then it '
+            'counts from the end of the list. As a special case, an end_index '
+            'of zero is assumed to be the length of the list. Examples using '
+            'basic template mode and assuming a #genre value if A.B.C: '
+            '{#genre:sublist(-1,0,.)} returns C<br/>'
+            '{#genre:sublist(0,1,.)} returns A<br/>'
+            '{#genre:sublist(0,-1,.)} returns A.B')
+
+    def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
+        if not val:
+            return ''
+        si = int(start_index)
+        ei = int(end_index)
+        val = val.split(sep)
+        try:
+            if ei == 0:
+                return sep.join(val[si:])
+            else:
+                return sep.join(val[si:ei])
+        except:
+            return ''
+
 class BuiltinUppercase(BuiltinFormatterFunction):
    name = 'uppercase'
    arg_count = 1
@ -447,6 +475,7 @@ def evaluate(self, formatter, kwargs, mi, locals, val):
 builtin_shorten     = BuiltinShorten()
 builtin_strcat      = BuiltinStrcat()
 builtin_strcmp      = BuiltinStrcmp()
+builtin_sublist     = BuiltinSublist()
 builtin_substr      = BuiltinSubstr()
 builtin_subtract    = BuiltinSubtract()
 builtin_switch      = BuiltinSwitch()
--- a/src/calibre/web/feeds/templates.py
+++ b/src/calibre/web/feeds/templates.py
@ -136,7 +136,7 @@ def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
            head.append(STYLE(style, type='text/css'))
        if extra_css:
            head.append(STYLE(extra_css, type='text/css'))
-        body = BODY(style='page-break-before:always')
+        body = BODY()
        body.append(self.get_navbar(f, feeds))

        div = DIV(
@ -322,7 +322,7 @@ def trim_title(title,clip=18):
            head.append(STYLE(style, type='text/css'))
        if extra_css:
            head.append(STYLE(extra_css, type='text/css'))
-        body = BODY(style='page-break-before:always')
+        body = BODY()
        div = DIV(
                top_navbar,
                H2(feed.title, CLASS('feed_title'))
--- a/src/odf/attrconverters.py
+++ b/src/odf/attrconverters.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element):

 # Potentially accept color values
 def cnv_color(attribute, arg, element):
+    """ A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
+        rr, gg and bb are 8-bit hexadecimal digits.
+    """
    return str(arg)

 def cnv_configtype(attribute, arg, element):
@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element):

 # Understand different date formats
 def cnv_date(attribute, arg, element):
+    """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
+        value.
+    """
    return str(arg)

 def cnv_dateTime(attribute, arg, element):
+    """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
+        value.
+    """
    return str(arg)

 def cnv_double(attribute, arg, element):
@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element):
    return str(arg)

 def cnv_family(attribute, arg, element):
+    """ A style family """
    if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
      "graphic", "presentation", "drawing-page", "chart"):
        raise ValueError, "'%s' not allowed" % str(arg)
    return str(arg)

+def __save_prefix(attribute, arg, element):
+    prefix = arg.split(':',1)[0]
+    if prefix == arg:
+        return unicode(arg)
+    namespace = element.get_knownns(prefix)
+    if namespace is None:
+        #raise ValueError, "'%s' is an unknown prefix" % str(prefix)
+        return unicode(arg)
+    p = element.get_nsprefix(namespace)
+    return unicode(arg)
+
+def cnv_formula(attribute, arg, element):
+    """ A string containing a formula. Formulas do not have a predefined syntax, but the string should
+        begin with a namespace prefix, followed by a “:” (COLON, U+003A) separator, followed by the text
+        of the formula. The namespace bound to the prefix determines the syntax and semantics of the
+        formula.
+    """
+    return __save_prefix(attribute, arg, element)
+
 def cnv_ID(attribute, arg, element):
    return str(arg)

@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element):
 pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')

 def cnv_length(attribute, arg, element):
+    """ A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the
+        Units of Measure defined in §5.9.13 of [XSL].
+    """
    global pattern_length
    if not pattern_length.match(arg):
        raise ValueError, "'%s' is not a valid length" % arg
@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element):

    if not pattern_namespacedToken.match(arg):
        raise ValueError, "'%s' is not a valid namespaced token" % arg
-    return arg
+    return __save_prefix(attribute, arg, element)

-# Must accept string as argument
-# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
-# Essentially an XML name minus ':'
 def cnv_NCName(attribute, arg, element):
+    """ NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
+        Essentially an XML name minus ':'
+    """
    if type(arg) in types.StringTypes:
        return make_NCName(arg)
    else:
@ -226,6 +258,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((ANIMNS,u'name'), None): cnv_string,
 	((ANIMNS,u'sub-item'), None): cnv_string,
 	((ANIMNS,u'value'), None): cnv_string,
+#	((DBNS,u'type'), None): cnv_namespacedToken,
 	((CHARTNS,u'attached-axis'), None): cnv_string,
 	((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor,
 	((CHARTNS,u'class'), None): cnv_namespacedToken,
@ -288,7 +321,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((CHARTNS,u'values-cell-range-address'), None): cnv_string,
 	((CHARTNS,u'vertical'), None): cnv_boolean,
 	((CHARTNS,u'visible'), None): cnv_boolean,
-	((CONFIGNS,u'name'), None): cnv_string,
+	((CONFIGNS,u'name'), None): cnv_formula,
 	((CONFIGNS,u'type'), None): cnv_configtype,
 	((DR3DNS,u'ambient-color'), None): cnv_string,
 	((DR3DNS,u'back-scale'), None): cnv_string,
@ -369,11 +402,11 @@ def cnv_xlinkshow(attribute, arg, element):
 	((DRAWNS,u'decimal-places'), None): cnv_string,
 	((DRAWNS,u'display'), None): cnv_string,
 	((DRAWNS,u'display-name'), None): cnv_string,
-	((DRAWNS,u'distance'), None): cnv_string,
+	((DRAWNS,u'distance'), None): cnv_lengthorpercent,
 	((DRAWNS,u'dots1'), None): cnv_integer,
-	((DRAWNS,u'dots1-length'), None): cnv_length,
+	((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent,
 	((DRAWNS,u'dots2'), None): cnv_integer,
-	((DRAWNS,u'dots2-length'), None): cnv_length,
+	((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent,
 	((DRAWNS,u'end-angle'), None): cnv_double,
 	((DRAWNS,u'end'), None): cnv_string,
 	((DRAWNS,u'end-color'), None): cnv_string,
@ -383,7 +416,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string,
 	((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string,
 	((DRAWNS,u'end-shape'), None): cnv_IDREF,
-	((DRAWNS,u'engine'), None): cnv_string,
+	((DRAWNS,u'engine'), None): cnv_namespacedToken,
 	((DRAWNS,u'enhanced-path'), None): cnv_string,
 	((DRAWNS,u'escape-direction'), None): cnv_string,
 	((DRAWNS,u'extrusion-allowed'), None): cnv_boolean,
@ -604,7 +637,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((FORMNS,u'button-type'), None): cnv_string,
 	((FORMNS,u'command'), None): cnv_string,
 	((FORMNS,u'command-type'), None): cnv_string,
-	((FORMNS,u'control-implementation'), None): cnv_string,
+	((FORMNS,u'control-implementation'), None): cnv_namespacedToken,
 	((FORMNS,u'convert-empty-to-null'), None): cnv_boolean,
 	((FORMNS,u'current-selected'), None): cnv_boolean,
 	((FORMNS,u'current-state'), None): cnv_string,
@ -800,8 +833,8 @@ def cnv_xlinkshow(attribute, arg, element):
 	((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean,
 	((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger,
 	((PRESENTATIONNS,u'visibility'), None): cnv_string,
-	((SCRIPTNS,u'event-name'), None): cnv_string,
-	((SCRIPTNS,u'language'), None): cnv_string,
+	((SCRIPTNS,u'event-name'), None): cnv_formula,
+	((SCRIPTNS,u'language'), None): cnv_formula,
 	((SCRIPTNS,u'macro-name'), None): cnv_string,
 	((SMILNS,u'accelerate'), None): cnv_double,
 	((SMILNS,u'accumulate'), None): cnv_string,
@ -1087,7 +1120,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((SVGNS,u'y2'), None): cnv_lengthorpercent,
 	((TABLENS,u'acceptance-state'), None): cnv_string,
 	((TABLENS,u'add-empty-lines'), None): cnv_boolean,
-	((TABLENS,u'algorithm'), None): cnv_string,
+	((TABLENS,u'algorithm'), None): cnv_formula,
 	((TABLENS,u'align'), None): cnv_string,
 	((TABLENS,u'allow-empty-cell'), None): cnv_boolean,
 	((TABLENS,u'application-data'), None): cnv_string,
@ -1106,7 +1139,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((TABLENS,u'cell-range'), None): cnv_string,
 	((TABLENS,u'column'), None): cnv_integer,
 	((TABLENS,u'comment'), None): cnv_string,
-	((TABLENS,u'condition'), None): cnv_string,
+	((TABLENS,u'condition'), None): cnv_formula,
 	((TABLENS,u'condition-source'), None): cnv_string,
 	((TABLENS,u'condition-source-range-address'), None): cnv_string,
 	((TABLENS,u'contains-error'), None): cnv_boolean,
@ -1144,13 +1177,13 @@ def cnv_xlinkshow(attribute, arg, element):
 	((TABLENS,u'end-x'), None): cnv_length,
 	((TABLENS,u'end-y'), None): cnv_length,
 	((TABLENS,u'execute'), None): cnv_boolean,
-	((TABLENS,u'expression'), None): cnv_string,
+	((TABLENS,u'expression'), None): cnv_formula,
 	((TABLENS,u'field-name'), None): cnv_string,
 	((TABLENS,u'field-number'), None): cnv_nonNegativeInteger,
 	((TABLENS,u'field-number'), None): cnv_string,
 	((TABLENS,u'filter-name'), None): cnv_string,
 	((TABLENS,u'filter-options'), None): cnv_string,
-	((TABLENS,u'formula'), None): cnv_string,
+	((TABLENS,u'formula'), None): cnv_formula,
 	((TABLENS,u'function'), None): cnv_string,
 	((TABLENS,u'function'), None): cnv_string,
 	((TABLENS,u'grand-total'), None): cnv_string,
@ -1290,7 +1323,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean,
 	((TEXTNS,u'comma-separated'), None): cnv_boolean,
 	((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef,
-	((TEXTNS,u'condition'), None): cnv_string,
+	((TEXTNS,u'condition'), None): cnv_formula,
 	((TEXTNS,u'connection-name'), None): cnv_string,
 	((TEXTNS,u'consecutive-numbering'), None): cnv_boolean,
 	((TEXTNS,u'continue-numbering'), None): cnv_boolean,
@ -1321,7 +1354,7 @@ def cnv_xlinkshow(attribute, arg, element):
 	((TEXTNS,u'first-row-start-column'), None): cnv_string,
 	((TEXTNS,u'fixed'), None): cnv_boolean,
 	((TEXTNS,u'footnotes-position'), None): cnv_string,
-	((TEXTNS,u'formula'), None): cnv_string,
+	((TEXTNS,u'formula'), None): cnv_formula,
 	((TEXTNS,u'global'), None): cnv_boolean,
 	((TEXTNS,u'howpublished'), None): cnv_string,
 	((TEXTNS,u'id'), None): cnv_ID,
@ -1437,7 +1470,10 @@ def cnv_xlinkshow(attribute, arg, element):

 class AttrConverters:
    def convert(self, attribute, value, element):
-        conversion = attrconverters.get((attribute,element), None)
+        """ Based on the element, figures out how to check/convert the attribute value
+            All values are converted to string
+        """
+        conversion = attrconverters.get((attribute, element.qname), None)
        if conversion is not None:
            return conversion(attribute, value, element)
        else:
--- a/src/odf/element.py
+++ b/src/odf/element.py
@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -112,6 +112,9 @@ def _get_lastChild(self):
            return self.childNodes[-1]

    def insertBefore(self, newChild, refChild):
+        """ Inserts the node newChild before the existing child node refChild.
+            If refChild is null, insert newChild at the end of the list of children.
+        """
        if newChild.nodeType not in self._child_node_types:
            raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName)
        if newChild.parentNode is not None:
@ -135,21 +138,26 @@ def insertBefore(self, newChild, refChild):
            newChild.parentNode = self
        return newChild

-    def appendChild(self, node):
-        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
-            for c in tuple(node.childNodes):
+    def appendChild(self, newChild):
+        """ Adds the node newChild to the end of the list of children of this node.
+            If the newChild is already in the tree, it is first removed.
+        """
+        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
+            for c in tuple(newChild.childNodes):
                self.appendChild(c)
            ### The DOM does not clearly specify what to return in this case
-            return node
-        if node.nodeType not in self._child_node_types:
-            raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName)
-        if node.parentNode is not None:
-            node.parentNode.removeChild(node)
-        _append_child(self, node)
-        node.nextSibling = None
-        return node
+            return newChild
+        if newChild.nodeType not in self._child_node_types:
+            raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName)
+        if newChild.parentNode is not None:
+            newChild.parentNode.removeChild(newChild)
+        _append_child(self, newChild)
+        newChild.nextSibling = None
+        return newChild

    def removeChild(self, oldChild):
+        """ Removes the child node indicated by oldChild from the list of children, and returns it.
+        """
        #FIXME: update ownerDocument.element_dict or find other solution
        try:
            self.childNodes.remove(oldChild)
@ -191,8 +199,8 @@ def _append_child(self, node):
    node.__dict__["parentNode"] = self

 class Childless:
-    """Mixin that makes childless-ness easy to implement and avoids
-    the complexity of the Node methods that deal with children.
+    """ Mixin that makes childless-ness easy to implement and avoids
+        the complexity of the Node methods that deal with children.
    """

    attributes = None
@ -207,6 +215,7 @@ def _get_lastChild(self):
        return None

    def appendChild(self, node):
+        """ Raises an error """
        raise xml.dom.HierarchyRequestErr(
            self.tagName + " nodes cannot have children")

@ -214,14 +223,17 @@ def hasChildNodes(self):
        return False

    def insertBefore(self, newChild, refChild):
+        """ Raises an error """
        raise xml.dom.HierarchyRequestErr(
            self.tagName + " nodes do not have children")

    def removeChild(self, oldChild):
+        """ Raises an error """
        raise xml.dom.NotFoundErr(
            self.tagName + " nodes do not have children")

    def replaceChild(self, newChild, oldChild):
+        """ Raises an error """
        raise xml.dom.HierarchyRequestErr(
            self.tagName + " nodes do not have children")

@ -247,8 +259,12 @@ class CDATASection(Childless, Text):
    nodeType = Node.CDATA_SECTION_NODE

    def toXml(self,level,f):
+        """ Generate XML output of the node. If the text contains "]]>", then
+            escape it by going out of CDATA mode (]]>), then write the string
+            and then go into CDATA mode again. (<![CDATA[)
+        """
        if self.data:
-            f.write('<![CDATA[%s]]>' % self.data)
+            f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))

 class Element(Node):
    """ Creates a arbitrary element and is intended to be subclassed not used on its own.
@ -310,7 +326,19 @@ def __init__(self, attributes=None, text=None, cdata=None, qname=None, qattribut
                if self.getAttrNS(r[0],r[1]) is None:
                    raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)

+    def get_knownns(self, prefix):
+        """ Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
+            we need to know which namespace it resolves to.
+        """
+        global nsdict
+        for ns,p in nsdict.items():
+            if p == prefix: return ns
+        return None
+        
    def get_nsprefix(self, namespace):
+        """ Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
+            and needs to look up or assign the prefix for it.
+        """
        if namespace is None: namespace = ""
        prefix = _nsassign(namespace)
        if not self.namespaces.has_key(namespace):
@ -339,6 +367,9 @@ def addElement(self, element, check_grammar=True):
            self.ownerDocument.rebuild_caches(element)

    def addText(self, text, check_grammar=True):
+        """ Adds text to an element
+            Setting check_grammar=False turns off grammar checking
+        """
        if check_grammar and self.qname not in grammar.allows_text:
            raise IllegalText, "The <%s> element does not allow text" % self.tagName
        else:
@ -346,6 +377,9 @@ def addText(self, text, check_grammar=True):
                self.appendChild(Text(text))

    def addCDATA(self, cdata, check_grammar=True):
+        """ Adds CDATA to an element
+            Setting check_grammar=False turns off grammar checking
+        """
        if check_grammar and self.qname not in grammar.allows_text:
            raise IllegalText, "The <%s> element does not allow text" % self.tagName
        else:
@ -403,17 +437,18 @@ def setAttrNS(self, namespace, localpart, value):
 #       if allowed_attrs and (namespace, localpart) not in allowed_attrs:
 #           raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
        c = AttrConverters()
-        self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
+        self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)

    def getAttrNS(self, namespace, localpart):
        prefix = self.get_nsprefix(namespace)
-        return self.attributes.get(prefix + ":" + localpart)
+        return self.attributes.get((namespace, localpart))

    def removeAttrNS(self, namespace, localpart):
-        prefix = self.get_nsprefix(namespace)
-        del self.attributes[prefix + ":" + localpart]
+        del self.attributes[(namespace, localpart)]

    def getAttribute(self, attr):
+        """ Get an attribute value. The method knows which namespace the attribute is in
+        """
        allowed_attrs = self.allowed_attributes()
        if allowed_attrs is None:
            if type(attr) == type(()):
@ -432,8 +467,9 @@ def write_open_tag(self, level, f):
        if level == 0:
            for namespace, prefix in self.namespaces.items():
                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
-        for attkey in self.attributes.keys():
-            f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+        for qname in self.attributes.keys():
+            prefix = self.get_nsprefix(qname[0])
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
        f.write('>')

    def write_close_tag(self, level, f):
@ -445,8 +481,9 @@ def toXml(self, level, f):
        if level == 0:
            for namespace, prefix in self.namespaces.items():
                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
-        for attkey in self.attributes.keys():
-            f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+        for qname in self.attributes.keys():
+            prefix = self.get_nsprefix(qname[0])
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
        if self.childNodes:
            f.write('>')
            for element in self.childNodes:
@ -464,6 +501,7 @@ def _getElementsByObj(self, obj, accumulator):
        return accumulator

    def getElementsByType(self, element):
+        """ Gets elements based on the type, which is function from text.py, draw.py etc. """
        obj = element(check_grammar=False)
        return self._getElementsByObj(obj,[])

--- a/src/odf/grammar.py
+++ b/src/odf/grammar.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
--- a/src/odf/load.py
+++ b/src/odf/load.py
@ -63,8 +63,8 @@ def startElementNS(self, tag, qname, attrs):

        self.level = self.level + 1
        # Add any accumulated text content
-        content = ''.join(self.data).strip()
-        if len(content) > 0:
+        content = ''.join(self.data)
+        if len(content.strip()) > 0:
            self.parent.addText(content, check_grammar=False)
            self.data = []
        # Create the element
--- a/src/odf/namespaces.py
+++ b/src/odf/namespaces.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -17,7 +17,7 @@
 #
 # Contributor(s):
 #
-TOOLSVERSION = u"ODFPY/0.9.2dev"
+TOOLSVERSION = u"ODFPY/0.9.4dev"

 ANIMNS         = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
 DBNS           = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@ -28,19 +28,23 @@
 DOMNS          = u"http://www.w3.org/2001/xml-events"
 DR3DNS         = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
 DRAWNS         = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+FIELDNS        = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
 FONS           = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
 FORMNS         = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+GRDDLNS        = u"http://www.w3.org/2003/g/data-view#"
 KOFFICENS      = u"http://www.koffice.org/2005/"
 MANIFESTNS     = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
 MATHNS         = u"http://www.w3.org/1998/Math/MathML"
 METANS         = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
 NUMBERNS       = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
 OFFICENS       = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+OFNS           = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2"
 OOONS          = u"http://openoffice.org/2004/office"
 OOOWNS         = u"http://openoffice.org/2004/writer"
 OOOCNS         = u"http://openoffice.org/2004/calc"
 PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
 RDFANS         = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
+RPTNS          = u"http://openoffice.org/2005/report"
 SCRIPTNS       = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
 SMILNS         = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
 STYLENS        = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
@ -50,7 +54,8 @@
 XFORMSNS       = u"http://www.w3.org/2002/xforms"
 XLINKNS        = u"http://www.w3.org/1999/xlink"
 XMLNS          = u"http://www.w3.org/XML/1998/namespace"
-
+XSDNS          = u"http://www.w3.org/2001/XMLSchema"
+XSINS          = u"http://www.w3.org/2001/XMLSchema-instance"

 nsdict = {
   ANIMNS: u'anim',
@ -61,19 +66,23 @@
   DOMNS: u'dom',
   DR3DNS: u'dr3d',
   DRAWNS: u'draw',
+   FIELDNS: u'field',
   FONS: u'fo',
   FORMNS: u'form',
+   GRDDLNS: u'grddl',
   KOFFICENS: u'koffice',
   MANIFESTNS: u'manifest',
   MATHNS: u'math',
   METANS: u'meta',
   NUMBERNS: u'number',
   OFFICENS: u'office',
+   OFNS: u'of',
   OOONS: u'ooo',
   OOOWNS: u'ooow',
   OOOCNS: u'oooc',
   PRESENTATIONNS: u'presentation',
   RDFANS: u'rdfa',
+   RPTNS:  u'rpt',
   SCRIPTNS: u'script',
   SMILNS: u'smil',
   STYLENS: u'style',
@ -83,4 +92,6 @@
   XFORMSNS: u'xforms',
   XLINKNS: u'xlink',
   XMLNS: u'xml',
+   XSDNS: u'xsd',
+   XSINS: u'xsi',
 }
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -20,15 +20,18 @@
 #
 #import pdb
 #pdb.set_trace()
-import zipfile
-from xml.sax import handler, expatreader
-from xml.sax.xmlreader import InputSource
+from xml.sax import handler
 from xml.sax.saxutils import escape, quoteattr
-from cStringIO import StringIO
+from xml.dom import Node

-from namespaces import DCNS, DRAWNS, FONS, \
-  METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \
-  STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
+from opendocument import load
+
+from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
+  FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
+  SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
+
+if False: # Added by Kovid
+    DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS

 # Handling of styles
 #
@ -72,8 +75,8 @@ def __init__(self):
            (FONS,u"border-left"): self.c_fo,
            (FONS,u"border-right"): self.c_fo,
            (FONS,u"border-top"): self.c_fo,
-            (FONS,u"break-after"): self.c_break,
-            (FONS,u"break-before"): self.c_break,
+            (FONS,u"break-after"): self.c_break, # Added by Kovid
+            (FONS,u"break-before"): self.c_break,# Added by Kovid
            (FONS,u"color"): self.c_fo,
            (FONS,u"font-family"): self.c_fo,
            (FONS,u"font-size"): self.c_fo,
@ -136,7 +139,7 @@ def c_fo(self, ruleset, sdict, rule, val):
        selector = rule[1]
        sdict[selector] = val

-    def c_break(self, ruleset, sdict, rule, val):
+    def c_break(self, ruleset, sdict, rule, val): # Added by Kovid
        property = 'page-' + rule[1]
        values = {'auto': 'auto', 'column': 'always', 'page': 'always',
                  'even-page': 'left', 'odd-page': 'right',
@ -346,13 +349,16 @@ def __init__(self, generate_css=True, embedable=False):
        self.elements = {
        (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
        (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
-        (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
+        (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
        (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
        (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
+        (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
        (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
        (DRAWNS, 'image'): (self.s_draw_image, None),
        (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
        (DRAWNS, "layer-set"):(self.s_ignorexml, None),
+        (DRAWNS, 'object'): (self.s_draw_object, None),
+        (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
        (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
        (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
        (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
@ -364,7 +370,9 @@ def __init__(self, generate_css=True, embedable=False):
        (NUMBERNS, "date-style"):(self.s_ignorexml, None),
        (NUMBERNS, "number-style"):(self.s_ignorexml, None),
        (NUMBERNS, "text-style"):(self.s_ignorexml, None),
+        (OFFICENS, "annotation"):(self.s_ignorexml, None),
        (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+        (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
        (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
        (OFFICENS, "forms"):(self.s_ignorexml, None),
        (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@ -374,6 +382,7 @@ def __init__(self, generate_css=True, embedable=False):
        (OFFICENS, "styles"):(self.s_office_styles, None),
        (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
        (OFFICENS, "scripts"):(self.s_ignorexml, None),
+        (OFFICENS, "settings"):(self.s_ignorexml, None),
        (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
 #       (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
        (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@ -389,8 +398,8 @@ def __init__(self, generate_css=True, embedable=False):
 #       (STYLENS, "header-style"):(self.s_style_header_style, None),
        (STYLENS, "master-page"):(self.s_style_master_page, None),
        (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
-#       (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
-        (STYLENS, "page-layout"):(self.s_ignorexml, None),
+        (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
+#       (STYLENS, "page-layout"):(self.s_ignorexml, None),
        (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
        (STYLENS, "style"):(self.s_style_style, self.e_style_style),
        (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
@ -407,6 +416,10 @@ def __init__(self, generate_css=True, embedable=False):
        (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
        (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
        (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
+        (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
+        (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
+        (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
        (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
        (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
        (TEXTNS, 'line-break'):(self.s_text_line_break, None),
@ -430,10 +443,66 @@ def __init__(self, generate_css=True, embedable=False):
        (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
        }
        if embedable:
-            self.elements[(OFFICENS, u"text")] = (None,None)
-            self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
-            self.elements[(OFFICENS, u"presentation")] = (None,None)
-            self.elements[(OFFICENS, u"document-content")] = (None,None)
+            self.make_embedable()
+        self._resetobject()
+
+    def set_plain(self):
+        """ Tell the parser to not generate CSS """
+        self.generate_css = False
+
+    def set_embedable(self):
+        """ Tells the converter to only output the parts inside the <body>"""
+        self.elements[(OFFICENS, u"text")] = (None,None)
+        self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
+        self.elements[(OFFICENS, u"presentation")] = (None,None)
+        self.elements[(OFFICENS, u"document-content")] = (None,None)
+
+
+    def add_style_file(self, stylefilename, media=None):
+        """ Add a link to an external style file.
+            Also turns of the embedding of styles in the HTML
+        """
+        self.use_internal_css = False
+        self.stylefilename = stylefilename
+        if media:
+            self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
+        else:
+            self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
+
+    def _resetfootnotes(self):
+        # Footnotes and endnotes
+        self.notedict = {}
+        self.currentnote = 0
+        self.notebody = ''
+
+    def _resetobject(self):
+        self.lines = []
+        self._wfunc = self._wlines
+        self.xmlfile = ''
+        self.title = ''
+        self.language = ''
+        self.creator = ''
+        self.data = []
+        self.tagstack = TagStack()
+        self.htmlstack = []
+        self.pstack = []
+        self.processelem = True
+        self.processcont = True
+        self.listtypes = {}
+        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
+        self.use_internal_css = True
+        self.cs = StyleToCSS()
+        self.anchors = {}
+
+        # Style declarations
+        self.stylestack = []
+        self.styledict = {}
+        self.currentstyle = None
+
+        self._resetfootnotes()
+
+        # Tags from meta.xml
+        self.metatags = []


    def writeout(self, s):
@ -447,6 +516,7 @@ def writedata(self):

    def opentag(self, tag, attrs={}, block=False):
        """ Create an open HTML tag """
+        self.htmlstack.append((tag,attrs,block))
        a = []
        for key,val in attrs.items():
            a.append('''%s=%s''' % (key, quoteattr(val)))
@ -458,6 +528,8 @@ def opentag(self, tag, attrs={}, block=False):
            self.writeout("\n")

    def closetag(self, tag, block=True):
+        """ Close an open HTML tag """
+        self.htmlstack.pop()
        self.writeout("</%s>" % tag)
        if block == True:
            self.writeout("\n")
@ -468,17 +540,13 @@ def emptytag(self, tag, attrs={}):
            a.append('''%s=%s''' % (key, quoteattr(val)))
        self.writeout("<%s %s/>\n" % (tag, " ".join(a)))

+#--------------------------------------------------
+# Interface to parser
 #--------------------------------------------------
    def characters(self, data):
        if self.processelem and self.processcont:
            self.data.append(data)

-    def handle_starttag(self, tag, method, attrs):
-        method(tag,attrs)
-
-    def handle_endtag(self, tag, attrs, method):
-        method(tag, attrs)
-
    def startElementNS(self, tag, qname, attrs):
        self.pstack.append( (self.processelem, self.processcont) )
        if self.processelem:
@ -499,6 +567,13 @@ def endElementNS(self, tag, qname):
                self.unknown_endtag(tag, attrs)
        self.processelem, self.processcont = self.pstack.pop()

+#--------------------------------------------------
+    def handle_starttag(self, tag, method, attrs):
+        method(tag,attrs)
+
+    def handle_endtag(self, tag, attrs, method):
+        method(tag, attrs)
+
    def unknown_starttag(self, tag, attrs):
        pass

@ -512,18 +587,21 @@ def s_ignorexml(self, tag, attrs):
        self.processelem = False

    def s_ignorecont(self, tag, attrs):
+        """ Stop processing the text nodes """
        self.processcont = False

    def s_processcont(self, tag, attrs):
+        """ Start processing the text nodes """
        self.processcont = True

    def classname(self, attrs):
        """ Generate a class name from a style name """
-        c = attrs[(TEXTNS,'style-name')]
+        c = attrs.get((TEXTNS,'style-name'),'')
        c = c.replace(".","_")
        return c

    def get_anchor(self, name):
+        """ Create a unique anchor id for a href name """
        if not self.anchors.has_key(name):
            # Changed by Kovid
            self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
@ -543,8 +621,8 @@ def purgedata(self):
    def e_dc_title(self, tag, attrs):
        """ Get the title from the meta data and create a HTML <title>
        """
-        self.metatags.append('<title>%s</title>\n' % escape(''.join(self.data)))
        self.title = ''.join(self.data)
+        #self.metatags.append('<title>%s</title>\n' % escape(self.title))
        self.data = []

    def e_dc_metatag(self, tag, attrs):
@ -556,13 +634,57 @@ def e_dc_metatag(self, tag, attrs):
    def e_dc_contentlanguage(self, tag, attrs):
        """ Set the content language. Identifies the targeted audience
        """
-        self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % ''.join(self.data))
+        self.language = ''.join(self.data)
+        self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
        self.data = []

+    def e_dc_creator(self, tag, attrs):
+        """ Set the content creator. Identifies the targeted audience
+        """
+        self.creator = ''.join(self.data)
+        self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
+        self.data = []
+
+    def s_custom_shape(self, tag, attrs):
+        """ A <draw:custom-shape> is made into a <div> in HTML which is then styled
+        """
+        anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
+        htmltag = 'div'
+        name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
+        if name == 'G-':
+            name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
+        name = name.replace(".","_")
+        if anchor_type == "paragraph":
+            style = 'position:absolute;'
+        elif anchor_type == 'char':
+            style = "position:absolute;"
+        elif anchor_type == 'as-char':
+            htmltag = 'div'
+            style = ''
+        else:
+            style = "position: absolute;"
+        if attrs.has_key( (SVGNS,"width") ):
+            style = style + "width:" + attrs[(SVGNS,"width")] + ";"
+        if attrs.has_key( (SVGNS,"height") ):
+            style = style + "height:" +  attrs[(SVGNS,"height")] + ";"
+        if attrs.has_key( (SVGNS,"x") ):
+            style = style + "left:" +  attrs[(SVGNS,"x")] + ";"
+        if attrs.has_key( (SVGNS,"y") ):
+            style = style + "top:" +  attrs[(SVGNS,"y")] + ";"
+        if self.generate_css:
+            self.opentag(htmltag, {'class': name, 'style': style})
+        else:
+            self.opentag(htmltag)
+
+    def e_custom_shape(self, tag, attrs):
+        """ End the <draw:frame>
+        """
+        self.closetag('div')
+
    def s_draw_frame(self, tag, attrs):
        """ A <draw:frame> is made into a <div> in HTML which is then styled
        """
-        anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
+        anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
        htmltag = 'div'
        name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
        if name == 'G-':
@ -576,7 +698,7 @@ def s_draw_frame(self, tag, attrs):
            htmltag = 'div'
            style = ''
        else:
-            style = "position: absolute;"
+            style = "position:absolute;"
        if attrs.has_key( (SVGNS,"width") ):
            style = style + "width:" + attrs[(SVGNS,"width")] + ";"
        if attrs.has_key( (SVGNS,"height") ):
@ -620,6 +742,30 @@ def s_draw_image(self, tag, attrs):
                htmlattrs['style'] = "display: block;"
        self.emptytag('img', htmlattrs)

+    def s_draw_object(self, tag, attrs):
+        """ A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation).
+        """
+        return # Added by Kovid
+        objhref = attrs[(XLINKNS,"href")]
+        # Remove leading "./": from "./Object 1" to "Object 1"
+#       objhref = objhref [2:]
+
+        # Not using os.path.join since it fails to find the file on Windows.
+#       objcontentpath = '/'.join([objhref, 'content.xml'])
+
+        for c in self.document.childnodes:
+            if c.folder == objhref:
+                self._walknode(c.topnode)
+
+    def s_draw_object_ole(self, tag, attrs):
+        """ A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph).
+        """
+        class_id = attrs[(DRAWNS,"class-id")]
+        if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
+            tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
+            self.opentag('a', tagattrs)
+            self.closetag('a', tagattrs)
+
    def s_draw_page(self, tag, attrs):
        """ A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML.
            Therefore if you convert a ODP file, you get a series of <fieldset>s.
@ -655,13 +801,9 @@ def e_draw_textbox(self, tag, attrs):

    def html_body(self, tag, attrs):
        self.writedata()
-        if self.generate_css:
+        if self.generate_css and self.use_internal_css:
            self.opentag('style', {'type':"text/css"}, True)
            self.writeout('/*<![CDATA[*/\n')
-            self.writeout('\nimg { width: 100%; height: 100%; }\n')
-            self.writeout('* { padding: 0; margin: 0;  background-color:white; }\n')
-            self.writeout('body { margin: 0 1em; }\n')
-            self.writeout('ol, ul { padding-left: 2em; }\n')
            self.generate_stylesheet()
            self.writeout('/*]]>*/\n')
            self.closetag('style')
@ -669,6 +811,16 @@ def html_body(self, tag, attrs):
        self.closetag('head')
        self.opentag('body', block=True)

+    # background-color: white removed by Kovid for #9118
+    # Specifying an explicit bg color prevents ebook readers
+    # from successfully inverting colors
+    default_styles = """
+img { width: 100%; height: 100%; }
+* { padding: 0; margin: 0; }
+body { margin: 0 1em; }
+ol, ul { padding-left: 2em; }
+"""
+
    def generate_stylesheet(self):
        for name in self.stylestack:
            styles = self.styledict.get(name)
@ -688,6 +840,7 @@ def generate_stylesheet(self):
                styles = parentstyle
            self.styledict[name] = styles
        # Write the styles to HTML
+        self.writeout(self.default_styles)
        for name in self.stylestack:
            styles = self.styledict.get(name)
            css2 = self.cs.convert_styles(styles)
@ -729,6 +882,7 @@ def s_office_document_content(self, tag, attrs):
        self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
        for metaline in self.metatags:
            self.writeout(metaline)
+        self.writeout('<title>%s</title>\n' % escape(self.title))

    def e_office_document_content(self, tag, attrs):
        """ Last tag """
@ -773,7 +927,7 @@ def s_style_handle_properties(self, tag, attrs):
        """ Copy all attributes to a struct.
            We will later convert them to CSS2
        """
-        if self.currentstyle is None:
+        if self.currentstyle is None: # Added by Kovid
            return
        for key,attr in attrs.items():
            self.styledict[self.currentstyle][key] = attr
@ -799,7 +953,7 @@ def e_style_default_style(self, tag, attrs):
    def s_style_font_face(self, tag, attrs):
        """ It is possible that the HTML browser doesn't know how to
            show a particular font. Luckily ODF provides generic fallbacks
-            Unluckily they are not the same as CSS2.
+            Unfortunately they are not the same as CSS2.
            CSS2: serif, sans-serif, cursive, fantasy, monospace
            ODF: roman, swiss, modern, decorative, script, system
        """
@ -850,7 +1004,7 @@ def s_style_page_layout(self, tag, attrs):
        """
        name = attrs[(STYLENS,'name')]
        name = name.replace(".","_")
-        self.currentstyle = "@page " + name
+        self.currentstyle = ".PL-" + name
        self.stylestack.append(self.currentstyle)
        self.styledict[self.currentstyle] = {}

@ -881,7 +1035,7 @@ def s_style_master_page(self, tag, attrs):
        self.s_ignorexml(tag, attrs)

    # Short prefixes for class selectors
-    familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
+    _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
        'text':'S', 'section':'D',
         'table':'T', 'table-cell':'TD', 'table-column':'TC',
         'table-row':'TR', 'graphic':'G' }
@ -897,7 +1051,7 @@ def s_style_style(self, tag, attrs):
        name = name.replace(".","_")
        family = attrs[(STYLENS,'family')]
        htmlfamily = self.familymap.get(family,'unknown')
-        sfamily = self.familyshort.get(family,'X')
+        sfamily = self._familyshort.get(family,'X')
        name = "%s%s-%s" % (self.autoprefix, sfamily, name)
        parent = attrs.get( (STYLENS,'parent-style-name') )
        self.currentstyle = special_styles.get(name,"."+name)
@ -942,6 +1096,7 @@ def e_table_table(self, tag, attrs):
        self.purgedata()

    def s_table_table_cell(self, tag, attrs):
+        """ Start a table cell """
        #FIXME: number-columns-repeated § 8.1.3
        #repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
        htmlattrs = {}
@ -959,11 +1114,13 @@ def s_table_table_cell(self, tag, attrs):
        self.purgedata()

    def e_table_table_cell(self, tag, attrs):
+        """ End a table cell """
        self.writedata()
        self.closetag('td')
        self.purgedata()

    def s_table_table_column(self, tag, attrs):
+        """ Start a table column """
        c = attrs.get( (TABLENS,'style-name'), None)
        repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
        htmlattrs = {}
@ -974,6 +1131,7 @@ def s_table_table_column(self, tag, attrs):
        self.purgedata()

    def s_table_table_row(self, tag, attrs):
+        """ Start a table row """
        #FIXME: table:number-rows-repeated
        c = attrs.get( (TABLENS,'style-name'), None)
        htmlattrs = {}
@ -983,6 +1141,7 @@ def s_table_table_row(self, tag, attrs):
        self.purgedata()

    def e_table_table_row(self, tag, attrs):
+        """ End a table row """
        self.writedata()
        self.closetag('tr')
        self.purgedata()
@ -997,10 +1156,28 @@ def s_text_a(self, tag, attrs):
        self.purgedata()

    def e_text_a(self, tag, attrs):
+        """ End an anchor or bookmark reference """
        self.writedata()
        self.closetag('a', False)
        self.purgedata()

+    def s_text_bookmark(self, tag, attrs):
+        """ Bookmark definition """
+        name = attrs[(TEXTNS,'name')]
+        html_id = self.get_anchor(name)
+        self.writedata()
+        self.opentag('span', {'id':html_id})
+        self.closetag('span', False)
+        self.purgedata()
+
+    def s_text_bookmark_ref(self, tag, attrs):
+        """ Bookmark reference """
+        name = attrs[(TEXTNS,'ref-name')]
+        html_id = "#" + self.get_anchor(name)
+        self.writedata()
+        self.opentag('a', {'href':html_id})
+        self.purgedata()
+
    def s_text_h(self, tag, attrs):
        """ Headings start """
        level = int(attrs[(TEXTNS,'outline-level')])
@ -1018,13 +1195,19 @@ def s_text_h(self, tag, attrs):
        self.purgedata()

    def e_text_h(self, tag, attrs):
-        """ Headings end """
+        """ Headings end
+            Side-effect: If there is no title in the metadata, then it is taken
+            from the first heading of any level.
+        """
        self.writedata()
        level = int(attrs[(TEXTNS,'outline-level')])
        if level > 6: level = 6 # Heading levels go only to 6 in XHTML
        if level < 1: level = 1
        lev = self.headinglevels[1:level+1]
        outline = '.'.join(map(str,lev) )
+        heading = ''.join(self.data)
+        if self.title == '': self.title = heading
+        # Changed by Kovid
        tail = ''.join(self.data)
        anchor = self.get_anchor("%s.%s" % ( outline, tail))
        anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506
@ -1036,12 +1219,14 @@ def e_text_h(self, tag, attrs):
        self.purgedata()

    def s_text_line_break(self, tag, attrs):
+        """ Force a line break (<br/>) """
        self.writedata()
        self.emptytag('br')
        self.purgedata()

    def s_text_list(self, tag, attrs):
-        """ To know which level we're at, we have to count the number
+        """ Start a list (<ul> or <ol>)
+            To know which level we're at, we have to count the number
            of <text:list> elements on the tagstack.
        """
        name = attrs.get( (TEXTNS,'style-name') )
@ -1055,12 +1240,13 @@ def s_text_list(self, tag, attrs):
            name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
        list_class = "%s_%d" % (name, level)
        if self.generate_css:
-            self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class })
+            self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
        else:
-            self.opentag('%s' % self.listtypes.get(list_class,'UL'))
+            self.opentag('%s' % self.listtypes.get(list_class,'ul'))
        self.purgedata()

    def e_text_list(self, tag, attrs):
+        """ End a list """
        self.writedata()
        name = attrs.get( (TEXTNS,'style-name') )
        level = self.tagstack.count_tags(tag) + 1
@ -1072,14 +1258,16 @@ def e_text_list(self, tag, attrs):
            # textbox itself may be nested within another list.
            name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
        list_class = "%s_%d" % (name, level)
-        self.closetag(self.listtypes.get(list_class,'UL'))
+        self.closetag(self.listtypes.get(list_class,'ul'))
        self.purgedata()

    def s_text_list_item(self, tag, attrs):
+        """ Start list item """
        self.opentag('li')
        self.purgedata()

    def e_text_list_item(self, tag, attrs):
+        """ End list item """
        self.writedata()
        self.closetag('li')
        self.purgedata()
@ -1191,7 +1379,7 @@ def e_text_p(self, tag, attrs):
            if specialtag is None:
                specialtag = 'p'
        self.writedata()
-        if not self.data:
+        if not self.data: # Added by Kovid
            # Give substance to empty paragraphs, as rendered by OOo
            self.writeout('&#160;')
        self.closetag(specialtag)
@ -1254,55 +1442,30 @@ def e_text_x_source(self, tag, attrs):
 #-----------------------------------------------------------------------------

    def load(self, odffile):
-        self._odffile = odffile
+        """ Loads a document into the parser and parses it.
+            The argument can either be a filename or a document in memory.
+        """
+        self.lines = []
+        self._wfunc = self._wlines
+        if isinstance(odffile, basestring) \
+                or hasattr(odffile, 'read'): # Added by Kovid
+            self.document = load(odffile)
+        else:
+            self.document = odffile
+        self._walknode(self.document.topnode)

-    def parseodf(self):
-        self.xmlfile = ''
-        self.title = ''
-        self.data = []
-        self.tagstack = TagStack()
-        self.pstack = []
-        self.processelem = True
-        self.processcont = True
-        self.listtypes = {}
-        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
-        self.cs = StyleToCSS()
-        self.anchors = {}
+    def _walknode(self, node):
+        if node.nodeType == Node.ELEMENT_NODE:
+            self.startElementNS(node.qname, node.tagName, node.attributes)
+            for c in node.childNodes:
+                self._walknode(c)
+            self.endElementNS(node.qname, node.tagName)
+        if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
+            self.characters(unicode(node))

-        # Style declarations
-        self.stylestack = []
-        self.styledict = {}
-        self.currentstyle = None
-
-        # Footnotes and endnotes
-        self.notedict = {}
-        self.currentnote = 0
-        self.notebody = ''
-
-        # Tags from meta.xml
-        self.metatags = []
-
-        # Extract the interesting files
-        z = zipfile.ZipFile(self._odffile)
-
-        # For some reason Trac has trouble when xml.sax.make_parser() is used.
-        # Could it be because PyXML is installed, and therefore a different parser
-        # might be chosen? By calling expatreader directly we avoid this issue
-        parser = expatreader.create_parser()
-        parser.setFeature(handler.feature_namespaces, 1)
-        parser.setContentHandler(self)
-        parser.setErrorHandler(handler.ErrorHandler())
-        inpsrc = InputSource()
-
-        for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'):
-            self.xmlfile = xmlfile
-            content = z.read(xmlfile)
-            inpsrc.setByteStream(StringIO(content))
-            parser.parse(inpsrc)
-        z.close()

    def odf2xhtml(self, odffile):
-        """ Load a file and return XHTML
+        """ Load a file and return the XHTML
        """
        self.load(odffile)
        return self.xhtml()
@ -1311,9 +1474,8 @@ def _wlines(self,s):
        if s != '': self.lines.append(s)

    def xhtml(self):
-        self.lines = []
-        self._wfunc = self._wlines
-        self.parseodf()
+        """ Returns the xhtml
+        """
        return ''.join(self.lines)

    def _writecss(self, s):
@ -1323,11 +1485,127 @@ def _writenothing(self, s):
        pass

    def css(self):
-        self._wfunc = self._writenothing
-        self.parseodf()
+        """ Returns the CSS content """
        self._csslines = []
        self._wfunc = self._writecss
        self.generate_stylesheet()
        res = ''.join(self._csslines)
+        self._wfunc = self._wlines
        del self._csslines
        return res
+
+    def save(self, outputfile, addsuffix=False):
+        """ Save the HTML under the filename.
+            If the filename is '-' then save to stdout
+            We have the last style filename in self.stylefilename
+        """
+        if outputfile == '-':
+            import sys # Added by Kovid
+            outputfp = sys.stdout
+        else:
+            if addsuffix:
+                outputfile = outputfile + ".html"
+            outputfp = file(outputfile, "w")
+        outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
+        outputfp.close()
+
+
+class ODF2XHTMLembedded(ODF2XHTML):
+    """ The ODF2XHTML parses an ODF file and produces XHTML"""
+
+    def __init__(self, lines, generate_css=True, embedable=False):
+        self._resetobject()
+        self.lines = lines
+
+        # Tags
+        self.generate_css = generate_css
+        self.elements = {
+#        (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
+#        (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
+#        (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
+#        (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
+#        (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
+        (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
+        (DRAWNS, 'image'): (self.s_draw_image, None),
+        (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
+        (DRAWNS, "layer-set"):(self.s_ignorexml, None),
+        (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
+        (DRAWNS, 'object'): (self.s_draw_object, None),
+        (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
+        (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
+#        (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
+#        (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
+#        (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
+#        (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
+        (NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "currency-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "date-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "number-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "text-style"):(self.s_ignorexml, None),
+#        (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+#        (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
+        (OFFICENS, "forms"):(self.s_ignorexml, None),
+#        (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
+        (OFFICENS, "meta"):(self.s_ignorecont, None),
+#        (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
+#        (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
+#        (OFFICENS, "styles"):(self.s_office_styles, None),
+#        (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
+        (OFFICENS, "scripts"):(self.s_ignorexml, None),
+        (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
+##       (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
+#        (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
+#        (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
+#        (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "font-face"):(self.s_style_font_face, None),
+##       (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
+##       (STYLENS, "footer-style"):(self.s_style_footer_style, None),
+#        (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "handout-master"):(self.s_ignorexml, None),
+##       (STYLENS, "header"):(self.s_style_header, self.e_style_header),
+##       (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
+##       (STYLENS, "header-style"):(self.s_style_header_style, None),
+#        (STYLENS, "master-page"):(self.s_style_master_page, None),
+#        (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
+##       (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
+#        (STYLENS, "page-layout"):(self.s_ignorexml, None),
+#        (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "style"):(self.s_style_style, self.e_style_style),
+#        (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
+        (SVGNS, 'desc'): (self.s_ignorexml, None),
+        (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
+        (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
+        (TABLENS, 'table-column'): (self.s_table_table_column, None),
+        (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
+        (TABLENS, 'table'): (self.s_table_table, self.e_table_table),
+        (TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
+        (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
+        (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
+        (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'line-break'):(self.s_text_line_break, None),
+        (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
+        (TEXTNS, "list"):(self.s_text_list, self.e_text_list),
+        (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
+        (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
+        (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
+        (TEXTNS, "list-style"):(None, None),
+        (TEXTNS, "note"):(self.s_text_note, None),
+        (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
+        (TEXTNS, "note-citation"):(None, self.e_text_note_citation),
+        (TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
+        (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
+        (TEXTNS, 's'): (self.s_text_s, None),
+        (TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
+        (TEXTNS, 'tab'): (self.s_text_tab, None),
+        (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "page-number"):(None, None),
+        }
+
--- a/src/odf/opendocument.py
+++ b/src/odf/opendocument.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -41,7 +41,7 @@
 # We need at least Python 2.2
 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2

-sys.setrecursionlimit=50
+#sys.setrecursionlimit(100)
 #The recursion limit is set conservative so mistakes like
 # s=content() s.addElement(s) won't eat up too much processor time.

@ -128,12 +128,12 @@ def build_caches(self, element):
            self.element_dict[element.qname] = []
        self.element_dict[element.qname].append(element)
        if element.qname == (STYLENS, u'style'):
-            self._register_stylename(element) # Add to style dictionary
+            self.__register_stylename(element) # Add to style dictionary
        styleref = element.getAttrNS(TEXTNS,u'style-name')
        if styleref is not None and self._styles_ooo_fix.has_key(styleref):
            element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])

-    def _register_stylename(self, element):
+    def __register_stylename(self, element):
        ''' Register a style. But there are three style dictionaries:
            office:styles, office:automatic-styles and office:master-styles
            Chapter 14
@ -165,7 +165,7 @@ def xml(self):
        """ Generates the full document as an XML file
            Always written as a bytestream in UTF-8 encoding
        """
-        self._replaceGenerator()
+        self.__replaceGenerator()
        xml=StringIO()
        xml.write(_XMLPROLOGUE)
        self.topnode.toXml(0, xml)
@ -197,8 +197,10 @@ def contentxml(self):
        x.write_close_tag(0, xml)
        return xml.getvalue()

-    def manifestxml(self):
-        """ Generates the manifest.xml file """
+    def __manifestxml(self):
+        """ Generates the manifest.xml file
+            The self.manifest isn't avaible unless the document is being saved
+        """
        xml=StringIO()
        xml.write(_XMLPROLOGUE)
        self.manifest.toXml(0,xml)
@ -206,7 +208,7 @@ def manifestxml(self):

    def metaxml(self):
        """ Generates the meta.xml file """
-        self._replaceGenerator()
+        self.__replaceGenerator()
        x = DocumentMeta()
        x.addElement(self.meta)
        xml=StringIO()
@ -344,7 +346,7 @@ def addThumbnail(self, filecontent=None):
            self.thumbnail = filecontent

    def addObject(self, document, objectname=None):
-        """ Add an object. The object must be an OpenDocument class
+        """ Adds an object (subdocument). The object must be an OpenDocument class
            The return value will be the folder in the zipfile the object is stored in
        """
        self.childobjects.append(document)
@ -367,15 +369,16 @@ def _savePictures(self, object, folder):
                zi.compress_type = zipfile.ZIP_STORED
                zi.external_attr = UNIXPERMS
                self._z.writestr(zi, fileobj)
-        if hasPictures:
-            self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype=""))
+        # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
+#       if hasPictures:
+#           self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
        # Look in subobjects
        subobjectnum = 1
        for subobject in object.childobjects:
            self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum))
            subobjectnum += 1

-    def _replaceGenerator(self):
+    def __replaceGenerator(self):
        """ Section 3.1.1: The application MUST NOT export the original identifier
            belonging to the application that created the document.
        """
@ -385,22 +388,29 @@ def _replaceGenerator(self):
        self.meta.addElement(meta.Generator(text=TOOLSVERSION))

    def save(self, outputfile, addsuffix=False):
-        """ Save the document under the filename """
+        """ Save the document under the filename.
+            If the filename is '-' then save to stdout
+        """
        if outputfile == '-':
            outputfp = zipfile.ZipFile(sys.stdout,"w")
        else:
            if addsuffix:
                outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx')
            outputfp = zipfile.ZipFile(outputfile, "w")
-        self._zipwrite(outputfp)
+        self.__zipwrite(outputfp)
        outputfp.close()

    def write(self, outputfp):
+        """ User API to write the ODF file to an open file descriptor
+            Writes the ZIP format
+        """
        zipoutputfp = zipfile.ZipFile(outputfp,"w")
-        self._zipwrite(zipoutputfp)
+        self.__zipwrite(zipoutputfp)

-    def _zipwrite(self, outputfp):
-        """ Write the document to an open file pointer """
+    def __zipwrite(self, outputfp):
+        """ Write the document to an open file pointer
+            This is where the real work is done
+        """
        self._z = outputfp
        self._now = time.localtime()[:6]
        self.manifest = manifest.Manifest()
@ -438,7 +448,7 @@ def _zipwrite(self, outputfp):
        zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now)
        zi.compress_type = zipfile.ZIP_DEFLATED
        zi.external_attr = UNIXPERMS
-        self._z.writestr(zi, self.manifestxml() )
+        self._z.writestr(zi, self.__manifestxml() )
        del self._z
        del self._now
        del self.manifest
@ -464,8 +474,8 @@ def _saveXmlObjects(self, object, folder):
        self._z.writestr(zi, object.contentxml() )

        # Write settings
-        if self == object and self.settings.hasChildNodes():
-            self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml"))
+        if object.settings.hasChildNodes():
+            self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml"))
            zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now)
            zi.compress_type = zipfile.ZIP_DEFLATED
            zi.external_attr = UNIXPERMS
@ -473,7 +483,7 @@ def _saveXmlObjects(self, object, folder):

        # Write meta
        if self == object:
-            self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml"))
+            self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml"))
            zi = zipfile.ZipInfo("meta.xml", self._now)
            zi.compress_type = zipfile.ZIP_DEFLATED
            zi.external_attr = UNIXPERMS
@ -497,6 +507,7 @@ def createTextNode(self, data):
        return element.Text(data)

    def createCDATASection(self, data):
+        """ Method to create a CDATA section """
        return element.CDATASection(cdata)

    def getMediaType(self):
@ -504,12 +515,14 @@ def getMediaType(self):
        return self.mimetype

    def getStyleByName(self, name):
+        """ Finds a style object based on the name """
        ncname = make_NCName(name)
        if self._styles_dict == {}:
            self.rebuild_caches()
        return self._styles_dict.get(ncname, None)

    def getElementsByType(self, element):
+        """ Gets elements based on the type, which is function from text.py, draw.py etc. """
        obj = element(check_grammar=False)
        if self.element_dict == {}:
            self.rebuild_caches()
@ -517,53 +530,59 @@ def getElementsByType(self, element):

 # Convenience functions
 def OpenDocumentChart():
+    """ Creates a chart document """
    doc = OpenDocument('application/vnd.oasis.opendocument.chart')
    doc.chart = Chart()
    doc.body.addElement(doc.chart)
    return doc

 def OpenDocumentDrawing():
+    """ Creates a drawing document """
    doc = OpenDocument('application/vnd.oasis.opendocument.graphics')
    doc.drawing = Drawing()
    doc.body.addElement(doc.drawing)
    return doc

 def OpenDocumentImage():
+    """ Creates an image document """
    doc = OpenDocument('application/vnd.oasis.opendocument.image')
    doc.image = Image()
    doc.body.addElement(doc.image)
    return doc

 def OpenDocumentPresentation():
+    """ Creates a presentation document """
    doc = OpenDocument('application/vnd.oasis.opendocument.presentation')
    doc.presentation = Presentation()
    doc.body.addElement(doc.presentation)
    return doc

 def OpenDocumentSpreadsheet():
+    """ Creates a spreadsheet document """
    doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet')
    doc.spreadsheet = Spreadsheet()
    doc.body.addElement(doc.spreadsheet)
    return doc

 def OpenDocumentText():
+    """ Creates a text document """
    doc = OpenDocument('application/vnd.oasis.opendocument.text')
    doc.text = Text()
    doc.body.addElement(doc.text)
    return doc

+def OpenDocumentTextMaster():
+    """ Creates a text master document """
+    doc = OpenDocument('application/vnd.oasis.opendocument.text-master')
+    doc.text = Text()
+    doc.body.addElement(doc.text)
+    return doc

-def load(odffile):
+def __loadxmlparts(z, manifest, doc, objectpath):
    from load import LoadParser
    from xml.sax import make_parser, handler
-    z = zipfile.ZipFile(odffile)
-    mimetype = z.read('mimetype')
-    doc = OpenDocument(mimetype, add_generator=False)

-    # Look in the manifest file to see if which of the four files there are
-    manifestpart = z.read('META-INF/manifest.xml')
-    manifest =  manifestlist(manifestpart)
-    for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
+    for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'):
        if not manifest.has_key(xmlfile):
            continue
        try:
@ -580,7 +599,19 @@ def load(odffile):
            parser.parse(inpsrc)
            del doc._parsing
        except KeyError, v: pass
-    # FIXME: Add subobjects correctly here
+
+def load(odffile):
+    """ Load an ODF file into memory
+        Returns a reference to the structure
+    """
+    z = zipfile.ZipFile(odffile)
+    mimetype = z.read('mimetype')
+    doc = OpenDocument(mimetype, add_generator=False)
+
+    # Look in the manifest file to see if which of the four files there are
+    manifestpart = z.read('META-INF/manifest.xml')
+    manifest =  manifestlist(manifestpart)
+    __loadxmlparts(z, manifest, doc, '')
    for mentry,mvalue in manifest.items():
        if mentry[:9] == "Pictures/" and len(mentry) > 9:
            doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
@ -588,6 +619,13 @@ def load(odffile):
            doc.addThumbnail(z.read(mentry))
        elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
            pass
+        # Load subobjects into structure
+        elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/":
+            subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
+            doc.addObject(subdoc, "/" + mentry[:-1])
+            __loadxmlparts(z, manifest, subdoc, mentry)
+        elif mentry[:7] == "Object ":
+            pass # Don't load subobjects as opaque objects
        else:
            if mvalue['full-path'][-1] == '/':
                doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
@ -612,4 +650,5 @@ def load(odffile):
    elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula':
        doc.formula = b[0].firstChild
    return doc
+
 # vim: set expandtab sw=4 :