From 7ceeef2a3be52e944ffecaddba6c2adff246a856 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Feb 2011 20:05:50 -0500
Subject: [PATCH 1/7] TXT Output: Fix stylizer so it works. Fix handling soft
 scene breaks created by top margins. FB2 Output: Handle soft scene breaks
 created by empty paragraphs and top margins. Stylizer: Enable use of
 lineHeight property.

---
 src/calibre/ebooks/fb2/fb2ml.py    | 56 +++++++++++++++++-------------
 src/calibre/ebooks/oeb/stylizer.py |  8 ++---
 src/calibre/ebooks/txt/txtml.py    | 18 ++++++----
 3 files changed, 47 insertions(+), 35 deletions(-)
diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 515bdee9df..dedfe963f6 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -71,19 +71,28 @@ def fb2mlize_spine(self):
             return u'<?xml version="1.0" encoding="UTF-8"?>' + output
 
     def clean_text(self, text):
+        # Condense empty paragraphs into a line break. 
+        text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<p><empty-line /></p>', text)
+        # Remove empty paragraphs.
         text = re.sub(r'(?miu)<p>\s*</p>', '', text)
+        # Clean up pargraph endings.
         text = re.sub(r'(?miu)\s*</p>', '</p>', text)
+        # Put paragraphs following a paragraph on a separate line.
         text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text)
 
+        # Remove empty title elements.
         text = re.sub(r'(?miu)<title>\s*</title>', '', text)
         text = re.sub(r'(?miu)\s+</title>', '</title>', text)
 
+        # Remove empty sections.
         text = re.sub(r'(?miu)<section>\s*</section>', '', text)
+        # Clean up sections start and ends.
         text = re.sub(r'(?miu)\s*</section>', '\n</section>', text)
         text = re.sub(r'(?miu)</section>\s*', '</section>\n\n', text)
         text = re.sub(r'(?miu)\s*<section>', '\n<section>', text)
         text = re.sub(r'(?miu)<section>\s*', '<section>\n', text)
-        text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
+        # Put sectnions followed by sections on a separate line.
+        text = re.sub(r'(?miu)</section>\s*<section>', '</section>\n\n<section>', text)
 
         if self.opts.insert_blank_line:
             text = re.sub(r'(?miu)</p>', '</p><empty-line />', text)
@@ -338,6 +347,11 @@ def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
         tags = []
         # First tag in tree
         tag = barename(elem_tree.tag)
+        # Number of blank lines above tag
+        try:
+            ems = int(round((float(style.marginTop) / style.fontSize) - 1))
+        except:
+            ems = 0
 
         # Convert TOC entries to <title>s and add <section>s
         if self.opts.sectionize == 'toc':
@@ -370,7 +384,9 @@ def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
                 fb2_out.append('<section>')
                 self.section_level += 1
 
-        # Process the XHTML tag if it needs to be converted to an FB2 tag.
+        # Process the XHTML tag and styles. Converted to an FB2 tag.
+        # Use individual if statement not if else. There can be
+        # only one XHTML tag but it can have multiple styles.
         if tag == 'img':
             if elem_tree.attrib.get('src', None):
                 # Only write the image tag if it is in the manifest.
@@ -381,7 +397,11 @@ def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
                     fb2_out += p_txt
                     tags += p_tag
                     fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
-        elif tag == 'br':
+        if tag in ('br', 'hr') or ems:
+            if not ems:
+                multiplier = 1
+            else:
+                multiplier = ems
             if self.in_p:
                 closed_tags = []
                 open_tags = tag_stack+tags
@@ -391,52 +411,38 @@ def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
                     closed_tags.append(t)
                     if t == 'p':
                         break
-                fb2_out.append('<empty-line />')
+                fb2_out.append('<empty-line />' * multiplier)
                 closed_tags.reverse()
                 for t in closed_tags:
                     fb2_out.append('<%s>' % t)
             else:
-                fb2_out.append('<empty-line />')
-        elif tag in ('div', 'li', 'p'):
+                fb2_out.append('<empty-line />' * multiplier)
+        if tag in ('div', 'li', 'p'):
             p_text, added_p = self.close_open_p(tag_stack+tags)
             fb2_out += p_text
             if added_p:
                 tags.append('p')
-        elif tag == 'b':
+        if tag == 'b' or style['font-weight'] in ('bold', 'bolder'):
             s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
             fb2_out += s_out
             tags += s_tags
-        elif tag == 'i':
+        if tag == 'i' or style['font-style'] == 'italic':
             s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
             fb2_out += s_out
             tags += s_tags
-        elif tag in ('del', 'strike'):
+        if tag in ('del', 'strike') or style['text-decoration'] == 'line-through':
             s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags)
             fb2_out += s_out
             tags += s_tags
-        elif tag == 'sub':
+        if tag == 'sub':
             s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags)
             fb2_out += s_out
             tags += s_tags
-        elif tag == 'sup':
+        if tag == 'sup':
             s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags)
             fb2_out += s_out
             tags += s_tags
 
-        # Processes style information.
-        if style['font-style'] == 'italic':
-            s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
-            fb2_out += s_out
-            tags += s_tags
-        elif style['font-weight'] in ('bold', 'bolder'):
-            s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
-            fb2_out += s_out
-            tags += s_tags
-        elif style['text-decoration'] == 'line-through':
-            s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags)
-            fb2_out += s_out
-            tags += s_tags
-
         # Process element text.
         if hasattr(elem_tree, 'text') and elem_tree.text:
             if not self.in_p:
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 40b82514c1..abad5afcb3 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -633,7 +633,7 @@ def height(self):
     def lineHeight(self):
         if self._lineHeight is None:
             result = None
-            parent = self._getparent()
+            #parent = self._getparent()
             if 'line-height' in self._style:
                 lineh = self._style['line-height']
                 if lineh == 'normal':
@@ -642,9 +642,9 @@ def lineHeight(self):
                     result = float(lineh) * self.fontSize
                 except ValueError:
                     result = self._unit_convert(lineh, base=self.fontSize)
-            elif parent is not None:
-                # TODO: proper inheritance
-                result = parent.lineHeight
+            #elif parent is not None:
+            #    # TODO: proper inheritance
+            #    result = parent.lineHeight
             else:
                 result = 1.2 * self.fontSize
             self._lineHeight = result
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 660fd9d38a..6654e70475 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -67,10 +67,11 @@ def mlize_spine(self):
         output.append(self.get_toc())
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to TXT...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
-            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
+            content = unicode(etree.tostring(item.data, encoding=unicode))
             content = self.remove_newlines(content)
-            output += self.dump_text(etree.fromstring(content), stylizer, item)
+            content = etree.fromstring(content)
+            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
+            output += self.dump_text(content.find(XHTML('body')), stylizer, item)
             output += '\n\n\n\n\n\n'
         output = u''.join(output)
         output = u'\n'.join(l.rstrip() for l in output.splitlines())
@@ -219,11 +220,16 @@ def dump_text(self, elem, stylizer, page):
         if tag in SPACE_TAGS:
             text.append(u' ')
 
-        # Scene breaks.
+        # Hard scene breaks.
         if tag == 'hr':
             text.append('\n\n* * *\n\n')
-        elif style['margin-top']:
-            text.append('\n\n' + '\n' * round(style['margin-top']))
+        # Soft scene breaks.
+        try:
+            ems = int(round((float(style.marginTop) / style.fontSize) - 1))
+            if ems:
+                text.append('\n' * ems)
+        except:
+            pass
 
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:

From 0e483f5f32c62bca0984e392a4c9799a2d701787 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Feb 2011 20:51:58 -0500
Subject: [PATCH 2/7] Add format documentation files.

---
 format_docs/compression/palmdoc.txt |   54 +
 format_docs/compression/zip.txt     | 3217 +++++++++++++++++++++++++++
 format_docs/pdb/ereader.txt         |  309 +++
 format_docs/pdb/mbp.txt             |  414 ++++
 format_docs/pdb/mobi.txt            |  341 +++
 format_docs/pdb/palmdoc.txt         |   25 +
 format_docs/pdb/pdb_format.txt      |  104 +
 format_docs/pdb/pdb_types.txt       |   34 +
 format_docs/pdb/plucker.html        | 2122 ++++++++++++++++++
 format_docs/pdb/pml.txt             |  936 ++++++++
 format_docs/pdb/ztxt.txt            |  226 ++
 format_docs/rb.txt                  |  303 +++
 format_docs/tcr.txt                 |   56 +
 13 files changed, 8141 insertions(+)
 create mode 100644 format_docs/compression/palmdoc.txt
 create mode 100644 format_docs/compression/zip.txt
 create mode 100644 format_docs/pdb/ereader.txt
 create mode 100644 format_docs/pdb/mbp.txt
 create mode 100644 format_docs/pdb/mobi.txt
 create mode 100644 format_docs/pdb/palmdoc.txt
 create mode 100644 format_docs/pdb/pdb_format.txt
 create mode 100644 format_docs/pdb/pdb_types.txt
 create mode 100644 format_docs/pdb/plucker.html
 create mode 100644 format_docs/pdb/pml.txt
 create mode 100644 format_docs/pdb/ztxt.txt
 create mode 100644 format_docs/rb.txt
 create mode 100644 format_docs/tcr.txt

diff --git a/format_docs/compression/palmdoc.txt b/format_docs/compression/palmdoc.txt
new file mode 100644
index 0000000000..e5bcc95150
--- /dev/null
+++ b/format_docs/compression/palmdoc.txt
@@ -0,0 +1,54 @@
+About
+-----
+
+PalmDOC uses LZ77 compression techniques. DOC files can contain only compressed
+text. The format does not allow for any text formatting. This keeps files
+small, in keeping with the Palm philosophy. However, extensions to the format
+can use tags, such as HTML or PML, to include formatting within text. These
+extensions to PalmDoc are not interchangeable and are the basis for most eBook
+Reader formats on Palm devices.
+
+LZ77 algorithms achieve compression by replacing portions of the data with
+references to matching data that has already passed through both encoder and
+decoder. A match is encoded by a pair of numbers called a length-distance pair,
+which is equivalent to the statement "each of the next length characters is
+equal to the character exactly distance characters behind it in the
+uncompressed stream." (The "distance" is sometimes called the "offset" instead.)
+
+In the PalmDoc format, a length-distance pair is always encoded by a two-byte
+sequence. Of the 16 bits that make up these two bytes, 11 bits go to encoding
+the distance, 3 go to encoding the length, and the remaining two are used to
+make sure the decoder can identify the first byte as the beginning of such a
+two-byte sequence.
+
+PalmDoc combines LZ77 with a simple kind of byte pair compression.
+
+
+PalmDoc files are decoded as follows:
+-------------------------------------
+
+Read a byte from the compressed stream. If the byte is
+
+0x00: "1 literal" copy that byte unmodified to the decompressed stream.
+
+0x09 to 0x7f: "1 literal" copy that byte unmodified to the decompressed stream.
+
+0x01 to 0x08: "literals": the byte is interpreted as a count from 1 to 8, and
+that many literals are copied unmodified from the compressed stream to the
+decompressed stream.
+
+0x80 to 0xbf: "length, distance" pair: the 2 leftmost bits of this byte ('10')
+are discarded, and the following 6 bits are combined with the 8 bits of the
+next byte to make a 14 bit "distance, length" item. Those 14 bits are broken
+into 11 bits of distance backwards from the current location in the
+uncompressed text, and 3 bits of length to copy from that point
+(copying n+3 bytes, 3 to 10 bytes).
+
+0xc0 to 0xff: "byte pair": this byte is decoded into 2 characters: a space
+character, and a letter formed from this byte XORed with 0x80.
+
+Repeat from the beginning until there is no more bytes in the compressed file.
+
+PalmDOC data is always divided into 4096 byte blocks and the blocks are acted
+upon independently. 
+
diff --git a/format_docs/compression/zip.txt b/format_docs/compression/zip.txt
new file mode 100644
index 0000000000..e658f9582b
--- /dev/null
+++ b/format_docs/compression/zip.txt
@@ -0,0 +1,3217 @@
+File:    APPNOTE.TXT - .ZIP File Format Specification
+Version: 6.3.2 
+Revised: September 28, 2007
+Copyright (c) 1989 - 2007 PKWARE Inc., All Rights Reserved.
+
+The use of certain technological aspects disclosed in the current
+APPNOTE is available pursuant to the below section entitled
+"Incorporating PKWARE Proprietary Technology into Your Product".
+
+I. Purpose
+----------
+
+This specification is intended to define a cross-platform,
+interoperable file storage and transfer format.  Since its 
+first publication in 1989, PKWARE has remained committed to 
+ensuring the interoperability of the .ZIP file format through 
+publication and maintenance of this specification.  We trust that 
+all .ZIP compatible vendors and application developers that have 
+adopted and benefited from this format will share and support 
+this commitment to interoperability.
+
+II. Contacting PKWARE
+---------------------
+
+     PKWARE, Inc.
+     648 N. Plankinton Avenue, Suite 220
+     Milwaukee, WI 53203
+     +1-414-289-9788
+     +1-414-289-9789 FAX
+     zipformat@pkware.com
+
+III. Disclaimer
+---------------
+
+Although PKWARE will attempt to supply current and accurate
+information relating to its file formats, algorithms, and the
+subject programs, the possibility of error or omission cannot 
+be eliminated. PKWARE therefore expressly disclaims any warranty 
+that the information contained in the associated materials relating 
+to the subject programs and/or the format of the files created or
+accessed by the subject programs and/or the algorithms used by
+the subject programs, or any other matter, is current, correct or
+accurate as delivered.  Any risk of damage due to any possible
+inaccurate information is assumed by the user of the information.
+Furthermore, the information relating to the subject programs
+and/or the file formats created or accessed by the subject
+programs and/or the algorithms used by the subject programs is
+subject to change without notice.
+
+If the version of this file is marked as a NOTIFICATION OF CHANGE,
+the content defines an Early Feature Specification (EFS) change 
+to the .ZIP file format that may be subject to modification prior 
+to publication of the Final Feature Specification (FFS).  This
+document may also contain information on Planned Feature 
+Specifications (PFS) defining recognized future extensions.
+
+IV. Change Log
+--------------
+
+Version       Change Description                        Date
+-------       ------------------                       ----------
+5.2           -Single Password Symmetric Encryption    06/02/2003
+               storage
+
+6.1.0         -Smartcard compatibility                 01/20/2004
+              -Documentation on certificate storage
+
+6.2.0         -Introduction of Central Directory       04/26/2004
+               Encryption for encrypting metadata
+              -Added OS/X to Version Made By values
+
+6.2.1         -Added Extra Field placeholder for       04/01/2005
+               POSZIP using ID 0x4690
+
+              -Clarified size field on 
+               "zip64 end of central directory record"
+
+6.2.2         -Documented Final Feature Specification  01/06/2006
+               for Strong Encryption
+
+              -Clarifications and typographical 
+               corrections
+
+6.3.0         -Added tape positioning storage          09/29/2006
+               parameters
+
+              -Expanded list of supported hash algorithms
+
+              -Expanded list of supported compression
+               algorithms
+
+              -Expanded list of supported encryption
+               algorithms
+
+              -Added option for Unicode filename 
+               storage
+
+              -Clarifications for consistent use
+               of Data Descriptor records
+
+              -Added additional "Extra Field" 
+               definitions
+
+6.3.1         -Corrected standard hash values for      04/11/2007
+               SHA-256/384/512
+
+6.3.2         -Added compression method 97             09/28/2007
+
+              -Documented InfoZIP "Extra Field"
+               values for UTF-8 file name and
+               file comment storage
+
+V. General Format of a .ZIP file
+--------------------------------
+
+  Files stored in arbitrary order.  Large .ZIP files can span multiple
+  volumes or be split into user-defined segment sizes. All values
+  are stored in little-endian byte order unless otherwise specified. 
+
+  Overall .ZIP file format:
+
+    [local file header 1]
+    [file data 1]
+    [data descriptor 1]
+    . 
+    .
+    .
+    [local file header n]
+    [file data n]
+    [data descriptor n]
+    [archive decryption header] 
+    [archive extra data record] 
+    [central directory]
+    [zip64 end of central directory record]
+    [zip64 end of central directory locator] 
+    [end of central directory record]
+
+
+  A.  Local file header:
+
+        local file header signature     4 bytes  (0x04034b50)
+        version needed to extract       2 bytes
+        general purpose bit flag        2 bytes
+        compression method              2 bytes
+        last mod file time              2 bytes
+        last mod file date              2 bytes
+        crc-32                          4 bytes
+        compressed size                 4 bytes
+        uncompressed size               4 bytes
+        file name length                2 bytes
+        extra field length              2 bytes
+
+        file name (variable size)
+        extra field (variable size)
+
+  B.  File data
+
+      Immediately following the local header for a file
+      is the compressed or stored data for the file. 
+      The series of [local file header][file data][data
+      descriptor] repeats for each file in the .ZIP archive. 
+
+  C.  Data descriptor:
+
+        crc-32                          4 bytes
+        compressed size                 4 bytes
+        uncompressed size               4 bytes
+
+      This descriptor exists only if bit 3 of the general
+      purpose bit flag is set (see below).  It is byte aligned
+      and immediately follows the last byte of compressed data.
+      This descriptor is used only when it was not possible to
+      seek in the output .ZIP file, e.g., when the output .ZIP file
+      was standard output or a non-seekable device.  For ZIP64(tm) format
+      archives, the compressed and uncompressed sizes are 8 bytes each.
+
+      When compressing files, compressed and uncompressed sizes 
+      should be stored in ZIP64 format (as 8 byte values) when a 
+      files size exceeds 0xFFFFFFFF.   However ZIP64 format may be 
+      used regardless of the size of a file.  When extracting, if 
+      the zip64 extended information extra field is present for 
+      the file the compressed and uncompressed sizes will be 8
+      byte values.  
+
+      Although not originally assigned a signature, the value 
+      0x08074b50 has commonly been adopted as a signature value 
+      for the data descriptor record.  Implementers should be 
+      aware that ZIP files may be encountered with or without this 
+      signature marking data descriptors and should account for
+      either case when reading ZIP files to ensure compatibility.
+      When writing ZIP files, it is recommended to include the
+      signature value marking the data descriptor record.  When
+      the signature is used, the fields currently defined for
+      the data descriptor record will immediately follow the
+      signature.
+
+      An extensible data descriptor will be released in a future
+      version of this APPNOTE.  This new record is intended to
+      resolve conflicts with the use of this record going forward,
+      and to provide better support for streamed file processing.
+
+      When the Central Directory Encryption method is used, the data
+      descriptor record is not required, but may be used.  If present,
+      and bit 3 of the general purpose bit field is set to indicate
+      its presence, the values in fields of the data descriptor
+      record should be set to binary zeros.
+
+  D.  Archive decryption header:  
+
+      The Archive Decryption Header is introduced in version 6.2
+      of the ZIP format specification.  This record exists in support
+      of the Central Directory Encryption Feature implemented as part of 
+      the Strong Encryption Specification as described in this document.
+      When the Central Directory Structure is encrypted, this decryption
+      header will precede the encrypted data segment.  The encrypted
+      data segment will consist of the Archive extra data record (if
+      present) and the encrypted Central Directory Structure data.
+      The format of this data record is identical to the Decryption
+      header record preceding compressed file data.  If the central 
+      directory structure is encrypted, the location of the start of
+      this data record is determined using the Start of Central Directory
+      field in the Zip64 End of Central Directory record.  Refer to the 
+      section on the Strong Encryption Specification for information
+      on the fields used in the Archive Decryption Header record.
+
+
+  E.  Archive extra data record: 
+
+        archive extra data signature    4 bytes  (0x08064b50)
+        extra field length              4 bytes
+        extra field data                (variable size)
+
+      The Archive Extra Data Record is introduced in version 6.2
+      of the ZIP format specification.  This record exists in support
+      of the Central Directory Encryption Feature implemented as part of 
+      the Strong Encryption Specification as described in this document.
+      When present, this record immediately precedes the central 
+      directory data structure.  The size of this data record will be
+      included in the Size of the Central Directory field in the
+      End of Central Directory record.  If the central directory structure
+      is compressed, but not encrypted, the location of the start of
+      this data record is determined using the Start of Central Directory
+      field in the Zip64 End of Central Directory record.  
+
+
+  F.  Central directory structure:
+
+      [file header 1]
+      .
+      .
+      . 
+      [file header n]
+      [digital signature] 
+
+      File header:
+
+        central file header signature   4 bytes  (0x02014b50)
+        version made by                 2 bytes
+        version needed to extract       2 bytes
+        general purpose bit flag        2 bytes
+        compression method              2 bytes
+        last mod file time              2 bytes
+        last mod file date              2 bytes
+        crc-32                          4 bytes
+        compressed size                 4 bytes
+        uncompressed size               4 bytes
+        file name length                2 bytes
+        extra field length              2 bytes
+        file comment length             2 bytes
+        disk number start               2 bytes
+        internal file attributes        2 bytes
+        external file attributes        4 bytes
+        relative offset of local header 4 bytes
+
+        file name (variable size)
+        extra field (variable size)
+        file comment (variable size)
+
+      Digital signature:
+
+        header signature                4 bytes  (0x05054b50)
+        size of data                    2 bytes
+        signature data (variable size)
+
+      With the introduction of the Central Directory Encryption 
+      feature in version 6.2 of this specification, the Central 
+      Directory Structure may be stored both compressed and encrypted. 
+      Although not required, it is assumed when encrypting the
+      Central Directory Structure, that it will be compressed
+      for greater storage efficiency.  Information on the
+      Central Directory Encryption feature can be found in the section
+      describing the Strong Encryption Specification. The Digital 
+      Signature record will be neither compressed nor encrypted.
+
+  G.  Zip64 end of central directory record
+
+        zip64 end of central dir 
+        signature                       4 bytes  (0x06064b50)
+        size of zip64 end of central
+        directory record                8 bytes
+        version made by                 2 bytes
+        version needed to extract       2 bytes
+        number of this disk             4 bytes
+        number of the disk with the 
+        start of the central directory  4 bytes
+        total number of entries in the
+        central directory on this disk  8 bytes
+        total number of entries in the
+        central directory               8 bytes
+        size of the central directory   8 bytes
+        offset of start of central
+        directory with respect to
+        the starting disk number        8 bytes
+        zip64 extensible data sector    (variable size)
+
+        The value stored into the "size of zip64 end of central
+        directory record" should be the size of the remaining
+        record and should not include the leading 12 bytes.
+  
+        Size = SizeOfFixedFields + SizeOfVariableData - 12.
+
+        The above record structure defines Version 1 of the 
+        zip64 end of central directory record. Version 1 was 
+        implemented in versions of this specification preceding 
+        6.2 in support of the ZIP64 large file feature. The 
+        introduction of the Central Directory Encryption feature 
+        implemented in version 6.2 as part of the Strong Encryption 
+        Specification defines Version 2 of this record structure. 
+        Refer to the section describing the Strong Encryption 
+        Specification for details on the version 2 format for 
+        this record.
+
+        Special purpose data may reside in the zip64 extensible data
+        sector field following either a V1 or V2 version of this
+        record.  To ensure identification of this special purpose data
+        it must include an identifying header block consisting of the
+        following:
+
+           Header ID  -  2 bytes
+           Data Size  -  4 bytes
+
+        The Header ID field indicates the type of data that is in the 
+        data block that follows.
+
+        Data Size identifies the number of bytes that follow for this
+        data block type.
+
+        Multiple special purpose data blocks may be present, but each
+        must be preceded by a Header ID and Data Size field.  Current
+        mappings of Header ID values supported in this field are as
+        defined in APPENDIX C.
+
+  H.  Zip64 end of central directory locator
+
+        zip64 end of central dir locator 
+        signature                       4 bytes  (0x07064b50)
+        number of the disk with the
+        start of the zip64 end of 
+        central directory               4 bytes
+        relative offset of the zip64
+        end of central directory record 8 bytes
+        total number of disks           4 bytes
+        
+  I.  End of central directory record:
+
+        end of central dir signature    4 bytes  (0x06054b50)
+        number of this disk             2 bytes
+        number of the disk with the
+        start of the central directory  2 bytes
+        total number of entries in the
+        central directory on this disk  2 bytes
+        total number of entries in
+        the central directory           2 bytes
+        size of the central directory   4 bytes
+        offset of start of central
+        directory with respect to
+        the starting disk number        4 bytes
+        .ZIP file comment length        2 bytes
+        .ZIP file comment       (variable size)
+
+  J.  Explanation of fields:
+
+      version made by (2 bytes)
+
+          The upper byte indicates the compatibility of the file
+          attribute information.  If the external file attributes 
+          are compatible with MS-DOS and can be read by PKZIP for 
+          DOS version 2.04g then this value will be zero.  If these 
+          attributes are not compatible, then this value will 
+          identify the host system on which the attributes are 
+          compatible.  Software can use this information to determine
+          the line record format for text files etc.  The current
+          mappings are:
+
+          0 - MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems)
+          1 - Amiga                     2 - OpenVMS
+          3 - UNIX                      4 - VM/CMS
+          5 - Atari ST                  6 - OS/2 H.P.F.S.
+          7 - Macintosh                 8 - Z-System
+          9 - CP/M                     10 - Windows NTFS
+         11 - MVS (OS/390 - Z/OS)      12 - VSE
+         13 - Acorn Risc               14 - VFAT
+         15 - alternate MVS            16 - BeOS
+         17 - Tandem                   18 - OS/400
+         19 - OS/X (Darwin)            20 thru 255 - unused
+
+          The lower byte indicates the ZIP specification version 
+          (the version of this document) supported by the software 
+          used to encode the file.  The value/10 indicates the major 
+          version number, and the value mod 10 is the minor version 
+          number.  
+
+      version needed to extract (2 bytes)
+
+          The minimum supported ZIP specification version needed to 
+          extract the file, mapped as above.  This value is based on 
+          the specific format features a ZIP program must support to 
+          be able to extract the file.  If multiple features are
+          applied to a file, the minimum version should be set to the 
+          feature having the highest value. New features or feature 
+          changes affecting the published format specification will be 
+          implemented using higher version numbers than the last 
+          published value to avoid conflict.
+
+          Current minimum feature versions are as defined below:
+
+          1.0 - Default value
+          1.1 - File is a volume label
+          2.0 - File is a folder (directory)
+          2.0 - File is compressed using Deflate compression
+          2.0 - File is encrypted using traditional PKWARE encryption
+          2.1 - File is compressed using Deflate64(tm)
+          2.5 - File is compressed using PKWARE DCL Implode 
+          2.7 - File is a patch data set 
+          4.5 - File uses ZIP64 format extensions
+          4.6 - File is compressed using BZIP2 compression*
+          5.0 - File is encrypted using DES
+          5.0 - File is encrypted using 3DES
+          5.0 - File is encrypted using original RC2 encryption
+          5.0 - File is encrypted using RC4 encryption
+          5.1 - File is encrypted using AES encryption
+          5.1 - File is encrypted using corrected RC2 encryption**
+          5.2 - File is encrypted using corrected RC2-64 encryption**
+          6.1 - File is encrypted using non-OAEP key wrapping***
+          6.2 - Central directory encryption
+          6.3 - File is compressed using LZMA
+          6.3 - File is compressed using PPMd+
+          6.3 - File is encrypted using Blowfish
+          6.3 - File is encrypted using Twofish
+
+
+          * Early 7.x (pre-7.2) versions of PKZIP incorrectly set the
+          version needed to extract for BZIP2 compression to be 50
+          when it should have been 46.
+
+          ** Refer to the section on Strong Encryption Specification
+          for additional information regarding RC2 corrections.
+
+          *** Certificate encryption using non-OAEP key wrapping is the
+          intended mode of operation for all versions beginning with 6.1.
+          Support for OAEP key wrapping should only be used for
+          backward compatibility when sending ZIP files to be opened by
+          versions of PKZIP older than 6.1 (5.0 or 6.0).
+
+          + Files compressed using PPMd should set the version
+          needed to extract field to 6.3, however, not all ZIP 
+          programs enforce this and may be unable to decompress 
+          data files compressed using PPMd if this value is set.
+
+          When using ZIP64 extensions, the corresponding value in the
+          zip64 end of central directory record should also be set.  
+          This field should be set appropriately to indicate whether 
+          Version 1 or Version 2 format is in use. 
+
+      general purpose bit flag: (2 bytes)
+
+          Bit 0: If set, indicates that the file is encrypted.
+
+          (For Method 6 - Imploding)
+          Bit 1: If the compression method used was type 6,
+                 Imploding, then this bit, if set, indicates
+                 an 8K sliding dictionary was used.  If clear,
+                 then a 4K sliding dictionary was used.
+          Bit 2: If the compression method used was type 6,
+                 Imploding, then this bit, if set, indicates
+                 3 Shannon-Fano trees were used to encode the
+                 sliding dictionary output.  If clear, then 2
+                 Shannon-Fano trees were used.
+
+          (For Methods 8 and 9 - Deflating)
+          Bit 2  Bit 1
+            0      0    Normal (-en) compression option was used.
+            0      1    Maximum (-exx/-ex) compression option was used.
+            1      0    Fast (-ef) compression option was used.
+            1      1    Super Fast (-es) compression option was used.
+
+          (For Method 14 - LZMA)
+          Bit 1: If the compression method used was type 14,
+                 LZMA, then this bit, if set, indicates
+                 an end-of-stream (EOS) marker is used to
+                 mark the end of the compressed data stream.
+                 If clear, then an EOS marker is not present
+                 and the compressed data size must be known
+                 to extract.
+
+          Note:  Bits 1 and 2 are undefined if the compression
+                 method is any other.
+
+          Bit 3: If this bit is set, the fields crc-32, compressed 
+                 size and uncompressed size are set to zero in the 
+                 local header.  The correct values are put in the 
+                 data descriptor immediately following the compressed
+                 data.  (Note: PKZIP version 2.04g for DOS only 
+                 recognizes this bit for method 8 compression, newer 
+                 versions of PKZIP recognize this bit for any 
+                 compression method.)
+
+          Bit 4: Reserved for use with method 8, for enhanced
+                 deflating. 
+
+          Bit 5: If this bit is set, this indicates that the file is 
+                 compressed patched data.  (Note: Requires PKZIP 
+                 version 2.70 or greater)
+
+          Bit 6: Strong encryption.  If this bit is set, you should
+                 set the version needed to extract value to at least
+                 50 and you must also set bit 0.  If AES encryption
+                 is used, the version needed to extract value must 
+                 be at least 51.
+
+          Bit 7: Currently unused.
+
+          Bit 8: Currently unused.
+
+          Bit 9: Currently unused.
+
+          Bit 10: Currently unused.
+
+          Bit 11: Language encoding flag (EFS).  If this bit is set,
+                  the filename and comment fields for this file
+                  must be encoded using UTF-8. (see APPENDIX D)
+
+          Bit 12: Reserved by PKWARE for enhanced compression.
+
+          Bit 13: Used when encrypting the Central Directory to indicate 
+                  selected data values in the Local Header are masked to
+                  hide their actual values.  See the section describing 
+                  the Strong Encryption Specification for details.
+
+          Bit 14: Reserved by PKWARE.
+
+          Bit 15: Reserved by PKWARE.
+
+      compression method: (2 bytes)
+
+          (see accompanying documentation for algorithm
+          descriptions)
+
+          0 - The file is stored (no compression)
+          1 - The file is Shrunk
+          2 - The file is Reduced with compression factor 1
+          3 - The file is Reduced with compression factor 2
+          4 - The file is Reduced with compression factor 3
+          5 - The file is Reduced with compression factor 4
+          6 - The file is Imploded
+          7 - Reserved for Tokenizing compression algorithm
+          8 - The file is Deflated
+          9 - Enhanced Deflating using Deflate64(tm)
+         10 - PKWARE Data Compression Library Imploding (old IBM TERSE)
+         11 - Reserved by PKWARE
+         12 - File is compressed using BZIP2 algorithm
+         13 - Reserved by PKWARE
+         14 - LZMA (EFS)
+         15 - Reserved by PKWARE
+         16 - Reserved by PKWARE
+         17 - Reserved by PKWARE
+         18 - File is compressed using IBM TERSE (new)
+         19 - IBM LZ77 z Architecture (PFS)
+         97 - WavPack compressed data
+         98 - PPMd version I, Rev 1
+
+      date and time fields: (2 bytes each)
+
+          The date and time are encoded in standard MS-DOS format.
+          If input came from standard input, the date and time are
+          those at which compression was started for this data. 
+          If encrypting the central directory and general purpose bit 
+          flag 13 is set indicating masking, the value stored in the 
+          Local Header will be zero. 
+
+      CRC-32: (4 bytes)
+
+          The CRC-32 algorithm was generously contributed by
+          David Schwaderer and can be found in his excellent
+          book "C Programmers Guide to NetBIOS" published by
+          Howard W. Sams & Co. Inc.  The 'magic number' for
+          the CRC is 0xdebb20e3.  The proper CRC pre and post
+          conditioning is used, meaning that the CRC register
+          is pre-conditioned with all ones (a starting value
+          of 0xffffffff) and the value is post-conditioned by
+          taking the one's complement of the CRC residual.
+          If bit 3 of the general purpose flag is set, this
+          field is set to zero in the local header and the correct
+          value is put in the data descriptor and in the central
+          directory. When encrypting the central directory, if the
+          local header is not in ZIP64 format and general purpose 
+          bit flag 13 is set indicating masking, the value stored 
+          in the Local Header will be zero. 
+
+      compressed size: (4 bytes)
+      uncompressed size: (4 bytes)
+
+          The size of the file compressed and uncompressed,
+          respectively.  When a decryption header is present it will
+          be placed in front of the file data and the value of the
+          compressed file size will include the bytes of the decryption
+          header.  If bit 3 of the general purpose bit flag is set, 
+          these fields are set to zero in the local header and the 
+          correct values are put in the data descriptor and
+          in the central directory.  If an archive is in ZIP64 format
+          and the value in this field is 0xFFFFFFFF, the size will be
+          in the corresponding 8 byte ZIP64 extended information 
+          extra field.  When encrypting the central directory, if the
+          local header is not in ZIP64 format and general purpose bit 
+          flag 13 is set indicating masking, the value stored for the 
+          uncompressed size in the Local Header will be zero. 
+
+      file name length: (2 bytes)
+      extra field length: (2 bytes)
+      file comment length: (2 bytes)
+
+          The length of the file name, extra field, and comment
+          fields respectively.  The combined length of any
+          directory record and these three fields should not
+          generally exceed 65,535 bytes.  If input came from standard
+          input, the file name length is set to zero.  
+
+      disk number start: (2 bytes)
+
+          The number of the disk on which this file begins.  If an 
+          archive is in ZIP64 format and the value in this field is 
+          0xFFFF, the size will be in the corresponding 4 byte zip64 
+          extended information extra field.
+
+      internal file attributes: (2 bytes)
+
+          Bits 1 and 2 are reserved for use by PKWARE.
+
+          The lowest bit of this field indicates, if set, that
+          the file is apparently an ASCII or text file.  If not
+          set, that the file apparently contains binary data.
+          The remaining bits are unused in version 1.0.
+
+          The 0x0002 bit of this field indicates, if set, that a 
+          4 byte variable record length control field precedes each 
+          logical record indicating the length of the record. The 
+          record length control field is stored in little-endian byte
+          order.  This flag is independent of text control characters, 
+          and if used in conjunction with text data, includes any 
+          control characters in the total length of the record. This 
+          value is provided for mainframe data transfer support.
+
+      external file attributes: (4 bytes)
+
+          The mapping of the external attributes is
+          host-system dependent (see 'version made by').  For
+          MS-DOS, the low order byte is the MS-DOS directory
+          attribute byte.  If input came from standard input, this
+          field is set to zero.
+
+      relative offset of local header: (4 bytes)
+
+          This is the offset from the start of the first disk on
+          which this file appears, to where the local header should
+          be found.  If an archive is in ZIP64 format and the value
+          in this field is 0xFFFFFFFF, the size will be in the 
+          corresponding 8 byte zip64 extended information extra field.
+
+      file name: (Variable)
+
+          The name of the file, with optional relative path.
+          The path stored should not contain a drive or
+          device letter, or a leading slash.  All slashes
+          should be forward slashes '/' as opposed to
+          backwards slashes '\' for compatibility with Amiga
+          and UNIX file systems etc.  If input came from standard
+          input, there is no file name field.  If encrypting
+          the central directory and general purpose bit flag 13 is set 
+          indicating masking, the file name stored in the Local Header 
+          will not be the actual file name.  A masking value consisting 
+          of a unique hexadecimal value will be stored.  This value will 
+          be sequentially incremented for each file in the archive. See
+          the section on the Strong Encryption Specification for details 
+          on retrieving the encrypted file name. 
+
+      extra field: (Variable)
+
+          This is for expansion.  If additional information
+          needs to be stored for special needs or for specific 
+          platforms, it should be stored here.  Earlier versions 
+          of the software can then safely skip this file, and 
+          find the next file or header.  This field will be 0 
+          length in version 1.0.
+
+          In order to allow different programs and different types
+          of information to be stored in the 'extra' field in .ZIP
+          files, the following structure should be used for all
+          programs storing data in this field:
+
+          header1+data1 + header2+data2 . . .
+
+          Each header should consist of:
+
+            Header ID - 2 bytes
+            Data Size - 2 bytes
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          The Header ID field indicates the type of data that is in
+          the following data block.
+
+          Header ID's of 0 thru 31 are reserved for use by PKWARE.
+          The remaining ID's can be used by third party vendors for
+          proprietary usage.
+
+          The current Header ID mappings defined by PKWARE are:
+
+          0x0001        Zip64 extended information extra field
+          0x0007        AV Info
+          0x0008        Reserved for extended language encoding data (PFS)
+                        (see APPENDIX D)
+          0x0009        OS/2
+          0x000a        NTFS 
+          0x000c        OpenVMS
+          0x000d        UNIX
+          0x000e        Reserved for file stream and fork descriptors
+          0x000f        Patch Descriptor
+          0x0014        PKCS#7 Store for X.509 Certificates
+          0x0015        X.509 Certificate ID and Signature for 
+                        individual file
+          0x0016        X.509 Certificate ID for Central Directory
+          0x0017        Strong Encryption Header
+          0x0018        Record Management Controls
+          0x0019        PKCS#7 Encryption Recipient Certificate List
+          0x0065        IBM S/390 (Z390), AS/400 (I400) attributes 
+                        - uncompressed
+          0x0066        Reserved for IBM S/390 (Z390), AS/400 (I400) 
+                        attributes - compressed
+          0x4690        POSZIP 4690 (reserved) 
+
+          Third party mappings commonly used are:
+
+
+          0x07c8        Macintosh
+          0x2605        ZipIt Macintosh
+          0x2705        ZipIt Macintosh 1.3.5+
+          0x2805        ZipIt Macintosh 1.3.5+
+          0x334d        Info-ZIP Macintosh
+          0x4341        Acorn/SparkFS 
+          0x4453        Windows NT security descriptor (binary ACL)
+          0x4704        VM/CMS
+          0x470f        MVS
+          0x4b46        FWKCS MD5 (see below)
+          0x4c41        OS/2 access control list (text ACL)
+          0x4d49        Info-ZIP OpenVMS
+          0x4f4c        Xceed original location extra field
+          0x5356        AOS/VS (ACL)
+          0x5455        extended timestamp
+          0x554e        Xceed unicode extra field
+          0x5855        Info-ZIP UNIX (original, also OS/2, NT, etc)
+          0x6375        Info-ZIP Unicode Comment Extra Field
+          0x6542        BeOS/BeBox
+          0x7075        Info-ZIP Unicode Path Extra Field
+          0x756e        ASi UNIX
+          0x7855        Info-ZIP UNIX (new)
+          0xa220        Microsoft Open Packaging Growth Hint
+          0xfd4a        SMS/QDOS
+
+          Detailed descriptions of Extra Fields defined by third 
+          party mappings will be documented as information on
+          these data structures is made available to PKWARE.  
+          PKWARE does not guarantee the accuracy of any published
+          third party data.
+
+          The Data Size field indicates the size of the following
+          data block. Programs can use this value to skip to the
+          next header block, passing over any data blocks that are
+          not of interest.
+
+          Note: As stated above, the size of the entire .ZIP file
+                header, including the file name, comment, and extra
+                field should not exceed 64K in size.
+
+          In case two different programs should appropriate the same
+          Header ID value, it is strongly recommended that each
+          program place a unique signature of at least two bytes in
+          size (and preferably 4 bytes or bigger) at the start of
+          each data area.  Every program should verify that its
+          unique signature is present, in addition to the Header ID
+          value being correct, before assuming that it is a block of
+          known type.
+
+         -Zip64 Extended Information Extra Field (0x0001):
+
+          The following is the layout of the zip64 extended 
+          information "extra" block. If one of the size or
+          offset fields in the Local or Central directory
+          record is too small to hold the required data,
+          a Zip64 extended information record is created.
+          The order of the fields in the zip64 extended 
+          information record is fixed, but the fields will
+          only appear if the corresponding Local or Central
+          directory record field is set to 0xFFFF or 0xFFFFFFFF.
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value      Size       Description
+          -----      ----       -----------
+  (ZIP64) 0x0001     2 bytes    Tag for this "extra" block type
+          Size       2 bytes    Size of this "extra" block
+          Original 
+          Size       8 bytes    Original uncompressed file size
+          Compressed
+          Size       8 bytes    Size of compressed data
+          Relative Header
+          Offset     8 bytes    Offset of local header record
+          Disk Start
+          Number     4 bytes    Number of the disk on which
+                                this file starts 
+
+          This entry in the Local header must include BOTH original
+          and compressed file size fields. If encrypting the 
+          central directory and bit 13 of the general purpose bit
+          flag is set indicating masking, the value stored in the
+          Local Header for the original file size will be zero.
+
+
+         -OS/2 Extra Field (0x0009):
+
+          The following is the layout of the OS/2 attributes "extra" 
+          block.  (Last Revision  09/05/95)
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value       Size          Description
+          -----       ----          -----------
+  (OS/2)  0x0009      2 bytes       Tag for this "extra" block type
+          TSize       2 bytes       Size for the following data block
+          BSize       4 bytes       Uncompressed Block Size
+          CType       2 bytes       Compression type
+          EACRC       4 bytes       CRC value for uncompress block
+          (var)       variable      Compressed block
+
+          The OS/2 extended attribute structure (FEA2LIST) is 
+          compressed and then stored in it's entirety within this 
+          structure.  There will only ever be one "block" of data in 
+          VarFields[].
+
+         -NTFS Extra Field (0x000a):
+
+          The following is the layout of the NTFS attributes 
+          "extra" block. (Note: At this time the Mtime, Atime
+          and Ctime values may be used on any WIN32 system.)  
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value      Size       Description
+          -----      ----       -----------
+  (NTFS)  0x000a     2 bytes    Tag for this "extra" block type
+          TSize      2 bytes    Size of the total "extra" block
+          Reserved   4 bytes    Reserved for future use
+          Tag1       2 bytes    NTFS attribute tag value #1
+          Size1      2 bytes    Size of attribute #1, in bytes
+          (var.)     Size1      Attribute #1 data
+          .
+          .
+          .
+          TagN       2 bytes    NTFS attribute tag value #N
+          SizeN      2 bytes    Size of attribute #N, in bytes
+          (var.)     SizeN      Attribute #N data
+
+          For NTFS, values for Tag1 through TagN are as follows:
+          (currently only one set of attributes is defined for NTFS)
+
+          Tag        Size       Description
+          -----      ----       -----------
+          0x0001     2 bytes    Tag for attribute #1 
+          Size1      2 bytes    Size of attribute #1, in bytes
+          Mtime      8 bytes    File last modification time
+          Atime      8 bytes    File last access time
+          Ctime      8 bytes    File creation time
+
+         -OpenVMS Extra Field (0x000c):
+
+          The following is the layout of the OpenVMS attributes 
+          "extra" block.
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value      Size       Description
+          -----      ----       -----------
+  (VMS)   0x000c     2 bytes    Tag for this "extra" block type
+          TSize      2 bytes    Size of the total "extra" block
+          CRC        4 bytes    32-bit CRC for remainder of the block
+          Tag1       2 bytes    OpenVMS attribute tag value #1
+          Size1      2 bytes    Size of attribute #1, in bytes
+          (var.)     Size1      Attribute #1 data
+          .
+          .
+          .
+          TagN       2 bytes    OpenVMS attribute tag value #N
+          SizeN      2 bytes    Size of attribute #N, in bytes
+          (var.)     SizeN      Attribute #N data
+
+          Rules:
+
+          1. There will be one or more of attributes present, which 
+             will each be preceded by the above TagX & SizeX values.  
+             These values are identical to the ATR$C_XXXX and 
+             ATR$S_XXXX constants which are defined in ATR.H under 
+             OpenVMS C.  Neither of these values will ever be zero.
+
+          2. No word alignment or padding is performed.
+
+          3. A well-behaved PKZIP/OpenVMS program should never produce
+             more than one sub-block with the same TagX value.  Also,
+             there will never be more than one "extra" block of type
+             0x000c in a particular directory record.
+
+         -UNIX Extra Field (0x000d):
+
+          The following is the layout of the UNIX "extra" block.
+          Note: all fields are stored in Intel low-byte/high-byte 
+          order.
+
+          Value       Size          Description
+          -----       ----          -----------
+  (UNIX)  0x000d      2 bytes       Tag for this "extra" block type
+          TSize       2 bytes       Size for the following data block
+          Atime       4 bytes       File last access time
+          Mtime       4 bytes       File last modification time
+          Uid         2 bytes       File user ID
+          Gid         2 bytes       File group ID
+          (var)       variable      Variable length data field
+
+          The variable length data field will contain file type 
+          specific data.  Currently the only values allowed are
+          the original "linked to" file names for hard or symbolic 
+          links, and the major and minor device node numbers for
+          character and block device nodes.  Since device nodes
+          cannot be either symbolic or hard links, only one set of
+          variable length data is stored.  Link files will have the
+          name of the original file stored.  This name is NOT NULL
+          terminated.  Its size can be determined by checking TSize -
+          12.  Device entries will have eight bytes stored as two 4
+          byte entries (in little endian format).  The first entry
+          will be the major device number, and the second the minor
+          device number.
+          
+         -PATCH Descriptor Extra Field (0x000f):
+
+          The following is the layout of the Patch Descriptor "extra"
+          block.
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value     Size     Description
+          -----     ----     -----------
+  (Patch) 0x000f    2 bytes  Tag for this "extra" block type
+          TSize     2 bytes  Size of the total "extra" block
+          Version   2 bytes  Version of the descriptor
+          Flags     4 bytes  Actions and reactions (see below) 
+          OldSize   4 bytes  Size of the file about to be patched 
+          OldCRC    4 bytes  32-bit CRC of the file to be patched 
+          NewSize   4 bytes  Size of the resulting file 
+          NewCRC    4 bytes  32-bit CRC of the resulting file 
+
+          Actions and reactions
+
+          Bits          Description
+          ----          ----------------
+          0             Use for auto detection
+          1             Treat as a self-patch
+          2-3           RESERVED
+          4-5           Action (see below)
+          6-7           RESERVED
+          8-9           Reaction (see below) to absent file 
+          10-11         Reaction (see below) to newer file
+          12-13         Reaction (see below) to unknown file
+          14-15         RESERVED
+          16-31         RESERVED
+
+          Actions
+
+          Action       Value
+          ------       ----- 
+          none         0
+          add          1
+          delete       2
+          patch        3
+
+          Reactions
+ 
+          Reaction     Value
+          --------     -----
+          ask          0
+          skip         1
+          ignore       2
+          fail         3
+
+          Patch support is provided by PKPatchMaker(tm) technology and is 
+          covered under U.S. Patents and Patents Pending. The use or 
+          implementation in a product of certain technological aspects set
+          forth in the current APPNOTE, including those with regard to 
+          strong encryption, patching, or extended tape operations requires
+          a license from PKWARE.  Please contact PKWARE with regard to 
+          acquiring a license. 
+
+         -PKCS#7 Store for X.509 Certificates (0x0014):
+
+          This field contains information about each of the certificates 
+          files may be signed with. When the Central Directory Encryption 
+          feature is enabled for a ZIP file, this record will appear in 
+          the Archive Extra Data Record, otherwise it will appear in the 
+          first central directory record and will be ignored in any 
+          other record.
+          
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value     Size     Description
+          -----     ----     -----------
+  (Store) 0x0014    2 bytes  Tag for this "extra" block type
+          TSize     2 bytes  Size of the store data
+          TData     TSize    Data about the store
+
+
+         -X.509 Certificate ID and Signature for individual file (0x0015):
+
+          This field contains the information about which certificate in 
+          the PKCS#7 store was used to sign a particular file. It also 
+          contains the signature data. This field can appear multiple 
+          times, but can only appear once per certificate.
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value     Size     Description
+          -----     ----     -----------
+  (CID)   0x0015    2 bytes  Tag for this "extra" block type
+          TSize     2 bytes  Size of data that follows
+          TData     TSize    Signature Data
+
+         -X.509 Certificate ID and Signature for central directory (0x0016):
+
+          This field contains the information about which certificate in 
+          the PKCS#7 store was used to sign the central directory structure.
+          When the Central Directory Encryption feature is enabled for a 
+          ZIP file, this record will appear in the Archive Extra Data Record, 
+          otherwise it will appear in the first central directory record.
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value     Size     Description
+          -----     ----     -----------
+  (CDID)  0x0016    2 bytes  Tag for this "extra" block type
+          TSize     2 bytes  Size of data that follows
+          TData     TSize    Data
+
+         -Strong Encryption Header (0x0017):
+
+          Value     Size     Description
+          -----     ----     -----------
+          0x0017    2 bytes  Tag for this "extra" block type
+          TSize     2 bytes  Size of data that follows
+          Format    2 bytes  Format definition for this record
+          AlgID     2 bytes  Encryption algorithm identifier
+          Bitlen    2 bytes  Bit length of encryption key
+          Flags     2 bytes  Processing flags
+          CertData  TSize-8  Certificate decryption extra field data
+                             (refer to the explanation for CertData
+                              in the section describing the 
+                              Certificate Processing Method under 
+                              the Strong Encryption Specification)
+
+
+         -Record Management Controls (0x0018):
+
+          Value     Size     Description
+          -----     ----     -----------
+(Rec-CTL) 0x0018    2 bytes  Tag for this "extra" block type
+          CSize     2 bytes  Size of total extra block data
+          Tag1      2 bytes  Record control attribute 1
+          Size1     2 bytes  Size of attribute 1, in bytes
+          Data1     Size1    Attribute 1 data
+            .
+            .
+            .
+          TagN      2 bytes  Record control attribute N
+          SizeN     2 bytes  Size of attribute N, in bytes
+          DataN     SizeN    Attribute N data
+
+
+         -PKCS#7 Encryption Recipient Certificate List (0x0019): 
+
+          This field contains information about each of the certificates
+          used in encryption processing and it can be used to identify who is
+          allowed to decrypt encrypted files.  This field should only appear 
+          in the archive extra data record. This field is not required and 
+          serves only to aide archive modifications by preserving public 
+          encryption key data. Individual security requirements may dictate 
+          that this data be omitted to deter information exposure.
+
+          Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value     Size     Description
+          -----     ----     -----------
+ (CStore) 0x0019    2 bytes  Tag for this "extra" block type
+          TSize     2 bytes  Size of the store data
+          TData     TSize    Data about the store
+
+          TData:
+
+          Value     Size     Description
+          -----     ----     -----------
+          Version   2 bytes  Format version number - must 0x0001 at this time
+          CStore    (var)    PKCS#7 data blob
+
+
+         -MVS Extra Field (0x0065):
+
+          The following is the layout of the MVS "extra" block.
+          Note: Some fields are stored in Big Endian format.
+          All text is in EBCDIC format unless otherwise specified.
+
+          Value       Size          Description
+          -----       ----          -----------
+  (MVS)   0x0065      2 bytes       Tag for this "extra" block type
+          TSize       2 bytes       Size for the following data block
+          ID          4 bytes       EBCDIC "Z390" 0xE9F3F9F0 or
+                                    "T4MV" for TargetFour
+          (var)       TSize-4       Attribute data (see APPENDIX B)
+
+
+         -OS/400 Extra Field (0x0065):
+
+          The following is the layout of the OS/400 "extra" block.
+          Note: Some fields are stored in Big Endian format.
+          All text is in EBCDIC format unless otherwise specified.
+
+          Value       Size          Description
+          -----       ----          -----------
+  (OS400) 0x0065      2 bytes       Tag for this "extra" block type
+          TSize       2 bytes       Size for the following data block
+          ID          4 bytes       EBCDIC "I400" 0xC9F4F0F0 or
+                                    "T4MV" for TargetFour
+          (var)       TSize-4       Attribute data (see APPENDIX A)
+
+
+          Third-party Mappings:
+          
+         -ZipIt Macintosh Extra Field (long) (0x2605):
+
+          The following is the layout of the ZipIt extra block 
+          for Macintosh. The local-header and central-header versions 
+          are identical. This block must be present if the file is 
+          stored MacBinary-encoded and it should not be used if the file 
+          is not stored MacBinary-encoded.
+
+          Value         Size        Description
+          -----         ----        -----------
+  (Mac2)  0x2605        Short       tag for this extra block type
+          TSize         Short       total data size for this block
+          "ZPIT"        beLong      extra-field signature
+          FnLen         Byte        length of FileName
+          FileName      variable    full Macintosh filename
+          FileType      Byte[4]     four-byte Mac file type string
+          Creator       Byte[4]     four-byte Mac creator string
+
+
+         -ZipIt Macintosh Extra Field (short, for files) (0x2705):
+
+          The following is the layout of a shortened variant of the
+          ZipIt extra block for Macintosh (without "full name" entry).
+          This variant is used by ZipIt 1.3.5 and newer for entries of
+          files (not directories) that do not have a MacBinary encoded
+          file. The local-header and central-header versions are identical.
+
+          Value         Size        Description
+          -----         ----        -----------
+  (Mac2b) 0x2705        Short       tag for this extra block type
+          TSize         Short       total data size for this block (12)
+          "ZPIT"        beLong      extra-field signature
+          FileType      Byte[4]     four-byte Mac file type string
+          Creator       Byte[4]     four-byte Mac creator string
+          fdFlags       beShort     attributes from FInfo.frFlags,
+                                    may be omitted
+          0x0000        beShort     reserved, may be omitted
+
+
+         -ZipIt Macintosh Extra Field (short, for directories) (0x2805):
+
+          The following is the layout of a shortened variant of the
+          ZipIt extra block for Macintosh used only for directory
+          entries. This variant is used by ZipIt 1.3.5 and newer to 
+          save some optional Mac-specific information about directories.
+          The local-header and central-header versions are identical.
+
+          Value         Size        Description
+          -----         ----        -----------
+  (Mac2c) 0x2805        Short       tag for this extra block type
+          TSize         Short       total data size for this block (12)
+          "ZPIT"        beLong      extra-field signature
+          frFlags       beShort     attributes from DInfo.frFlags, may
+                                    be omitted
+          View          beShort     ZipIt view flag, may be omitted
+
+
+          The View field specifies ZipIt-internal settings as follows:
+
+          Bits of the Flags:
+              bit 0           if set, the folder is shown expanded (open)
+                              when the archive contents are viewed in ZipIt.
+              bits 1-15       reserved, zero;
+
+
+         -FWKCS MD5 Extra Field (0x4b46):
+
+          The FWKCS Contents_Signature System, used in
+          automatically identifying files independent of file name,
+          optionally adds and uses an extra field to support the
+          rapid creation of an enhanced contents_signature:
+
+              Header ID = 0x4b46
+              Data Size = 0x0013
+              Preface   = 'M','D','5'
+              followed by 16 bytes containing the uncompressed file's
+              128_bit MD5 hash(1), low byte first.
+
+          When FWKCS revises a .ZIP file central directory to add
+          this extra field for a file, it also replaces the
+          central directory entry for that file's uncompressed
+          file length with a measured value.
+
+          FWKCS provides an option to strip this extra field, if
+          present, from a .ZIP file central directory. In adding
+          this extra field, FWKCS preserves .ZIP file Authenticity
+          Verification; if stripping this extra field, FWKCS
+          preserves all versions of AV through PKZIP version 2.04g.
+
+          FWKCS, and FWKCS Contents_Signature System, are
+          trademarks of Frederick W. Kantor.
+
+          (1) R. Rivest, RFC1321.TXT, MIT Laboratory for Computer
+              Science and RSA Data Security, Inc., April 1992.
+              ll.76-77: "The MD5 algorithm is being placed in the
+              public domain for review and possible adoption as a
+              standard."
+
+
+         -Info-ZIP Unicode Comment Extra Field (0x6375):
+
+          Stores the UTF-8 version of the file comment as stored in the
+          central directory header. (Last Revision 20070912)
+
+          Value         Size        Description
+          -----         ----        -----------
+   (UCom) 0x6375        Short       tag for this extra block type ("uc")
+          TSize         Short       total data size for this block
+          Version       1 byte      version of this extra field, currently 1
+          ComCRC32      4 bytes     Comment Field CRC32 Checksum
+          UnicodeCom    Variable    UTF-8 version of the entry comment
+
+          Currently Version is set to the number 1.  If there is a need
+          to change this field, the version will be incremented.  Changes
+          may not be backward compatible so this extra field should not be
+          used if the version is not recognized.
+
+          The ComCRC32 is the standard zip CRC32 checksum of the File Comment
+          field in the central directory header.  This is used to verify that
+          the comment field has not changed since the Unicode Comment extra field
+          was created.  This can happen if a utility changes the File Comment 
+          field but does not update the UTF-8 Comment extra field.  If the CRC 
+          check fails, this Unicode Comment extra field should be ignored and 
+          the File Comment field in the header should be used instead.
+
+          The UnicodeCom field is the UTF-8 version of the File Comment field
+          in the header.  As UnicodeCom is defined to be UTF-8, no UTF-8 byte
+          order mark (BOM) is used.  The length of this field is determined by
+          subtracting the size of the previous fields from TSize.  If both the
+          File Name and Comment fields are UTF-8, the new General Purpose Bit
+          Flag, bit 11 (Language encoding flag (EFS)), can be used to indicate
+          both the header File Name and Comment fields are UTF-8 and, in this
+          case, the Unicode Path and Unicode Comment extra fields are not
+          needed and should not be created.  Note that, for backward
+          compatibility, bit 11 should only be used if the native character set
+          of the paths and comments being zipped up are already in UTF-8. It is
+          expected that the same file comment storage method, either general
+          purpose bit 11 or extra fields, be used in both the Local and Central
+          Directory Header for a file.
+
+
+         -Info-ZIP Unicode Path Extra Field (0x7075):
+
+          Stores the UTF-8 version of the file name field as stored in the
+          local header and central directory header. (Last Revision 20070912)
+
+          Value         Size        Description
+          -----         ----        -----------
+  (UPath) 0x7075        Short       tag for this extra block type ("up")
+          TSize         Short       total data size for this block
+          Version       1 byte      version of this extra field, currently 1
+          NameCRC32     4 bytes     File Name Field CRC32 Checksum
+          UnicodeName   Variable    UTF-8 version of the entry File Name
+
+          Currently Version is set to the number 1.  If there is a need
+          to change this field, the version will be incremented.  Changes
+          may not be backward compatible so this extra field should not be
+          used if the version is not recognized.
+
+          The NameCRC32 is the standard zip CRC32 checksum of the File Name
+          field in the header.  This is used to verify that the header
+          File Name field has not changed since the Unicode Path extra field
+          was created.  This can happen if a utility renames the File Name but
+          does not update the UTF-8 path extra field.  If the CRC check fails,
+          this UTF-8 Path Extra Field should be ignored and the File Name field
+          in the header should be used instead.
+
+          The UnicodeName is the UTF-8 version of the contents of the File Name
+          field in the header.  As UnicodeName is defined to be UTF-8, no UTF-8
+          byte order mark (BOM) is used.  The length of this field is determined
+          by subtracting the size of the previous fields from TSize.  If both
+          the File Name and Comment fields are UTF-8, the new General Purpose
+          Bit Flag, bit 11 (Language encoding flag (EFS)), can be used to
+          indicate that both the header File Name and Comment fields are UTF-8
+          and, in this case, the Unicode Path and Unicode Comment extra fields
+          are not needed and should not be created.  Note that, for backward
+          compatibility, bit 11 should only be used if the native character set
+          of the paths and comments being zipped up are already in UTF-8. It is
+          expected that the same file name storage method, either general
+          purpose bit 11 or extra fields, be used in both the Local and Central
+          Directory Header for a file.
+ 
+
+        -Microsoft Open Packaging Growth Hint (0xa220):
+
+          Value         Size        Description
+          -----         ----        -----------
+          0xa220        Short       tag for this extra block type
+          TSize         Short       size of Sig + PadVal + Padding
+          Sig           Short       verification signature (A028)
+          PadVal        Short       Initial padding value
+          Padding       variable    filled with NULL characters
+
+
+      file comment: (Variable)
+
+          The comment for this file.
+
+      number of this disk: (2 bytes)
+
+          The number of this disk, which contains central
+          directory end record. If an archive is in ZIP64 format
+          and the value in this field is 0xFFFF, the size will 
+          be in the corresponding 4 byte zip64 end of central 
+          directory field.
+
+
+      number of the disk with the start of the central
+      directory: (2 bytes)
+
+          The number of the disk on which the central
+          directory starts. If an archive is in ZIP64 format
+          and the value in this field is 0xFFFF, the size will 
+          be in the corresponding 4 byte zip64 end of central 
+          directory field.
+
+      total number of entries in the central dir on 
+      this disk: (2 bytes)
+
+          The number of central directory entries on this disk.
+          If an archive is in ZIP64 format and the value in 
+          this field is 0xFFFF, the size will be in the 
+          corresponding 8 byte zip64 end of central 
+          directory field.
+
+      total number of entries in the central dir: (2 bytes)
+
+          The total number of files in the .ZIP file. If an 
+          archive is in ZIP64 format and the value in this field
+          is 0xFFFF, the size will be in the corresponding 8 byte 
+          zip64 end of central directory field.
+
+      size of the central directory: (4 bytes)
+
+          The size (in bytes) of the entire central directory.
+          If an archive is in ZIP64 format and the value in 
+          this field is 0xFFFFFFFF, the size will be in the 
+          corresponding 8 byte zip64 end of central 
+          directory field.
+
+      offset of start of central directory with respect to
+      the starting disk number:  (4 bytes)
+
+          Offset of the start of the central directory on the
+          disk on which the central directory starts. If an 
+          archive is in ZIP64 format and the value in this 
+          field is 0xFFFFFFFF, the size will be in the 
+          corresponding 8 byte zip64 end of central 
+          directory field.
+
+      .ZIP file comment length: (2 bytes)
+
+          The length of the comment for this .ZIP file.
+
+      .ZIP file comment: (Variable)
+
+          The comment for this .ZIP file.  ZIP file comment data
+          is stored unsecured.  No encryption or data authentication
+          is applied to this area at this time.  Confidential information
+          should not be stored in this section.
+
+      zip64 extensible data sector    (variable size)
+
+          (currently reserved for use by PKWARE)
+
+
+  K.  Splitting and Spanning ZIP files
+
+          Spanning is the process of segmenting a ZIP file across 
+          multiple removable media. This support has typically only 
+          been provided for DOS formatted floppy diskettes. 
+
+          File splitting is a newer derivative of spanning.  
+          Splitting follows the same segmentation process as
+          spanning, however, it does not require writing each
+          segment to a unique removable medium and instead supports
+          placing all pieces onto local or non-removable locations
+          such as file systems, local drives, folders, etc...
+
+          A key difference between spanned and split ZIP files is
+          that all pieces of a spanned ZIP file have the same name.  
+          Since each piece is written to a separate volume, no name 
+          collisions occur and each segment can reuse the original 
+          .ZIP file name given to the archive.
+
+          Sequence ordering for DOS spanned archives uses the DOS 
+          volume label to determine segment numbers.  Volume labels
+          for each segment are written using the form PKBACK#xxx, 
+          where xxx is the segment number written as a decimal 
+          value from 001 - nnn.
+
+          Split ZIP files are typically written to the same location
+          and are subject to name collisions if the spanned name
+          format is used since each segment will reside on the same 
+          drive. To avoid name collisions, split archives are named 
+          as follows.
+
+          Segment 1   = filename.z01
+          Segment n-1 = filename.z(n-1)
+          Segment n   = filename.zip
+
+          The .ZIP extension is used on the last segment to support
+          quickly reading the central directory.  The segment number
+          n should be a decimal value.
+
+          Spanned ZIP files may be PKSFX Self-extracting ZIP files.
+          PKSFX files may also be split, however, in this case
+          the first segment must be named filename.exe.  The first
+          segment of a split PKSFX archive must be large enough to
+          include the entire executable program.
+
+          Capacities for split archives are as follows.
+
+          Maximum number of segments = 4,294,967,295 - 1
+          Maximum .ZIP segment size = 4,294,967,295 bytes
+          Minimum segment size = 64K
+          Maximum PKSFX segment size = 2,147,483,647 bytes
+          
+          Segment sizes may be different however by convention, all 
+          segment sizes should be the same with the exception of the 
+          last, which may be smaller.  Local and central directory 
+          header records must never be split across a segment boundary. 
+          When writing a header record, if the number of bytes remaining 
+          within a segment is less than the size of the header record,
+          end the current segment and write the header at the start
+          of the next segment.  The central directory may span segment
+          boundaries, but no single record in the central directory
+          should be split across segments.
+
+          Spanned/Split archives created using PKZIP for Windows
+          (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
+          or PKZIP Explorer will include a special spanning 
+          signature as the first 4 bytes of the first segment of
+          the archive.  This signature (0x08074b50) will be 
+          followed immediately by the local header signature for
+          the first file in the archive.  
+
+          A special spanning marker may also appear in spanned/split 
+          archives if the spanning or splitting process starts but 
+          only requires one segment.  In this case the 0x08074b50 
+          signature will be replaced with the temporary spanning 
+          marker signature of 0x30304b50.  Split archives can
+          only be uncompressed by other versions of PKZIP that
+          know how to create a split archive.
+
+          The signature value 0x08074b50 is also used by some
+          ZIP implementations as a marker for the Data Descriptor 
+          record.  Conflict in this alternate assignment can be
+          avoided by ensuring the position of the signature
+          within the ZIP file to determine the use for which it
+          is intended.  
+
+  L.  General notes:
+
+      1)  All fields unless otherwise noted are unsigned and stored
+          in Intel low-byte:high-byte, low-word:high-word order.
+
+      2)  String fields are not null terminated, since the
+          length is given explicitly.
+
+      3)  The entries in the central directory may not necessarily
+          be in the same order that files appear in the .ZIP file.
+
+      4)  If one of the fields in the end of central directory
+          record is too small to hold required data, the field
+          should be set to -1 (0xFFFF or 0xFFFFFFFF) and the
+          ZIP64 format record should be created.
+
+      5)  The end of central directory record and the
+          Zip64 end of central directory locator record must
+          reside on the same disk when splitting or spanning
+          an archive.
+
+VI. Explanation of compression methods
+--------------------------------------
+
+UnShrinking - Method 1
+----------------------
+
+Shrinking is a Dynamic Ziv-Lempel-Welch compression algorithm
+with partial clearing.  The initial code size is 9 bits, and
+the maximum code size is 13 bits.  Shrinking differs from
+conventional Dynamic Ziv-Lempel-Welch implementations in several
+respects:
+
+1)  The code size is controlled by the compressor, and is not
+    automatically increased when codes larger than the current
+    code size are created (but not necessarily used).  When
+    the decompressor encounters the code sequence 256
+    (decimal) followed by 1, it should increase the code size
+    read from the input stream to the next bit size.  No
+    blocking of the codes is performed, so the next code at
+    the increased size should be read from the input stream
+    immediately after where the previous code at the smaller
+    bit size was read.  Again, the decompressor should not
+    increase the code size used until the sequence 256,1 is
+    encountered.
+
+2)  When the table becomes full, total clearing is not
+    performed.  Rather, when the compressor emits the code
+    sequence 256,2 (decimal), the decompressor should clear
+    all leaf nodes from the Ziv-Lempel tree, and continue to
+    use the current code size.  The nodes that are cleared
+    from the Ziv-Lempel tree are then re-used, with the lowest
+    code value re-used first, and the highest code value
+    re-used last.  The compressor can emit the sequence 256,2
+    at any time.
+
+Expanding - Methods 2-5
+-----------------------
+
+The Reducing algorithm is actually a combination of two
+distinct algorithms.  The first algorithm compresses repeated
+byte sequences, and the second algorithm takes the compressed
+stream from the first algorithm and applies a probabilistic
+compression method.
+
+The probabilistic compression stores an array of 'follower
+sets' S(j), for j=0 to 255, corresponding to each possible
+ASCII character.  Each set contains between 0 and 32
+characters, to be denoted as S(j)[0],...,S(j)[m], where m<32.
+The sets are stored at the beginning of the data area for a
+Reduced file, in reverse order, with S(255) first, and S(0)
+last.
+
+The sets are encoded as { N(j), S(j)[0],...,S(j)[N(j)-1] },
+where N(j) is the size of set S(j).  N(j) can be 0, in which
+case the follower set for S(j) is empty.  Each N(j) value is
+encoded in 6 bits, followed by N(j) eight bit character values
+corresponding to S(j)[0] to S(j)[N(j)-1] respectively.  If
+N(j) is 0, then no values for S(j) are stored, and the value
+for N(j-1) immediately follows.
+
+Immediately after the follower sets, is the compressed data
+stream.  The compressed data stream can be interpreted for the
+probabilistic decompression as follows:
+
+let Last-Character <- 0.
+loop until done
+    if the follower set S(Last-Character) is empty then
+        read 8 bits from the input stream, and copy this
+        value to the output stream.
+    otherwise if the follower set S(Last-Character) is non-empty then
+        read 1 bit from the input stream.
+        if this bit is not zero then
+            read 8 bits from the input stream, and copy this
+            value to the output stream.
+        otherwise if this bit is zero then
+            read B(N(Last-Character)) bits from the input
+            stream, and assign this value to I.
+            Copy the value of S(Last-Character)[I] to the
+            output stream.
+
+    assign the last value placed on the output stream to
+    Last-Character.
+end loop
+
+B(N(j)) is defined as the minimal number of bits required to
+encode the value N(j)-1.
+
+The decompressed stream from above can then be expanded to
+re-create the original file as follows:
+
+let State <- 0.
+
+loop until done
+    read 8 bits from the input stream into C.
+    case State of
+        0:  if C is not equal to DLE (144 decimal) then
+                copy C to the output stream.
+            otherwise if C is equal to DLE then
+                let State <- 1.
+
+        1:  if C is non-zero then
+                let V <- C.
+                let Len <- L(V)
+                let State <- F(Len).
+            otherwise if C is zero then
+                copy the value 144 (decimal) to the output stream.
+                let State <- 0
+
+        2:  let Len <- Len + C
+            let State <- 3.
+
+        3:  move backwards D(V,C) bytes in the output stream
+            (if this position is before the start of the output
+            stream, then assume that all the data before the
+            start of the output stream is filled with zeros).
+            copy Len+3 bytes from this position to the output stream.
+            let State <- 0.
+    end case
+end loop
+
+The functions F,L, and D are dependent on the 'compression
+factor', 1 through 4, and are defined as follows:
+
+For compression factor 1:
+    L(X) equals the lower 7 bits of X.
+    F(X) equals 2 if X equals 127 otherwise F(X) equals 3.
+    D(X,Y) equals the (upper 1 bit of X) * 256 + Y + 1.
+For compression factor 2:
+    L(X) equals the lower 6 bits of X.
+    F(X) equals 2 if X equals 63 otherwise F(X) equals 3.
+    D(X,Y) equals the (upper 2 bits of X) * 256 + Y + 1.
+For compression factor 3:
+    L(X) equals the lower 5 bits of X.
+    F(X) equals 2 if X equals 31 otherwise F(X) equals 3.
+    D(X,Y) equals the (upper 3 bits of X) * 256 + Y + 1.
+For compression factor 4:
+    L(X) equals the lower 4 bits of X.
+    F(X) equals 2 if X equals 15 otherwise F(X) equals 3.
+    D(X,Y) equals the (upper 4 bits of X) * 256 + Y + 1.
+
+Imploding - Method 6
+--------------------
+
+The Imploding algorithm is actually a combination of two distinct
+algorithms.  The first algorithm compresses repeated byte
+sequences using a sliding dictionary.  The second algorithm is
+used to compress the encoding of the sliding dictionary output,
+using multiple Shannon-Fano trees.
+
+The Imploding algorithm can use a 4K or 8K sliding dictionary
+size. The dictionary size used can be determined by bit 1 in the
+general purpose flag word; a 0 bit indicates a 4K dictionary
+while a 1 bit indicates an 8K dictionary.
+
+The Shannon-Fano trees are stored at the start of the compressed
+file. The number of trees stored is defined by bit 2 in the
+general purpose flag word; a 0 bit indicates two trees stored, a
+1 bit indicates three trees are stored.  If 3 trees are stored,
+the first Shannon-Fano tree represents the encoding of the
+Literal characters, the second tree represents the encoding of
+the Length information, the third represents the encoding of the
+Distance information.  When 2 Shannon-Fano trees are stored, the
+Length tree is stored first, followed by the Distance tree.
+
+The Literal Shannon-Fano tree, if present is used to represent
+the entire ASCII character set, and contains 256 values.  This
+tree is used to compress any data not compressed by the sliding
+dictionary algorithm.  When this tree is present, the Minimum
+Match Length for the sliding dictionary is 3.  If this tree is
+not present, the Minimum Match Length is 2.
+
+The Length Shannon-Fano tree is used to compress the Length part
+of the (length,distance) pairs from the sliding dictionary
+output.  The Length tree contains 64 values, ranging from the
+Minimum Match Length, to 63 plus the Minimum Match Length.
+
+The Distance Shannon-Fano tree is used to compress the Distance
+part of the (length,distance) pairs from the sliding dictionary
+output. The Distance tree contains 64 values, ranging from 0 to
+63, representing the upper 6 bits of the distance value.  The
+distance values themselves will be between 0 and the sliding
+dictionary size, either 4K or 8K.
+
+The Shannon-Fano trees themselves are stored in a compressed
+format. The first byte of the tree data represents the number of
+bytes of data representing the (compressed) Shannon-Fano tree
+minus 1.  The remaining bytes represent the Shannon-Fano tree
+data encoded as:
+
+    High 4 bits: Number of values at this bit length + 1. (1 - 16)
+    Low  4 bits: Bit Length needed to represent value + 1. (1 - 16)
+
+The Shannon-Fano codes can be constructed from the bit lengths
+using the following algorithm:
+
+1)  Sort the Bit Lengths in ascending order, while retaining the
+    order of the original lengths stored in the file.
+
+2)  Generate the Shannon-Fano trees:
+
+    Code <- 0
+    CodeIncrement <- 0
+    LastBitLength <- 0
+    i <- number of Shannon-Fano codes - 1   (either 255 or 63)
+
+    loop while i >= 0
+        Code = Code + CodeIncrement
+        if BitLength(i) <> LastBitLength then
+            LastBitLength=BitLength(i)
+            CodeIncrement = 1 shifted left (16 - LastBitLength)
+        ShannonCode(i) = Code
+        i <- i - 1
+    end loop
+
+3)  Reverse the order of all the bits in the above ShannonCode()
+    vector, so that the most significant bit becomes the least
+    significant bit.  For example, the value 0x1234 (hex) would
+    become 0x2C48 (hex).
+
+4)  Restore the order of Shannon-Fano codes as originally stored
+    within the file.
+
+Example:
+
+    This example will show the encoding of a Shannon-Fano tree
+    of size 8.  Notice that the actual Shannon-Fano trees used
+    for Imploding are either 64 or 256 entries in size.
+
+Example:   0x02, 0x42, 0x01, 0x13
+
+    The first byte indicates 3 values in this table.  Decoding the
+    bytes:
+            0x42 = 5 codes of 3 bits long
+            0x01 = 1 code  of 2 bits long
+            0x13 = 2 codes of 4 bits long
+
+    This would generate the original bit length array of:
+    (3, 3, 3, 3, 3, 2, 4, 4)
+
+    There are 8 codes in this table for the values 0 thru 7.  Using 
+    the algorithm to obtain the Shannon-Fano codes produces:
+
+                                  Reversed     Order     Original
+Val  Sorted   Constructed Code      Value     Restored    Length
+---  ------   -----------------   --------    --------    ------
+0:     2      1100000000000000        11       101          3
+1:     3      1010000000000000       101       001          3
+2:     3      1000000000000000       001       110          3
+3:     3      0110000000000000       110       010          3
+4:     3      0100000000000000       010       100          3
+5:     3      0010000000000000       100        11          2
+6:     4      0001000000000000      1000      1000          4
+7:     4      0000000000000000      0000      0000          4
+
+The values in the Val, Order Restored and Original Length columns
+now represent the Shannon-Fano encoding tree that can be used for
+decoding the Shannon-Fano encoded data.  How to parse the
+variable length Shannon-Fano values from the data stream is beyond
+the scope of this document.  (See the references listed at the end of
+this document for more information.)  However, traditional decoding
+schemes used for Huffman variable length decoding, such as the
+Greenlaw algorithm, can be successfully applied.
+
+The compressed data stream begins immediately after the
+compressed Shannon-Fano data.  The compressed data stream can be
+interpreted as follows:
+
+loop until done
+    read 1 bit from input stream.
+
+    if this bit is non-zero then       (encoded data is literal data)
+        if Literal Shannon-Fano tree is present
+            read and decode character using Literal Shannon-Fano tree.
+        otherwise
+            read 8 bits from input stream.
+        copy character to the output stream.
+    otherwise              (encoded data is sliding dictionary match)
+        if 8K dictionary size
+            read 7 bits for offset Distance (lower 7 bits of offset).
+        otherwise
+            read 6 bits for offset Distance (lower 6 bits of offset).
+
+        using the Distance Shannon-Fano tree, read and decode the
+          upper 6 bits of the Distance value.
+
+        using the Length Shannon-Fano tree, read and decode
+          the Length value.
+
+        Length <- Length + Minimum Match Length
+
+        if Length = 63 + Minimum Match Length
+            read 8 bits from the input stream,
+            add this value to Length.
+
+        move backwards Distance+1 bytes in the output stream, and
+        copy Length characters from this position to the output
+        stream.  (if this position is before the start of the output
+        stream, then assume that all the data before the start of
+        the output stream is filled with zeros).
+end loop
+
+Tokenizing - Method 7
+---------------------
+
+This method is not used by PKZIP.
+
+Deflating - Method 8
+--------------------
+
+The Deflate algorithm is similar to the Implode algorithm using
+a sliding dictionary of up to 32K with secondary compression
+from Huffman/Shannon-Fano codes.
+
+The compressed data is stored in blocks with a header describing
+the block and the Huffman codes used in the data block.  The header
+format is as follows:
+
+   Bit 0: Last Block bit     This bit is set to 1 if this is the last
+                             compressed block in the data.
+   Bits 1-2: Block type
+      00 (0) - Block is stored - All stored data is byte aligned.
+               Skip bits until next byte, then next word = block 
+               length, followed by the ones compliment of the block
+               length word. Remaining data in block is the stored 
+               data.
+
+      01 (1) - Use fixed Huffman codes for literal and distance codes.
+               Lit Code    Bits             Dist Code   Bits
+               ---------   ----             ---------   ----
+                 0 - 143    8                 0 - 31      5
+               144 - 255    9
+               256 - 279    7
+               280 - 287    8
+
+               Literal codes 286-287 and distance codes 30-31 are 
+               never used but participate in the huffman construction.
+
+      10 (2) - Dynamic Huffman codes.  (See expanding Huffman codes)
+
+      11 (3) - Reserved - Flag a "Error in compressed data" if seen.
+
+Expanding Huffman Codes
+-----------------------
+If the data block is stored with dynamic Huffman codes, the Huffman
+codes are sent in the following compressed format:
+
+   5 Bits: # of Literal codes sent - 256 (256 - 286)
+           All other codes are never sent.
+   5 Bits: # of Dist codes - 1           (1 - 32)
+   4 Bits: # of Bit Length codes - 3     (3 - 19)
+
+The Huffman codes are sent as bit lengths and the codes are built as
+described in the implode algorithm.  The bit lengths themselves are
+compressed with Huffman codes.  There are 19 bit length codes:
+
+   0 - 15: Represent bit lengths of 0 - 15
+       16: Copy the previous bit length 3 - 6 times.
+           The next 2 bits indicate repeat length (0 = 3, ... ,3 = 6)
+              Example:  Codes 8, 16 (+2 bits 11), 16 (+2 bits 10) will
+                        expand to 12 bit lengths of 8 (1 + 6 + 5)
+       17: Repeat a bit length of 0 for 3 - 10 times. (3 bits of length)
+       18: Repeat a bit length of 0 for 11 - 138 times (7 bits of length)
+
+The lengths of the bit length codes are sent packed 3 bits per value
+(0 - 7) in the following order:
+
+   16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+
+The Huffman codes should be built as described in the Implode algorithm
+except codes are assigned starting at the shortest bit length, i.e. the
+shortest code should be all 0's rather than all 1's.  Also, codes with
+a bit length of zero do not participate in the tree construction.  The
+codes are then used to decode the bit lengths for the literal and 
+distance tables.
+
+The bit lengths for the literal tables are sent first with the number
+of entries sent described by the 5 bits sent earlier.  There are up
+to 286 literal characters; the first 256 represent the respective 8
+bit character, code 256 represents the End-Of-Block code, the remaining
+29 codes represent copy lengths of 3 thru 258.  There are up to 30
+distance codes representing distances from 1 thru 32k as described
+below.
+
+                             Length Codes
+                             ------------
+      Extra             Extra              Extra              Extra
+ Code Bits Length  Code Bits Lengths  Code Bits Lengths  Code Bits Length(s)
+ ---- ---- ------  ---- ---- -------  ---- ---- -------  ---- ---- ---------
+  257   0     3     265   1   11,12    273   3   35-42    281   5  131-162
+  258   0     4     266   1   13,14    274   3   43-50    282   5  163-194
+  259   0     5     267   1   15,16    275   3   51-58    283   5  195-226
+  260   0     6     268   1   17,18    276   3   59-66    284   5  227-257
+  261   0     7     269   2   19-22    277   4   67-82    285   0    258
+  262   0     8     270   2   23-26    278   4   83-98
+  263   0     9     271   2   27-30    279   4   99-114
+  264   0    10     272   2   31-34    280   4  115-130
+
+                            Distance Codes
+                            --------------
+      Extra           Extra             Extra               Extra
+ Code Bits Dist  Code Bits  Dist   Code Bits Distance  Code Bits Distance
+ ---- ---- ----  ---- ---- ------  ---- ---- --------  ---- ---- --------
+   0   0    1      8   3   17-24    16    7  257-384    24   11  4097-6144
+   1   0    2      9   3   25-32    17    7  385-512    25   11  6145-8192
+   2   0    3     10   4   33-48    18    8  513-768    26   12  8193-12288
+   3   0    4     11   4   49-64    19    8  769-1024   27   12 12289-16384
+   4   1   5,6    12   5   65-96    20    9 1025-1536   28   13 16385-24576
+   5   1   7,8    13   5   97-128   21    9 1537-2048   29   13 24577-32768
+   6   2   9-12   14   6  129-192   22   10 2049-3072
+   7   2  13-16   15   6  193-256   23   10 3073-4096
+
+The compressed data stream begins immediately after the
+compressed header data.  The compressed data stream can be
+interpreted as follows:
+
+do
+   read header from input stream.
+
+   if stored block
+      skip bits until byte aligned
+      read count and 1's compliment of count
+      copy count bytes data block
+   otherwise
+      loop until end of block code sent
+         decode literal character from input stream
+         if literal < 256
+            copy character to the output stream
+         otherwise
+            if literal = end of block
+               break from loop
+            otherwise
+               decode distance from input stream
+
+               move backwards distance bytes in the output stream, and
+               copy length characters from this position to the output
+               stream.
+      end loop
+while not last block
+
+if data descriptor exists
+   skip bits until byte aligned
+   read crc and sizes
+endif
+
+Enhanced Deflating - Method 9
+-----------------------------
+
+The Enhanced Deflating algorithm is similar to Deflate but
+uses a sliding dictionary of up to 64K. Deflate64(tm) is supported
+by the Deflate extractor. 
+
+BZIP2 - Method 12
+-----------------
+
+BZIP2 is an open-source data compression algorithm developed by 
+Julian Seward.  Information and source code for this algorithm
+can be found on the internet.
+
+LZMA - Method 14 (EFS)
+----------------------
+
+LZMA is a block-oriented, general purpose data compression algorithm  
+developed and maintained by Igor Pavlov.  It is a derivative of LZ77
+that utilizes Markov chains and a range coder.  Information and 
+source code for this algorithm can be found on the internet.  Consult 
+with the author of this algorithm for information on terms or 
+restrictions on use.
+
+Support for LZMA within the ZIP format is defined as follows:   
+
+The Compression method field within the ZIP Local and Central 
+Header records will be set to the value 14 to indicate data was
+compressed using LZMA. 
+
+The Version needed to extract field within the ZIP Local and 
+Central Header records will be set to 6.3 to indicate the 
+minimum ZIP format version supporting this feature.
+
+File data compressed using the LZMA algorithm must be placed 
+immediately following the Local Header for the file.  If a 
+standard ZIP encryption header is required, it will follow 
+the Local Header and will precede the LZMA compressed file 
+data segment.  The location of LZMA compressed data segment 
+within the ZIP format will be as shown:
+
+    [local header file 1]
+    [encryption header file 1]
+    [LZMA compressed data segment for file 1]
+    [data descriptor 1]
+    [local header file 2]
+
+The encryption header and data descriptor records may
+be conditionally present.  The LZMA Compressed Data Segment 
+will consist of an LZMA Properties Header followed by the 
+LZMA Compressed Data as shown:
+
+    [LZMA properties header for file 1]
+    [LZMA compressed data for file 1]
+
+The LZMA Compressed Data will be stored as provided by the 
+LZMA compression library.  Compressed size, uncompressed 
+size and other file characteristics about the file being 
+compressed must be stored in standard ZIP storage format.
+
+The LZMA Properties Header will store specific data required to 
+decompress the LZMA compressed Data.  This data is set by the 
+LZMA compression engine using the function WriteCoderProperties() 
+as documented within the LZMA SDK. 
+ 
+Storage fields for the property information within the LZMA 
+Properties Header are as follows:
+
+     LZMA Version Information 2 bytes
+     LZMA Properties Size 2 bytes
+     LZMA Properties Data variable, defined by "LZMA Properties Size"
+
+LZMA Version Information - this field identifies which version of 
+     the LZMA SDK was used to compress a file.  The first byte will 
+     store the major version number of the LZMA SDK and the second 
+     byte will store the minor number.  
+
+LZMA Properties Size - this field defines the size of the remaining 
+     property data.  Typically this size should be determined by the 
+     version of the SDK.  This size field is included as a convenience
+     and to help avoid any ambiguity should it arise in the future due
+     to changes in this compression algorithm. 
+
+LZMA Property Data - this variable sized field records the required 
+     values for the decompressor as defined by the LZMA SDK.  The 
+     data stored in this field should be obtained using the 
+     WriteCoderProperties() in the version of the SDK defined by 
+     the "LZMA Version Information" field.  
+
+The layout of the "LZMA Properties Data" field is a function of the
+LZMA compression algorithm.  It is possible that this layout may be
+changed by the author over time.  The data layout in version 4.32 
+of the LZMA SDK defines a 5 byte array that uses 4 bytes to store 
+the dictionary size in little-endian order. This is preceded by a 
+single packed byte as the first element of the array that contains
+the following fields:
+
+     PosStateBits
+     LiteralPosStateBits
+     LiteralContextBits
+
+Refer to the LZMA documentation for a more detailed explanation of 
+these fields.  
+
+Data compressed with method 14, LZMA, may include an end-of-stream
+(EOS) marker ending the compressed data stream.  This marker is not
+required, but its use is highly recommended to facilitate processing
+and implementers should include the EOS marker whenever possible.
+When the EOS marker is used, general purpose bit 1 must be set.  If
+general purpose bit 1 is not set, the EOS marker is not present.
+
+WavPack - Method 97
+-------------------
+
+Information describing the use of compression method 97 is 
+provided by WinZIP International, LLC.  This method relies on the
+open source WavPack audio compression utility developed by David Bryant.  
+Information on WavPack is available at www.wavpack.com.  Please consult 
+with the author of this algorithm for information on terms and 
+restrictions on use.
+
+WavPack data for a file begins immediately after the end of the
+local header data.  This data is the output from WavPack compression
+routines.  Within the ZIP file, the use of WavPack compression is
+indicated by setting the compression method field to a value of 97 
+in both the local header and the central directory header.  The Version 
+needed to extract and version made by fields use the same values as are 
+used for data compressed using the Deflate algorithm.
+
+An implementation note for storing digital sample data when using 
+WavPack compression within ZIP files is that all of the bytes of
+the sample data should be compressed.  This includes any unused
+bits up to the byte boundary.  An example is a 2 byte sample that
+uses only 12 bits for the sample data with 4 unused bits.  If only
+12 bits are passed as the sample size to the WavPack routines, the 4 
+unused bits will be set to 0 on extraction regardless of their original 
+state.  To avoid this, the full 16 bits of the sample data size
+should be provided. 
+
+PPMd - Method 98
+----------------
+
+PPMd is a data compression algorithm developed by Dmitry Shkarin
+which includes a carryless rangecoder developed by Dmitry Subbotin.
+This algorithm is based on predictive phrase matching on multiple
+order contexts.  Information and source code for this algorithm
+can be found on the internet. Consult with the author of this
+algorithm for information on terms or restrictions on use.
+
+Support for PPMd within the ZIP format currently is provided only 
+for version I, revision 1 of the algorithm.  Storage requirements
+for using this algorithm are as follows:
+
+Parameters needed to control the algorithm are stored in the two
+bytes immediately preceding the compressed data.  These bytes are
+used to store the following fields:
+
+Model order - sets the maximum model order, default is 8, possible
+              values are from 2 to 16 inclusive
+
+Sub-allocator size - sets the size of sub-allocator in MB, default is 50,
+            possible values are from 1MB to 256MB inclusive
+
+Model restoration method - sets the method used to restart context
+            model at memory insufficiency, values are:
+
+            0 - restarts model from scratch - default
+            1 - cut off model - decreases performance by as much as 2x
+            2 - freeze context tree - not recommended
+
+An example for packing these fields into the 2 byte storage field is
+illustrated below.  These values are stored in Intel low-byte/high-byte
+order.
+
+wPPMd = (Model order - 1) + 
+        ((Sub-allocator size - 1) << 4) + 
+        (Model restoration method << 12)
+
+
+VII. Traditional PKWARE Encryption
+----------------------------------
+
+The following information discusses the decryption steps
+required to support traditional PKWARE encryption.  This
+form of encryption is considered weak by today's standards
+and its use is recommended only for situations with
+low security needs or for compatibility with older .ZIP 
+applications.
+
+Decryption
+----------
+
+PKWARE is grateful to Mr. Roger Schlafly for his expert contribution 
+towards the development of PKWARE's traditional encryption.
+
+PKZIP encrypts the compressed data stream.  Encrypted files must
+be decrypted before they can be extracted.
+
+Each encrypted file has an extra 12 bytes stored at the start of
+the data area defining the encryption header for that file.  The
+encryption header is originally set to random values, and then
+itself encrypted, using three, 32-bit keys.  The key values are
+initialized using the supplied encryption password.  After each byte
+is encrypted, the keys are then updated using pseudo-random number
+generation techniques in combination with the same CRC-32 algorithm
+used in PKZIP and described elsewhere in this document.
+
+The following is the basic steps required to decrypt a file:
+
+1) Initialize the three 32-bit keys with the password.
+2) Read and decrypt the 12-byte encryption header, further
+   initializing the encryption keys.
+3) Read and decrypt the compressed data stream using the
+   encryption keys.
+
+Step 1 - Initializing the encryption keys
+-----------------------------------------
+
+Key(0) <- 305419896
+Key(1) <- 591751049
+Key(2) <- 878082192
+
+loop for i <- 0 to length(password)-1
+    update_keys(password(i))
+end loop
+
+Where update_keys() is defined as:
+
+update_keys(char):
+  Key(0) <- crc32(key(0),char)
+  Key(1) <- Key(1) + (Key(0) & 000000ffH)
+  Key(1) <- Key(1) * 134775813 + 1
+  Key(2) <- crc32(key(2),key(1) >> 24)
+end update_keys
+
+Where crc32(old_crc,char) is a routine that given a CRC value and a
+character, returns an updated CRC value after applying the CRC-32
+algorithm described elsewhere in this document.
+
+Step 2 - Decrypting the encryption header
+-----------------------------------------
+
+The purpose of this step is to further initialize the encryption
+keys, based on random data, to render a plaintext attack on the
+data ineffective.
+
+Read the 12-byte encryption header into Buffer, in locations
+Buffer(0) thru Buffer(11).
+
+loop for i <- 0 to 11
+    C <- buffer(i) ^ decrypt_byte()
+    update_keys(C)
+    buffer(i) <- C
+end loop
+
+Where decrypt_byte() is defined as:
+
+unsigned char decrypt_byte()
+    local unsigned short temp
+    temp <- Key(2) | 2
+    decrypt_byte <- (temp * (temp ^ 1)) >> 8
+end decrypt_byte
+
+After the header is decrypted,  the last 1 or 2 bytes in Buffer
+should be the high-order word/byte of the CRC for the file being
+decrypted, stored in Intel low-byte/high-byte order.  Versions of
+PKZIP prior to 2.0 used a 2 byte CRC check; a 1 byte CRC check is
+used on versions after 2.0.  This can be used to test if the password
+supplied is correct or not.
+
+Step 3 - Decrypting the compressed data stream
+----------------------------------------------
+
+The compressed data stream can be decrypted as follows:
+
+loop until done
+    read a character into C
+    Temp <- C ^ decrypt_byte()
+    update_keys(temp)
+    output Temp
+end loop
+
+
+VIII. Strong Encryption Specification
+-------------------------------------
+
+The Strong Encryption technology defined in this specification is 
+covered under a pending patent application. The use or implementation
+in a product of certain technological aspects set forth in the current
+APPNOTE, including those with regard to strong encryption, patching, 
+or extended tape operations requires a license from PKWARE. Portions
+of this Strong Encryption technology are available for use at no charge.
+Contact PKWARE for licensing terms and conditions. Refer to section II
+of this APPNOTE (Contacting PKWARE) for information on how to 
+contact PKWARE. 
+
+Version 5.x of this specification introduced support for strong 
+encryption algorithms.  These algorithms can be used with either 
+a password or an X.509v3 digital certificate to encrypt each file. 
+This format specification supports either password or certificate 
+based encryption to meet the security needs of today, to enable 
+interoperability between users within both PKI and non-PKI 
+environments, and to ensure interoperability between different 
+computing platforms that are running a ZIP program.  
+
+Password based encryption is the most common form of encryption 
+people are familiar with.  However, inherent weaknesses with 
+passwords (e.g. susceptibility to dictionary/brute force attack) 
+as well as password management and support issues make certificate 
+based encryption a more secure and scalable option.  Industry 
+efforts and support are defining and moving towards more advanced 
+security solutions built around X.509v3 digital certificates and 
+Public Key Infrastructures(PKI) because of the greater scalability, 
+administrative options, and more robust security over traditional 
+password based encryption. 
+
+Most standard encryption algorithms are supported with this
+specification. Reference implementations for many of these 
+algorithms are available from either commercial or open source 
+distributors.  Readily available cryptographic toolkits make
+implementation of the encryption features straight-forward.  
+This document is not intended to provide a treatise on data 
+encryption principles or theory.  Its purpose is to document the 
+data structures required for implementing interoperable data 
+encryption within the .ZIP format.  It is strongly recommended that 
+you have a good understanding of data encryption before reading 
+further.
+
+The algorithms introduced in Version 5.0 of this specification 
+include:
+
+    RC2 40 bit, 64 bit, and 128 bit
+    RC4 40 bit, 64 bit, and 128 bit
+    DES
+    3DES 112 bit and 168 bit
+  
+Version 5.1 adds support for the following:
+
+    AES 128 bit, 192 bit, and 256 bit
+
+
+Version 6.1 introduces encryption data changes to support 
+interoperability with Smartcard and USB Token certificate storage 
+methods which do not support the OAEP strengthening standard.
+
+Version 6.2 introduces support for encrypting metadata by compressing 
+and encrypting the central directory data structure to reduce information 
+leakage.   Information leakage can occur in legacy ZIP applications 
+through exposure of information about a file even though that file is 
+stored encrypted.  The information exposed consists of file 
+characteristics stored within the records and fields defined by this 
+specification.  This includes data such as a files name, its original 
+size, timestamp and CRC32 value. 
+
+Version 6.3 introduces support for encrypting data using the Blowfish
+and Twofish algorithms.  These are symmetric block ciphers developed 
+by Bruce Schneier.  Blowfish supports using a variable length key from 
+32 to 448 bits.  Block size is 64 bits.  Implementations should use 16
+rounds and the only mode supported within ZIP files is CBC. Twofish 
+supports key sizes 128, 192 and 256 bits.  Block size is 128 bits.  
+Implementations should use 16 rounds and the only mode supported within
+ZIP files is CBC.  Information and source code for both Blowfish and 
+Twofish algorithms can be found on the internet.  Consult with the author
+of these algorithms for information on terms or restrictions on use.
+
+Central Directory Encryption provides greater protection against 
+information leakage by encrypting the Central Directory structure and 
+by masking key values that are replicated in the unencrypted Local 
+Header.   ZIP compatible programs that cannot interpret an encrypted 
+Central Directory structure cannot rely on the data in the corresponding 
+Local Header for decompression information.  
+
+Extra Field records that may contain information about a file that should 
+not be exposed should not be stored in the Local Header and should only 
+be written to the Central Directory where they can be encrypted.  This 
+design currently does not support streaming.  Information in the End of 
+Central Directory record, the Zip64 End of Central Directory Locator, 
+and the Zip64 End of Central Directory records are not encrypted.  Access 
+to view data on files within a ZIP file with an encrypted Central Directory
+requires the appropriate password or private key for decryption prior to 
+viewing any files, or any information about the files, in the archive.  
+
+Older ZIP compatible programs not familiar with the Central Directory 
+Encryption feature will no longer be able to recognize the Central 
+Directory and may assume the ZIP file is corrupt.  Programs that 
+attempt streaming access using Local Headers will see invalid 
+information for each file.  Central Directory Encryption need not be 
+used for every ZIP file.  Its use is recommended for greater security.  
+ZIP files not using Central Directory Encryption should operate as 
+in the past. 
+
+This strong encryption feature specification is intended to provide for 
+scalable, cross-platform encryption needs ranging from simple password
+encryption to authenticated public/private key encryption.  
+
+Encryption provides data confidentiality and privacy.  It is 
+recommended that you combine X.509 digital signing with encryption 
+to add authentication and non-repudiation.
+
+
+Single Password Symmetric Encryption Method:
+-------------------------------------------
+
+The Single Password Symmetric Encryption Method using strong 
+encryption algorithms operates similarly to the traditional 
+PKWARE encryption defined in this format.  Additional data 
+structures are added to support the processing needs of the 
+strong algorithms.
+
+The Strong Encryption data structures are:
+
+1. General Purpose Bits - Bits 0 and 6 of the General Purpose bit 
+flag in both local and central header records.  Both bits set 
+indicates strong encryption.  Bit 13, when set indicates the Central
+Directory is encrypted and that selected fields in the Local Header
+are masked to hide their actual value.
+
+
+2. Extra Field 0x0017 in central header only.
+
+     Fields to consider in this record are:
+
+     Format - the data format identifier for this record.  The only
+     value allowed at this time is the integer value 2.
+
+     AlgId - integer identifier of the encryption algorithm from the
+     following range
+
+         0x6601 - DES
+         0x6602 - RC2 (version needed to extract < 5.2)
+         0x6603 - 3DES 168
+         0x6609 - 3DES 112
+         0x660E - AES 128 
+         0x660F - AES 192 
+         0x6610 - AES 256 
+         0x6702 - RC2 (version needed to extract >= 5.2)
+         0x6720 - Blowfish
+         0x6721 - Twofish
+         0x6801 - RC4
+         0xFFFF - Unknown algorithm
+
+     Bitlen - Explicit bit length of key
+
+         32 - 448 bits
+   
+     Flags - Processing flags needed for decryption
+
+         0x0001 - Password is required to decrypt
+         0x0002 - Certificates only
+         0x0003 - Password or certificate required to decrypt
+
+         Values > 0x0003 reserved for certificate processing
+
+
+3. Decryption header record preceding compressed file data.
+
+         -Decryption Header:
+
+          Value     Size     Description
+          -----     ----     -----------
+          IVSize    2 bytes  Size of initialization vector (IV)
+          IVData    IVSize   Initialization vector for this file
+          Size      4 bytes  Size of remaining decryption header data
+          Format    2 bytes  Format definition for this record
+          AlgID     2 bytes  Encryption algorithm identifier
+          Bitlen    2 bytes  Bit length of encryption key
+          Flags     2 bytes  Processing flags
+          ErdSize   2 bytes  Size of Encrypted Random Data
+          ErdData   ErdSize  Encrypted Random Data
+          Reserved1 4 bytes  Reserved certificate processing data
+          Reserved2 (var)    Reserved for certificate processing data
+          VSize     2 bytes  Size of password validation data
+          VData     VSize-4  Password validation data
+          VCRC32    4 bytes  Standard ZIP CRC32 of password validation data
+
+     IVData - The size of the IV should match the algorithm block size.
+              The IVData can be completely random data.  If the size of
+              the randomly generated data does not match the block size
+              it should be complemented with zero's or truncated as
+              necessary.  If IVSize is 0,then IV = CRC32 + Uncompressed
+              File Size (as a 64 bit little-endian, unsigned integer value).
+
+     Format - the data format identifier for this record.  The only
+     value allowed at this time is the integer value 3.
+
+     AlgId - integer identifier of the encryption algorithm from the
+     following range
+
+         0x6601 - DES
+         0x6602 - RC2 (version needed to extract < 5.2)
+         0x6603 - 3DES 168
+         0x6609 - 3DES 112
+         0x660E - AES 128 
+         0x660F - AES 192 
+         0x6610 - AES 256 
+         0x6702 - RC2 (version needed to extract >= 5.2)
+         0x6720 - Blowfish
+         0x6721 - Twofish
+         0x6801 - RC4
+         0xFFFF - Unknown algorithm
+
+     Bitlen - Explicit bit length of key
+
+         32 - 448 bits
+   
+     Flags - Processing flags needed for decryption
+
+         0x0001 - Password is required to decrypt
+         0x0002 - Certificates only
+         0x0003 - Password or certificate required to decrypt
+
+         Values > 0x0003 reserved for certificate processing
+
+     ErdData - Encrypted random data is used to store random data that
+               is used to generate a file session key for encrypting 
+               each file.  SHA1 is used to calculate hash data used to 
+               derive keys.  File session keys are derived from a master 
+               session key generated from the user-supplied password.
+               If the Flags field in the decryption header contains 
+               the value 0x4000, then the ErdData field must be 
+               decrypted using 3DES. If the value 0x4000 is not set,
+               then the ErdData field must be decrypted using AlgId.
+
+
+     Reserved1 - Reserved for certificate processing, if value is
+               zero, then Reserved2 data is absent.  See the explanation
+               under the Certificate Processing Method for details on
+               this data structure.
+
+     Reserved2 - If present, the size of the Reserved2 data structure 
+               is located by skipping the first 4 bytes of this field 
+               and using the next 2 bytes as the remaining size.  See
+               the explanation under the Certificate Processing Method
+               for details on this data structure.
+
+     VSize - This size value will always include the 4 bytes of the
+             VCRC32 data and will be greater than 4 bytes.
+
+     VData - Random data for password validation.  This data is VSize
+             in length and VSize must be a multiple of the encryption
+             block size.  VCRC32 is a checksum value of VData.  
+             VData and VCRC32 are stored encrypted and start the
+             stream of encrypted data for a file.
+
+
+4. Useful Tips
+
+Strong Encryption is always applied to a file after compression. The
+block oriented algorithms all operate in Cypher Block Chaining (CBC) 
+mode.  The block size used for AES encryption is 16.  All other block
+algorithms use a block size of 8.  Two ID's are defined for RC2 to 
+account for a discrepancy found in the implementation of the RC2
+algorithm in the cryptographic library on Windows XP SP1 and all 
+earlier versions of Windows.  It is recommended that zero length files
+not be encrypted, however programs should be prepared to extract them
+if they are found within a ZIP file.
+
+A pseudo-code representation of the encryption process is as follows:
+
+Password = GetUserPassword()
+MasterSessionKey = DeriveKey(SHA1(Password)) 
+RD = CryptographicStrengthRandomData() 
+For Each File
+   IV = CryptographicStrengthRandomData() 
+   VData = CryptographicStrengthRandomData()
+   VCRC32 = CRC32(VData)
+   FileSessionKey = DeriveKey(SHA1(IV + RD) 
+   ErdData = Encrypt(RD,MasterSessionKey,IV) 
+   Encrypt(VData + VCRC32 + FileData, FileSessionKey,IV)
+Done
+
+The function names and parameter requirements will depend on
+the choice of the cryptographic toolkit selected.  Almost any
+toolkit supporting the reference implementations for each
+algorithm can be used.  The RSA BSAFE(r), OpenSSL, and Microsoft
+CryptoAPI libraries are all known to work well.  
+
+
+Single Password - Central Directory Encryption:
+-----------------------------------------------
+
+Central Directory Encryption is achieved within the .ZIP format by 
+encrypting the Central Directory structure.  This encapsulates the metadata 
+most often used for processing .ZIP files.  Additional metadata is stored for 
+redundancy in the Local Header for each file.  The process of concealing 
+metadata by encrypting the Central Directory does not protect the data within 
+the Local Header.  To avoid information leakage from the exposed metadata 
+in the Local Header, the fields containing information about a file are masked.  
+
+Local Header:
+
+Masking replaces the true content of the fields for a file in the Local 
+Header with false information.  When masked, the Local Header is not 
+suitable for streaming access and the options for data recovery of damaged
+archives is reduced.  Extra Data fields that may contain confidential
+data should not be stored within the Local Header.  The value set into
+the Version needed to extract field should be the correct value needed to
+extract the file without regard to Central Directory Encryption. The fields 
+within the Local Header targeted for masking when the Central Directory is 
+encrypted are:
+
+        Field Name                     Mask Value
+        ------------------             ---------------------------
+        compression method              0
+        last mod file time              0
+        last mod file date              0
+        crc-32                          0
+        compressed size                 0
+        uncompressed size               0
+        file name (variable size)       Base 16 value from the
+                                        range 1 - 0xFFFFFFFFFFFFFFFF
+                                        represented as a string whose
+                                        size will be set into the
+                                        file name length field
+
+The Base 16 value assigned as a masked file name is simply a sequentially
+incremented value for each file starting with 1 for the first file.  
+Modifications to a ZIP file may cause different values to be stored for 
+each file.  For compatibility, the file name field in the Local Header 
+should never be left blank.  As of Version 6.2 of this specification, 
+the Compression Method and Compressed Size fields are not yet masked.
+Fields having a value of 0xFFFF or 0xFFFFFFFF for the ZIP64 format
+should not be masked.  
+
+Encrypting the Central Directory:
+
+Encryption of the Central Directory does not include encryption of the 
+Central Directory Signature data, the Zip64 End of Central Directory
+record, the Zip64 End of Central Directory Locator, or the End
+of Central Directory record.  The ZIP file comment data is never
+encrypted.
+
+Before encrypting the Central Directory, it may optionally be compressed.
+Compression is not required, but for storage efficiency it is assumed
+this structure will be compressed before encrypting.  Similarly, this 
+specification supports compressing the Central Directory without
+requiring that it also be encrypted.  Early implementations of this
+feature will assume the encryption method applied to files matches the 
+encryption applied to the Central Directory.
+
+Encryption of the Central Directory is done in a manner similar to
+that of file encryption.  The encrypted data is preceded by a 
+decryption header.  The decryption header is known as the Archive
+Decryption Header.  The fields of this record are identical to
+the decryption header preceding each encrypted file.  The location
+of the Archive Decryption Header is determined by the value in the
+Start of the Central Directory field in the Zip64 End of Central
+Directory record.  When the Central Directory is encrypted, the
+Zip64 End of Central Directory record will always be present.
+
+The layout of the Zip64 End of Central Directory record for all
+versions starting with 6.2 of this specification will follow the
+Version 2 format.  The Version 2 format is as follows:
+
+The leading fixed size fields within the Version 1 format for this
+record remain unchanged.  The record signature for both Version 1 
+and Version 2 will be 0x06064b50.  Immediately following the last
+byte of the field known as the Offset of Start of Central 
+Directory With Respect to the Starting Disk Number will begin the 
+new fields defining Version 2 of this record.  
+
+New fields for Version 2:
+
+Note: all fields stored in Intel low-byte/high-byte order.
+
+          Value                 Size       Description
+          -----                 ----       -----------
+          Compression Method    2 bytes    Method used to compress the
+                                           Central Directory
+          Compressed Size       8 bytes    Size of the compressed data
+          Original   Size       8 bytes    Original uncompressed size
+          AlgId                 2 bytes    Encryption algorithm ID
+          BitLen                2 bytes    Encryption key length
+          Flags                 2 bytes    Encryption flags
+          HashID                2 bytes    Hash algorithm identifier
+          Hash Length           2 bytes    Length of hash data
+          Hash Data             (variable) Hash data
+
+The Compression Method accepts the same range of values as the 
+corresponding field in the Central Header.
+
+The Compressed Size and Original Size values will not include the
+data of the Central Directory Signature which is compressed or
+encrypted.
+
+The AlgId, BitLen, and Flags fields accept the same range of values
+the corresponding fields within the 0x0017 record. 
+
+Hash ID identifies the algorithm used to hash the Central Directory 
+data.  This data does not have to be hashed, in which case the
+values for both the HashID and Hash Length will be 0.  Possible 
+values for HashID are:
+
+      Value         Algorithm
+     ------         ---------
+     0x0000          none
+     0x0001          CRC32
+     0x8003          MD5
+     0x8004          SHA1
+     0x8007          RIPEMD160
+     0x800C          SHA256
+     0x800D          SHA384
+     0x800E          SHA512
+
+When the Central Directory data is signed, the same hash algorithm
+used to hash the Central Directory for signing should be used.
+This is recommended for processing efficiency, however, it is 
+permissible for any of the above algorithms to be used independent 
+of the signing process.
+
+The Hash Data will contain the hash data for the Central Directory.
+The length of this data will vary depending on the algorithm used.
+
+The Version Needed to Extract should be set to 62.
+
+The value for the Total Number of Entries on the Current Disk will
+be 0.  These records will no longer support random access when
+encrypting the Central Directory.
+
+When the Central Directory is compressed and/or encrypted, the
+End of Central Directory record will store the value 0xFFFFFFFF
+as the value for the Total Number of Entries in the Central
+Directory.  The value stored in the Total Number of Entries in
+the Central Directory on this Disk field will be 0.  The actual
+values will be stored in the equivalent fields of the Zip64
+End of Central Directory record.
+
+Decrypting and decompressing the Central Directory is accomplished
+in the same manner as decrypting and decompressing a file.
+
+Certificate Processing Method:
+-----------------------------
+
+The Certificate Processing Method of for ZIP file encryption 
+defines the following additional data fields:
+
+1. Certificate Flag Values
+
+Additional processing flags that can be present in the Flags field of both 
+the 0x0017 field of the central directory Extra Field and the Decryption 
+header record preceding compressed file data are:
+
+         0x0007 - reserved for future use
+         0x000F - reserved for future use
+         0x0100 - Indicates non-OAEP key wrapping was used.  If this
+                  this field is set, the version needed to extract must
+                  be at least 61.  This means OAEP key wrapping is not
+                  used when generating a Master Session Key using
+                  ErdData.
+         0x4000 - ErdData must be decrypted using 3DES-168, otherwise use the
+                  same algorithm used for encrypting the file contents.
+         0x8000 - reserved for future use
+
+
+2. CertData - Extra Field 0x0017 record certificate data structure
+
+The data structure used to store certificate data within the section
+of the Extra Field defined by the CertData field of the 0x0017
+record are as shown:
+
+          Value     Size     Description
+          -----     ----     -----------
+          RCount    4 bytes  Number of recipients.  
+          HashAlg   2 bytes  Hash algorithm identifier
+          HSize     2 bytes  Hash size
+          SRList    (var)    Simple list of recipients hashed public keys
+
+          
+     RCount    This defines the number intended recipients whose 
+               public keys were used for encryption.  This identifies
+               the number of elements in the SRList.
+
+     HashAlg   This defines the hash algorithm used to calculate
+               the public key hash of each public key used
+               for encryption. This field currently supports
+               only the following value for SHA-1
+
+               0x8004 - SHA1
+
+     HSize     This defines the size of a hashed public key.
+
+     SRList    This is a variable length list of the hashed 
+               public keys for each intended recipient.  Each 
+               element in this list is HSize.  The total size of 
+               SRList is determined using RCount * HSize.
+
+
+3. Reserved1 - Certificate Decryption Header Reserved1 Data:
+
+          Value     Size     Description
+          -----     ----     -----------
+          RCount    4 bytes  Number of recipients.  
+          
+     RCount    This defines the number intended recipients whose 
+               public keys were used for encryption.  This defines
+               the number of elements in the REList field defined below.
+
+
+4. Reserved2 - Certificate Decryption Header Reserved2 Data Structures:
+
+
+          Value     Size     Description
+          -----     ----     -----------
+          HashAlg   2 bytes  Hash algorithm identifier
+          HSize     2 bytes  Hash size
+          REList    (var)    List of recipient data elements
+
+
+     HashAlg   This defines the hash algorithm used to calculate
+               the public key hash of each public key used
+               for encryption. This field currently supports
+               only the following value for SHA-1
+
+               0x8004 - SHA1
+
+     HSize     This defines the size of a hashed public key
+               defined in REHData.
+
+     REList    This is a variable length of list of recipient data.  
+               Each element in this list consists of a Recipient
+               Element data structure as follows:
+
+
+    Recipient Element (REList) Data Structure:
+
+          Value     Size     Description
+          -----     ----     -----------
+          RESize    2 bytes  Size of REHData + REKData
+          REHData   HSize    Hash of recipients public key
+          REKData   (var)    Simple key blob
+
+
+     RESize    This defines the size of an individual REList 
+               element.  This value is the combined size of the
+               REHData field + REKData field.  REHData is defined by
+               HSize.  REKData is variable and can be calculated
+               for each REList element using RESize and HSize.
+
+     REHData   Hashed public key for this recipient.
+
+     REKData   Simple Key Blob.  The format of this data structure
+               is identical to that defined in the Microsoft
+               CryptoAPI and generated using the CryptExportKey()
+               function.  The version of the Simple Key Blob
+               supported at this time is 0x02 as defined by
+               Microsoft.
+
+Certificate Processing - Central Directory Encryption:
+------------------------------------------------------
+
+Central Directory Encryption using Digital Certificates will 
+operate in a manner similar to that of Single Password Central
+Directory Encryption.  This record will only be present when there 
+is data to place into it.  Currently, data is placed into this
+record when digital certificates are used for either encrypting 
+or signing the files within a ZIP file.  When only password 
+encryption is used with no certificate encryption or digital 
+signing, this record is not currently needed. When present, this 
+record will appear before the start of the actual Central Directory 
+data structure and will be located immediately after the Archive 
+Decryption Header if the Central Directory is encrypted.
+
+The Archive Extra Data record will be used to store the following
+information.  Additional data may be added in future versions.
+
+Extra Data Fields:
+
+0x0014 - PKCS#7 Store for X.509 Certificates
+0x0016 - X.509 Certificate ID and Signature for central directory
+0x0019 - PKCS#7 Encryption Recipient Certificate List
+
+The 0x0014 and 0x0016 Extra Data records that otherwise would be 
+located in the first record of the Central Directory for digital 
+certificate processing. When encrypting or compressing the Central 
+Directory, the 0x0014 and 0x0016 records must be located in the 
+Archive Extra Data record and they should not remain in the first 
+Central Directory record.  The Archive Extra Data record will also 
+be used to store the 0x0019 data. 
+
+When present, the size of the Archive Extra Data record will be
+included in the size of the Central Directory.  The data of the
+Archive Extra Data record will also be compressed and encrypted
+along with the Central Directory data structure.
+
+Certificate Processing Differences:
+
+The Certificate Processing Method of encryption differs from the
+Single Password Symmetric Encryption Method as follows.  Instead
+of using a user-defined password to generate a master session key,
+cryptographically random data is used.  The key material is then
+wrapped using standard key-wrapping techniques.  This key material
+is wrapped using the public key of each recipient that will need
+to decrypt the file using their corresponding private key.
+
+This specification currently assumes digital certificates will follow
+the X.509 V3 format for 1024 bit and higher RSA format digital
+certificates.  Implementation of this Certificate Processing Method
+requires supporting logic for key access and management.  This logic
+is outside the scope of this specification.
+
+OAEP Processing with Certificate-based Encryption:
+
+OAEP stands for Optimal Asymmetric Encryption Padding.  It is a
+strengthening technique used for small encoded items such as decryption
+keys.  This is commonly applied in cryptographic key-wrapping techniques
+and is supported by PKCS #1.  Versions 5.0 and 6.0 of this specification 
+were designed to support OAEP key-wrapping for certificate-based 
+decryption keys for additional security.  
+
+Support for private keys stored on Smartcards or Tokens introduced
+a conflict with this OAEP logic.  Most card and token products do 
+not support the additional strengthening applied to OAEP key-wrapped 
+data.  In order to resolve this conflict, versions 6.1 and above of this 
+specification will no longer support OAEP when encrypting using 
+digital certificates. 
+
+Versions of PKZIP available during initial development of the 
+certificate processing method set a value of 61 into the 
+version needed to extract field for a file.  This indicates that 
+non-OAEP key wrapping is used.  This affects certificate encryption 
+only, and password encryption functions should not be affected by 
+this value.  This means values of 61 may be found on files encrypted
+with certificates only, or on files encrypted with both password
+encryption and certificate encryption.  Files encrypted with both
+methods can safely be decrypted using the password methods documented.
+
+IX. Change Process
+------------------
+
+In order for the .ZIP file format to remain a viable definition, this
+specification should be considered as open for periodic review and
+revision.  Although this format was originally designed with a 
+certain level of extensibility, not all changes in technology
+(present or future) were or will be necessarily considered in its
+design.  If your application requires new definitions to the
+extensible sections in this format, or if you would like to 
+submit new data structures, please forward your request to
+zipformat@pkware.com.  All submissions will be reviewed by the
+ZIP File Specification Committee for possible inclusion into
+future versions of this specification.  Periodic revisions
+to this specification will be published to ensure interoperability. 
+We encourage comments and feedback that may help improve clarity 
+or content.
+
+X. Incorporating PKWARE Proprietary Technology into Your Product
+----------------------------------------------------------------
+
+PKWARE is committed to the interoperability and advancement of the
+.ZIP format.  PKWARE offers a free license for certain technological
+aspects described above under certain restrictions and conditions.
+However, the use or implementation in a product of certain technological
+aspects set forth in the current APPNOTE, including those with regard to
+strong encryption, patching, or extended tape operations requires a 
+license from PKWARE.  Please contact PKWARE with regard to acquiring
+a license.
+
+XI. Acknowledgements
+---------------------
+
+In addition to the above mentioned contributors to PKZIP and PKUNZIP,
+I would like to extend special thanks to Robert Mahoney for suggesting
+the extension .ZIP for this software.
+
+XII. References
+---------------
+
+    Fiala, Edward R., and Greene, Daniel H., "Data compression with
+       finite windows",  Communications of the ACM, Volume 32, Number 4,
+       April 1989, pages 490-505.
+
+    Held, Gilbert, "Data Compression, Techniques and Applications,
+       Hardware and Software Considerations", John Wiley & Sons, 1987.
+
+    Huffman, D.A., "A method for the construction of minimum-redundancy
+       codes", Proceedings of the IRE, Volume 40, Number 9, September 1952,
+       pages 1098-1101.
+
+    Nelson, Mark, "LZW Data Compression", Dr. Dobbs Journal, Volume 14,
+       Number 10, October 1989, pages 29-37.
+
+    Nelson, Mark, "The Data Compression Book",  M&T Books, 1991.
+
+    Storer, James A., "Data Compression, Methods and Theory",
+       Computer Science Press, 1988
+
+    Welch, Terry, "A Technique for High-Performance Data Compression",
+       IEEE Computer, Volume 17, Number 6, June 1984, pages 8-19.
+
+    Ziv, J. and Lempel, A., "A universal algorithm for sequential data
+       compression", Communications of the ACM, Volume 30, Number 6,
+       June 1987, pages 520-540.
+
+    Ziv, J. and Lempel, A., "Compression of individual sequences via
+       variable-rate coding", IEEE Transactions on Information Theory,
+       Volume 24, Number 5, September 1978, pages 530-536.
+
+
+APPENDIX A - AS/400 Extra Field (0x0065) Attribute Definitions
+--------------------------------------------------------------
+
+Field Definition Structure:
+
+   a. field length including length             2 bytes
+   b. field code                                2 bytes
+   c. data                                      x bytes
+
+Field Code  Description
+   4001     Source type i.e. CLP etc
+   4002     The text description of the library 
+   4003     The text description of the file
+   4004     The text description of the member
+   4005     x'F0' or 0 is PF-DTA,  x'F1' or 1 is PF_SRC
+   4007     Database Type Code                  1 byte
+   4008     Database file and fields definition
+   4009     GZIP file type                      2 bytes
+   400B     IFS code page                       2 bytes
+   400C     IFS Creation Time                   4 bytes
+   400D     IFS Access Time                     4 bytes
+   400E     IFS Modification time               4 bytes
+   005C     Length of the records in the file   2 bytes
+   0068     GZIP two words                      8 bytes
+
+APPENDIX B - z/OS Extra Field (0x0065) Attribute Definitions
+------------------------------------------------------------
+
+Field Definition Structure:
+
+   a. field length including length             2 bytes
+   b. field code                                2 bytes
+   c. data                                      x bytes
+
+Field Code  Description
+   0001     File Type                           2 bytes 
+   0002     NonVSAM Record Format               1 byte
+   0003     Reserved		
+   0004     NonVSAM Block Size                  2 bytes Big Endian
+   0005     Primary Space Allocation            3 bytes Big Endian
+   0006     Secondary Space Allocation          3 bytes Big Endian
+   0007     Space Allocation Type1 byte flag		
+   0008     Modification Date                   Retired with PKZIP 5.0 +
+   0009     Expiration Date                     Retired with PKZIP 5.0 +
+   000A     PDS Directory Block Allocation      3 bytes Big Endian binary value
+   000B     NonVSAM Volume List                 variable		
+   000C     UNIT Reference                      Retired with PKZIP 5.0 +
+   000D     DF/SMS Management Class             8 bytes EBCDIC Text Value
+   000E     DF/SMS Storage Class                8 bytes EBCDIC Text Value
+   000F     DF/SMS Data Class                   8 bytes EBCDIC Text Value
+   0010     PDS/PDSE Member Info.               30 bytes	
+   0011     VSAM sub-filetype                   2 bytes		
+   0012     VSAM LRECL                          13 bytes EBCDIC "(num_avg num_max)"
+   0013     VSAM Cluster Name                   Retired with PKZIP 5.0 +
+   0014     VSAM KSDS Key Information           13 bytes EBCDIC "(num_length num_position)"
+   0015     VSAM Average LRECL                  5 bytes EBCDIC num_value padded with blanks
+   0016     VSAM Maximum LRECL                  5 bytes EBCDIC num_value padded with blanks
+   0017     VSAM KSDS Key Length                5 bytes EBCDIC num_value padded with blanks
+   0018     VSAM KSDS Key Position              5 bytes EBCDIC num_value padded with blanks
+   0019     VSAM Data Name                      1-44 bytes EBCDIC text string
+   001A     VSAM KSDS Index Name                1-44 bytes EBCDIC text string
+   001B     VSAM Catalog Name                   1-44 bytes EBCDIC text string
+   001C     VSAM Data Space Type                9 bytes EBCDIC text string
+   001D     VSAM Data Space Primary             9 bytes EBCDIC num_value left-justified
+   001E     VSAM Data Space Secondary           9 bytes EBCDIC num_value left-justified
+   001F     VSAM Data Volume List               variable EBCDIC text list of 6-character Volume IDs
+   0020     VSAM Data Buffer Space              8 bytes EBCDIC num_value left-justified
+   0021     VSAM Data CISIZE                    5 bytes EBCDIC num_value left-justified
+   0022     VSAM Erase Flag                     1 byte flag		
+   0023     VSAM Free CI %                      3 bytes EBCDIC num_value left-justified
+   0024     VSAM Free CA %                      3 bytes EBCDIC num_value left-justified
+   0025     VSAM Index Volume List              variable EBCDIC text list of 6-character Volume IDs
+   0026     VSAM Ordered Flag                   1 byte flag		
+   0027     VSAM REUSE Flag                     1 byte flag		
+   0028     VSAM SPANNED Flag                   1 byte flag		
+   0029     VSAM Recovery Flag                  1 byte flag		
+   002A     VSAM  WRITECHK  Flag                1 byte flag		
+   002B     VSAM Cluster/Data SHROPTS           3 bytes EBCDIC "n,y"	
+   002C     VSAM Index SHROPTS                  3 bytes EBCDIC "n,y"	
+   002D     VSAM Index Space Type               9 bytes EBCDIC text string
+   002E     VSAM Index Space Primary            9 bytes EBCDIC num_value left-justified
+   002F     VSAM Index Space Secondary          9 bytes EBCDIC num_value left-justified
+   0030     VSAM Index CISIZE                   5 bytes EBCDIC num_value left-justified
+   0031     VSAM Index IMBED                    1 byte flag		
+   0032     VSAM Index Ordered Flag             1 byte flag		
+   0033     VSAM REPLICATE Flag                 1 byte flag		
+   0034     VSAM Index REUSE Flag               1 byte flag		
+   0035     VSAM Index WRITECHK Flag            1 byte flag Retired with PKZIP 5.0 +
+   0036     VSAM Owner                          8 bytes EBCDIC text string
+   0037     VSAM Index Owner                    8 bytes EBCDIC text string
+   0038     Reserved
+   0039     Reserved
+   003A     Reserved
+   003B     Reserved
+   003C     Reserved
+   003D     Reserved
+   003E     Reserved
+   003F     Reserved
+   0040     Reserved
+   0041     Reserved
+   0042     Reserved
+   0043     Reserved
+   0044     Reserved
+   0045     Reserved
+   0046     Reserved
+   0047     Reserved
+   0048     Reserved
+   0049     Reserved
+   004A     Reserved
+   004B     Reserved
+   004C     Reserved
+   004D     Reserved
+   004E     Reserved
+   004F     Reserved
+   0050     Reserved
+   0051     Reserved
+   0052     Reserved
+   0053     Reserved
+   0054     Reserved
+   0055     Reserved
+   0056     Reserved
+   0057     Reserved
+   0058     PDS/PDSE Member TTR Info.           6 bytes  Big Endian
+   0059     PDS 1st LMOD Text TTR               3 bytes  Big Endian
+   005A     PDS LMOD EP Rec #                   4 bytes  Big Endian
+   005B     Reserved
+   005C     Max Length of records               2 bytes  Big Endian
+   005D     PDSE Flag                           1 byte flag
+   005E     Reserved
+   005F     Reserved
+   0060     Reserved
+   0061     Reserved
+   0062     Reserved
+   0063     Reserved
+   0064     Reserved
+   0065     Last Date Referenced                4 bytes  Packed Hex "yyyymmdd"
+   0066     Date Created                        4 bytes  Packed Hex "yyyymmdd"
+   0068     GZIP two words                      8 bytes
+   0071     Extended NOTE Location              12 bytes Big Endian
+   0072     Archive device UNIT                 6 bytes  EBCDIC
+   0073     Archive 1st Volume                  6 bytes  EBCDIC
+   0074     Archive 1st VOL File Seq#           2 bytes  Binary
+
+APPENDIX C - Zip64 Extensible Data Sector Mappings (EFS)
+--------------------------------------------------------
+
+          -Z390   Extra Field:
+
+          The following is the general layout of the attributes for the 
+          ZIP 64 "extra" block for extended tape operations. Portions of 
+          this extended tape processing technology is covered under a 
+          pending patent application. The use or implementation in a 
+          product of certain technological aspects set forth in the 
+          current APPNOTE, including those with regard to strong encryption,
+          patching or extended tape operations, requires a license from
+          PKWARE.  Please contact PKWARE with regard to acquiring a license. 
+ 
+
+          Note: some fields stored in Big Endian format.  All text is 
+	  in EBCDIC format unless otherwise specified.
+
+          Value       Size          Description
+          -----       ----          -----------
+  (Z390)  0x0065      2 bytes       Tag for this "extra" block type
+          Size        4 bytes       Size for the following data block
+          Tag         4 bytes       EBCDIC "Z390"
+          Length71    2 bytes       Big Endian
+          Subcode71   2 bytes       Enote type code
+          FMEPos      1 byte
+          Length72    2 bytes       Big Endian
+          Subcode72   2 bytes       Unit type code
+          Unit        1 byte        Unit
+          Length73    2 bytes       Big Endian
+          Subcode73   2 bytes       Volume1 type code
+          FirstVol    1 byte        Volume
+          Length74    2 bytes       Big Endian
+          Subcode74   2 bytes       FirstVol file sequence
+          FileSeq     2 bytes       Sequence 
+
+APPENDIX D - Language Encoding (EFS)
+------------------------------------
+
+The ZIP format has historically supported only the original IBM PC character 
+encoding set, commonly referred to as IBM Code Page 437.  This limits storing 
+file name characters to only those within the original MS-DOS range of values 
+and does not properly support file names in other character encodings, or 
+languages. To address this limitation, this specification will support the 
+following change. 
+
+If general purpose bit 11 is unset, the file name and comment should conform 
+to the original ZIP character encoding.  If general purpose bit 11 is set, the 
+filename and comment must support The Unicode Standard, Version 4.1.0 or 
+greater using the character encoding form defined by the UTF-8 storage 
+specification.  The Unicode Standard is published by the The Unicode
+Consortium (www.unicode.org).  UTF-8 encoded data stored within ZIP files 
+is expected to not include a byte order mark (BOM). 
+
+Applications may choose to supplement this file name storage through the use 
+of the 0x0008 Extra Field.  Storage for this optional field is currently 
+undefined, however it will be used to allow storing extended information 
+on source or target encoding that may further assist applications with file 
+name, or file content encoding tasks.  Please contact PKWARE with any
+requirements on how this field should be used.
+
+The 0x0008 Extra Field storage may be used with either setting for general 
+purpose bit 11.  Examples of the intended usage for this field is to store 
+whether "modified-UTF-8" (JAVA) is used, or UTF-8-MAC.  Similarly, other 
+commonly used character encoding (code page) designations can be indicated 
+through this field.  Formalized values for use of the 0x0008 record remain 
+undefined at this time.  The definition for the layout of the 0x0008 field
+will be published when available.  Use of the 0x0008 Extra Field provides
+for storing data within a ZIP file in an encoding other than IBM Code
+Page 437 or UTF-8.
+
+General purpose bit 11 will not imply any encoding of file content or
+password.  Values defining character encoding for file content or 
+password must be stored within the 0x0008 Extended Language Encoding 
+Extra Field.
+
+Ed Gordon of the Info-ZIP group has defined a pair of "extra field" records 
+that can be used to store UTF-8 file name and file comment fields.  These
+records can be used for cases when the general purpose bit 11 method
+for storing UTF-8 data in the standard file name and comment fields is
+not desirable.  A common case for this alternate method is if backward
+compatibility with older programs is required.
+
+Definitions for the record structure of these fields are included above 
+in the section on 3rd party mappings for "extra field" records.  These
+records are identified by Header ID's 0x6375 (Info-ZIP Unicode Comment 
+Extra Field) and 0x7075 (Info-ZIP Unicode Path Extra Field).
+
+The choice of which storage method to use when writing a ZIP file is left
+to the implementation.  Developers should expect that a ZIP file may 
+contain either method and should provide support for reading data in 
+either format. Use of general purpose bit 11 reduces storage requirements 
+for file name data by not requiring additional "extra field" data for
+each file, but can result in older ZIP programs not being able to extract 
+files.  Use of the 0x6375 and 0x7075 records will result in a ZIP file 
+that should always be readable by older ZIP programs, but requires more 
+storage per file to write file name and/or file comment fields.
+
+ 
+
+
diff --git a/format_docs/pdb/ereader.txt b/format_docs/pdb/ereader.txt
new file mode 100644
index 0000000000..5770c37e65
--- /dev/null
+++ b/format_docs/pdb/ereader.txt
@@ -0,0 +1,309 @@
+About
+-----
+
+The eReader format has evolved and changed over time. Subsequently, there are
+multiple versions of the eReader format. There are also two different tools
+that can create eReader files. The official tools are Makebook and Dropbook.
+Dropbook is the newer official tool that has replaced Makebook. However,
+Makebook is still in wide use because it supports a wider range of platforms
+than Dropbook. Dropbook is a GUI application that only runs on Windows and
+Apple’s OS X.
+
+
+PDB Identiy
+-------
+
+PNRdPPrs
+
+
+202 and 132 headers
+-----------------------------------------
+
+Older files have a record 0 size of 202 and occasionally 116. Newer files have
+a record 0 size of 132. As of this writing the 202 files only support text and
+images. The image format in the 202 files is the same as the 132 files. The 132
+files support a number of additional features.
+
+
+Record 0, eReader header (202)
+------------------
+
+Note all values are in 2 byte increments. Like values are condensed into a
+range. The range can be borken into 2 byte sections which represent the actual
+stored values.
+
+bytes       content             comments
+
+0-2         Version             Non-DRM books 2 and 4.
+2-8         Garbage
+8-10        Non-Text Offset     Start of Non text area (images) will run to the
+                                end of the section list.
+10-14       Unknown
+14-24       Garbage
+24-28       Unknown
+28-98       Garbage
+98-100      Unknown
+100-110     Garbage
+110-114     Unknown
+114-116     Garbage
+116-202     Unknown
+
+* Garbage: Intentially random values.
+
+
+Text Records (202)
+------------------
+
+Text starts with section 1 and continues until the section indicated by the
+Non-Text Offset. All text records are PalmDoc compressed.
+
+Each character in the compressed data is xored with 0xA5.
+
+A decompression example in sudo Python:
+
+for num in range(1, Non-Text Offset):
+    text += decompress_pamldoc(''.join([chr(ord(x) ^ 0xA5) for x in section_data(num)])).decode('cp1252', 'replace')
+
+
+Dropbook 132 files
+------------------
+
+The following sections apply to the newer Dropbook created files.
+
+
+Record 0, eReader header (132)
+----------------------------
+
+This is only for 132 byte header files created by Dropbook.
+
+bytes   content                     comments
+
+0-2     compression                 Specifies compression and drm. 2 = palmdoc,
+                                    10 = zlib. 260 and 272 = DRM
+2-6     unknown                     Value of 0 is used
+6-8     encoding                    Always 25152 (0x6240). All text must be
+                                    encoded as Latin-1 cp1252
+8-10    Number of small pages       The number of small font pages. If page
+                                    index is not build in then 0.
+10-12   Number of large pages       The number of large font pages. If page
+                                    index is not build in then 0.
+12-14   Non-Text record start       The location of the first non text records.
+                                    record 1 to this value minus 1 are all text
+                                    records
+14-16   Number of chapters          The number of chapter index records
+                                    contained in the file
+16-18   Number of small index       The number of small font page index records
+                                    contained in the file
+18-20   Number of large index       The number of large font page index records
+                                    contained in the file
+20-22   Number of images            The number of images contained in the file
+22-24   Number of links             The number of links contained in the file
+24-26   Metadata avaliable          Is there a metadata record in the file?
+                                    0 = None, 1 = There is a metadata record
+26-28   Unknown                     Value of 0 is used
+28-30   Number of Footnotes         The number of footnote records in the file
+30-32   Number of Sidebars          The number of sidebar records in the file
+32-34   Chapter index record start  The location of chapter index records. If
+                                    there are no chapters use the value for the
+                                    Last data record.
+34-36   2560                        Magic value that must be set to 2560
+36-38   Small page index start      The location of small font page index
+                                    records. If page table is not built in use
+                                    the value for the Last data record.
+38-40   Large page index start      The location of large font page index
+                                    records. If page table is not built in use
+                                    the value for the Last data record.
+40-42   Image data record start     The location of the first image record. If
+                                    there are no images use the value for the
+                                    Last data record.
+42-44   Links record start          The location of the first link index
+                                    record. If there are no links use the value
+                                    for the Last data record.
+44-46   Metadata record start       The location of the metadata record. If
+                                    there is no metadata use the value for the
+                                    Last data record.
+46-48   Unknown                     Value of 0 is used
+48-50   Footnote record start       The location of the first footnote record.
+                                    If there are no footnotes use the value for
+                                    the Last data record.
+50-52   Sidebar record start        The location of the first sidebar record.
+                                    If there are no sidebars use the value for
+                                    the Last data record.
+52-54   Last data record            The location of the last data record
+54-132  Unknown                     Value of 0 is used
+
+Note: All values are in 2 byte increments. All bytes in the table that have a
+range larger than 2 can be broken into 2 byte segments and have different
+values set for each grouping.
+
+
+Records Order
+-------------
+
+Though the order of this sections is described in eReader header,
+DropBook makes the following order:
+
+   1. eReader Header
+   2. Compressed text
+   3. Small font page index
+   4. Large font page index
+   5. Chapter index
+   6. Links index
+   7. Images
+   8. (Extrapolation: there should be one more record type here though it has
+       not yet been uncovered what it might be).
+   9. Metadata
+  10. Sidebar records
+  11. Footnote records
+  12. Text block size record
+  13. "MeTaInFo\x00" word record 
+
+
+Text Records
+------------
+
+All text records use cp1252  encoding (although eReader documents talk about
+UTF-8 as well). Their total compressed size is unknown however, anything below
+3560 Bytes is known to work. The text will be either zlib or palmdoc
+compressed. Use the compression value from the eReader header to determine
+which. All text utalizes the Palm Markup Language (PML) for formatting.
+
+Starting with DropBook 1.6.0 text is divided into 8KB (8192 bytes) blocks
+trimming the end to the closest space character and then being compressed.
+Earlier version of DropBook 1.5.2 tries to behave the same way, though
+sometimes it trims the block in unexpected place.
+
+
+Chapter Index Records
+---------------------
+
+Each chapter record corresponds to 1 chapter and points at the place in the
+book. Chapter record takes a form of 'offset name\x00' First 4 bytes are offset
+of the original pml file where the chapter index points to (offset of
+the \x|\X?|\C? tags). Then without a space goes a name of a chapter in chapter
+index. It should contain only text, all formatting tags should be removed.
+\U and \a tags are not permitted in chapter name. To maintain sub-chapters
+4*n spaces (\x20) are added to the beginning of the name, where "n" is level of
+chapter: 0 for \x tag and N for \CN="" and \XN tags. And then an ending
+\x00 symbol.
+
+
+Image Records
+-------------
+
+Image records must be smaller than 65505 Bytes. They must also be 8bit PNG
+images.
+
+An image record takes the form 'PNG name\x00... image_data'
+
+bytes   content         comments
+
+0-4     PNG             There must be a space after PNG.
+4-36    image name.     The image name must be 32 exactly 32 Bytes long. Pad
+                        the right side of the name with \x00 characters for
+                        names shorter than 32 characters.
+36-58   Unknown	
+58-60   width           Width of an image
+60-62   height          Height of an image
+62-?    The image data  raw image data in 8 bit PNG format
+
+Note: DropBooks seems to change something in png raw data. Like reencoding or
+something, but plain insertion of png image there still works. 
+
+
+Links Records
+-------------
+
+Links records are constructed the same way as chapter ones. Each link anchor
+record corresponds to 1 link anchor and points at the place in the book. Link
+record takes a form of 'offset name\x00' First 4 bytes are offset of the
+original pml file where the link anchor points to (offset of the \Q tag). Then
+without a space goes a name of a link anchor. It should contain only text, all
+formatting tags should be removed. \U and \a tags are not permitted in link
+anchor name. And then an ending \x00 symbol.
+
+
+Footnote Records
+----------------
+
+The first footnote record is a \x00 separated list of footnote ids. All
+subsequent footnote records are the footnote text corresponding to the id's
+position in the list. Footnote text is compressed in the same manner as normal
+text records
+
+E.G.
+
+footnote section 1 = 'notice1\x00notice2\x00notice3\x00'
+footnote section 2 = 'Text for notice 1'
+footnote section 3 = 'Text for notice 2'
+footnote section 4 = 'Text for notice 3'
+
+Starting with Dropbook 1.5.2 first record looks a bit different. It is sequence
+of \x00\x01 then 1 byte of footnote id length, then footnote id then \x00.
+
+E.G.
+
+footnote section 1 = '\x00\x01\x07notice1\x00\x00\x01\x0Afootnote10\x00'
+
+
+Sidebar Records
+---------------
+
+The first sidebar record is a \x00 separated list of sidebar ids. All
+subsequent sidebar records are the sidebar text corresponding to the id's
+position in the list. Sidebar text is compressed in the same manner as normal
+text records
+
+E.G.
+
+sidebar section 1 = 'notice1\x00notice2\x00notice3\x00'
+sidebar section 2 = 'Text for notice 1'
+sidebar section 3 = 'Text for notice 2'
+sidebar section 4 = 'Text for notice 3'
+
+Starting with Dropbook 1.5.2 first record looks a bit different. It is sequence
+of \x00\x01 then 1 byte of sidebar's id length, then sidebar's id then \x00.
+
+E.G.
+
+sidebar section 1 = '\x00\x01\x07notice1\x00\x00\x01\x09sidebar10\x00'
+
+
+Metadata Record
+---------------
+
+\x00 separated list of string.
+
+Metadata takes the form:
+
+  title\x00
+  author\x00
+  copyright\x00
+  publisher\x00
+  isbn\x00
+
+E.G.
+
+Gibraltar Earth\x00Michael McCollum\x001999\x00Sci Fi Arizona\x001929381255\x00
+
+The metdata record is always followed by a record which contains 'MeTaInFo\x00'
+
+Note: Starting with DropBook 1.5.2 'MeTaInFo\x00' is not following Metadata
+Record. It is a separate record that ends the file and there are some more
+records between Metadata record and 'MeTaInFo\x00' record.
+
+
+Text Sizes Record
+-----------------
+
+There is a special record that contains the initial size of all text blocks
+before compression. It is just a sequence of 2-byte blocks which are containing
+the sizes.
+
+E.G.
+
+\x1F\xFB\x20\x00\x20\x00\x1F\xFE\x1F\xFD\x09\x46
+
+Note: By this we can judge that theoretical maximum of initial block size is
+65535 bytes. 
+
diff --git a/format_docs/pdb/mbp.txt b/format_docs/pdb/mbp.txt
new file mode 100644
index 0000000000..61e1d2d9ee
--- /dev/null
+++ b/format_docs/pdb/mbp.txt
@@ -0,0 +1,414 @@
+// BEGINING OF FILE
+//   NOTES:
+//   1* Numeric data stored as big endian, 32 bits.
+//   2* Data padded to 16 bits limits. (Sometimes to 32 bits limits?)
+//   3* Text stored seems to be an 8 bit encoding padded to 16 bits
+//    (may be "ISO-8859-1"?, or may be just a local machine character set?)
+//   4* I initially used the term "MARK" where I should have used "HIGHLIGTH", 
+//     bear that in mind (it was a bad name election when I started reversing)
+
+<0x 31 bytes = book_title_PAR + 0x00 PAD if (book_title_PAR < 31) >
+<0x 00>
+<0x 00 00 00 00>
+...4
+...4
+<0x 00 00 00 00>
+<0x 00 00 00 00>
+<0x 00 00 00 00>
+<0x 00 00 00 00>
+BPAR
+MOBI
+<0x 4 bytes = Next free pointer identifier>
+	// Note: pointer identifiers aren't always consecutive,
+	// so this number is usually bigger than de # of index entries
+<0x 00 00>
+<0x 4 bytes = Number of index entries>
+<0x 4 bytes = Position of BPAR>
+<0x 00 00 00 00>	// BPAR pointer identifier = 0x0
+
+
+// INDEXES:
+// Order of Indexes: from the beginning of this MBP file, 
+// forward to the end of the file.
+// Nevertheless, see these comments for order relative to: 
+//   "BEGINING OF USER DATA": order of Data marks.
+//   "FINAL GROUP OF MARKS": order of final marks.
+[for each {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK,
+		AUTHOR,TITLE,CATEGORY,GENRE,ABSTRACT,COVER,PUBLISHER,
+		...} 
+	  || "last DATA"]
+// Note: Pointer identifiers to DATA's assigned so the number
+// shrinks as the table grows down.
+[if NOTE || CORRECTION]
+	<0x 4 bytes = Position of DATA....EBVS>
+	<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[fi NOTE || CORRECTION]
+<0x 4 bytes = Position of DATA>
+<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[if NOTE || CORRECTION]
+	<0x 4 bytes = Position of DATA>
+	<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[fi NOTE || CORRECTION]
+[if MARK || DRAWING || BOOKMARK]
+	<0x 4 bytes = Position of DATA....EBVS>
+	<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[fi MARK || DRAWING || BOOKMARK]
+[if AUTHOR || TITLE || CATEGORY || GENRE || ABSTRACT || COVER || PUBLISHER]
+	<0x 4 bytes = Position of [AUTH || TITL || CATE || GENR || ABST || COVE || PUBL] >
+	<0x 4 bytes = Pointer identifier>
+[fi AUTHOR || TITLE || CATEGORY || GENRE || ABSTRACT || COVER || PUBLISHER]
+[if last DATA] // there's always a last piece of DATA (not user data?)
+	<0x 4 bytes = Position of last DATA>
+	<0x 4 bytes = Pointer identifier>	// usually <0x 00 00 00 01>
+[fi last DATA]
+[next {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK,
+		AUTHOR,TITLE,CATEGORY,GENRE,ABSTRACT,COVER,PUBLISHER,
+		...} 
+      || "last DATA"]
+
+
+[for each {NOTE,MARK,CORRECTION,DRAWING}]
+<0x 4 bytes = Position of BKMK>
+<0x 4 bytes = Pointer identifier>
+	// Note: pointer identifiers for BKMK's are usually the minor
+	// of all the identifiers associated to an annotation. All
+	// other DATA references in INDEXES table associated to this
+	// BKMK, have bigger pointer identifiers.
+	// Note: Pointer identifiers to BKMK's assigned so the number
+	// grows as the table grows down.
+[next {NOTE,MARK,CORRECTION,DRAWING}]
+
+
+<0x 2 bytes random PAD>
+BPAR
+<0x 4 bytes = size of BPAR block>
+<0x FF FF FF FF>
+...4	<-- 'position of last read' related
+...4	<-- 'position of last read' related
+...4
+<0x FF FF FF FF>
+...4
+...4
+...4	<-- 'position of last read' related
+...(rest of size of BPAR block, if bigger than 0x20)
+[if (size of BPAR block) mod 32 != 0]
+<0x FF FF FF FF>
+[fi]
+
+// BEGINING OF USER DATA:
+// Order of {NOTE,MARK,CORRECTION,DRAWING} : 
+// starts with user data at the end of the file, 
+// going backwards to the begining of the file:
+//--------------------------------------------------------------------
+[for each {NOTE,MARK,CORRECTION,DRAWING}]
+//-------------------------------
+[if NOTE]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> ???
+<0x 4 bytes = IDENTIFIER> ???
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(rest of size of DATA block)
+<0x FD EA = PAD? (��)>
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of <note text (see 3rd note)> >
+<note text (see 3rd note)>
+[if (size of <note text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <note text (see 3rd note)>) mod 4 ==0>
+[fi]
+[fi NOTE]
+//-------------------------------
+[if MARK || BOOKMARK]
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> ???
+<0x 4 bytes = IDENTIFIER> ???
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(rest of size of DATA block)
+<0x FD EA = PAD? (��)>
+[fi MARK || BOOKMARK]
+//-------------------------------
+[if CORRECTION]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> ???
+<0x 4 bytes = IDENTIFIER> ???
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(rest of size of DATA block)
+<0x FD EA = PAD? (��)>
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of <note text (see 3rd note)> >
+<note text (see 3rd note)>
+[if (size of <note text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <note text (see 3rd note)>) mod 4 ==0>
+[fi]
+[fi CORRECTION]
+//-------------------------------
+[if DRAWING]
+DATA
+<0x 4 bytes = size of raw data>
+ADQM
+	// NOTE: bakground color is stored in corresponding BKMK.
+	[begin DRAWING format]
+		...4 = <0x 00 00 00 01> ???
+		<0x 4 bytes = X POSITION OF UPPER LEFT CORNER??? > 
+		<0x 4 bytes = Y POSITION OF UPPER LEFT CORNER??? > 
+		<0x 4 bytes = X SIZE in pixels > 
+		<0x 4 bytes = Y SIZE in pixels > 
+		...4 = <0x 00 00 00 00> ???
+		<0x 4 bytes = number of STROKES>
+		[if "number of STROKES" == 0]
+			<0x 00 00 00 00>
+			[end DRAWING format]	
+		[fi]
+		[for each STROKE]
+			<0x 00 00 00 01> ???
+			<0x 4 bytes> = 
+				Stroke's beginning position in list of coordinates.
+			<0x 4 bytes> = 
+				Stroke's ending position in list of coordinates.
+			<0x 00 RR GG BB> = RRGGBB color of stroke.
+		[next STROKE]
+		<0x 4 bytes> = number of coordinate pairs in array of coordinates.
+		// NOTE: each stroke is formed out of at least three 
+		// coordinate pairs: begin, {next point}(1-n), end point.
+		[for each COORDINATE]
+			<0x 4 bytes> = X coordinate
+			<0x 4 bytes> = Y coordinate
+		[next COORDINATE]
+	[end DRAWING format]
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03>
+<0x 4 bytes = IDENTIFIER>
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(size of DATA block - 30)
+<0x FD EA = PAD? (��)>
+[fi DRAWING]
+//-------------------------------
+[next {NOTE,MARK,CORRECTION,DRAWING}]
+
+// AUTHOR (if any)
+//--------------------------------------------------------------------
+[if AUTHOR]
+AUTH
+<0x 4 bytes = size of AUTHOR block>
+<text (see 3rd note)>
+[fi AUTHOR]
+//--------------------------------------------------------------------
+// TITLE (if any)
+//--------------------------------------------------------------------
+[if TITLE]
+TITL
+<0x 4 bytes = size of TITLE block>
+<text (see 3rd note)>
+[fi TITLE]
+//--------------------------------------------------------------------
+// GENRE (if any)
+//--------------------------------------------------------------------
+[if GENRE]
+GENR
+<0x 4 bytes = size of GENRE block>
+<text (see 3rd note)>
+[fi GENRE]
+//--------------------------------------------------------------------
+// ABSTRACT (if any)
+//--------------------------------------------------------------------
+[if ABSTRACT]
+ABST
+<0x 4 bytes = size of ABSTRACT block>
+<text (see 3rd note)>
+[fi ABSTRACT]
+//--------------------------------------------------------------------
+
+// FINAL DATA
+// Note: 'FINAL DATA' can occur anytime between these marks: 
+//   AUTHOR,TITLE,CATEGORY,GENRE,ABSTRACT,COVER,PUBLISHER,...
+//--------------------------------------------------------------------
+DATA
+<0x 4 bytes = size of EBVS block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> || <0x 00 00 00 04> 
+<0x 4 bytes || 8 bytes = IDENTIFIER>
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(size of EBVS block - 30) :
+	...4	<-- 'position of last read' related
+	...various {4 x byte} ???
+	...4	<-- 'position of last read' related
+	...4
+	...4
+	...4
+<0x FD EA = PAD? (��)>
+//--------------------------------------------------------------------
+
+// CATEGORY (if any)
+//--------------------------------------------------------------------
+[if CATEGORY]
+CATE
+<0x 4 bytes = size of CATEGORY block>
+<text (see 3rd note)>
+[fi CATEGORY]
+//--------------------------------------------------------------------
+// COVER (if any)
+//--------------------------------------------------------------------
+[if COVER]
+COVE
+<0x 4 bytes = size of COVER block>
+<text (see 3rd note)>
+[fi COVER]
+//--------------------------------------------------------------------
+// PUBLISHER (if any)
+//--------------------------------------------------------------------
+[if PUBLISHER]
+PUBL
+<0x 4 bytes = size of PUBLISHER block>
+<text (see 3rd note)>
+[fi PUBLISHER]
+//--------------------------------------------------------------------
+
+
+// FINAL GROUP OF MARKS
+// Order of {NOTE,MARK,CORRECTION} : 
+// starts with user data at the begining of the file, 
+// going forwards to the end:
+//--------------------------------------------------------------------
+[for each {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK}]
+BKMK
+<0x 4 bytes = size of BKMK>
+<0x 4 bytes = TEXT position of the beginning of {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK}>
+//-------------------------------
+[if DRAWING]
+<0x FF FF FF FF>
+[else]
+<0x 4 bytes = TEXT position of the end of {NOTE,MARK,CORRECTION,BOOKMARK}>
+[fi DRAWING]
+...4
+...4
+//-------------------------------
+[if NOTE]
+	<0x xx xx xx (20)?>, xxxxxx=>RRGGBB color ???
+	<0x 00 00 00 02>
+[fi NOTE]
+[if MARK]
+	<0x xx xx xx (0F/00)??>, xxxxxx=>RRGGBB color ???
+	<0x 00 00 00 04>
+[fi MARK]
+[if CORRECTION]
+	<0x xx xx xx (6F)?>, xxxxxx=>RRGGBB color ???
+	<0x 00 00 00 02>
+[fi CORRECTION]
+[if DRAWING]
+	<0x xx xx xx (0F)?>, xxxxxx=>RRGGBB DRAWING's background color.
+	<0x 00 00 00 08>
+[fi DRAWING]
+[if BOOKMARK]
+	<0x xx xx xx 00>
+	<0x 00 00 00 01>
+[fi BOOKMARK]
+	// this one is a strange type of mark, of yet not identified use:
+	[if UNKNOWN_TYPE_YET_1]
+		<0x xx xx xx 00>
+		<0x 00 00 40 00>
+	[fi UNKNOWN_TYPE_YET_1]
+
+//-------------------------------
+[if BOOKMARK || (NOTE "without stored marked text")]
+	<0x FF FF FF FF>
+[else]
+	<0x 4 bytes = DATA pointer in INDEXES>
+[fi BOOKMARK]
+[if DRAWING || MARK]
+	<0x FF FF FF FF>
+[else]
+	<0x 4 bytes = DATA pointer in INDEXES>
+[fi]
+<0x 4 bytes = DATA pointer in INDEXES>
+[if DRAWING]
+	<0x 4 bytes = DATA pointer in INDEXES>
+[else]
+	<0x FF FF FF FF>
+[fi]
+//-------------------------------
+<0x FF FF FF FF>
+<0x FF FF FF FF>
+[next {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK}]
+//--------------------------------------------------------------------
+
+[if length % 32 bit != 0] ???
+	<0x FF FF FF FF>
+[fi]
+
+// END OF FILE
+
+// by idleloop@yahoo.com, v0.2.e, 12/2009
+// http://www.angelfire.com/ego2/idleloop
\ No newline at end of file
diff --git a/format_docs/pdb/mobi.txt b/format_docs/pdb/mobi.txt
new file mode 100644
index 0000000000..e378e1622b
--- /dev/null
+++ b/format_docs/pdb/mobi.txt
@@ -0,0 +1,341 @@
+from (http://wiki.mobileread.com/wiki/MOBI)
+
+About
+-----
+
+MOBI is the format used by the the MobiPocket Reader. It may have a .mobi
+extension or it may have a .prc extension. The extension can be changed by the
+user to either of the accepted forms. In either case it may be DRM protected or
+non-DRM. The .prc extension is used because the PalmOS doesn't support any file
+extensions except .prc or .pdb. Note that Mobipocket prohibits their DRM format
+to be used on dedicated eBook readers that support other DRM formats.
+
+
+Description
+-----------
+
+MOBI format was originally an extension of the PalmDOC  format by adding
+certain HTML like tags to the data. Many MOBI formatted documents still use
+this form. However there is also a high compression version of this file format
+that compresses data to a larger degree in a proprietary manner. There are some
+third party programs that can read the eBooks in the original MOBI format but
+there are only a few third party program that can read the eBooks in the new
+compressed form. The higher compression mode is using a huffman coding scheme
+that has been called the Huff/cdic algorithm.
+
+From time to time features have been added to the format so new files may have
+problems if you try and read them with a down level reader. Currently the
+source files follow the guidelines in the Open eBook format.
+
+Note that AZW for the Amazon Kindle is the same format as MOBI except that it
+uses a slightly different DRM scheme.
+
+
+Format
+------
+
+Like PalmDOC, the Mobipocket file format is that of a standard Palm Database
+Format file. The header of that format includes the name of the database
+(usually the book title and sometimes a portion of the authors name) which is
+up to 31 bytes of data. The files are identified as Creator ID of MOBI and a
+Type of BOOK.
+
+
+PalmDOC Header
+--------------
+
+The first record in the Palm Database Format gives more information about the
+Mobipocket file. The first 16 bytes are almost identical to the first sixteen
+bytes of a PalmDOC format file.
+
+bytes   content             comments
+2       Compression         1 == no compression, 2 = PalmDOC compression,
+                            17480 = HUFF/CDIC compression.
+2       Unused              Always zero
+4       text length         Uncompressed length of the entire text of the book
+2       record count        Number of PDB records used for the text of the book.
+2       record size         Maximum size of each record containing text, always
+                            4096.
+4       Current Position    Current reading position, as an offset into the
+                            uncompressed text
+
+There are two differences from a Palm DOC file. There's an additional
+compression type (17480), and the Current Position bytes are used for a
+different purpose:
+
+bytes   content             comments
+2       Encryption Type     0 == no encryption, 1 = Old Mobipocket Encryption,
+                            2 = Mobipocket Encryption.
+2       Unknown             Usually zero
+
+The old Mobipocket Encryption scheme only allows the file to be registered
+with one PID, unlike the current encryption scheme that allows multiple PIDs to
+be used in a single file. Unless specifically mentioned, all the encryption
+information on this page refers to the current scheme.
+
+
+MOBI Header
+-----------
+
+Most Mobipocket file also have a MOBI header in record 0 that follows these
+16 bytes, and newer formats also have an EXTH header following the MOBI header,
+again all in record 0 of the PDB file format.
+
+The MOBI header is of variable length and is not documented. Some fields have
+been tentatively identified as follows:
+
+offset  bytes   content                 comments
+16      4       identifier              The characters M O B I
+20      4       header length           The length of the MOBI header, including
+                                        the previous 4 bytes
+24      4       Mobi type               The kind of Mobipocket file this is
+                                            2 Mobipocket Book
+                                            3 PalmDoc Book
+                                            4 Audio
+                                            257 News
+                                            258 News_Feed
+                                            259 News_Magazine
+                                            513 PICS
+                                            514 WORD
+                                            515 XLS
+                                            516 PPT
+                                            517 TEXT
+                                            518 HTML
+28      4       text Encoding           1252 = CP1252 (WinLatin1); 65001 = UTF-8
+32      4       Unique-ID               Some kind of unique ID number (random?)
+36      4       Generator version       Potentially the version of the
+                                        Mobipocket-generation tool. Always >=
+                                        the value of the "format version" field
+                                        and <= the version of mobigen used to
+                                        produce the file.
+40      40      Reserved                All 0xFF. In case of a dictionary, or
+                                        some newer file formats, a few bytes are
+                                        used from this range of 40 0xFFs
+80      4       First Non-book index?   First record number (starting with 0)
+                                        that's not the book's text
+84      4       Full Name Offset        Offset in record 0 (not from start of
+                                        file) of the full name of the book
+88      4       Full Name Length        Length in bytes of the full name of the
+                                        book
+92      4       Language                Book language code. Low byte is main
+                                        language 09= English, next byte is
+                                        dialect, 08 = British, 04 = US
+96      4       Input Language          Input language for a dictionary
+100     4       Output Language         Output language for a dictionary
+104     4       Format version          Potentially the version of the
+                                        Mobipocket format used in this file.
+                                        Always >= 1 and <= the value of the
+                                        "generator version" field.
+108     4       First Image record      First record number (starting with 0)
+                                        that contains an image. Image records
+                                        should be sequential. If there are
+                                        no images this will be 0xffffffff.
+112     4       HUFF record             Record containing Huff information
+                                        used in HUFF/CDIC decompression.
+116     4       HUFF count              Number of Huff records.
+122     4       DATP record             Unknown: Records starts with DATP.
+124     4       DATP count              Number of DATP records.
+128     4       EXTH flags              Bitfield. if bit 6, 0x40 is set, then
+                                        there's an EXTH record
+The following records are only present if the mobi header is long enough.
+132     36      ?                       32 unknown bytes, if MOBI is long enough
+168     4       DRM Offset              Offset to DRM key info in DRMed files.
+                                        0xFFFFFFFF if no DRM
+172     4       DRM Count               Number of entries in DRM info.
+174     4       DRM Size                Number of bytes in DRM info.
+176     4       DRM Flags               Some flags concerning the DRM info.
+180     6       ?
+186     2       Last Image record       Possible vaule with the last image
+                                        record. If there are no images in the
+                                        book this will be 0xffff.
+188     4       ?
+192     4       FCIS record             Unknown. Record starts with FCIS.
+196     4       ?
+200     4       FLIS record             Unknown. Records starts with FLIS.
+204     ?       ?                       Bytes to the end of the MOBI header,
+                                        including the following if the header
+                                        length >= 228. ( 244 from start of
+                                        record)
+242     2       Extra Data Flags        A set of binary flags, some of which
+                                        indicate extra data at the end of each
+                                        text block. This only seems to be valid
+                                        for Mobipocket format version 5 and 6
+                                        (and higher?), when the header length
+                                        is 228 (0xE4) or 232 (0xE8).
+
+
+EXTH Header
+-----------
+
+If the MOBI header indicates that there's an EXTH header, it follows immediately
+after the MOBI header. since the MOBI header is of variable length, this isn't
+at any fixed offset in record 0. Note that some readers will ignore any EXTH
+header info if the mobipocket version number specified in the MOBI header is 2
+or less (perhaps 3 or less).
+
+The EXTH header is also undocumented, so some of this is guesswork.
+
+bytes   content             comments
+4       identifier          the characters E X T H
+4       header length       the length of the EXTH header, including the previous 4 bytes
+4       record Count        The number of records in the EXTH header. the rest of the EXTH header consists of repeated EXTH records to the end of the EXTH length.
+        EXTH record start   Repeat until done.
+4       record type         Exth Record type. Just a number identifying what's stored in the record
+4       record length       length of EXTH record = L , including the 8 bytes in the type and length fields
+L-8     record data         Data.
+        EXTH record end     Repeat until done.
+
+There are lots of different EXTH Records types. Ones found so far in Mobipocket
+files are listed here, with possible meanings. Hopefully the table will be
+filled in as more information comes to light.
+
+record type    usual length     name             comments
+1                               drm_server_id
+2                               drm_commerce_id
+3                               drm_ebookbase_book_id
+100                             author
+101                             publisher
+102                             imprint
+103                             description
+104                             isbn
+105                             subject
+106                             publishingdate
+107                             review
+108                             contributor
+109                             rights
+110                             subjectcode
+111                             type
+112                             source
+113                             asin
+114                             versionnumber
+115                             sample
+116                             startreading
+118                             retail price (as text)
+119                             retail price currency (as text)
+201                             coveroffset
+202                             thumboffset
+203                             hasfakecover
+204                             204 Unknown
+205                             205 Unknown
+206                             206 Unknown
+207                             207 Unknown
+208                             208 Unknown
+300                             300 Unknown
+401                             clippinglimit
+402                             publisherlimit
+403                             403 Unknown
+404                             404 ttsflag
+501            4                cdetype          PDOC - Personal Doc;
+                                                 EBOK - ebook;
+502                             lastupdatetime
+503                             updatedtitle
+
+And now, at the end of Record 0 of the PDB file format, we usually get the full
+file name, the offset of which is given in the MOBI header.
+
+
+Variable-width integers
+-----------------------
+
+Some parts of the Mobipocket format encode data as variable-width integers.
+These integers are represented big-endian with 7 bits per byte in bits 1-7. They
+may be either forward-encoded, in which case only the LSB has bit 8 set, or
+backward-encoded, in which case only the MSB has bit 8 set. For example, the
+number 0x11111 would be represented forward-encoded as:
+
+    0x04 0x22 0x91
+
+And backward-encoded as: 
+
+    0x84 0x22 0x11
+
+
+Trailing entries
+----------------
+
+The Extra Data Flags field of the MOBI header indicates which, if any, trailing
+entries are appended to the end of each text record. Each set bit in the field
+indicates a trailing entry. The entries appear to occur in bit-order; e.g.,
+trailing entry 1 immediately follows the text content and entry 16 occurs at
+the very end of the record. The effect and exact details of most of these
+entries is unknown. The trailing entries indicated by bits 2-16 appear to
+follow a common format. That format is:
+
+    <data><size>
+
+Where <size> is the size of the entire trailing entry (including the size of
+<size>) as a backward-encoded Mobipocket variable-width integer.
+
+Only a few bits have been identified
+
+bit     Data at end of records
+0x0001  Multi-byte character overlaps
+0x0002  Some data to help with indexing
+0x0004  Some data about uncrossable breaks
+
+
+Multibyte character overlap
+---------------------------
+
+When bit 1 of the Extra Data Flags field is set, each record is followed by a
+trailing entry containing any extra bytes necessary to complete a multibyte
+character which crosses the record boundary. The bytes do not participate in
+compression regardless which compression scheme is used for the file. However,
+unlike the trailing data bytes, the multibytes (including the count byte) do
+get included in any encryption. The overlapping bytes then re-appear as normal
+content at the beginning of the following record. The trailing entry ends with
+a byte containing a count of the overlapping bytes plus additional flags.
+
+offset  bytes   content         comments
+0       0-3	N   terminal bytes
+                of a multibyte
+                character	
+N       1       Size & flags    bits 1-2 encode N, use of bits 3-8 is unknown 
+
+
+PalmDOC Compression
+-------------------
+
+PalmDOC uses LZ77 compression techniques. DOC files can contain only compressed
+text. The format does not allow for any text formatting. This keeps files small,
+in keeping with the Palm philosophy. However, extensions to the format can use
+tags, such as HTML or PML, to include formatting within text. These extensions
+to PalmDoc are not interchangeable and are the basis for most eBook Reader
+formats on Palm devices.
+
+LZ77 algorithms achieve compression by replacing portions of the data with
+references to matching data that has already passed through both encoder and
+decoder. A match is encoded by a pair of numbers called a length-distance pair,
+which is equivalent to the statement "each of the next length characters is
+equal to the character exactly distance characters behind it in the uncompressed
+stream." (The "distance" is sometimes called the "offset" instead.)
+
+In the PalmDoc format, a length-distance pair is always encoded by a two-byte
+sequence. Of the 16 bits that make up these two bytes, 11 bits go to encoding
+the distance, 3 go to encoding the length, and the remaining two are used to
+make sure the decoder can identify the first byte as the beginning of such a
+two-byte sequence. The exact alforithm needed to decode the compressed text can
+be found on the PalmDOC page.
+
+PalmDOC data is always divided into 4096 byte blocks and the blocks are acted
+upon independently.
+
+PalmDOC does have support for bookmarks. These pointers are named and refer to
+an offset location in a file. If the file is edited these locations may no
+longer refer to the correct locations. Some reading programs allow the user to
+enter or edit these bookmarks while others treat them as a TOC. Some reading
+programs may ignore them entirely. They are stored at the end of the file itself
+so the full file needs to be scanned when loaded to find them. 
+
+
+MBP
+---
+
+This is the extension used on a side file (auxiliary) for MOBI formatted eBooks.
+It is used to store metadata  used by the library software and also to store
+user entered data like bookmarks, annotations, last read position. This file is
+created automatically by the reader program when the eBook is first opened and
+has a .mbp extension. The Library management software in MobiPocket uses this
+file to get information displayed in the library window such as title and author
+so that it won't have to open the larger eBook file.
+
diff --git a/format_docs/pdb/palmdoc.txt b/format_docs/pdb/palmdoc.txt
new file mode 100644
index 0000000000..0df62ae2e2
--- /dev/null
+++ b/format_docs/pdb/palmdoc.txt
@@ -0,0 +1,25 @@
+PalmDoc Format
+--------------
+
+The format is that of a standard Palm Database Format file. The header of that
+format includes the name of the database (usually the book title and sometimes
+a portion of the authors name) which is up to 31 bytes of data. This string of
+characters is terminated with a 0 in the C style. The files are identified as
+Creator ID of REAd and a Type of TEXt. 
+
+
+Record 0
+--------
+
+The first record in the Palm Database Format gives more information about the
+PalmDOC file, and contains 16 bytes.
+
+bytes   content             comments 
+
+2       Compression         1 == no compression, 2 = PalmDOC compression (see below)
+2       Unused              Always zero
+4       text length         Uncompressed length of the entire text of the book
+2       record count        Number of PDB records used for the text of the book.
+2       record size         Maximum size of each record containing text, always 4096
+4       Current Position    Current reading position, as an offset into the uncompressed text
+
diff --git a/format_docs/pdb/pdb_format.txt b/format_docs/pdb/pdb_format.txt
new file mode 100644
index 0000000000..e6837ac2ad
--- /dev/null
+++ b/format_docs/pdb/pdb_format.txt
@@ -0,0 +1,104 @@
+Format
+------
+
+A PDB file can be borken into multiple parts. The header, record 0 and data.
+values stored within the various parts are big-endian byte order. The data
+part is is broken down into multiple sections. The section count and offsets
+are referened in the PDB header. Sections can be no more than 65505 bytes in
+length.
+
+
+Layout
+------
+
+PDB files take the format: DB header followed by the record 0 which has
+contained format specific iformation followed by data.
+
+    DB Header
+0   Record 0
+.
+.   Data (borken down into sections)
+.
+
+
+Palm Database Header Format
+
+bytes   content             comments 
+
+32      name                database name. This name is 0 terminated in the
+                            field and will be used as the file name on a
+                            computer. For eBooks this usually contains the
+                            title and may have the author depending on the
+                            length available.
+
+2       attributes          bit field.
+                            0x0002 Read-Only
+                            0x0004 Dirty AppInfoArea
+                            0x0008 Backup this database (i.e. no conduit exists)
+                            0x0010 (16 decimal) Okay to install newer over
+                                    existing copy, if present on PalmPilot
+                            0x0020 (32 decimal) Force the PalmPilot to reset
+                                    after this database is installed
+                            0x0040 (64 decimal) Don't allow copy of file to be
+                                    beamed to other Pilot.
+
+2       version             file version
+
+4       creation date       No. of seconds since start of January 1, 1904.
+
+4       modification date   No. of seconds since start of January 1, 1904.
+
+4       last backup date    No. of seconds since start of January 1, 1904.
+
+4       modificationNumber
+
+4       appInfoID           offset to start of Application Info (if present)
+                            or null
+
+4       sortInfoID          offset to start of Sort Info (if present) or null
+
+4       type                See above table. (For Applications this data will
+                            be 'appl')
+
+4   	creator             See above table. This program will be launched if
+                            the file is tapped
+
+4       uniqueIDseed        used internally to identify record
+
+4       nextRecordListID    Only used when in-memory on Palm OS. Always set to
+                            zero in stored files.
+
+2       number of Records   number of records in the file - N
+
+8N      record Info List
+
+        start of record
+        info entry          Repeat N times to end of record info entry
+
+4       record Data Offset  the offset from the start of the PDB of this record
+
+1       record Attributes   bit field. The least significant four bits are used
+                            to represent the category values. These are the
+                            categories used to split the databases for viewing
+                            on the screen. A few of the 16 categories are
+                            pre-defined but the user can add their own. There
+                            is an undefined category for use if the user or
+                            programmer hasn't set this.
+                            0x10 (16 decimal) Secret record bit.
+                            0x20 (32 decimal) Record in use (busy bit).
+                            0x40 (64 decimal) Dirty record bit.
+                            0x80 (128, unsigned decimal) Delete record on
+                                  next HotSync.
+
+3       UniqueID            The unique ID for this record. Often just a
+                            sequential count from 0
+
+        end of record
+        info entry
+
+2?      Gap to data        traditionally 2 zero bytes to Info or raw data
+
+?       Records            The actual data in the file. AppInfoArea (if
+                           present), SortInfoArea (if present) and then
+                           records sequentially
+
diff --git a/format_docs/pdb/pdb_types.txt b/format_docs/pdb/pdb_types.txt
new file mode 100644
index 0000000000..5d6d39c897
--- /dev/null
+++ b/format_docs/pdb/pdb_types.txt
@@ -0,0 +1,34 @@
+Palm Database File Code
+-----------------------
+
+Reader                      Type Code
+
+Adobe Reader                .pdfADBE
+PalmDOC                     TEXtREAd
+BDicty                      BVokBDIC
+DB (Database program)       DB99DBOS
+eReader                     PNRdPPrs
+eReader                     DataPPrs
+FireViewer (ImageViewer)    vIMGView
+HanDBase                    PmDBPmDB
+InfoView                    InfoINDB
+iSilo                       ToGoToGo
+iSilo 3                     SDocSilX
+JFile                       JbDbJBas
+JFile Pro                   JfDbJFil
+LIST                        DATALSdb
+MobileDB                    Mdb1Mdb1
+MobiPocket                  BOOKMOBI
+Plucker                     DataPlkr
+QuickSheet                  DataSprd
+SuperMemo                   SM01SMem
+TealDoc                     TEXtTlDc
+TealInfo                    InfoTlIf
+TealMeal                    DataTlMl
+TealPaint                   DataTlPt
+ThinkDB                     dataTDBP
+Tides                       TdatTide
+TomeRaider                  ToRaTRPW
+Weasel                      zTXTGPlm
+WordSmith                   BDOCWrdS 
+
diff --git a/format_docs/pdb/plucker.html b/format_docs/pdb/plucker.html
new file mode 100644
index 0000000000..07f7b926ca
--- /dev/null
+++ b/format_docs/pdb/plucker.html
@@ -0,0 +1,2122 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<TITLE>The Plucker Document Format</TITLE>
+</HEAD>
+<BODY>
+<H1>The Plucker Document Format</H1>
+
+<H2>Introduction</H2>
+
+<p>
+This document is the official description of the
+<a href="http://www.plkr.org/">Plucker</a> format.</p>
+
+<HR><H2>Overview</H2>
+<p>
+The Plucker document format supports a multi-page (in the Web sense of 'page') hyperlinked information structure containing both 'rich' text and images.  Links may be internal to the document or link to other documents. External links, in standard URL form, may be included and displayed, but not followed.  Images may either be embedded in a text page, as with HTML, or be included as separate stand-alone pages.</p>
+<p>
+Plucker documents are structured so that they can be used both with a file-system-oriented operating system such as Unix or Windows, and with the PalmOS, a non-file-system-oriented OS.  To this end, they always begin with a standard PalmOS record database prefix, which consists of four parts:  the <i><A HREF="#database_header">database header</a></i>, a <i><a href="#record_id_list">record-id list</a></i>, an <i><a href="#appinfo_block">AppInfo block</a></i>, and a <i>SortInfo block</i>.  The Plucker format does not use the SortInfo block, which is therefore null, and consequently occupies no space in the document prefix.</p>
+<p>
+The record database prefix is then followed by a sequence of application-specific records.  In a Plucker document, this sequence consists of one <a href="#index_record">index record</a>, followed by a series of <a href="#data_records">data records</a>.  The index record contains information about the data records, along with some global information, such as the type of compression used.  Each data record contains either a page, an image, or data about the document, such as bookmarks or URL data.</p>
+<p>
+The format is big-endian; any multi-byte numeric values specified in this document are big-endian.  Images are stored in the Palm image format; for more information on this format please consult <A HREF="http://www.palmos.com/dev/tech/docs">http://www.palmos.com/dev/tech/docs/</A>.</p>
+<HR><H2>The Database Prefix</H2>
+
+<H3><a name="database_header">The Database Header</a></H3>
+<p>
+The database header is a fixed-size structure of 72 bytes.  It contains the name of the database, the Plucker version number, various timestamps (creation, modification, last backup), and several flags.  All timestamps are given using the PalmOS standard, seconds since 12:00 AM on January 1, 1904.</p>
+<p>
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>docName</b></td>
+<td align="center">32</td>
+<td>String</td>
+<td>Must contain a NUL-terminated 7-bit ASCII string (only character codes 0x20-0x7E are valid) giving the name of the document.  Because of the terminating NUL character at end, only 31 bytes can actually be used for the name of the document.  The first 26 bytes of this string are used by Plucker as a unique ID for the document; names should be unique in the first 26 characters.</td>
+</tr>
+
+<tr>
+<td><b>flags</b></td>
+<td align="center">2</td>
+<td>Bitfield</td>
+<td>Most bits in this field are unused.  Unused bits should be set to zero on document creation, but reader software should not expect them to stay at this value.
+
+<p>Valid bits are as follows.  All numeric values given are big-endian.</p>
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="100%" >
+<tr BGCOLOR="#BBDDDD">
+<td><b><font color="#000000">Name</font></b></td>
+<td><b><font color="#000000">Value</font></b></td>
+<td><b><font color="#000000">Meaning</font></b></td>
+</tr>
+
+<tr>
+<td>CopyPrevention</td>
+<td>0x0040</td>
+<td>Indicates that system should not allow copying of this document.</td>
+</tr>
+
+<tr>
+<td>Launchable</td>
+<td>0x0200</td>
+<td>Indicates that this document should be presented as a first-class object on desktop renderings.  If this bit is set, an AppInfo block must be included.</td>
+</tr>
+
+<tr>
+<td>Backup</td>
+<td>0x0008</td>
+<td>Indicates that this document should be backed up, if the system includes such a capability.</td>
+</tr>
+
+</table>
+</td>
+</tr>
+
+<tr>
+<td><b>version</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Version of the Plucker format used in this document.  Must have the value 1.</td>
+</tr>
+
+<tr>
+<td><b>creationDate</b></td>
+<td align="center">4</td>
+<td>Timestamp</td>
+<td>Time of document creation</td>
+</tr>
+
+<tr>
+<td><b>modificationDate</b></td>
+<td align="center">4</td>
+<td>Timestamp</td>
+<td>Time document last modified</td>
+</tr>
+
+<tr>
+<td><b>unused1</b></td>
+<td align="center">8</td>
+<td>Numeric</td>
+<td>Must be zero at document creation, but any specific value should not be relied upon.</td>
+</tr>
+
+<tr>
+<td><b>appInfoOffset</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>Either zero, if no appInfo is present, or the offset from the beginning of the document to the start of the appInfo block.
+</tr>
+
+<tr>
+<td><b>sortInfoId</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>Must be zero.</td>
+</tr>
+
+<tr>
+<td><b>magic</b></td>
+<td align="center">8</td>
+<td>String</td>
+<td>Must be the 8 ISO Latin-1 characters "DataPlkr".  No terminating NUL character.</td>
+</tr>
+
+<tr>
+<td><b>unused2</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>Must be zero at document creation, but any specific value should not be relied upon.</td>
+</tr>
+
+</table>
+
+<H3><a name="record_id_list">The Record-ID List</a></H3>
+<p>
+This list consists of a six-byte list header, followed by one ID entry for each data record in the document.  The list header has the structure:</p>
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>nextRecordListID</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>Must be zero.</td>
+</tr>
+
+<tr>
+<td><b>numRecords</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Number of records in the document, including the index record.</td>
+</tr>
+</table>
+
+<p>This is then followed by <b>numRecords</b> entries of the following structure:</p>
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>recordOffset</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>Number of bytes from the start of the document to the beginning of the record</td>
+</tr>
+
+<tr>
+<td><b>attributes</b></td>
+<td align="center">1</td>
+<td>Bitfield</td>
+<td>Record attributes -- should be zero.</td>
+</tr>
+
+<tr>
+<td><b>uniqueID</b></td>
+<td align="center">3</td>
+<td>Numeric</td>
+<td>A local (document-specific) unique ID for the record.  This is not used by Plucker (because it is not preserved by PalmOS through beaming of a document), but must still be different for each record.</td>
+</tr>
+</table>
+
+<p>
+Finally, there are two bytes of zero-padding to bring the structure alignment back to 4 bytes.</p>
+
+<H3><a name="appinfo_block">The AppInfo Block</a></H3>
+<p>
+Typically, this is only present when the <i>launchable</i> flag is set in the flags field of the database header.  No Plucker data aside from icon display information and a versioning string is stored in this block.  This block has the following structure:</p>
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>signature</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>Must contain the value 0x6C6E6368.</td>
+</tr>
+
+<tr>
+<td><b>hdrVersion</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Must have the value 3.</td>
+</tr>
+
+<tr>
+<td><b>hdrEncoding</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Must have the value 0.</td>
+</tr>
+
+<tr>
+<td><b>verStrWords</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>The number of two-byte words following, containing the version string.</td>
+</tr>
+
+<tr>
+<td><b>verStr</b></td>
+<td align="center">2 * <b>verStrWords</b></td>
+<td>String</td>
+<td>NUL-terminated ISO Latin-1 string, padded at end if necessary with a zero byte to an even-byte boundary, containing a version string to display to the user containing version information for the document.</td>
+</tr>
+
+<tr>
+<td><b>pqaTitleWords</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>The number of two-byte words in the following <b>pqaTitleStr</b>.</td>
+</tr>
+
+<tr>
+<td><b>pqaTitleStr</b></td>
+<td align="center">2 * <b>pqaTitleWords</b></td>
+<td>String</td>
+<td>NUL-terminated ISO Latin-1 string, padded at end if necessary with a zero byte to an even-byte boundary, containing a title string for iconic display of the document.</td>
+</tr>
+
+<tr>
+<td><b>iconWords</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Number of two-byte words in the following icon image.</td>
+</tr>
+
+<tr>
+<td><b>icon</b></td>
+<td align="center">2 * <b>iconWords</b></td>
+<td>Image</td>
+<td>Image (32x32) in Palm image format to be used as an icon to represent the document on a desktop-style display.  The image may not use a custom color map.</td>
+</tr>
+
+<tr>
+<td><b>smIconWords</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Number of two-byte words in the following icon image.</td>
+</tr>
+
+<tr>
+<td><b>smIcon</b></td>
+<td align="center">2 * <b>smIconWords</b></td>
+<td>Image</td>
+<td>Small image (15x9) in Palm image format to be used as an icon to represent the document on a desktop-style display.  The image may not use a custom color map.</td>
+</tr>
+</table>
+
+<HR><H2><a name="index_record">The Index Record</a></H2>
+<p>
+This record includes info about the compression type used
+for the Plucker document and also what IDs the reserved records use.
+The viewer will use this record to know where to look for the
+reserved records and whether it must have support for ZLib
+compression. This record should always be the first record in
+the Plucker document (i.e. at index 0).</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>uid</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>unique ID for record, always 0x0001</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>version</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>0x0002 if data is ZLib compressed, 0x0001 if DOC compressed</td>
+</tr>
+
+<tr>
+<td><b>records</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of reserved records</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>reserved</b></td>
+
+<td align="center">4*records</td>
+
+<td>Numeric</td>
+
+<td>reserved ID array</td>
+</tr>
+</table>
+
+<p>
+The reserved ID array consists of a series of name/ID pairs,
+where the <strong>ID</strong> is the unique ID (2 bytes) for
+the record and the <strong>name</strong> is a value (2 bytes)
+from the following list.</p>
+
+<ul>
+<li>home.html = 0</li>
+<li>external bookmarks = 1</li>
+<li>URL handling = 2</li>
+<li>default category = 3</li>
+<li>additional metadata = 4</li>
+<li>page list metadata = 5</li>
+<li>sorted URL name data = 6</li>
+<li>external anchor name data = 7</li>
+
+</ul>
+
+<HR><H2><a name="data_records">The Data Records</a></H2>
+<p>
+There are several different types of data records.</p>
+
+<ul>
+<li><A HREF="#text_dr">text data (compressed/uncompressed)</A></li>
+<li><A HREF="#image_dr">image data (compressed/uncompressed)</A></li>
+<li><A HREF="#mailto_dr">mailto data</A></li>
+<li><A HREF="#url_handling_dr">URL handling data</A></li>
+<li><A HREF="#url_dr">URL data (compressed/uncompressed)</A></li>
+<li><A HREF="#external_bookmarks_dr">external bookmarks</A></li>
+<li><A HREF="#default_category_dr">default category</A></li>
+<li><a href="#index_extension_dr">metadata</a></li>
+<li><a href="#style_sheet_dr">style sheets (not yet implemented)</a></li>
+<li><a href="#font_page_dr">font pages (not yet implemented)</a></li>
+<li><a href="#table_dr">table data (compressed/uncompressed)</a></li>
+<li><a href="#pagelist_dr">page list metadata</a></li>
+<li><a href="#sorted_url_handling_dr">sorted URL handling data</a></li>
+<li><a href="#sorted_url_dr">sorted URL data (compressed/uncompressed)</a></li>
+<li><a href="#extanchor_handling_dr">external anchor name handling data</a></li>
+<li><a href="#extanchor_dr">external anchor name data (compressed/uncompressed)</a></li>
+
+</ul>
+
+
+<p>
+Each data record starts with a header, having the following structure:</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>uid</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Unique ID for record.  IDs must be sorted in increasing order.
+Currently the ID is not
+allowed to be 0xFFFF.  Moreover, some earlier versions of the viewer had a bug
+that crashed on records numbered 0x8000-0xFFFE.</td>
+</tr>
+
+<tr>
+<td><b>paragraphs</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>number of paragraphs</td>
+</tr>
+
+<tr>
+<td><b>size</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>total length of data before compression</td>
+</tr>
+
+<tr>
+<td><b>type</b></td>
+<td align="center">1</td>
+<td>Numeric</td>
+<td>Data type.  Must be one of the following:
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="100%" >
+<tr BGCOLOR="#AADDDD">
+<td><b><font color="#000000">Data type</font></b></td>
+<td align="center"><b><font color="#000000">Value</font></b></td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_PHTML</b></td>
+
+<td align="center">0</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_PHTML_COMPRESSED</b></td>
+
+<td align="center">1</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_TBMP</b></td>
+
+<td align="center">2</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_TBMP_COMPRESSED</b></td>
+
+<td align="center">3</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_MAILTO</b></td>
+
+<td align="center">4</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_LINK_INDEX</b></td>
+
+<td align="center">5</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_LINKS</b></td>
+
+<td align="center">6</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_LINKS_COMPRESSED</b></td>
+
+<td align="center">7</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_BOOKMARKS</b></td>
+
+<td align="center">8</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_CATEGORY</b></td>
+
+<td align="center">9</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_METADATA</b></td>
+
+<td align="center">10</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_STYLE_SHEET</b></td>
+
+<td align="center">11</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_FONT_PAGE</b></td>
+
+<td align="center">12</td>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_TABLE</b></td>
+
+<td align="center">13</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_TABLE_COMPRESSED</b></td>
+
+<td align="center">14</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_COMPOSITE_IMAGE</b></td>
+
+<td align="center">15</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_PAGELIST_METADATA</b></td>
+
+<td align="center">16</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_SORTED_URL_INDEX</b></td>
+
+<td align="center">17</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_SORTED_URL</b></td>
+
+<td align="center">18</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_SORTED_URL_COMPRESSED</b></td>
+
+<td align="center">19</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_EXT_ANCHOR_INDEX</b></td>
+
+<td align="center">20</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>DATATYPE_EXT_ANCHOR</b></td>
+
+<td align="center">21</td>
+</tr>
+
+<tr>
+<td><b>DATATYPE_EXT_ANCHOR_COMPRESSED</b></td>
+
+<td align="center">22</td>
+</tr>
+
+</table>
+</td>
+</tr>
+
+<tr>
+<td><b>flags</b></td>
+<td align="center">1</td>
+<td>Bitfield</td>
+<td>Bit-mapped record flags. Valid bits are as follows (all numeric values given are big-endian); unused bits should be set to zero.
+<table border="1" cellpadding="2" cellspacing="0" width="811">
+<tr bgcolor="#bbdddd">
+<td width="147"><b><font color="#000000">Name</font></b></td>
+<td width="39"><b><font color="#000000">Value</font></b></td>
+<td width="605"><b><font color="#000000">Meaning</font></b></td>
+</tr>
+
+<tr>
+<td bgcolor="#ffffff" width="147">Continued Record</td>
+<td bgcolor="#ffffff" width="39">&nbsp;0x01</td>
+<td bgcolor="#ffffff" width="605">A value of one indicates that the record is
+continued by the fragment in the next sequential record of the same type.
+This value is applicable to the following data types:
+<ul>
+  <li>DATATYPE_PHTML</li>
+  <li>DATATYPE_PHTML_COMPRESSED</li>
+</ul>
+<p>A value of zero indicates that the record is not to be continued (i.e. there are no fragments beyond this one, or this is the last one).</p>
+</td>
+</tr>
+
+<tr>
+<td bgcolor="#ffffff" width="147">Navigation Metadata</td>
+<td bgcolor="#ffffff" width="39">0x02</td>
+<td bgcolor="#ffffff" width="605">A value of one indicates that the text or image data in this record is followed by additional navigation metadata.</td>
+</tr>
+
+</table>
+</td>
+</tr>
+</table>
+
+<H3><a name="compression">Compression and the Owner-ID</a></H3>
+<p>This data format supports two forms of compression, DOC and ZLIB.  That part of a data record that occurs after the header is compressed as a single chunk.  All compressed records in a single document must use the same compression format.  Compressed records may be mixed with uncompressed records.  In a compressed record, the length of the compressed data <b>must</b> be less than its uncompressed length.</p>
+<p>DOC compression is the <A HREF="http://www.pyrite.org/doc_format.php">the format invented for early Palm usage</a>.</P>
+<p>ZLIB compression uses the ZLib format documented in Internet RFCs <a href="http://www.ietf.org/rfc/rfc1950.txt?number=1950">1950</a> and <a href="http://www.ietf.org/rfc/rfc1951.txt?number=1951">1951</a>.  See also <a href="http://www.gzip.org/zlib/manual.html">http://www.gzip.org/zlib/manual.html</a> for a description of the library used to perform the compression and decompression.</p>
+<p>Plucker documents may be keyed to a specific string of 40 or fewer ASCII characters, called the <i>owner-id</i>.  When such a key is specified, zlib compression must be used in the document.  When an owner-id is specified, the beginning of each zlib-compressed data segment is XOR'ed with a value derived from the key, after compression, and must be XOR'ed again with the derived value before being decompressed.  If an owner-id is specified for a document, the metadata record must exist, and must contain an <TT>OwnerID</TT> subrecord giving the CRC-32 of the owner-id string.</p>
+<p>The <i>derived value</i> mentioned above is a 40-byte value constructed by forming 10 strings by concatenating the owner-id string with itself 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11 times, then taking the CRC-32 values of each of these concatenations, then packing those 32-bit values in big-endian order into a 40-byte buffer.
+<p></p>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="text_dr">Text data records</A></H3>
+<p>
+For text data the data record header is followed by a series of paragraph
+headers, each representing a paragraph block in the text data.  This series of paragraph headers is then followed by the compressed or uncompressed text data.  Each paragraph header has the form:</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>size</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>Total length of paragraph before compression. <font color="#FF0000">NOTE: No text data should be larger than 
+32k. If the original document is larger than 32k, then the 
+parser must split it into several records.</font>
+</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>attributes</b></td>
+
+<td align="center">2</td>
+
+<td>Bitfield</td>
+
+<td>Paragraph info.  The high-order 13 bits are reserved for future use and should be set to zero; the 3 low-order bits contain a numeric value in the range [0..7] giving the
+amount of extra paragraph spacing (2*value pixels).</td>
+</tr>
+</table>
+
+<p>
+The (uncompressed) text data contains a character stream of ISO Latin-1 characters, interspersed with 'functions'.</p>
+<P>
+A function is introduced in the text stream by a NULL character (0x00), followed by a one-byte function code
+and up to 7 bytes of data. The 3 LSB of the function code represent the
+remaining function data length; the 5 MSB denote the actual function
+code.  The following functions are valid:</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Function Code</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Arguments</font></b></td>
+</tr>
+
+<tr>
+<td><b>0x0A</b></td>
+<td>Page link begins</td>
+<td align="center">2</td>
+<td>record ID</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x0B</b></td>
+<td>Targeted page link begins</td>
+<td align="center">3</td>
+<td>record ID, target</td>
+</tr>
+
+<tr>
+<td><b>0x0C</b></td>
+<td>Paragraph link begins</td>
+<td align="center">4</td>
+<td>record ID, paragraph number</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x0D</b></td>
+<td>Targeted paragraph link begins</td>
+<td align="center">5</td>
+<td>record ID, paragraph number, target</td>
+</tr>
+
+<tr>
+<td><b>0x08</b></td>
+<td>Link ends</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x11</b></td>
+<td>Set font</td>
+<td align="center">1</td>
+<td>font specifier</td>
+</tr>
+
+<tr>
+<td><b>0x1A</b></td>
+<td>Embedded image</td>
+<td align="center">2</td>
+<td>image record ID</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x22</b></td>
+<td>Set margin</td>
+<td align="center">2</td>
+<td>left margin, right margin</td>
+</tr>
+
+<tr>
+<td><b>0x29</b></td>
+<td>Alignment of text</td>
+<td align="center">1</td>
+<td>alignment</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x33</b></td>
+<td>Horizontal rule</td>
+<td align="center">3</td>
+<td>8-bit height, 8-bit width (pixels), 8-bit width (%, 1-100)</td>
+</tr>
+
+<tr>
+<td><b>0x38</b></td>
+<td>New line</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x40</b></td>
+<td>Italic text begins</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr>
+<td><b>0x48</b></td>
+<td>Italic text ends</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x53</b></td>
+<td>Set text color</td>
+<td align="center">3</td>
+<td>8-bit red, 8-bit green, 8-bit blue</td>
+</tr>
+
+<tr>
+<td><b>0x5C</b></td>
+<td>Multiple embedded image</td>
+<td align="center">4</td>
+<td>alternate image record ID, image record ID</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x60</b></td>
+<td>Underline text begins</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr>
+<td><b>0x68</b></td>
+<td>Underline text ends</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x70</b></td>
+<td>Strike-through text begins</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr>
+<td><b>0x78</b></td>
+<td>Strike-through text ends</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x83</b></td>
+<td>16-bit Unicode character</td>
+<td align="center">3</td>
+<td>alternate text length, 16-bit unicode character</td>
+</tr>
+
+<tr>
+<td><b>0x85</b></td>
+<td>32-bit Unicode character</td>
+<td align="center">5</td>
+<td>alternate text length, 32-bit unicode character</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x8E</b></td>
+<td>Begin custom font span</td>
+<td align="center">6</td>
+<td>font page record ID, X page position, Y page position</td>
+</tr>
+
+<tr>
+<td><b>0x8C</b></td>
+<td>Adjust custom font glyph position</td>
+<td align="center">4</td>
+<td>X page position, Y page position</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x8A</b></td>
+<td>Change font page</td>
+<td align="center">2</td>
+<td>font record ID</td>
+</tr>
+
+<tr>
+<td><b>0x88</b></td>
+<td>End custom font span</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x90</b></td>
+<td>Begin new table row</td>
+<td align="center">0</td>
+<td>no data</td>
+</tr>
+
+<tr>
+<td><b>0x92</b></td>
+<td>Insert table (or table link)</td>
+<td align="center">2</td>
+<td>table record ID</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>0x97</b></td>
+<td>Table cell data</td>
+<td align="center">7</td>
+<td>8-bit alignment, 16-bit image record ID, 8-bit columns, 8-bit rows, 16-bit text length</td>
+</tr>
+
+<tr>
+<td><b>0x9A</b></td>
+<td>Exact link modifier</td>
+<td align="center">2</td>
+<td>Paragraph Offset (The Exact Link Modifier modifies a Paragraph Link or Targeted Paragraph Link function to specify an exact byte offset within the paragraph. This function must be followed immediately by the function it modifies).</td>
+</tr>
+
+</table>
+<p>
+The function arguments have the following definitions:</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Argument</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>record ID</b></td>
+
+<td align="center">2</td>
+
+<td>This is <em>either</em> a reference to a record in Plucker document (that is, a real record ID), or an index into the list of URLs, for URLs which have not been included in the document.</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>image record ID</b></td>
+
+<td align="center">2</td>
+
+<td>reference to image in Plucker document</td>
+</tr>
+
+<tr>
+<td><b>paragraph number</b></td>
+
+<td align="center">2</td>
+
+<td>paragraph number (starting from 0) to jump to or an index into the <a href="#extanchor_dr">external anchor name data</a> if the record ID is pseudo-Record ID for a URL which has not been included in the document.</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>font specifier</b></td>
+
+<td align="center">1</td>
+
+<td>
+The font concept used in Plucker is that of a 'standard' font, along with bold and italic versions of that font.  There is no font notion corresponding to HTML's &lt;BIG&gt; or &lt;SMALL&gt;.  In this markup, boldness and size are specified with a font specifier; italic is specified with a separate function code.  There are currently 11 font specification values, with the following meanings (the actual PalmOS fonts used by the Palm viewer are also given):
+<table BORDER=0 CELLSPACING=0 CELLPADDING=2 WIDTH="100%" >
+<tr>
+<td><b><font color="#000000">Value</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+<td><b><font color="#000000">PalmOS 2.x</font></b></td>
+<td><b><font color="#000000">PalmOS 3.x</font></b></td>
+</tr>
+<tr>
+<td>0</td>
+<td>Regular text.</td>
+<td>stdFont</td>
+<td>stdFont</td>
+</tr>
+<tr>
+<td>1</td>
+<td>Suitable for &lt;H1&gt HTML tags.</td>
+<td>boldFont</td>
+<td>largeBoldFont</td>
+</tr>
+<tr>
+<td>2</td>
+<td>Suitable for &lt;H2&gt HTML tags.</td>
+<td>boldFont</td>
+<td>largeBoldFont</td>
+</tr>
+<tr>
+<td>3</td>
+<td>Suitable for &lt;H3&gt HTML tags.</td>
+<td>boldFont</td>
+<td>largeFont</td>
+</tr>
+<tr>
+<td>4</td>
+<td>Suitable for &lt;H4&gt HTML tags.</td>
+<td>boldFont</td>
+<td>largeFont</td>
+</tr>
+<tr>
+<td>5</td>
+<td>Suitable for &lt;H5&gt HTML tags.</td>
+<td>stdFont</td>
+<td>boldFont</td>
+</tr>
+<tr>
+<td>6</td>
+<td>Suitable for &lt;H6&gt HTML tags.</td>
+<td>stdFont</td>
+<td>boldFont</td>
+</tr>
+<tr>
+<td>7</td>
+<td>Regular text, but bold.</td>
+<td>stdFont</td>
+<td>boldFont</td>
+</tr>
+<tr>
+<td>8</td>
+<td>Fixed-width text, suitable for &lt;TT&gt; HTML tags.</td>
+<td>stdFont</td>
+<td>fixedWidthFont</td>
+</tr>
+<tr>
+<td>9</td>
+<td>Small normal text, suitable for &lt;SMALL&gt; HTML tags.</td>
+<td>stdFont</td>
+<td>stdFont</td>
+</tr>
+<tr>
+<td>10</td>
+<td>Small subscript text, suitable for &lt;SUB&gt; HTML tags.</td>
+<td>stdFont</td>
+<td>stdFont</td>
+</tr>
+<tr>
+<td>11</td>
+<td>Small superscript text, suitable for &lt;SUP&gt; HTML tags.</td>
+<td>stdFont</td>
+<td>stdFont</td>
+</tr>
+</table>
+</td>
+</tr>
+
+<tr>
+<td><b>left margin</b></td>
+
+<td align="center">1</td>
+
+<td>left margin in pixels</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>right margin</b></td>
+
+<td align="center">1</td>
+
+<td>right margin in pixels</td>
+</tr>
+
+<tr>
+<td><b>alignment</b></td>
+
+<td align="center">1</td>
+
+<td>alignment code (left = 0, right = 1, center = 2, justify = 3)</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>height</b></td>
+
+<td align="center">1</td>
+
+<td>height of horizontal rule in pixels, if not given a default value
+of 2 pixels will be used</td>
+</tr>
+
+<tr>
+<td><b>width (pixels)</b></td>
+
+<td align="center">1</td>
+
+<td>width in pixels, should be 0 if percentage value should be used</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>width (%)</b></td>
+
+<td align="center">1</td>
+
+<td>width as the percentage between the current left and right margins.
+The default is 100%</td>
+</tr>
+
+<tr>
+<td><b>alternate text length</b></td>
+
+<td align="center">1</td>
+
+<td>When a Unicode character not representable in ISO-Latin-1 is encountered in an HTML document, a Unicode-character function code is inserted, with the 16-bit or 32-bit value of the character.  This is followed by a "alternate representation" of the character in ISO-Latin-1 text.  This parameter gives the length, in bytes, of the alternate text span.  If the viewer can present the Unicode character directly, display of the alternate text should be suppressed.</td>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>16 or 32 bit Unicode character</b></td>
+
+<td align="center">2, 4</td>
+
+<td>When a Unicode character not representable in ISO-Latin-1 is encountered in an HTML document, a Unicode-character function code is inserted, with the 16-bit or 32-bit Unicode character code for the character, which this parameter supplies.  This is followed by a "alternate representation" of the character in ISO-Latin-1 text.  If the viewer can present the Unicode character directly, display of the alternate text should be suppressed.</td>
+</tr>
+
+<tr>
+<td><b>target</b></td>
+
+<td align="center">1</td>
+
+<td>The target parameter of a link function allows an alternate default target view for a link
+to be specified. By default, a link will always open in the same view as the current content
+location. Valid link targets are as follows:
+<table border="0" cellpadding="2" cellspacing="0" width="787">
+<tr>
+<td><b>Value</b></td>
+<td><b>Description</b></td>
+</tr>
+<tr>
+<td>0</td>
+<td>Default View. If specified, the link will be opened in the default view as
+determined by the reader. This value causes a Targeted Paragraph or Page
+Link to behave identical to a standard Paragraph or Page Link.</td>
+</tr>
+<tr>
+<td>1</td>
+<td>Primary View. Specifies that the link will be opened in the primary window
+regardless of current content location.</td>
+</tr>
+<tr>
+<td>2</td>
+<td>Secondary View/Popup View. Specifies that the link will be opened in the
+secondary or popup view regardless of current content location.</td>
+</tr>
+</table>
+</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>Paragraph Offset</b></td>
+
+<td align="center">2</td>
+
+<td>specifies an exact byte offset within a paragraph relative to the beginning of the paragraph.</td>
+</tr>
+
+</table>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="image_dr">Image data records</A></H3>
+<p>The image data consists of an image in Palm image format, compressed or uncompressed as specified in the document's index record.  The image may in addition be internally compressed, via any of the compression techniques allowed in the Palm image format.  The <i>fundamental size</i> of an image must be less than 480,000; this size is calculated by multiplying the width (in pixels) by the height (in pixels) by the depth (in bits).</p>
+
+<p>If the <i>fundamental size</i> is greater than 480,000, most parsers can be told to create a Multi-image group. This is a group of image records consisting of parts of the image which the viewer displays as one image. The parts are standard Image data records and the Multi-image record tells how many columns and rows the image has, and the record numbers of the parts.
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>columns</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of columns in this image</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>rows</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of rows in this image</td>
+</tr>
+
+<tr>
+<td><b>image record IDs</b></td>
+
+<td align="center">2 * columns * rows</td>
+
+<td>Numeric</td>
+
+<td>References to images in Plucker document. There are (columns * rows) images listed here</td>
+</tr>
+
+</table>
+
+<br>
+<p><HR width="60%" align=left><P>
+
+<h3><a name="navigation_dr">Navigation metadata</a></h3>
+<p>This data is optionally appended to the end of a text or image data record based on the 
+setting of the Navigation Metadata flag in the record header. If the Navigation Metadata flag
+is set to one, the image or text data is immediately followed by the following data structures.
+</p>
+<p>
+<font color="#FF0000">NOTE: If navigation data is appended to a record then the last two bytes
+in the record shall contain the byte offset from the beginning of the record to the start of the
+navigation data.</font>
+</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>anchor name offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>Byte offset from the beginning of the metadata to the anchor name table for this record or 0xffff if there is no anchor name data.</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>pagelist offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>Byte offset from the beginning of the metadata to the page list table for this record or 0xffff if there is no page list data.</td>
+</tr>
+
+<tr>
+<td><b>hierarchy offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>Byte offset from the beginning of the metadata to the hierarchy table for this record or 0xffff if there is no hierarchy data.</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>topic offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</span></font></td>
+
+<td>Byte offset from the beginning of the metadata to the list of topics associates with this record or 0xffff if there is no topic data .</td>
+</tr>
+
+<tr>
+<td><b>Title Strings</b></td>
+
+<td align="center">2+</td>
+
+<td>String sequence</td>
+
+<td>A series of concatenated NUL-terminated strings in the following order:
+<ol>
+  <li><b>Long Record Title - </b>The title of the record. This title should allow the record to be identified out of context.</li>
+  <li><b>Short Record Title - </b>A title string which allows the record to be identified in context.</li>
+</ol>
+<p>If a given string in the list is not defined, an empty string (NUL) must still be entered 
+in the appropriate location in the string sequence.</p></td>
+</tr>
+
+</table>
+
+<p>The offsets block and title strings are followed by a series of tables.</p>
+
+<p>The <b>anchor name table</b> specifies the offset of the anchor names within the text record.</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>anchor names</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of anchor names</td>
+</tr>
+
+<tr>
+<td><b>anchor name data</b></td>
+
+<td align="center">2*anchor names</td>
+
+<td>Numeric Array</td>
+
+<td>This field is an array of 2 byte offsets, each representing the corresponding 
+offset associated with an anchor name relative to the beginning of the text 
+record. The order of each offset corresponds to the order of the string for that anchor name 
+within the anchor name string sequence below.</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>anchor name strings</b></td>
+
+<td align="center">variable size</td>
+
+<td>String sequence</td>
+
+<td>A concatenated sequence of NUL-terminated strings, each an anchor name name. The relative
+location of an anchor name string in the string sequence represents its index into the anchor
+name data.</td>
+</tr>
+
+</table>
+
+<p>
+The <b>page list table</b> contains 
+the uid of the previous and next records relative to this record for one or more 
+page lists. Each page list, combined with the page lists from other 
+Incoming Navigation data records defines one of more unique a linear navigation 
+schemes for the document. The default scheme is always associated with a 
+list id of 0.</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>pagelists</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of page list entries for this record.</td>
+</tr>
+
+<tr>
+<td><b>pagelist data</b></td>
+
+<td align="center">6*pagelists</td>
+
+<td>Page List Data</td>
+
+<td>Block of data containing an array of Page List Data (described below).</td>
+</tr>
+
+</table>
+
+<p>
+The page list data consists of a series of structures containing a list id followed by the 
+unique ID of the previous and next records associated with that list id.</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>list id</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>the list id for this list. A list id of 0 should be used for the default linear ordering.</td>
+</tr>
+
+<tr>
+<td><b>prev uid</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>the uid of the previous record in the series for this list id or 0xffff if this is the 
+first record in the series.</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>next uid</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>the uid of the next record in the series for the list id or 0xffff if this is the 
+last record in the series.</td>
+</tr>
+
+</table>
+
+<p>The <b>hierarchy table</b> specifies unique ID of each text record above this record in
+the document hierarchy that serves as an index leading to the current record.</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>levels</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of levels above the current record in the hierarchy.</td>
+</tr>
+
+<tr>
+<td><b>hierarchy data</b></td>
+
+<td align="center">2*levels</td>
+
+<td>Numeric Array</td>
+
+<td>This field is an array of 2 byte uids, each corresponding to the text record 
+that serves as the index at a given level in the document hierarchy relative to 
+the current record. The order of each uid in the array corresponds to the 
+order of its corresponding string description in the string sequence below.</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>hierarchy strings</b></td>
+
+<td align="center">variable size</td>
+
+<td>String sequence</td>
+
+<td>An abbreviated string that identifies the level index. This string should be 
+as short as possible, ideally only a few characters.</td>
+</tr>
+
+</table>
+
+<p>The <b>topic table</b> provides a list of topics associated with the record.</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>topics</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of topics in the topic string sequence.</td>
+</tr>
+
+<tr>
+<td><b>topic strings</b></td>
+
+<td align="center">0+</td>
+
+<td>String sequence</td>
+
+<td>A concatenated sequence of one or more NUL-terminated 
+ISO Latin-1 strings. Each string represents a topic associated with this 
+text record.</td>
+</tr>
+
+</table>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="mailto_dr">Mailto data records</A></H3>
+The mailto data contains info about e-mail addresses that are
+referenced by the mailto anchors. All the offsets are counting
+from the end of the header.
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>to offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>offset to TO string</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>cc offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>offset to CC string</td>
+</tr>
+
+<tr>
+<td><b>subject offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>offset to SUBJECT string</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>body offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>offset to BODY string</td>
+</tr>
+
+<tr>
+<td><b>strings</b></td>
+
+<td align="center">0+</td>
+
+<td>String sequence</td>
+
+<td>A concatenated sequence of one or more NUL-terminated US-ASCII strings.  Each contains a header-value, which follows the contraints on header values laid down in IETF RFC 2822.  Header folding is not allowed.  Any of the four headers shown above may be absent; header values should be accessed via the above offsets.</td>
+</tr>
+
+</table>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="url_handling_dr">URL handling data records</a></H3>
+Optionally, URL information for the records in the document may be stored.  This information includes URL strings both for the pages actually included in the document, and for those pages excluded from the document.  This information is conceptually stored as a sequence of strings, where the position of the URL in the sequence corresponds to the record ID of its page in the document.  In the case of a page which is not actually included in the document, a pseudo-record-ID is assigned, greater than any actual record IDs in the document, and the URL of that page is associated with that pseudo-record-ID.
+<p>
+In practice, there are two kinds of records used to store the URL strings, the <i>URL handling data record</i>, which serves as an index into the sequence of strings, and the <i>URL data record</i>, one or more of which contain the actual strings.  
+<p>
+<p>
+For cross-document linking support, the URL strings must be of the format "doc://[<i>external doc name</i>]:[<i>url</i>]" where <i>external doc name</i> is the name of the external document and <i>url</i> is the URL string associated with a given record.
+</p>
+The URL handling data is used to find the record ID of the record which contains the correct URL string. It
+contains a series of 2 byte number pairs.
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>last url</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>the ordinal number of the last URL in record</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>id</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>record ID for record</td>
+</tr>
+
+</table>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="url_dr">URL data records</A></h3>
+The URL data contains a list of the URLs. Additional records
+are created if needed and contain up to 200 URLs.
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>URLs</b></td>
+
+<td align="center">1+</td>
+
+<td>String sequence</td>
+
+<td>a concatenated sequence of NUL-terminated URL strings following the constraints of IETF RFC 1738.  The list may contain up to 200 URLs (only text and image records are included,
+other records are represented only by the presence of a NUL; that is, by an empty string)</td>
+</tr>
+
+</table>
+<p>
+These records may or may not be compressed. This is indicated
+by the type in the header. These records are used by the Details
+form to display the URL of the current record and by the External
+Reference form to display the URL of not collected pages. From
+either form you can copy the URL to a Memo to remind you to pluck
+it at a later date.  For inter-document links, a paragraph link
+function may be specified to contain a pseudo-Record ID in place
+of an actual-Record ID, and an index into the
+<a href="#extanchor_handling_dr">external anchor names</a> record
+in place of the paragraph number.
+</p>
+
+<p><HR width="60%" align=left><P>
+<h3><A NAME="external_bookmarks_dr">External Bookmarks data records</A></h3>
+The external bookmarks data contains a list of bookmarks added by the
+parser. It will work similar to named anchors.
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>bookmarks</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>number of bookmarks</td>
+</tr>
+
+<tr>
+<td><b>offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>offset to the start of the bookmark data (counting from the beginning
+of the record)</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>names</b></td>
+
+<td align="center">variable size</td>
+
+<td>String sequence</td>
+
+<td>A concatenated sequence of NUL-terminated strings, each a bookmark name</td>
+</tr>
+
+<tr>
+<td><b>bookmark data</b></td>
+
+<td align="center">4*bookmarks</td>
+
+<td>Bookmark Data</td>
+
+<td>block of data for the location of the external bookmarks (see below)</td>
+</tr>
+
+</table>
+
+<p>
+The bookmark data is a series of uid/offset pairs.</p>
+
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>uid</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>unique ID for record</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>paragraph offset</td>
+</tr>
+
+</table>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="default_category_dr">Default Category data records</A></h3>
+<p>Each Plucker document can be assigned to a number of named categories.  This record stores the names of default categories for the document.  The data consists of a concatenated series of NUL-terminated strings that
+should be used as the default category/categories for this document.</p>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="index_extension_dr">Metadata data records</A></h3>
+<p>There should only be one of these per document.  This record begins with a two byte numeric value, giving the number of subrecords that follow, followed by that number of subrecords.  The subrecords are a sequence of tagged variable length items.  Each subrecord consists of three fields:
+
+<P>
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>type code</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Specifies what piece of extra information is in this subrecord</td>
+</tr>
+
+<tr>
+<td><b>length</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Number of 2-byte words in the <b>argument</b></td>
+</tr>
+
+<tr>
+<td><b>argument</b></td>
+<td align="center">2 * <b>length</b></td>
+<td>(type code specific)</td>
+<td>Data</td>
+</tr>
+
+</table>
+
+<P>The following table describes the valid subrecord type codes, and describes the structure of the associated data for each subrecord type.  Subrecords with unknown type codes should be ignored.
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Type code</font></b></td>
+<td><b><font color="#000000">Name</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+<td><b><font color="#000000">Argument</font></b></td>
+</tr>
+
+<tr>
+<td align="center">1</td>
+<td><b>CharSet</b></td>
+<td>This is the character set and encoding used by text records in this document, unless otherwise specified for particular records.</td>
+<td>a two-byte numeric value, specifying the IETF IANA MIBenum value for the character set.  See <a href="http://www.iana.org/assignments/character-sets">the IANA registry of character sets</a> for valid values.</td>
+</tr>
+
+<tr>
+<td align="center">2</td>
+<td><b>ExceptionalCharSets</b></td>
+<td>This is a list of text records which use a charset other than that specified by the default CharSet.  Note that if no default CharSet is specified, the default charset should be thought of as "unknown".</td>
+<td>a sequence of (<b>length</b> / 2) record-ID, IANA-MIBenum pairs, where MIBenum values are as specified for <B>CharSet</B>.  The invalid MIBenum value of 0 (zero) is used for records which have an unknown charset, if necessary.<br>
+<table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="100%" >
+<tr BGCOLOR="#BBDDDD">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Notes</font></b></td>
+</tr>
+
+<tr>
+<td><b>record ID</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>unique ID for record</td>
+</tr>
+
+<tr>
+<td><b>MIBenum</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>IANA MIBenum for the character set used in this record</td>
+</tr>
+
+</table>
+</td>
+</tr>
+
+<tr>
+<td align="center">3</td>
+<td><b>OwnerID</b></td>
+<td>This is the CRC-32 of the specified owner-id for the document, if any.  Note that associating an owner-id with a document also affects the calculation of <a href="#compression">zlib compression</a>.</td>
+<td>a four-byte numeric value giving the CRC-32 of the owner-id string.</td>
+</tr>
+
+<tr>
+<td align="center">4</td>
+<td><b>Author</b></td>
+<td>The name of the author of the document.</td>
+<td>A string value in the document's default character set, padded at the end with NUL characters to an even number of bytes.</td>
+</tr>
+
+<tr>
+<td align="center">5</td>
+<td><b>Title</b></td>
+<td>The full title of the document.</td>
+<td>A string value in the document's default character set, padded at the end with NUL characters to an even number of bytes.</td>
+</tr>
+
+<tr>
+<td align="center">6</td>
+<td><b>PublicationDate</b></td>
+<td>The date and time this document was created.</td>
+<td>A 4-byte unsigned integer giving the number of seconds from 12:00 AM on January 1, 1904, to the time when this document was created.</td>
+</tr>
+
+<tr>
+<td align="center">7</td>
+<td><b>Linked Documents</b></td>
+<td>The list of external documents that this document links to.</td>
+<td>A concatenated sequence of NUL-terminated strings representing the document names for each external document linked to within this document. The string sequence should be padded at the end with NUL characters to an even number of bytes.</td>
+</tr>
+
+</table>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="style_sheet_dr">Style Sheet data records</A></h3>
+<p>TBD</p>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="font_page_dr">Font Page data records</A></h3>
+<p>TBD</p>
+
+<p><HR width="60%" align=left><P>
+<H3><A NAME="table_dr">Table data records</A></h3>
+<p>The Table Record describes an HTML table. It begins with a structure with the following format.</p>
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>size</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Size of the following data</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>columns</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Number of columns the table contains</td>
+</tr>
+
+<tr>
+<td><b>rows</b></td>
+<td align="center">2</td>
+<td>Numeric</td>
+<td>Number of rows the table contains</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>depth</b></td>
+<td align="center">1</td>
+<td>Numeric</td>
+<td>Bits per pixel (BPP) needed to render the table</td>
+</tr>
+
+<tr>
+<td><b>border</b></td>
+<td align="center">1</td>
+<td>Numeric</td>
+<td>Draw table borders (0 = no, any other value = yes, 1 pixel wide)</td>
+</tr>
+
+<tr BGCOLOR="#F0F0F0">
+<td><b>border color</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>RGB value of border color</td>
+</tr>
+
+<tr>
+<td><b>link color</b></td>
+<td align="center">4</td>
+<td>Numeric</td>
+<td>RGB value of link color</td>
+</tr>
+
+</table>
+
+<p>This is followed by table row and table cell functions (their ends are implied). Each table cell function is followed by 'text length' (from the function) text, containing text and/or formatting functions. (Such as links, style, underline, strike through, italic, etc.)</p>
+
+<p><hr align="left" width="60%">
+<h3><a name="pagelist_dr">Page List Metadata data record</a></h3>
+<p>
+There should only be one of these per document. This record is used to assign the name and initial record associated with each page list in the document. Page lists are used to define the default ordering of pages within the document. More than one page list can be specified, which can be useful for defining tours through a document.
+</p>
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>lists</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>The number of page lists or tours in the document.</td>
+</tr>
+
+<tr>
+<td><b>first record</b></td>
+
+<td align="center">2 * sequences</td>
+
+<td>Numeric Array</td>
+
+<td>An array of uids corresponding to the first record in each page sequence. The zero
+based index into this array represents the sequence id or tour id. The first entry should be
+considered the default page ordering for the document.</td>
+</tr>
+
+<tr>
+<td><b>list name</b></td>
+
+<td align="center">1+</td>
+
+<td>String sequence</td>
+
+<td>a concatenated sequence of NUL-terminated strings, each representing the name of a
+page list. The first entry in the list corresponds to the default page ordering. For
+unnamed page lists, a NUL charater should still be specified.</td>
+</tr>
+
+</table>
+
+<p>Page lists can be thought of as linked lists of records. The first record field in the
+Page List Metadata record is equivalent to the head pointer of the list.
+Each text record contains a previous/next record pointer within it's navigation
+metadata.</p>
+
+<p><hr align="left" width="60%">
+<h3><a name="sorted_url_handling_dr">Sorted URL Handling data record</a></h3>
+<p>
+The Sorted URL handling record is used to find the record ID of the Sorted URL data record
+containing data for a given URL string. It contains a series of 2 byte number pairs.
+</p>
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>last URL</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>the ordinal number of the last URL in record</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>id</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>record ID for record</td>
+</tr>
+
+</table>
+
+<p><hr align="left" width="60%">
+
+<h3><a name="sorted_url_dr">Sorted URL data records</a></h3>
+<p>The sorted URL data record contains a list of URL/UID pointers sorted according to the
+lexicographical order of the URL strings pointed to by the url uid and
+url offset fields. This data is used in cross-document linking to
+facilitate a binary search of the URL strings in order to lookup the record ID
+for an incoming URL string. Only URLs for records actually contained in the document
+should be included in the Sorted URL data records. URLs for external
+records should be omitted.</p>
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>url uid</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>unique ID for URL data record that contains the sorted URL string.</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>url offset</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>byte offset of the first character of the sorted URL string in the URL data record.</td>
+</tr>
+
+<tr>
+<td><b>record uid</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>unique ID of the text or image record that pertains to the sorted URL string.</td>
+</tr>
+
+</table>
+
+<p><hr align="left" width="60%">
+
+<h3><a name="extanchor_handling_dr">External Anchor Name Handling data record</a></h3>
+<p>The External Anchor handling record is used to find the record ID of the External
+Anchor data record containing a given external anchor string. It contains a series of
+2 byte number pairs.
+</p>
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>last anchor</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>the ordinal number of the last anchor in record</td>
+</tr>
+
+<tr bgcolor="#f0f0f0">
+<td><b>id</b></td>
+
+<td align="center">2</td>
+
+<td>Numeric</td>
+
+<td>record ID for record</td>
+</tr>
+
+</table>
+
+<p><hr align="left" width="60%">
+
+<h3><a name="extanchor_dr">External Anchor Name data record</a></h3>
+<p>The External Anchor data record is a string table containing the unique names for all
+external anchor name strings referenced in this document. These strings
+are used to query the record specific anchor name tables in a target book to
+determine a paragraph offset for cross-document linking. This information is
+conceptually stored as a sequence of strings, where the position of the anchor
+name in the sequence corresponds to it's index.</p>
+
+<P><table BORDER CELLSPACING=0 CELLPADDING=2 WIDTH="80%" >
+<tr BGCOLOR="#99CCCC">
+<td><b><font color="#000000">Field</font></b></td>
+<td align="center"><b><font color="#000000">Bytes</font></b></td>
+<td><b><font color="#000000">Type</font></b></td>
+<td><b><font color="#000000">Description</font></b></td>
+</tr>
+
+<tr>
+<td><b>anchor name list</b></td>
+
+<td align="center">1+</td>
+
+<td>String sequence</td>
+
+<td>a concatenated sequence of unique NUL-terminated strings, each representing an anchor
+name from an external link found within this document.</td>
+</tr>
+
+</table>
+
+<p>
+These records may or may not be compressed. This is indicated by the type in the header. These
+records are used in conjunction with the Sorted URL Data records and record specific anchor
+name tables to facilitate cross-document linking.</p>
+
+<p><HR><P>
+<table BORDER=0 CELLSPACING=0 CELLPADDING=2 WIDTH="100%" BGCOLOR="#EEEEEE" >
+<tr>
+<td BGCOLOR="#EEEEEE"><font size=-2>&copy; Copyright 2000 Michael
+Nordstr&ouml;m
+&lt;<a href="mailto:micke@sslug.dk">micke@sslug.dk</a>&gt; &middot; Copyright 2001 Bill Janssen &lt;<a href="mailto:bill@janssen.org">bill@janssen.org</a></font></td>
+
+<td ALIGN=RIGHT BGCOLOR="#EEEEEE"><font size=-2>$Id: DBFormat.html,v 1.27 2005/10/29 14:14:21 nordstrom Exp $</font></td>
+</tr>
+</table>
+</BODY>
+</HTML>
diff --git a/format_docs/pdb/pml.txt b/format_docs/pdb/pml.txt
new file mode 100644
index 0000000000..b5b357f381
--- /dev/null
+++ b/format_docs/pdb/pml.txt
@@ -0,0 +1,936 @@
+Palm Markup Language
+--------------------
+
+This page explains how to use the Palm Markup Language (PML) to specify
+formatting and other information in a text file for later reading using the
+eReader.
+
+PML commands start with a backslash, "\", and usually consist of a single
+character after that. Some PML commands are paired, such as those that specify
+italicized text. Other commands are directives, such as the "\p", which
+specifies a page break. PML is not meant to be an industrial-strength markup
+language, but it is easy to understand, easy to parse, and creates high-quality
+electronic books.
+
+Since PML and Palm DropBook are not without flaws, there is a page of Tips and
+Pitfalls.
+
+
+Let's Dive Right In
+-------------------
+
+palmsample.txt contains examples of formatting text, specifying chapters, etc.
+Use it to start from, or just as an example when making your own books.
+
+The following table specifies the Palm Markup Language commands, and what
+they do.
+
+\p                              New page
+\x                              New chapter; also causes a new page break.
+                                Enclose chapter title (and any style codes)
+                                with \x and \x
+\Xn                             New chapter, indented n levels (n between 0 and
+                                4 inclusive) in the Chapter dialog; doesn't
+                                cause a page break. Enclose chapter title (and
+                                any style codes) with \Xn and \Xn
+\Cn="Chapter title"             Insert "Chapter title" into the chapter
+                                listing, with level n (like \Xn). The text is
+                                not shown on the page and does not force a page
+                                break. This can sometimes be useful to insert a
+                                chapter mark at the beginning of an
+                                introduction to the chapter, for example.
+\c                              Center this block of text; close with \c on
+                                beginning of line
+\r                              Right justify text block; close with \r on
+                                beginning of line
+\i                              Italicize block; close with \i
+\u                              Underline block; close with \u
+\o                              Overstrike block; close with \o
+\v                              Invisible text; close with \v (can be used for
+                                comments)
+\t                              Indent block. Start at beginning of a line,
+                                close with \t at end of a line
+\T="50%"                        Indents the specified percentage of the screen
+                                width, 50% in this case. If the current drawing
+                                position is already past the specified screen
+                                location, this tag is ignored.
+\w="50%"                        Embed a horizontal rule of a given percentage
+                                width of the screen, in this case 50%. This tag
+                                causes a line break before and after it. The
+                                rule is centered. The percent sign is mandatory.
+\n                              Switch to the "normal" font, which is specified
+                                by the user
+\s                              Switch to stdFont; close with \s to revert to
+                                normal font
+\b                              Switch to boldFont; close with \b to revert to
+                                normal font (deprecated; use \B instead)
+\l                              Switch to largeFont; close with \l to revert to
+                                normal font
+\B                              Mark text as bold. Unlike the \b tag, \B
+                                doesn't change the font, so you can have large
+                                bold text. You cannot mix \b and \B in the same
+                                PML file.
+\Sp                             Mark text as superscript. Should not be mixed
+                                with other styles such as bold, italic, etc.
+                                Enclose superscripted text with \Sp.
+\Sb                             Mark text as subscript. Should not be mixed
+                                with other styles such as bold, italic, etc.
+                                Enclose subscripted text with \Sb.
+\k                              Make enclosed text into small-caps; close with
+                                \k. Any characters enclosed in \k tags
+                                (including those with accents) are made
+                                uppercase and are rendered at a smaller point
+                                size than a regular uppercase character.
+\\                              Represents a single backslash
+\aXXX                           Insert non-ASCII character whose Windows 1252
+                                code is decimal XXX. See the PML character
+                                table for details.
+\UXXXX                          Insert non-ASCII character whose Unicode code
+                                is hexidecimal XXXX. See the Extended PML
+                                character table for details.
+\m="imagename.png"              Insert the named image. See the section on
+                                Images below.
+\q="#linkanchor"Some text\q     Reference a link anchor which is at another
+                                spot in the document. The string after the
+                                anchor specification and before the trailing\q
+                                is underlined or otherwise shown to be a link
+                                when viewing the document.
+\Q="linkanchor"                 Specify a link anchor in the document.
+\-                              Insert a soft hyphen. A soft hyphen shows up
+                                only if it is necessary to break a word across
+                                a line.
+\Fn="footnote1"1\Fn             Link the "1" to a footnote whose name is
+                                footnote1, tagged at the end of the PML
+                                document. See the section on Footnotes and
+                                Sidebars below.
+\Sd="sidebar1"Sidebar\Sd        Link the "Sidebar" text to a sidebar whose name
+                                is sidebar1, tagged at the end of the PML
+                                document. See the section on Footnotes and
+                                Sidebars below.
+\I                              Mark as a reference index item. Enclose index
+                                item (and any style codes) with \I and \I. See
+                                Creating Dictionaries for more information.
+
+
+Examples
+--------
+
+\pThis is a new page
+
+\xChapter III\x
+
+\X1Chapter III, part A\X1
+
+\p\C="Introduction"The following story is one of my favorites...
+
+\cProperty of
+Gateway Senior High School
+\c
+
+\rJustify my love
+\r
+
+This stuff is \ireally\i cool.
+
+I just read \uMoby Dick.\u
+
+This is a \obig\o mistake.
+
+Copyright 1917\v Date of magazine serialization \v
+
+\tOnce upon a time
+there was a wicked queen
+called Esmerelda.\t
+
+Mammals:\T="40%"Lions
+\T="40%"Tigers
+\T="40%"Bears
+
+He walked away.
+\w="80%"
+Later that day, he ran into an old friend.
+
+\nIn the normal ways...
+
+The \stitle page\s should be formatted...
+
+I just \bcan't\b believe that you...
+
+This \lREALLY\l is a large tiger...
+
+This \Bbold\B text can be either \l\Blarge bold\B\l or \s\Bsmall bold\B\s.
+
+e\Spx + 2\Sp = 9
+
+C\Sb2\SbH\Sb3\SbO\Sb2\Sb should be used in moderation.
+
+See also \kanteater\k.
+
+The DOS prompt said "C:\\windows\\"
+
+The man said \a147Yeah.\a148
+
+Arrows can point \U2190 left or right \U2192.
+
+A Yield sign looks like this: \m="yieldsign.png".
+
+See the \q="#detailedinstructions"Detailed Instructions\q for how to install your eBook.
+
+\Q="detailedinstructions"\bDetailed Instructions\b - This section
+describes how to install an eBook to your handheld device.
+
+Very long words like anti\-dis\-establish\-ment\-arian\-ism may benefit from
+the use of soft hyphens.
+
+The Emerson case\Fn="emerson"[1]\Fn will be very important...
+
+For more information, see the \Sd="moreinfo"sidebar\Sd.
+
+\I\Baardvark\B\I \in.\i a large burrowing nocturnal mammal that feeds especially on termites and ants
+
+
+Footnotes and Sidebars
+----------------------
+
+Footnotes and Sidebars are specified with an XML-like syntax at the end of the
+PML document. For example,
+
+<sidebar id="sidebar1">
+Here's some \itext\i for a sidebar.
+</sidebar>
+
+would specify the sidebar to be displayed when the user taps on a sidebar link
+in the text that was specified using the \Sd tag.
+
+Any text or PML placed after the first footnote or sidebar is ignored as part
+of the book text.
+
+Sidebars and footnotes can include most PML features, but there are some PML
+tags that cannot be used inside of a sidebar or footnote.
+
+These include
+Chapters       \x, \X, \C
+Links       \q, \Q
+Footnotes           \Fn
+Sidebars    \Sd
+
+See the palmsample.txt file for examples of how to use many of the PML tags.
+
+
+Images
+------
+
+The following rules are intended to guarantee that images in your eBook will be
+viewable on all platforms that eReader runs on.
+
+On low-resolution Palm OS handhelds, an image wider than 158 pixels or taller
+than 148 pixels will be represented in the text by a thumbnail that the user
+can tap to view the entire image. Images smaller than 158 x 148 will be
+presented in-line with the text.
+
+On high-resolution Palm OS handhelds (those having screens of 320x320 pixels or
+more), images smaller than 158 by 148 pixels will be pixel-doubled. Images
+larger than 158x148 may be shown in-line with the text, if they will fit on
+the screen.
+
+On non-Palm OS platforms, small images will be scaled up appropriately. Large
+images will be scaled down to fit on the page; in this case the user can tap on
+the image to view the entire image and zoom in or out.
+
+For DropBook to find the image, it must be present in a directory whose name
+matches that of the PML text file. For example, if "pmlsample.txt" contains a
+reference to an image called "intro.png", then there must be a directory called
+"pmlsample_img" that contains intro.png. The directory's name is the name of
+the PML file (without the .txt extension) with "_img" appended.
+
+Images must be in PNG format and cannot be filtered or interlaced. Image depth
+must be 8 bits or less. Any color table may be used for color images.
+
+Image files must be less than or equal to 65505 bytes in size, since they are
+embedded into the .pdb format of the book; Palm database records are limited to
+65505 bytes in length. Since images are compressed, the actual image displayed
+by the reader may be much larger than 64K.
+
+Any or all of these restrictions may eventually be removed.
+
+
+Adding a Title, Cover Art, and Other Meta-information to Your eBook
+-------------------------------------------------------------------
+
+DropBook normally presents a dialog in which the title and other information
+for the eBook may be specified. This information may be embedded in the PML
+file instead.
+
+To specify the eBook title as it will appear in the Open dialog on the
+handheld, place a block of invisible comment text at the beginning of the file
+using \v tags. Inside this comment block, put the string TITLE="My eBook",
+where "My eBook" is replaced with the name of your eBook. It should look
+something like this:
+
+\vTITLE="Palm Sample Document"\v
+
+You can also specify the author using the AUTHOR meta-tag, the publisher with
+PUBLISHER, copyright information with COPYRIGHT, and the eBook ISBN with EISBN.
+A fully-specified set of meta-information might appear in PML as:
+
+\vTITLE="Palm Sample Document" AUTHOR="Sam Morgenstern" PUBLISHER="eReader.com"
+EISBN="X-XXXX-XXXX" COPYRIGHT="Copyright \a169 2004 by Sam Morgenstern"\v
+
+Cover art: If an image named "cover.png" is present in the eBook, it is assumed
+to be the cover art for the eBook. See the rules for images for sizing and
+other information.
+
+Some or all of this information may appear in the book information dialog in
+eReader, and may be used for other purposes in future products.
+
+
+Creating Dictionaries
+---------------------
+
+The \I PML tag is used to delimit an index item. Example: \Iaardvark\I
+
+Each entry must start in the normal font. If DropBook shows an error beginning
+with "No styles permitted before...", there is probably a missing end style tag
+before the text shown in the error message.
+
+Links, chapters and other PML structures are not permitted in dictionaries.
+Images, however, are.
+
+A special dictionary entry, "(Front matter)" is shown before other entries in
+the list of entries, and should be used to include pronunciation symbols and
+other front matter.
+
+Note that use of dictionaries requires eReader Pro.
+
+
+Tips and Pitfalls
+-----------------
+
+This page explains some common mistakes, some bugs in DropBook and/or the
+eReader, and some techniques that will allow you to create quality electronic
+books for the eReader.
+
+    * Check out the Converting to Palm eBooks page for some pointers on
+      converting text from various formats into the Palm Markup Language.
+    * Use a return at the end of each paragraph, not each line.
+    * Using an extra return between paragraphs reads easier than paragraph
+      indentation.
+    * The eReader doesn't display empty lines at the top of a page. If you need
+      to have some "empty" lines at the top of a page, put a space on each line.
+    * Don't use tables if you can possibly avoid it.
+
+      None of the fonts that the eReader supports are monospaced, so tables can
+      be difficult to represent. Break out the information in another way, or
+      use the \T tag, but beware of tables that look great on a Palm OS
+      handheld but not on a Pocket PC or vice versa.
+
+    * The Reader breaks lines on spaces, dashes or underscores. This has
+      several implications.
+
+         1. Don't fill more than a line with spaces, dashes or underscores.
+            There's a bug (which will be fixed in a future release) which
+            causes MakeBook to hang on such a line. Note that in the large
+            font, the number of spaces, dashes or underscores will be much
+            smaller than in the small font.
+         2. A string such as He shouted "Wait!--" may place the last quote on
+            the beginning of a line, since the line would break after the
+            second dash. Prevent this by using the PML string: He shouted
+            "Wait!\a150\a150". The non-breaking dash, code 150, will not break
+            a line. Use \a160 for a non-breaking space. Even better: use \a151,
+            a long dash, instead of two short dashes.
+
+    * The justification codes \c and \r (center and right justification) must
+      have closing codes on the beginning of the line following the justified
+      text.
+    * The indentation tag \t must have a closing tag at the end of a line of
+      the indented text.
+    * Use \s (small font) in the title page(s) of books to force the page(s) to
+      format nicely. Other than that, \n, \s and \l should rarely be necessary;
+      the font size used for most text display should be chosen by the user.
+
+
+Converting Uncommon Characters to PML
+-------------------------------------
+
+Use this chart to convert uncommon characters to their Palm Markup Language
+(PML) equivalent. Most characters are simply represented as themselves in PML
+and don't require this chart. But some uncommon characters can only be
+represented in PML by their "\aXXX" syntax. Use this chart to look up that
+"\aXXX" syntax.
+
+For Example, if you wanted to write the following phrase in PML:
+
+    Copyright © 1999 by Samuel Morgenstern
+
+In PML, you would write it as:
+
+    Copyright \a169 1999 by Samuel Morgenstern
+
+Char    HTML # Code HTML Char Code  PML Char Code  Description
+
+        &#32;       -               Normal space
+!       &#33;       -       !       Exclamation
+"       &#34;       &quot;  "       Double quote
+#       &#35;       -       #       Hash
+$       &#36;       -       $       Dollar
+%       &#37;       -       %       Percent
+&       &#38;       &amp;   &       Ampersand
+'       &#39;       -       '       Apostrophe
+(       &#40;       -       (       Open bracket
+)       &#41;       -       )       Close bracket
+*       &#42;       -       *       Asterisk
++       &#43;       -       +       Plus sign
+,       &#44;       -       ,       Comma
+-       &#45;       -       -       Minus sign
+.       &#46;       -       .       Period
+/       &#47;       -       /       Forward slash
+0       &#48;       -       0       Digit 0
+1       &#49;       -       1       Digit 1
+2       &#50;       -       2       Digit 2
+3       &#51;       -       3       Digit 3
+4       &#52;       -       4       Digit 4
+5       &#53;       -       5       Digit 5
+6       &#54;       -       6       Digit 6
+7       &#55;       -       7       Digit 7
+8       &#56;       -       8       Digit 8
+9       &#57;       -       9       Digit 9
+:       &#58;       -       :       Colon
+;       &#59;       -       ;       Semicolon
+        &#60;       &lt;    <       Less than
+=       &#61;       -       =       Equals
+        &#62;       &gt;    >       Greater than
+?       &#63;       -       ?       Question mark
+@       &#64;       -       @       At sign
+A       &#65;       -       A       A
+B       &#66;       -       B       B
+C       &#67;       -       C       C
+D       &#68;       -       D       D
+E       &#69;       -       E       E
+F       &#70;       -       F       F
+G       &#71;       -       G       G
+H       &#72;       -       H       H
+I       &#73;       -       I       I
+J       &#74;       -       J       J
+K       &#75;       -       K       K
+L       &#76;       -       L       L
+M       &#77;       -       M       M
+N       &#78;       -       N       N
+O       &#79;       -       O       O
+P       &#80;       -       P       P
+Q       &#81;       -       Q       Q
+R       &#82;       -       R       R
+S       &#83;       -       S       S
+T       &#84;       -       T       T
+U       &#85;       -       U       U
+V       &#86;       -       V       V
+W       &#87;       -       W       W
+X       &#88;       -       X       X
+Y       &#89;       -       Y       Y
+Z       &#90;       -       Z       Z
+[       &#91;       -       [       Open square bracket
+\       &#92;       -       \\       Backslash
+]       &#93;       -       ]       Close square bracket
+^       &#94;       -       ^       Caret
+_       &#95;       -       _       Underscore
+`       &#96;       -       `       Grave accent
+a       &#97;       -       a       a
+b       &#98;       -       b       b
+c       &#99;       -       c       c
+d       &#100;       -       d       d
+e       &#101;       -       e       e
+f       &#102;       -       f       f
+g       &#103;       -       g       g
+h       &#104;       -       h       h
+i       &#105;       -       i       i
+j       &#106;       -       j       j
+k       &#107;       -       k       k
+l       &#108;       -       l       l
+m       &#109;       -       m       m
+n       &#110;       -       n       n
+o       &#111;       -       o       o
+p       &#112;       -       p       p
+q       &#113;       -       q       q
+r       &#114;       -       r       r
+s       &#115;       -       s       s
+t       &#116;       -       t       t
+u       &#117;       -       u       u
+v       &#118;       -       v       v
+w       &#119;       -       w       w
+x       &#120;       -       x       x
+y       &#121;       -       y       y
+z       &#122;       -       z       z
+{       &#123;       -       {       Left brace
+|       &#124;       -       |       Vertical bar
+}       &#125;       -       }       Right brace
+~       &#126;       -       ~       Tilde
+
+        &#160;       &nbsp;     \a160       Non-breaking space
+        &#161;       &iexcl;    \a161       Inverted exclamation
+        &#162;       &cent;     \a162       Cent sign
+        &#163;       &pound;    \a163       Pound sign
+        &#164;       &curren;   \a164       Currency sign
+        &#165;       &yen;      \a165       Yen sign
+        &#166;       &brvbar;   \a166       Broken bar
+        &#167;       &sect;     \a167       Section sign
+        &#168;       &uml;      \a168       Umlaut or diaeresis
+        &#169;       &copy;     \a169       Copyright sign
+        &#170;       &ordf;     \a170       Feminine ordinal
+        &#171;       &laquo;    \a171       Left angle quotes
+        &#172;       &not;      \a172       Logical not sign
+        &#173;       &shy;      \a173       Soft hyphen
+        &#174;       &reg;      \a174       Registered trademark
+        &#175;       &macr;     \a175       Spacing macron
+        &#176;       &deg;      \a176       Degree sign
+        &#177;       &plusmn;   \a177       Plus-minus sign
+        &#178;       &sup2;     \a178       Superscript 2
+        &#179;       &sup3;     \a179       Superscript 3
+        &#180;       &acute;    \a180       Spacing acute
+        &#181;       &micro;    \a181       Micro sign
+        &#182;       &para;     \a182       Paragraph sign
+        &#183;       &middot;   \a183       Middle dot
+        &#184;       &cedil;    \a184       Spacing cedilla
+        &#185;       &sup1;     \a185       Superscript 1
+        &#186;       &ordm;     \a186       Masculine ordinal
+        &#187;       &raquo;    \a187       Right angle quotes
+        &#188;       &frac14;   \a188       One quarter
+        &#189;       &frac12;   \a189       One half
+        &#190;       &frac34;   \a190       Three quarters
+        &#191;       &iquest;   \a191       Inverted question mark
+        &#192;       &Agrave;   \a192       A grave
+        &#193;       &Aacute;   \a193       A acute
+        &#194;       &Acirc;    \a194       A circumflex
+        &#195;       &Atilde;   \a195       A tilde
+        &#196;       &Auml;     \a196       A diaeresis
+        &#197;       &Aring;    \a197       A ring
+        &#198;       &Aelig;    \a198       AE ligature
+        &#199;       &Ccedil;   \a199       C cedilla
+        &#200;       &Egrave;   \a200       E grave
+        &#201;       &Eacute;   \a201       E acute
+        &#202;       &Ecirc;    \a202       E circumflex
+        &#203;       &Euml;     \a203       E diaeresis
+        &#204;       &Igrave;   \a204       I grave
+        &#205;       &Iacute;   \a205       I acute
+        &#206;       &Icirc;    \a206       I circumflex
+        &#207;       &Iuml;     \a207       I diaeresis
+        &#208;       &ETH;      \a208       Eth
+        &#209;       &Ntilde;   \a209       N tilde
+        &#210;       &Ograve;   \a210       O grave
+        &#211;       &Oacute;   \a211       O acute
+        &#212;       &Ocirc;    \a212       O circumflex
+        &#213;       &Otilde;   \a213       O tilde
+        &#214;       &Ouml;     \a214       O diaeresis
+        &#215;       &times;    \a215       Multiplication sign
+        &#216;       &Oslash;   \a216       O slash
+        &#217;       &Ugrave;   \a217       U grave
+        &#218;       &Uacute;   \a218       U acute
+        &#219;       &Ucirc;    \a219       U circumflex
+        &#220;       &Uuml;     \a220       U diaeresis
+        &#221;       &Yacute;   \a221       Y acute
+        &#222;       &THORN;    \a222       THORN
+        &#223;       &szlig;    \a223       sharp s
+        &#224;       &agrave;   \a224       a grave
+        &#225;       &aacute;   \a225       a acute
+        &#226;       &acirc;    \a226       a circumflex
+        &#227;       &atilde;   \a227       a tilde
+        &#228;       &auml;     \a228       a diaeresis
+        &#229;       &aring;    \a229       a ring
+        &#230;       &aelig;    \a230       ae ligature
+        &#231;       &ccedil;   \a231       c cedilla
+        &#232;       &egrave;   \a232       e grave
+        &#233;       &eacute;   \a233       e acute
+        &#234;       &ecirc;    \a234       e circumflex
+        &#235;       &euml;     \a235       e diaeresis
+        &#236;       &igrave;   \a236       i grave
+        &#237;       &iacute;   \a237       i acute
+        &#238;       &icirc;    \a238       i circumflex
+        &#239;       &iuml;     \a239       i diaeresis
+        &#240;       &eth;      \a240       eth
+        &#241;       &ntilde;   \a241       n tilde
+        &#242;       &ograve;   \a242       o grave
+        &#243;       &oacute;   \a243       o acute
+        &#244;       &ocirc;    \a244       o circumflex
+        &#245;       &otilde;   \a245       o tilde
+        &#246;       &ouml;     \a246       o diaeresis
+        &#247;       &divide;   \a247       division sign
+        &#248;       &oslash;   \a248       o slash
+        &#249;       &ugrave;   \a249       u grave
+        &#250;       &uacute;   \a250       u acute
+        &#251;       &ucirc;    \a251       u circumflex
+        &#252;       &uuml;     \a252       u diaeresis
+        &#253;       &yacute;   \a253       y acute
+        &#254;       &thorn;    \a254       thorn
+        &#255;       &yuml;     \a255       y diaeresis
+,       &#8218;      &sbquo;    \a130       single low quote
+        &#402;       &fnof;     \a131       Scripted f
+        &#8222;      &bdquo;    \a132       low quote
+        &#8230;      &hellip;   \a133       Ellipsis
+        &#8224;      &dagger;   \a134       Dagger
+        &#8225;      &Dagger    \a135       Double dagger
+        &#352;       &Scaron;   \a138       Large S w/inverted caret
+<       &#8249;      &lsaquo;   \a139       single left angle quote
+        &#338;       &OElig;    \a140       Large combined oe
+        &#8216;      &lsquo;    \a145       Open single smart quote
+        &#8217;      &rsquo;    \a146       Close single smart quote
+        &#8220;      &ldquo;    \a147       Open double smart quote
+        &#8221;      &rdquo;    \a148       Close double smart quote
+        &#8226;      &bull;     \a149       Bullet
+        &#8211;      &ndash;    \a150       Small dash (en dash)
+        &#8212;      &mdash;    \a151       Large dash (em dash)
+        &#8482;      &trade;    \a153       Trademark
+        &#353;       &scaron;   \a154       Small S w/inverted caret
+>       &#8250;      &rsaquo;   \a155       single right angle quote
+        &#339;       &oelig;    \a156       Small combined oe
+        &#376;       &Yuml;     \a159       Large Y with diaeresis
+
+
+Extended Character Set
+----------------------
+
+In addition to the special characters supported by earlier versions of eReader
+(which can be accessed using the \a### tag), all versions of eReader Pro and
+eReader version 2.4 and later include support for additional special characters
+and symbols. These symbols can be accessed using the \U#### tag, where #### are
+four hexidecimal digits giving the Unicode encoding of the special character.
+
+Only the limited subset of Unicode characters given in the table below are
+supported. In addition, some of the characters that are included in the table
+are not present in eReader Pro versions prior to 2.4. To ensure that the
+characters are displayed correctly, books using these tags should be read using
+eReader or eReader Pro version 2.4 or later.
+
+On Palm OS handhelds these special symbols are only available in one size,
+matching the "Small" font. For best results on Palm OS handhelds the \U tag
+should only be used inside blocks set to the "Small" font by way of \s tags.
+On Palm OS handhelds these special characters are not affected by the font tags
+(\s, \l, \b and \n), the bold style tag (\B), or the small caps style tag (\k).
+
+If the \U characters are not showing up correctly using eReader on your Windows
+desktop or laptop this problem is a result of the fonts for eReader not being
+installed properly. The solution is to go to the directory C:\Windows\Fonts\
+and "double click" on each font that starts with "Maynard". This will open each
+font and allow the system to register it. Close the windows that were opened a
+result of the mouse clicks and the problem should be resolved.
+
+Char     HTML Code     PML Code     Description
+
+Latin Extended-A
+Ā     &#256;     \U0100     LATIN CAPITAL LETTER A WITH MACRON
+ā     &#257;     \U0101     LATIN SMALL LETTER A WITH MACRON
+Ă     &#258;     \U0102     LATIN CAPITAL LETTER A WITH BREVE
+ă     &#259;     \U0103     LATIN SMALL LETTER A WITH BREVE
+ą     &#261;     \U0105     LATIN SMALL LETTER A WITH OGONEK
+ć     &#263;     \U0107     LATIN SMALL LETTER C WITH ACUTE
+Č     &#268;     \U010C     LATIN CAPITAL LETTER C WITH CARON
+č     &#269;     \U010D     LATIN SMALL LETTER C WITH CARON
+Ē     &#274;     \U0112     LATIN CAPITAL LETTER E WITH MACRON
+ē     &#275;     \U0113     LATIN SMALL LETTER E WITH MACRON
+ĕ     &#277;     \U0115     LATIN SMALL LETTER E WITH BREVE
+ė     &#279;     \U0117     LATIN SMALL LETTER E WITH DOT ABOVE
+ę     &#281;     \U0119     LATIN SMALL LETTER E WITH OGONEK
+ě     &#283;     \U011B     LATIN SMALL LETTER E WITH CARON
+ĝ     &#285;     \U011D     LATIN SMALL LETTER G WITH CIRCUMFLEX
+ğ     &#287;     \U011F     LATIN SMALL LETTER G WITH BREVE
+Ī     &#298;     \U012A     LATIN CAPITAL LETTER I WITH MACRON
+ī     &#299;     \U012B     LATIN SMALL LETTER I WITH MACRON
+ĭ     &#301;     \U012D     LATIN SMALL LETTER I WITH BREVE
+į     &#303;     \U012F     LATIN SMALL LETTER I WITH OGONEK
+ı     &#305;     \U0131     LATIN SMALL LETTER DOTLESS I
+Ł     &#321;     \U0141     LATIN CAPITAL LETTER L WITH STROKE
+ł     &#322;     \U0142     LATIN SMALL LETTER L WITH STROKE
+ń     &#324;     \U0144     LATIN SMALL LETTER N WITH ACUTE
+ň     &#328;     \U0148     LATIN SMALL LETTER N WITH CARON
+ŋ     &#331;     \U014B     LATIN SMALL LETTER ENG
+Ō     &#332;     \U014C     LATIN CAPITAL LETTER O WITH MACRON
+ō     &#333;     \U014D     LATIN SMALL LETTER O WITH MACRON
+ŏ     &#335;     \U014F     LATIN SMALL LETTER O WITH BREVE
+ő     &#337;     \U0151     LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ŕ     &#341;     \U0155     LATIN SMALL LETTER R WITH ACUTE
+ř     &#345;     \U0159     LATIN SMALL LETTER R WITH CARON
+Ś     &#346;     \U015A     LATIN CAPITAL LETTER S WITH ACUTE
+ś     &#347;     \U015B     LATIN SMALL LETTER S WITH ACUTE
+ş     &#351;     \U015F     LATIN SMALL LETTER S WITH CEDILLA
+ţ     &#355;     \U0163     LATIN SMALL LETTER T WITH CEDILLA
+ũ     &#361;     \U0169     LATIN SMALL LETTER U WITH TILDE
+ū     &#363;     \U016B     LATIN SMALL LETTER U WITH MACRON
+ŭ     &#365;     \U016D     LATIN SMALL LETTER U WITH BREVE
+ŷ     &#375;     \U0177     LATIN SMALL LETTER Y WITH CIRCUMFLEX
+ź     &#378;     \U017A     LATIN SMALL LETTER Z WITH ACUTE
+Ž     &#381;     \U017D     LATIN CAPITAL LETTER Z WITH CARON
+ž     &#382;     \U017E     LATIN SMALL LETTER Z WITH CARON
+Latin Extended-B
+    &#447;     \U01BF     LATIN LETTER WYNN
+    &#462;     \U01CE     LATIN SMALL LETTER A WITH CARON
+    &#464;     \U01D0     LATIN SMALL LETTER I WITH CARON
+    &#466;     \U01D2     LATIN SMALL LETTER O WITH CARON
+    &#468;     \U01D4     LATIN SMALL LETTER U WITH CARON
+    &#481;     \U01E1     LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
+    &#483;     \U01E3     LATIN SMALL LETTER AE WITH MACRON
+    &#487;     \U01E7     LATIN SMALL LETTER G WITH CARON
+    &#491;     \U01EB     LATIN SMALL LETTER O WITH OGONEK
+    &#496;     \U01F0     LATIN SMALL LETTER J WITH CARON
+    &#519;     \U0207     LATIN SMALL LETTER E WITH INVERTED BREVE
+    &#541;     \U021D     LATIN SMALL LETTER YOGH
+    &#551;     \U0227     LATIN SMALL LETTER A WITH DOT ABOVE
+    &#559;     \U022F     LATIN SMALL LETTER O WITH DOT ABOVE
+    &#563;     \U0233     LATIN SMALL LETTER Y WITH MACRON
+IPA Extensions
+    &#593;     \U0251     LATIN SMALL LETTER SCRIPT A
+    &#594;     \U0252     LATIN SMALL LETTER TURNED SCRIPT A
+    &#596;     \U0254     LATIN SMALL LETTER OPEN O
+    &#601;     \U0259     LATIN SMALL LETTER SCHWA
+    &#604;     \U025C     LATIN SMALL LETTER REVERSED OPEN E
+    &#613;     \U0265     LATIN LETTER SMALL LETTER TURNED H
+    &#618;     \U026A     LATIN LETTER SMALL CAPITAL I
+    &#626;     \U0272     LATIN SMALL LETTER N WITH LEFT HOOK
+    &#643;     \U0283     LATIN SMALL LETTER ESH
+    &#649;     \U0289     LATIN SMALL LETTER U BAR
+    &#650;     \U028A     LATIN SMALL LETTER UPSILON
+    &#652;     \U028C     LATIN SMALL LETTER TURNED V
+    &#655;     \U028F     LATIN LETTER SMALL CAPITAL Y
+    &#658;     \U0292     LATIN SMALL LETTER EZH
+    &#660;     \U0294     LATIN LETTER GLOTTAL STOP
+    &#668;     \U029C     LATIN LETTER SMALL CAPITAL H
+Spacing Modifier Letters
+    &#702;     \U02BE     MODIFIER LETTER RIGHT HALF RING
+    &#703;     \U02BF     MODIFIER LETTER LEFT HALF RING
+ˇ   &#711;     \U02C7     CARON
+    &#712;     \U02C8     MODIFIER LETTER VERTICAL LINE
+    &#716;     \U02CC     MODIFIER LETTER LOW VERTICAL LINE
+    &#720;     \U02D0     MODIFIER LETTER TRIANGULAR COLON
+˘   &#728;     \U02D8     BREVE
+˙   &#729;     \U02D9     DOT ABOVE
+Greek and Coptic
+Α     &#913;     \U0391     GREEK CAPTIAL LETTER ALPHA
+Β     &#914;     \U0392     GREEK CAPTIAL LETTER BETA
+Γ     &#915;     \U0393     GREEK CAPTIAL LETTER GAMMA
+Δ     &#917;     \U0394     GREEK CAPTIAL LETTER DELTA
+Ε     &#917;     \U0395     GREEK CAPTIAL LETTER EPSILON
+Ζ     &#918;     \U0396     GREEK CAPTIAL LETTER ZETA
+Η     &#919;     \U0397     GREEK CAPTIAL LETTER ETA
+Θ     &#920;     \U0398     GREEK CAPTIAL LETTER THETA
+Ι     &#921;     \U0399     GREEK CAPTIAL LETTER IOTA
+Κ     &#922;     \U039A     GREEK CAPTIAL LETTER KAPPA
+Λ     &#923;     \U039B     GREEK CAPTIAL LETTER LAMBDA
+Μ     &#924;     \U039C     GREEK CAPTIAL LETTER MU
+Ν     &#925;     \U039D     GREEK CAPTIAL LETTER NU
+Ξ     &#926;     \U039E     GREEK CAPTIAL LETTER XI
+Ο     &#927;     \U039F     GREEK CAPTIAL LETTER OMICRON
+Π     &#928;     \U03A0     GREEK CAPTIAL LETTER PI
+Ρ     &#929;     \U03A1     GREEK CAPTIAL LETTER RHO
+Σ     &#931;     \U03A3     GREEK CAPTIAL LETTER SIGMA
+Τ     &#932;     \U03A4     GREEK CAPTIAL LETTER TAU
+Υ     &#933;     \U03A5     GREEK CAPTIAL LETTER UPSILON
+Φ     &#934;     \U03A6     GREEK CAPTIAL LETTER PHI
+Χ     &#935;     \U03A7     GREEK CAPTIAL LETTER CHI
+Ψ     &#936;     \U03A8     GREEK CAPTIAL LETTER PSI
+Ω     &#937;     \U03A9     GREEK CAPTIAL LETTER OMEGA
+α     &#945;     \U03B1     GREEK SMALL LETTER ALPHA
+β     &#946;     \U03B2     GREEK SMALL LETTER BETA
+γ     &#947;     \U03B3     GREEK SMALL LETTER GAMMA
+δ     &#948;     \U03B4     GREEK SMALL LETTER DELTA
+ε     &#949;     \U03B5     GREEK SMALL LETTER EPSILON
+ζ     &#950;     \U03B6     GREEK SMALL LETTER ZETA
+η     &#951;     \U03B7     GREEK SMALL LETTER ETA
+θ     &#952;     \U03B8     GREEK SMALL LETTER THETA
+ι     &#953;     \U03B9     GREEK SMALL LETTER IOTA
+κ     &#954;     \U03BA     GREEK SMALL LETTER KAPPA
+λ     &#955;     \U03BB     GREEK SMALL LETTER LAMBDA
+μ     &#956;     \U03BC     GREEK SMALL LETTER MU
+ν     &#957;     \U03BD     GREEK SMALL LETTER NU
+ξ     &#958;     \U03BE     GREEK SMALL LETTER XI
+ο     &#959;     \U03BF     GREEK SMALL LETTER OMICRON
+π     &#960;     \U03C0     GREEK SMALL LETTER PI
+ρ     &#961;     \U03C1     GREEK SMALL LETTER RHO
+ς     &#962;     \U03C2     GREEK SMALL LETTER FINAL SIGMA
+σ     &#963;     \U03C3     GREEK SMALL LETTER SIGMA
+τ     &#964;     \U03C4     GREEK SMALL LETTER TAU
+υ     &#965;     \U03C5     GREEK SMALL LETTER UPSILON
+φ     &#966;     \U03C6     GREEK SMALL LETTER PHI
+χ     &#967;     \U03C7     GREEK SMALL LETTER CHI
+ψ     &#968;     \U03C8     GREEK SMALL LETTER PSI
+ω     &#969;     \U03C9     GREEK SMALL LETTER OMEGA
+      &#977;     \U03D1     GREEK THETA SYMBOL
+      &#989;     \U03DD     GREEK SMALL LETTER DIGAMMA
+Hebrew
+א     &#1488;     \U05D0     HEBREW LETTER ALEPH
+ב     &#1489;     \U05D1     HEBREW LETTER BET
+ג     &#1490;     \U05D2     HEBREW LETTER GIMEL
+ד     &#1491;     \U05D3     HEBREW LETTER DALET
+ה     &#1492;     \U05D4     HEBREW LETTER HE
+ו     &#1493;     \U05D5     HEBREW LETTER VAV
+ז     &#1494;     \U05D6     HEBREW LETTER ZAYIN
+ח     &#1495;     \U05D7     HEBREW LETTER HET
+ט     &#1496;     \U05D8     HEBREW LETTER TET
+י     &#1497;     \U05D9     HEBREW LETTER YOD
+ך     &#1498;     \U05DA     HEBREW LETTER FINAL KAF
+כ     &#1499;     \U05DB     HEBREW LETTER KAF
+ל     &#1500;     \U05DC     HEBREW LETTER LAMED
+ם     &#1501;     \U05DD     HEBREW LETTER FINAL MEM
+מ     &#1502;     \U05DE     HEBREW LETTER MEM
+ן     &#1503;     \U05DF     HEBREW LETTER FINAL NUN
+נ     &#1504;     \U05E0     HEBREW LETTER NUN
+ס     &#1505;     \U05E1     HEBREW LETTER SAMEKH
+ע     &#1506;     \U05E2     HEBREW LETTER AYIN
+ף     &#1507;     \U05E3     HEBREW LETTER FINAL PE
+פ     &#1508;     \U05E4     HEBREW LETTER PE
+ץ     &#1509;     \U05E5     HEBREW LETTER FINAL TSADI
+צ     &#1510;     \U05E6     HEBREW LETTER TSADI
+ק     &#1511;     \U05E7     HEBREW LETTER QOF
+ר     &#1512;     \U05E8     HEBREW LETTER RESH
+ת     &#1514;     \U05EA     HEBREW LETTER TAV
+Latin Extended Additional
+    &#7691;     \U1E0B     LATIN SMALL LETTER D WITH DOT ABOVE
+    &#7693;     \U1E0D     LATIN SMALL LETTER D WITH DOT BELOW
+    &#7703;     \U1E17     LATIN SMALL LETTER E WITH MACRON AND ACUTE
+    &#7714;     \U1E22     LATIN CAPITAL LETTER H WITH DOT ABOVE
+    &#7716;     \U1E24     LATIN CAPITAL LETTER H WITH DOT BELOW
+    &#7717;     \U1E25     LATIN SMALL LETTER H WITH DOT BELOW
+    &#7723;     \U1E2B     LATIN SMALL LETTER H WITH BREVE BELOW
+    &#7731;     \U1E33     LATIN SMALL LETTER K WITH DOT BELOW
+    &#7735;     \U1E37     LATIN SMALL LETTER L WITH DOT BELOW
+    &#7745;     \U1E41     LATIN SMALL LETTER M WITH DOT ABOVE
+    &#7747;     \U1E43     LATIN SMALL LETTER M WITH DOT BELOW
+    &#7749;     \U1E45     LATIN SMALL LETTER N WITH DOT ABOVE
+    &#7751;     \U1E47     LATIN SMALL LETTER N WITH DOT BELOW
+    &#7763;     \U1E53     LATIN SMALL LETTER O WITH MACRON AND ACUTE
+    &#7769;     \U1E59     LATIN SMALL LETTER R WITH DOT ABOVE
+    &#7770;     \U1E5A     LATIN CAPITAL LETTER R WITH DOT BELOW
+    &#7771;     \U1E5B     LATIN SMALL LETTER R WITH DOT BELOW
+    &#7777;     \U1E61     LATIN SMALL LETTER S WITH DOT ABOVE
+    &#7779;     \U1E63     LATIN SMALL LETTER S WITH DOT BELOW
+    &#7787;     \U1E6B     LATIN SMALL LETTER T WITH DOT ABOVE
+    &#7789;     \U1E6D     LATIN SMALL LETTER T WITH DOT BELOW
+    &#7791;     \U1E6F     LATIN SMALL LETTER T WITH LINE BELOW
+    &#7825;     \U1E91     LATIN SMALL LETTER Z WITH CIRCUMFLEX
+    &#7827;     \U1E93     LATIN SMALL LETTER Z WITH DOT BELOW
+    &#7830;     \U1E96     LATIN SMALL LETTER H WITH LINE BELOW
+    &#7841;     \U1EA1     LATIN SMALL LETTER A WITH DOT BELOW
+    &#7885;     \U1ECD     LATIN SMALL LETTER O WITH DOT BELOW
+    &#7929;     \U1EF9     LATIN SMALL LETTER Y WITH TILDE
+General Punctuation
+-   &#8209;     \U2011     NON-BREAKING HYPHEN
+    &#8248;     \U2038     CARET
+    &#8253;     \U203D     INTERROBANG
+    &#8258;     \U2042     ASTERISM
+Arrows
+←   &#8592;     \U2190     LEFTWARDS ARROW
+→   &#8594;     \U2192     RIGHTWARDS ARROW
+Mathematical Operators
+∂   &#8706;     \U2202     PARTIAL DIFFERENTIAL
+√   &#8730;     \U221A     SQUARE ROOT
+∞   &#8734;     \U221E     INFINITY
+∥   &#8741;     \U2225     PARALLEL TO
+∫   &#8747;     \U222B     INTEGRAL
+≠   &#8800;     \U2260     NOT EQUAL TO
+    &#8852;     \U2294     SQUARE CUP
+    &#8853;     \U2295     CIRCLED PLUS
+    &#8942;     \U22EE     VERTICAL ELLIPSIS
+Enclosed Alphanumerics
+    &#9418;     \U24CA     CIRCLED LATIN CAPITAL LETTER U
+Miscellaneous Symbols
+☜   &#9756;     \U261C     WHITE LEFT POINTING INDEX
+☞   &#9758;     \U261E     WHITE RIGHT POINTING INDEX
+    &#9791;     \U263F     MERCURY
+    &#9792;     \U2640     FEMALE SIGN
+    &#9794;     \U2642     MALE SIGN
+    &#9795;     \U2643     JUPITER
+    &#9796;     \U2644     SATURN
+    &#9797;     \U2645     URANUS
+    &#9798;     \U2646     NEPTUNE
+    &#9799;     \U2647     PLUTO
+    &#9824;     \U2660     BLACK SPADE SUIT
+    &#9825;     \U2661     WHITE HEART SUIT
+    &#9826;     \U2662     WHITE DIAMOND SUIT
+    &#9827;     \U2663     BLACK CLUB SUIT
+    &#9837;     \U266D     MUSIC FLAT SIGN
+    &#9838;     \U266E     MUSIC NATURAL SIGN
+    &#9839;     \U266F     MUSIC SHARP SIGN
+Dingbats
+    &#10003;     \U2713     CHECK MARK
+    &#10016;     \U2720     MALTESE CROSS
+Private Use Area
+    -     \UE000     LATIN SMALL LETTER A WITH MACRON AND ACUTE
+    -     \UE001     LATIN SMALL LETTER A WITH MACRON AND TILDE
+    -     \UE002     LATIN SMALL LETTER A WITH VERTICAL LINE ABOVE
+    -     \UE003     LATIN CAPITAL LETTER C WITH MACRON
+    -     \UE004     LATIN SMALL LETTER C WITH MACRON
+    -     \UE005     LATIN SMALL LETTER C WITH BREVE
+    -     \UE006     LATIN SMALL LETTER C WITH DOT BELOW
+    -     \UE007     LATIN SMALL LIGATURE CH
+    -     \UE008     LATIN CAPITAL LETTER D WITH MACRON
+    -     \UE009     LATIN SMALL LETTER E WITH BAR BELOW
+    -     \UE00A     LATIN SMALL LETTER E WITH TILDE
+    -     \UE00B     LATIN SMALL LETTER E WITH MACRON AND BREVE
+    -     \UE00C     LATIN SMALL LETTER E WITH TILDE AND DOT ABOVE
+    -     \UE00D     LATIN SMALL LETTER E WITH HOOK RIGHT BELOW
+    -     \UE00E     LATIN SMALL LETTER G WITH INVERTED BREVE
+    -     \UE00F     LATIN SMALL LETTER I WITH INVERTED BREVE BELOW
+    -     \UE010     LATIN SMALL LETTER I WITH MACRON AND ACUTE
+    -     \UE011     LATIN SMALL LETTER K WITH CIRCUMFLEX
+    -     \UE012     LATIN SMALL LETTER K WITH BREVE
+    -     \UE013     LATIN SMALL LETTER K WITH INVERTED BREVE
+    -     \UE014     LATIN SMALL LIGATURE KH
+    -     \UE015     LATIN CAPITAL LETTER L WITH MACRON
+    -     \UE016     LATIN SMALL LETTER L WITH TILDE
+    -     \UE017     LATIN SMALL LETTER L WITH INVERTED BREVE
+    -     \UE018     LATIN CAPITAL LETTER M WITH MACRON
+    -     \UE019     LATIN SMALL LETTER M WITH MACRON
+    -     \UE01A     LATIN SMALL LETTER M WITH TILDE
+    -     \UE01B     LATIN SMALL LETTER O WITH CEDILLA
+    -     \UE01C     LATIN SMALL LETTER O WITH MACRON AND CIRUMFLEX
+    -     \UE01E     LATIN SMALL LIGATURE OI
+    -     \UE01F     LATIN SMALL LIGATURE OO
+    -     \UE020     LATIN SMALL LIGATURE OO WITH MACRON
+    -     \UE021     LATIN SMALL LIGATURE OU
+    -     \UE022     LATIN SMALL LETTER OPEN O WITH ACUTE
+    -     \UE023     LATIN SMALL LETTER R WITH DIARESIS
+    -     \UE024     LATIN SMALL LETTER R WITH CIRCUMFLEX
+    -     \UE025     LATIN SMALL LETTER R WITH RING BELOW
+    -     \UE026     LATIN SMALL LETTER S WITH VERTICAL LINE ABOVE
+    -     \UE027     LATIN SMALL LETTER S WITH OGONEK
+    -     \UE028     LATIN SMALL LETTER S WITH COMMA
+    -     \UE02A     LATIN SMALL LETTER S WITH BREVE
+    -     \UE02B     LATIN SMALL LIGATURE SH
+    -     \UE02C     LATIN SMALL LIGATURE TH
+    -     \UE02D     LATIN SMALL LETTER U WITH MACRON AND ACUTE
+    -     \UE02E     LATIN CAPITAL LETTER V WITH MACRON
+    -     \UE02F     LATIN CAPITAL LETTER X WITH MACRON
+    -     \UE030     LATIN SMALL LETTER X WITH CIRCUMFLEX
+    -     \UE031     LATIN SMALL LETTER Y WITH BREVE
+    -     \UE032     LATIN SMALL LIGATURE ZH
+    -     \UE033     LATIN SMALL LETTER TURNED E WITH ACUTE
+    -     \UE034     LATIN SMALL LETTER TURNED E WITH CIRCUMFLEX
+    -     \UE035     GREEK SMALL LETTER ALPHA WITH GRAVE
+    -     \UE036     MUSICAL SYMBOL SEGNO
+    -     \UE037     MUSICAL SYMBOL FERMATA
+    -     \UE038     MUSICAL SYMBOL CRESCENDO
+    -     \UE039     MUSICAL SYMBOL DECRESCENDO
+    -     \UE03A     MUSICAL SYMBOL DOUBLE SHARP
+    -     \UE03B     MUSICAL SYMBOL BREVE
+    -     \UE03C     MUSICAL SYMBOL DOWN BOW
+    -     \UE03D     MUSICAL SYMBOL UP BOW
+    -     \UE03E     MUSICAL SYMBOL BREVE ALTERNATE
+    -     \UE03F     PRINTING SYMBOL DELE
+    -     \UE040     PRINTING SYMBOL FRACTIONAL EM
+    -     \UE041     INVERTED ASTERISM
+    -     \UE042     LATIN SMALL LETTER SCHWA SUPERSCRIPT
+    -     \UE043     LATIN SMALL LETTER TURNED Y
+    -     \UE044     LATIN SMALL LIGATURE OE WITH MACRON
+    -     \UE045     SQUARE ROOT WITH BAR
+    -     \UE046     LATIN SMALL LETTER U WITH DOT ABOVE
+    -     \UE047     LATIN SMALL LIGATURE UE
+    -     \UE048     LATIN SMALL LIGATURE UE WITH MACRON
+    -     \UE049     LATIN SMALL LETTER OPEN O WITH TILDE
+    -     \UE04A     LATIN SMALL LETTER T WITH CARON BELOW
+    -     \UE04B     LATIN SMALL LETTER SCRIPT A WITH TILDE
+    -     \UE04C     GREEK SMALL LETTER EPSILON WITH TILDE
+    -     \UE04D     LATIN SMALL LIGATURE OE WITH TILDE
+    -     \UE04E     MODIFIER LETTER DOUBLE VERTICAL LINE
+    -     \UE04F     DOUBLE HYPHEN
+    -     \UE050     LATIN SMALL LETTER SCHWA WITH DOT ABOVE
+    -     \UE051     LATIN SMALL LETTER SCHWA WITH MACRON
+Alphabetic Presentation Forms
+ﬂ     &#64258;     \UFB02     LATIN SMALL LIGATURE FL
+שׁ     &#64298;     \UFB2A     HEBREW LETTER SINH WITH SHIN DOT
+שׂ     &#64299;     \UFB2B     HEBREW LETTER SINH WITH SIN DOT
+
diff --git a/format_docs/pdb/ztxt.txt b/format_docs/pdb/ztxt.txt
new file mode 100644
index 0000000000..98fb6bae3e
--- /dev/null
+++ b/format_docs/pdb/ztxt.txt
@@ -0,0 +1,226 @@
+The zTXT Format
+---------------
+
+The zTXT format is relatively straightforward. The simplest zTXT contains a
+Palm database header, followed by zTXT record #0, followed by the compressed
+data. The compressed data can be in one of two formats: one long data stream,
+or split into chunks for random access. If there are any bookmarks, they occupy
+the record immediately after the compressed data. If there are any annotations,
+the annotation index occupies the record immediately after the bookmarks with
+each annotation in the index having a record immediately after the annotation
+index. Here are diagrams of a simple zTXT and a full featured zTXT:
+
+    DB Header
+0   Record 0
+1
+2
+3
+... Compressed Data
+36
+37
+38
+
+    DB Header
+0   Record 0
+1
+2
+3
+... Compressed Data
+36
+37
+38
+39  Bookmarks
+40  Annotation Index
+41  Annotation 1
+42  Annotation 2
+43  Annotation 3
+
+
+Compression Modes
+-----------------
+
+zTXT version 1.40 and later supports two modes of compression. Mode 1 is a
+random access mode, and mode 2 consists of one long data stream. Both modes
+work on 8K (the default record size) blocks of text.
+
+Please note, however, that as of Weasel Reader version 1.60 the old style
+(mode 2) zTXT format is no longer supported. makeztxt and libztxt still support
+creating these documents for backwards compatibility, but you should not use
+mode 2 if possible.
+
+
+Mode 1
+------
+
+In mode one, 8K blocks of text are compressed into an equal number of blocks of
+compressed data. Using the Z_FULL_FLUSH flush mode with zLib allows for random
+access among the blocks of data. In order for this to function, the first block
+must be decompressed first, and after that any block in the file may be
+decompressed in any order. In mode 1, the blocks of compressed data will likely
+not all have the same size.
+
+
+Mode 2
+------
+
+In zTXT versions before 1.40, this was the only method of compression. This
+mode involves compressing the entire input buffer into a single output buffer
+and then splitting the resulting buffer into 8K segments. This mode requires
+that all of the compressed data be decompressed in one pass. Since there are no
+real 'blocks' of data, the resulting output can be of any blocksize, though
+typically the default of 8K should be fine. The advantage to mode 2 is that it
+will give about 10% - 15% more compression.
+
+
+zTXT Record #0 Definition (version 1.44)
+----------------------------------------
+
+Record 0 provides all of the information about the zTXT contents. Be sure it is
+correct, lest firey death rain down upon your program.
+
+typedef struct zTXT_record0Type {
+  UInt16        version;
+  UInt16        numRecords;
+  UInt32        size;
+  UInt16        recordSize;
+  UInt16        numBookmarks;
+  UInt16        bookmarkRecord;
+  UInt16        numAnnotations;
+  UInt16        annotationRecord;
+  UInt8         flags;
+  UInt8         reserved;
+  UInt32        crc32;
+  UInt8         padding[0x20 - 24];
+} zTXT_record0;
+
+
+Structure Elements
+------------------
+
+UInt16        version;
+
+This is mostly just informational. Your program can figure out what features
+might be available from the version. However, the remaining parts of the
+structure are designed such that their value will be 0 if that particular
+feature is not present, so that is the correct way to test. The version is
+stored as two 8 bit integers. For example, version 1.42 is 0x012A.
+
+UInt16        numRecords;
+
+This is the number of DATA records only and does not include record 0,
+bookmarks, or annotations. With compression mode 1, this is also the number of
+uncompressed text records. With mode 2, you must decompress the file to figure
+out how many text records there will be.
+
+UInt32        size;
+
+The size in bytes of the uncompressed data in the zTXT. Check this value with
+the amount of free storage memory on the Palm to make sure there's enough room
+to decompress the data in full or in part.
+
+UInt16        recordSize;
+
+recordSize is the size in bytes of a text record. This field is important, as
+the size of text and decompression buffers is based on this value. It is used
+by Weasel to navigate though the text so it can map absolute offsets to record
+numberss. 8192 is the default. With compression mode 1, this is the amount of
+data inside each compressed record (except maybe the last one), but the actual
+compressed records will likely have varying sizes. In mode 2, both compressed
+records and the resulting text records are all of this size (except, again, the
+last record).
+
+UInt16        numBookmarks;
+
+The definitive count of how many bookmarks are stored in the bookmark index
+record. See the section on bookmarks below.
+
+UInt16        bookmarkRecord;
+
+If there are any bookmarks, this is set to the record index number that
+contains the bookmark listing, otherwise it is 0.
+
+UInt16        numAnnotations;
+
+Like the bookmark count, this is the definitive count of how many annotations
+are in the annotation index and how many annotation records follow it. See the
+section on annotation below.
+
+UInt16        annotationRecord;
+
+If there are any annotations, this is set to the record index number that
+contains the annotation index, otherwise it is 0.
+
+UInt8         flags;
+
+These flags indicate various features of the zTXT database. flags is a bitmask
+and at present the only two defined bits are:
+
+ZTXT_RANDOMACCESS (0x01)
+    If the zTXT was compressed according to the method in mode 1, then it
+    supports random access and this should be set.
+ZTXT_NONUNIFORM (0x02)
+    Setting this bit indicates that the text records within the zTXT database
+    are not of uniform length. That is, when the blocks of text are
+    decompressed they will not have identical block sizes. If this is not set,
+    the compressed blocks are assumed to all have the same size when
+    decompressed (typically 8K) except for the last block which can be smaller.
+
+UInt32        crc32;
+
+A CRC32 value for checking data integrity. This value is computer over all text
+data record only and does not include record 0 nor any bookmark/annotation
+records. The current implementation in makeztxt/Weasel computes this value
+using the crc32 function in zLib which should be the standard CRC32 definition.
+
+UInt8         padding[0x20 - 24];
+
+zTXT record zero is 32 bytes in length, so the unused portion is padded.
+
+
+zTXT Bookmarks
+--------------
+
+zTXT bookmarks are stored in a simple array in a record at the end of a zTXT.
+The format is as follows:
+
+#define MAX_BMRK_LENGTH         20
+
+typedef struct GPlmMarkType {
+  UInt32        offset;
+  Char          title[MAX_BMRK_LENGTH];
+} GPlmMark;
+
+In the structure, offset is counted as an absolute offset into the text. The
+bookmarks must be sorted in ascending order.
+
+If there are no bookmarks, then the bookmark index does not exist. When the
+user creates the first bookmark, the record containing the index will then be
+created. If there are annotations, when the bookmark record is created it must
+go before the annotation index. This will require incrementing annotationRecord
+in record 0 to point to the new record index.
+
+Similarly, when all bookmarks are deleted the bookmark index record is also
+deleted. If there are annotations, annotationRecord in record 0 must be
+decremented to point to the new index.
+
+
+zTXT Annotations
+----------------
+
+zTXT annotations have a format almost identical to that of the bookmark index:
+
+typedef struct GPlmAnnotationType {
+  UInt32        offset;
+  Char          title[MAX_BMRK_LENGTH];
+} GPlmAnnotation;
+
+Like the bookmarks, offset is an absolute offset into the text. The annotation
+index is organized just as the bookmarks are, as a single array in a record.
+Note that this structure does NOT store the actual annotation text.
+
+The text of each annotation is stored in its own record immediately following
+the index. So, the first annotation in the index will occupy the first record
+following the index, and the second annotation will be in the second record
+following the index, and so on. The text of each annotation is limited to
+4096 bytes.
+
diff --git a/format_docs/rb.txt b/format_docs/rb.txt
new file mode 100644
index 0000000000..2eb1992afb
--- /dev/null
+++ b/format_docs/rb.txt
@@ -0,0 +1,303 @@
+Rocket eBook File Format
+------------------------
+
+from http://rbmake.sourceforge.net/rb_format.html
+
+
+Overview
+--------
+
+This document attempts to describe the format of a .rb file -- the book
+format that is downloaded into NuvoMedia's <http://www.nuvomedia.com>
+hand-held wonder, the Rocket eBook
+<http://www.rocket-ebook.com/enter.html>.
+
+*Note:* All multi-byte integers are stored in Vax/Intel order (the
+opposite of network byte order). Most integers are 4 bytes (an int32),
+but there are some minor exceptions (as detailed below).
+
+Also, the following document refers to the .rb file sections as "pages".
+
+
+Details
+-------
+
+The first 4 bytes of the file seem to be a magic number (in hex): B0 0C
+B0 0C. I like to think of this as a hexidecimal pun on the word "book"
+(repeated). [Matt Greenwood has reported seeing a magic number of "B0 0C
+F0 0D" in another type of ReB-related file -- i.e. "book food".]
+
+The next two bytes appear to be a version number, currently "02 00". I
+assume this means major version 2, minor version 0.
+
+The next 4 bytes are the string "NUVO", followed by 4 bytes of 00h. (I
+have also seen an old title that had 0s in place of the "NUVO".)
+
+This brings us up to offset 0Eh, at which point we have a 4-byte
+representation of the date the book was created (Matt Greenwood pointed
+this out to me -- thanks!). The year is encoded as an int16. On older
+version of the RocketLibrary was encoding the year's full value (e.g.
+1999 was "CF 07" and 2000 was "D0 07"), but a more recent version is now
+using the tm_year value verbatim -- i.e. it's storing 100 for the year
+2000 ("64 00"). The year is followed by an int8 for the 1-relative month
+number, and an int8 for the day of the month.
+
+After that is 6 bytes of 00h. These may be reserved for setting the time
+of creation (at a guess).
+
+Then, at offset 18h, we have an int32 that contains the absolute offset
+of the "Table of Contents" (the directory of the pages contained within
+this .rb file). In all of the .rb file's I've seen, this remains
+constant with a value of 128h. However, I have tested an atypical .rb
+file where I placed the ToC at the end of the file (after all the file
+contents), and it worked fine. (I've chosen not to build any books in
+such a non-standard format, however.)
+
+Immediately following this is an int32 with the length of the .rb file
+(so we can check if the file is complete or not).
+
+All the bytes from here (offset 20h) up to offset 128h appear to only be
+used by an encrypted title. In a non-encrypted title, they are always 0.
+
+The table of contents typically comes next (at offset 128h). It starts
+with an int32 count of the number of "page" entries (.rb-file sections)
+in the ToC. Each entry consists of a name (zero-padded to 32 bytes),
+followed by 3 int32s: the length of this entry's data segment, the
+absolute offset of the data in the .rb file, and a flag. The known flag
+values are: 1 (encrypted), 2 (info page), and 8 (deflated). The names
+are tweaked as needed to ensure that they are all unique. The current
+RocketWriter software uses a unique 6-digit number, a dash, up to 8
+characters from the filename, and then the re-mapped suffix for the data
+(.html, .hidx, .png, .info, etc.). My rbmake library simply ensures that
+the names are no longer than 15 characters (not counting the suffix) and
+are all unique.
+
+Often the first item in the ToC is the info page, but it doesn't have to
+be. This page of information contains NAME=VALUE pairs that note the
+author, title, what the root-page's name is, etc. (See appendix A). This
+data is never encrypted nor compressed, so this entry's flag value is
+always "2".
+
+An image page is always stored as a B&W image in PNG format. Since it
+has its own compression, it is stored without any additional attempt at
+deflation. I have also never seen an encrypted image, so its flag value
+is always 0.
+
+An HTML page contains the tags and text that were re-written into a
+consistent syntax (this presumably makes the HTML renderer in the ReB
+itself simpler). HTML pages are typically compressed (See appendix B).
+Every HTML page appears to use the suffix .html no matter what the file
+name was on import (but I have seen older files with .htm used as the
+suffix, so the rocket appears to support both).
+
+For every HTML page there is a corresponding .hidx page that contains a
+summary of the paragraph formatting and the position of the anchor names
+in the associated .html page (See appendix C). This page is sometimes
+compressed, depending on length (See appendix B).
+
+There are also reference titles that have a .hkey page that contains a
+list of words that can be looked up in the associated .html page (See
+appendix D).
+
+Immediately following the ToC is the data for each piece mentioned in
+the ToC, in the same order as it appeared in the ToC.
+
+Finally, the end of the file appears to be padded with 20 bytes of 01h.
+
+
+Appendix A: Info Page Format
+----------------------------
+
+The info page consists of a series of lines that contain "NAME=VALUE"
+strings. Each line is terminated by a single newline. Here are the
+values that the RocketWriter generates:
+
+    COMMENT=Info file for <title>
+    TYPE=2
+    TITLE=<title>
+    AUTHOR=<author>
+    URL=ebook:<long, unique string used for the file's name by the librarian>
+    GENERATOR=<e.g. RocketLibrarian 1.3.216>
+    PARSE=1
+    OUTPUT=1
+    BODY=<name of root HTML page (as it appears in the ToC)>
+    MENUMARK=menumark.html
+    SuggestedRetailPrice=<usually empty>
+
+Encrypted titles have a few more entries (including those listed above):
+
+    ISBN=<ISBN number, including dashes>
+    REVISION=<digits>
+    TITLE_LANGUAGE=<en-us>
+    PUB_NAME=<Publisher's name>
+    PUBSERVER_ID=<digits>
+    GENERATOR=<e.g. RocketPress 1.3.121>
+    VERSION=<digits>
+    USERNAME=<rocket-ID>
+    COPY_ID=<digits>
+    COPYRIGHT=<copyright>
+    COPYTITLE=<another copyright?>
+
+A reference title also has an indication that there is a .hkey page
+present, and may also have a GENRE of "Reference":
+
+    HKEY=1
+    GENRE=Reference
+
+
+Appendix B: The format of compressed data
+-----------------------------------------
+
+Compressed pages have a data section in the .rb file with the following
+format:
+
+The first int32 is a count of the number of 4096-byte chunks of data we
+broke the uncompressed page into (the last chunk can be shorter than
+4096 bytes, of course).
+
+This is immediately followed by an int32 with the length of the entire
+uncompressed data.
+
+After this there are <count> int32s that indicate the size of each
+chunk's compressed data.
+
+Following these length int32s is the output from a deflation (the
+algorithm used in gzip) for each 4096-byte chunk of the original data.
+It appears that you must use a window-bit size of 13 and a compression
+level of "best" to be compatible with the Rocket eBook's system software.
+
+
+Appendix C: HTML-index Page Format
+----------------------------------
+
+The .hidx page's purpose is to allow the renderer to quickly look up the
+format of each paragraph (useful for random access to the data), and the
+position of the anchor names.
+
+The first section lists the various paragraph-producing tags. It is
+headed by a line of "[tags <count>]", where <count> is the number of
+tags that follow this header. The tags are listed one per line, and have
+an implied enumeration from 0 to N-1 (which the other tags and the
+upcoming paragraph sections reference).
+
+The first tag is typically (always?) "<HTML> -1". The number trailing
+the tag indicates what other tag (or sequence of tags, one per line) in
+which we are nested. So, if we have a <BR> nested inside a <P
+ALIGN="center">, it would be listed separately from a <BR> that was
+nested inside a normal paragraph, and each one would have a different
+trailing index number.
+
+Following the tag section is the paragraph section. The heading is
+"[paragraphs <count>]", and is followed by a line for each paragraph.
+These lines consist of a character offset into the .html page for the
+start of the paragraph followed by a 0-relative offset into the tag
+section (indicating what kind of formatting to use for the indicated
+paragraph).
+
+The paragraph-section character offsets point to the first bit of text
+after the associated tag.
+
+The last section details the anchor names. The heading is
+"[names <count>]", and each item that follows is a quoted string of the
+anchor name, followed by a character offset into the .html page where
+we'll find that name. If there are no names in the associated HTML
+section, the heading is included with a 0 count (i.e. "[names 0]").
+
+The name-section character offsets point to the start of the anchor tag
+(not after the tag, like the offsets in the "paragraphs" section).
+
+The lines are terminated by newlines (in standard unix fashion).
+
+For example:
+
+    [tags 10]
+    <HTML> -1
+    <BODY> 0
+    <P ALIGN="right"> 1
+    <P ALIGN="left"> 1
+    <P> 1
+    <H3 ALIGN="center"> 1
+    <P ALIGN="center"> 1
+    <BR> 6
+    <H2 ALIGN="center"> 1
+    <BR> 1
+
+    [paragraphs 42]
+    160 9
+    164 9
+    184 8
+    220 8
+    261 6
+    316 5
+    359 1
+    379 6
+    410 6
+    460 7
+    511 7
+    564 7
+    616 7
+    668 7
+    720 7
+    773 7
+    827 7
+    880 7
+    933 7
+    988 7
+    1043 7
+    1100 7
+    1157 7
+    1214 7
+    1270 7
+    1328 7
+    1385 7
+    1442 7
+    1497 7
+    1556 7
+    1561 7
+    1635 1
+    1656 5
+    1690 6
+    1737 7
+    1773 5
+    1798 4
+    1826 3
+    2663 1
+    2668 4
+    2689 2
+    2730 8
+
+    [names 1]
+    "ch1" 2689
+
+
+Appendix D: HTML-key Page Format
+--------------------------------
+
+The .hkey page contains a list of words, one per line, sorted in a
+strict ASCII sequence, each one followed by a tab and the offset in the
+.html page of the word's data. I presume that the .hkey page must share
+the same name prefix as its related .html page.
+
+If the names contain high-bit characters, they are translated into
+regular ASCII in the .hkey file, since this allows the user to search
+for the words using unaccented characters.
+
+The lines are terminated with a newline (in standard unix fashion).
+
+An example:
+
+    a	5
+    apple	38
+    b	84
+    book	104
+
+Each of these offsets points to a paragraph tag in the associated .html
+page. I have only seen this sequence of tags used so far:
+
+    <P><BIG><B>word</B></BIG> other stuff</P>
+
+I have seen multiple <B>...</B> tags in the middle of the single set of
+<BIG>...</BIG> tags, but this is the basic tag format.
+
+The offset in the .hkey page points to the start of the <P> tag.
+
diff --git a/format_docs/tcr.txt b/format_docs/tcr.txt
new file mode 100644
index 0000000000..dbbbbaa869
--- /dev/null
+++ b/format_docs/tcr.txt
@@ -0,0 +1,56 @@
+About
+-----
+
+Text compression format that can be decompressed starting at any point.
+Little-endian byte ordering is used.
+
+
+Header
+------
+
+TCR files always start with:
+
+!!8-Bit!!
+
+
+Layout
+------
+
+Header
+256 key dictionary
+compressed text
+
+
+Dictionary
+----------
+
+A dictionary of key and replacement string. There are a total of 256 keys,
+0 - 255. Each string is preceded with one byte that represents the length of
+the string.
+
+
+Compressed text
+---------------
+
+The compressed text is a series of values 0-255 which correspond to a key and
+thus a string. Reassembling is replacing each key in the compressed text with
+its corresponding string.
+
+
+Compressor
+-----------------
+
+From Andrew Giddings TCR.c (http://www.cix.co.uk/~gidds/Software/TCR.html):
+
+The TCR compression format is easy to describe: after the fixed header is a
+dictionary of 256 strings, each preceded by a length byte.  The rest of the
+file is a list of codes from this dictionary.
+
+The compressor works by starting with each code defined as itself.  While
+there's an unused code, it finds the most common two-code combination, and
+creates a new code for it, replacing all occurrences in the text with the
+new code.
+
+It also searches for codes that are always followed by another, which it can
+merge, possibly freeing up some.
+

From cbb668b6ddfb362e58b505a475bc41893c8f01ac Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Thu, 3 Feb 2011 13:21:44 +0900
Subject: [PATCH 3/7] recipe: msn sankei news changes its charcode.

---
 resources/recipes/msnsankei.recipe | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe
index ae195559d5..59664d055f 100644
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@@ -13,15 +13,12 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
     description     = 'Products release from Japan'
     oldest_article = 7
     max_articles_per_feed = 100
-    encoding       = 'Shift_JIS'
+    encoding       = 'utf-8'
     language       = 'ja'
     cover_url       = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg'
     masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif'
 
     feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
 
-    remove_tags_before = dict(id="__r_article_title__")
-    remove_tags_after  = dict(id="ajax_release_news")
-    remove_tags = [{'class':"parent chromeCustom6G"},
-                              dict(id="RelatedImg")
-                            ]
+    remove_tags_before = dict(id="NewsTitle")
+    remove_tags_after  = dict(id="RelatedTitle")

From c7da3c8c1f35b7fa07ab4ff21e15106330ce62a3 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Thu, 3 Feb 2011 06:41:40 -0700
Subject: [PATCH 4/7] GwR updates to catalog css

---
 resources/catalog/stylesheet.css | 37 ++++++++++++++++++----------
 src/calibre/library/catalog.py   | 41 +++++++++++++++-----------------
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
index 336d015e44..4b32056400 100644
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@@ -52,6 +52,17 @@ p.formats {
 	text-indent: 0.0in;
 	}
 
+/*
+* 	Minimize widows and orphans by logically grouping chunks
+*   Some reports of problems with Sony (ADE) ereaders
+*	   ADE: page-break-inside:avoid;
+*	iBooks: display:inline-block;
+*		    width:100%;
+*/
+div.author_logical_group {
+	page-break-inside:avoid;
+	}
+
 div.description > p:first-child {
 	margin: 0 0 0 0;
 	text-indent: 0em;
@@ -62,27 +73,19 @@ div.description {
 	text-indent: 1em;
 	}
 
-/*
-* 	Attempt to minimize widows and orphans by logically grouping chunks
-* 	Recommend enabling for iPad
-*   Some reports of problems with Sony ereaders, presumably ADE engines
-*/
-/*
-div.logical_group {
-	display:inline-block;
-	width:100%;
+div.initial_letter {
+	page-break-before:always;
 	}
-*/
 
-p.date_index {
+p.author_title_letter_index {
 	font-size:x-large;
 	text-align:center;
 	font-weight:bold;
-	margin-top:1em;
+	margin-top:0px;
 	margin-bottom:0px;
 	}
 
-p.letter_index {
+p.date_index {
 	font-size:x-large;
 	text-align:center;
 	font-weight:bold;
@@ -99,6 +102,14 @@ p.series {
 	text-indent:-2em;
 	}
 
+p.series_letter_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
 p.read_book {
 	text-align:left;
 	margin-top:0px;
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 8ad64c8cdd..092cc66ff9 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1832,8 +1832,6 @@ def generateHTMLByTitle(self):
                 body.insert(btc,pTag)
                 btc += 1
 
-            # <p class="letter_index">
-            # <p class="book_title">
             divTag = Tag(soup, "div")
             dtc = 0
             current_letter = ""
@@ -1861,11 +1859,12 @@ def generateHTMLByTitle(self):
                         divTag.insert(dtc, divRunningTag)
                         dtc += 1
                     divRunningTag = Tag(soup, 'div')
-                    divRunningTag['class'] = "logical_group"
+                    if dtc > 0:
+                        divRunningTag['class'] = "initial_letter"
                     drtc = 0
                     current_letter = self.letter_or_symbol(book['title_sort'][0])
                     pIndexTag = Tag(soup, "p")
-                    pIndexTag['class'] = "letter_index"
+                    pIndexTag['class'] = "author_title_letter_index"
                     aTag = Tag(soup, "a")
                     aTag['name'] = "%s" % self.letter_or_symbol(book['title_sort'][0])
                     pIndexTag.insert(0,aTag)
@@ -1973,8 +1972,6 @@ def generateHTMLByAuthor(self):
             body.insert(btc, aTag)
             btc += 1
 
-            # <p class="letter_index">
-            # <p class="author_index">
             divTag = Tag(soup, "div")
             dtc = 0
             divOpeningTag = None
@@ -2008,10 +2005,11 @@ def generateHTMLByAuthor(self):
                     current_letter = self.letter_or_symbol(book['author_sort'][0].upper())
                     author_count = 0
                     divOpeningTag = Tag(soup, 'div')
-                    divOpeningTag['class'] = "logical_group"
+                    if dtc > 0:
+                        divOpeningTag['class'] = "initial_letter"
                     dotc = 0
                     pIndexTag = Tag(soup, "p")
-                    pIndexTag['class'] = "letter_index"
+                    pIndexTag['class'] = "author_title_letter_index"
                     aTag = Tag(soup, "a")
                     aTag['name'] = "%sauthors" % self.letter_or_symbol(current_letter)
                     pIndexTag.insert(0,aTag)
@@ -2023,16 +2021,21 @@ def generateHTMLByAuthor(self):
                     # Start a new author
                     current_author = book['author']
                     author_count += 1
-                    if author_count == 2:
+                    if author_count >= 2:
                         # Add divOpeningTag to divTag, kill divOpeningTag
-                        divTag.insert(dtc, divOpeningTag)
-                        dtc += 1
-                        divOpeningTag = None
-                        dotc = 0
+                        if divOpeningTag:
+                            divTag.insert(dtc, divOpeningTag)
+                            dtc += 1
+                            divOpeningTag = None
+                            dotc = 0
+
+                        # Create a divRunningTag for the next author
+                        if author_count > 2:
+                            divTag.insert(dtc, divRunningTag)
+                            dtc += 1
 
-                        # Create a divRunningTag for the rest of the authors in this letter
                         divRunningTag = Tag(soup, 'div')
-                        divRunningTag['class'] = "logical_group"
+                        divRunningTag['class'] = "author_logical_group"
                         drtc = 0
 
                     non_series_books = 0
@@ -2364,8 +2367,6 @@ def add_books_to_HTML_by_date_range(date_range_list, date_range, dtc):
                 body.insert(btc,pTag)
                 btc += 1
 
-            # <p class="letter_index">
-            # <p class="author_index">
             divTag = Tag(soup, "div")
             dtc = 0
 
@@ -2549,8 +2550,6 @@ def add_books_to_HTML_by_date_range(date_range_list, date_range, dtc):
             body.insert(btc, aTag)
             btc += 1
 
-            # <p class="letter_index">
-            # <p class="author_index">
             divTag = Tag(soup, "div")
             dtc = 0
 
@@ -2652,8 +2651,6 @@ def generateHTMLBySeries(self):
             body.insert(btc, aTag)
             btc += 1
 
-            # <p class="letter_index">
-            # <p class="author_index">
             divTag = Tag(soup, "div")
             dtc = 0
             current_letter = ""
@@ -2668,7 +2665,7 @@ def generateHTMLBySeries(self):
                     # Start a new letter with Index letter
                     current_letter = self.letter_or_symbol(sort_title[0].upper())
                     pIndexTag = Tag(soup, "p")
-                    pIndexTag['class'] = "letter_index"
+                    pIndexTag['class'] = "series_letter_index"
                     aTag = Tag(soup, "a")
                     aTag['name'] = "%s_series" % self.letter_or_symbol(current_letter)
                     pIndexTag.insert(0,aTag)

From be76d5b41e2fb298be7e2f63cf11274d2345e66d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 3 Feb 2011 07:57:53 -0700
Subject: [PATCH 5/7] Fix #8735 (Updated recipe for The Onion)

---
 resources/recipes/theonion.recipe | 78 ++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 21 deletions(-)

diff --git a/resources/recipes/theonion.recipe b/resources/recipes/theonion.recipe
index 3be4ae4e04..b0eacbb5e0 100644
--- a/resources/recipes/theonion.recipe
+++ b/resources/recipes/theonion.recipe
@@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 
 '''
 theonion.com
@@ -12,35 +10,73 @@
 class TheOnion(BasicNewsRecipe):
     title                 = 'The Onion'
     __author__            = 'Darko Miletic'
-    description           = "America's finest news source"    
-    oldest_article        = 2    
+    description           = "America's finest news source"
+    oldest_article        = 2
     max_articles_per_feed = 100
-    publisher             = u'Onion, Inc.'
-    category              = u'humor, news, USA'    
-    language = 'en'
-
+    publisher             = 'Onion, Inc.'
+    category              = 'humor, news, USA'
+    language              = 'en'
     no_stylesheets        = True
     use_embedded_content  = False
     encoding              = 'utf-8'
-    remove_javascript     = True
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
-     
-    html2lrf_options = [
-                          '--comment'       , description
-                        , '--category'      , category
-                        , '--publisher'     , publisher
-                        ]
+    publication_type      = 'newsportal'
+    masthead_url          = 'http://o.onionstatic.com/img/headers/onion_190.png'
+    extra_css             = """
+                                body{font-family: Helvetica,Arial,sans-serif}
+                                .section_title{color: gray; text-transform: uppercase}
+                                .title{font-family: Georgia,serif}
+                                .meta{color: gray; display: inline}
+                                .has_caption{display: block}
+                                .caption{font-size: x-small; color: gray; margin-bottom: 0.8em}
+                            """
 
-    keep_only_tags = [dict(name='div', attrs={'id':'main'})]
-    
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    keep_only_tags = [
+                         dict(name='h2', attrs={'class':['section_title','title']})
+                        ,dict(attrs={'class':['main_image','meta','article_photo_lead','article_body']})
+                        ,dict(attrs={'id':['entries']})
+                     ]
+    remove_attributes=['lang','rel']
+    remove_tags_after = dict(attrs={'class':['article_body','feature_content']})
     remove_tags = [
-                     dict(name=['object','link','iframe','base'])
+                     dict(name=['object','link','iframe','base','meta'])
                     ,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
                     ,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
                   ]
 
-                            
+
     feeds = [
               (u'Daily'  , u'http://feeds.theonion.com/theonion/daily' )
              ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
             ]
+
+    def get_article_url(self, article):
+        artl = BasicNewsRecipe.get_article_url(self, article)
+        if artl.startswith('http://www.theonion.com/audio/'):
+           artl = None
+        return artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name  = 'div'
+                  item.attrs = []
+                  if not limg.has_key('alt'):
+                     limg['alt'] = 'image'
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup

From 57efe4fb061c92a00db48cadc169fd05f6d8d833 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 3 Feb 2011 10:18:00 -0700
Subject: [PATCH 6/7] Fix #8739 (get_matches() got multiple values for keyword
 argument 'allow_recursion')

---
 src/calibre/library/caches.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index dd4509acea..e818e6a3c0 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -420,7 +420,8 @@ def get_user_category_matches(self, location, query, candidates):
             return candidates - res
         return res
 
-    def get_matches(self, location, query, allow_recursion=True, candidates=None):
+    def get_matches(self, location, query, candidates=None,
+            allow_recursion=True):
         matches = set([])
         if candidates is None:
             candidates = self.universal_set()
@@ -434,8 +435,8 @@ def get_matches(self, location, query, allow_recursion=True, candidates=None):
             if isinstance(location, list):
                 if allow_recursion:
                     for loc in location:
-                        matches |= self.get_matches(loc, query, candidates,
-                                                    allow_recursion=False)
+                        matches |= self.get_matches(loc, query,
+                                candidates=candidates, allow_recursion=False)
                     return matches
                 raise ParseException(query, len(query), 'Recursive query group detected', self)
 

From 8749611440861d79f53a3a43d19d1b276fbf13f6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 3 Feb 2011 11:33:19 -0700
Subject: [PATCH 7/7] Nook Color driver: Send downloaded news to the My
 Files/Magazines folder on the Nook Color. Also when getting the list of books
 on the device look at all folders in My Files, not just My Files/Books.

---
 src/calibre/devices/nook/driver.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index ca05885645..39d0763735 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -89,21 +89,21 @@ class NOOK_COLOR(NOOK):
     BCD         = [0x216]
     WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOK_DISK'
 
-    EBOOK_DIR_MAIN = 'My Files/Books'
+    EBOOK_DIR_MAIN = 'My Files'
 
-    '''
     def create_upload_path(self, path, mdata, fname, create_dirs=True):
         filepath = NOOK.create_upload_path(self, path, mdata, fname,
-                create_dirs=create_dirs)
-        edm = self.EBOOK_DIR_MAIN.replace('/', os.sep)
-        npath = os.path.join(edm, _('News')) + os.sep
-        if npath in filepath:
-            filepath = filepath.replace(npath, os.sep.join('My Files',
-                'Magazines')+os.sep)
-            filedir = os.path.dirname(filepath)
-            if create_dirs and not os.path.exists(filedir):
-                os.makedirs(filedir)
+                create_dirs=False)
+        edm = self.EBOOK_DIR_MAIN
+        subdir = 'Books'
+        if mdata.tags:
+            if _('News') in mdata.tags:
+                subdir = 'Magazines'
+        filepath = filepath.replace(os.sep+edm+os.sep,
+                os.sep+edm+os.sep+subdir+os.sep)
+        filedir = os.path.dirname(filepath)
+        if create_dirs and not os.path.exists(filedir):
+            os.makedirs(filedir)
 
         return filepath
-    '''
 

Field	Bytes	Type	Notes
nextRecordListID	4	Numeric	Must be zero.
numRecords	2	Numeric	Number of records in the document, including the index record.
Function Code	Description	Bytes	Arguments
0x0A	Page link begins	2	record ID
0x0B	Targeted page link begins	3	record ID, target
0x0C	Paragraph link begins	4	record ID, paragraph number
0x0D	Targeted paragraph link begins	5	record ID, paragraph number, target
0x08	Link ends	0	no data
0x11	Set font	1	font specifier
0x1A	Embedded image	2	image record ID
0x22	Set margin	2	left margin, right margin
0x29	Alignment of text	1	alignment
0x33	Horizontal rule	3	8-bit height, 8-bit width (pixels), 8-bit width (%, 1-100)
0x38	New line	0	no data
0x40	Italic text begins	0	no data
0x48	Italic text ends	0	no data
0x53	Set text color	3	8-bit red, 8-bit green, 8-bit blue
0x5C	Multiple embedded image	4	alternate image record ID, image record ID
0x60	Underline text begins	0	no data
0x68	Underline text ends	0	no data
0x70	Strike-through text begins	0	no data
0x78	Strike-through text ends	0	no data
0x83	16-bit Unicode character	3	alternate text length, 16-bit unicode character
0x85	32-bit Unicode character	5	alternate text length, 32-bit unicode character
0x8E	Begin custom font span	6	font page record ID, X page position, Y page position
0x8C	Adjust custom font glyph position	4	X page position, Y page position
0x8A	Change font page	2	font record ID
0x88	End custom font span	0	no data
0x90	Begin new table row	0	no data
0x92	Insert table (or table link)	2	table record ID
0x97	Table cell data	7	8-bit alignment, 16-bit image record ID, 8-bit columns, 8-bit rows, 16-bit text length
0x9A	Exact link modifier	2	Paragraph Offset (The Exact Link Modifier modifies a Paragraph Link or Targeted Paragraph Link function to specify an exact byte offset within the paragraph. This function must be followed immediately by the function it modifies).