Correction of hard line breaking bug in RTF (introduce a <br/> for \line)

This commit is contained in:
Sengian 2010-07-26 01:26:39 +02:00
parent c1776406e5
commit 8cb95f343b
4 changed files with 48 additions and 35 deletions

View file

@ -412,6 +412,10 @@
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
</xsl:element>
</xsl:template>
<xsl:template match="rtf:hardline-break">
<xsl:element name="br"/>
</xsl:template>
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>

View file

@ -90,8 +90,8 @@ def __init__(self,
out_file = '',
out_dir = None,
dtd = '',
debug = 0,
deb_dir=None,
#debug = 0, #why? calibre
deb_dir = None,
convert_symbol = None,
convert_wingdings = None,
convert_zapf = None,
@ -132,7 +132,7 @@ def __init__(self,
self.__dtd_path = dtd
self.__check_file(in_file,"file_to_parse")
self.__char_data = char_data
self.__debug_dir = debug
self.__debug_dir = deb_dir #self.__debug_dir = debug calibre
self.__check_dir(self.__temp_dir)
self.__copy = self.__check_dir(self.__debug_dir)
self.__convert_caps = convert_caps

View file

@ -1,4 +1,4 @@
import sys, os, tempfile
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
"""
States.
@ -51,6 +51,7 @@ def __initiate_values(self):
'tx<ut<__________' : self.__found_text_func,
'mi<mk<inline-fld' : self.__found_text_func,
'text' : self.__found_text_func,
'cw<nu<hard-lineb' : self.__found_text_func, #calibre
'cb<nu<clos-brack' : self.__close_bracket_func,
'mi<mk<par-end___' : self.__end_para_func,
'mi<mk<footnt-ope' : self.__end_para_func,
@ -62,6 +63,7 @@ def __initiate_values(self):
'tx<hx<__________' : self.__found_text_func,
'tx<ut<__________' : self.__found_text_func,
'text' : self.__found_text_func,
'cw<nu<hard-lineb' : self.__found_text_func, #calibre
'mi<mk<inline-fld' : self.__found_text_func,
'ob<nu<open-brack': self.__found_open_bracket_func,
'mi<mk<par-end___' : self.__end_para_func,
@ -80,32 +82,32 @@ def __initiate_values(self):
self.__inline_list = self.__body_inline_list
self.__in_para = 0 # not in paragraph
self.__char_dict = {
# character info => ci
'annotation' : 'annotation',
'blue______' : 'blue',
'bold______' : 'bold',
'caps______' : 'caps',
'char-style' : 'character-style',
'dbl-strike' : 'double-strike-through',
'emboss____' : 'emboss',
'engrave___' : 'engrave',
'font-color' : 'font-color',
'font-down_' : 'subscript',
'font-size_' : 'font-size',
'font-style' : 'font-style',
'font-up___' : 'superscript',
'footnot-mk' : 'footnote-marker',
'green_____' : 'green',
'hidden____' : 'hidden',
'italics___' : 'italics',
'outline___' : 'outline',
'red_______' : 'red',
'shadow____' : 'shadow',
'small-caps' : 'small-caps',
'strike-thr' : 'strike-through',
'subscript_' : 'subscript',
'superscrip' : 'superscript',
'underlined' : 'underlined',
# character info => ci
'annotation' : 'annotation',
'blue______' : 'blue',
'bold______' : 'bold',
'caps______' : 'caps',
'char-style' : 'character-style',
'dbl-strike' : 'double-strike-through',
'emboss____' : 'emboss',
'engrave___' : 'engrave',
'font-color' : 'font-color',
'font-down_' : 'subscript',
'font-size_' : 'font-size',
'font-style' : 'font-style',
'font-up___' : 'superscript',
'footnot-mk' : 'footnote-marker',
'green_____' : 'green',
'hidden____' : 'hidden',
'italics___' : 'italics',
'outline___' : 'outline',
'red_______' : 'red',
'shadow____' : 'shadow',
'small-caps' : 'small-caps',
'strike-thr' : 'strike-through',
'subscript_' : 'subscript',
'superscrip' : 'superscript',
'underlined' : 'underlined',
}
self.__caps_list = ['false']
def __set_list_func(self, line):
@ -133,11 +135,13 @@ def __default_func(self, line):
Returns:
nothing
Logic:
Write if not hardline break
"""
action = self.__default_dict.get(self.__token_info)
if action:
action(line)
self.__write_obj.write(line)
if self.__token_info != 'cw<nu<hard-lineb': #calibre
self.__write_obj.write(line)
def __found_open_bracket_func(self, line):
"""
Requires:
@ -164,7 +168,7 @@ def __after_open_bracket_func(self, line):
Use the dictionary to get the approriate function.
Always print out the line.
"""
if line[0:2] == 'cw':
if line[0:5] == 'cw<ci': #calibre: bug in original function no diff between cw<ci and cw<pf
self.__handle_control_word(line)
else:
action = self.__after_open_bracket_dict.get(self.__token_info)
@ -247,12 +251,13 @@ def __found_text_func(self, line):
Return:
nothing
Logic:
Two cases:
Three cases:
1. in a list. Simply write inline
2. Not in a list
Text can mark the start of a paragraph.
If already in a paragraph, check to see if any groups are waiting
to be added. If so, use another method to write these groups.
3. If not check if hardline break, then write
"""
if self.__place == 'in_list':
self.__write_inline()
@ -261,8 +266,11 @@ def __found_text_func(self, line):
self.__in_para = 1
self.__start_para_func(line)
else:
if self.__token_info == 'cw<nu<hard-lineb': #calibre
self.__write_obj.write('mi<tg<empty_____<hardline-break\n')
if self.__groups_in_waiting[0] != 0:
self.__write_inline()
def __write_inline(self):
"""
Required:
@ -279,7 +287,7 @@ def __write_inline(self):
Get the keys in each dictionary. If 'font-style' is in the keys,
write a marker tag. (I will use this marker tag later when conerting
hext text to utf8.)
Write a tag for the inline vaues.
Write a tag for the inline values.
"""
if self.__groups_in_waiting[0] != 0:
last_index = -1 * self.__groups_in_waiting[0]

View file

@ -73,7 +73,8 @@ def initiate_token_dict(self):
'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func),
'line' : ('nu', ' ', self.text_func),
'line' : ('nu', 'hard-lineb', self.default_func), #calibre
#'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func),
'par' : ('pf', 'par-end___', self.default_func),