From b03bc9691212caf7dc3f524e4163b2cb73a6a9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Matox?= Date: Sun, 22 Jul 2007 18:20:22 +0000 Subject: [PATCH] Fix bug 2049 git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@19166 a592a061-630c-0410-9148-cb99ea01b6c8 --- lib/lyx2lyx/lyx_1_5.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py index f8bcd6e4d2..4b75701688 100644 --- a/lib/lyx2lyx/lyx_1_5.py +++ b/lib/lyx2lyx/lyx_1_5.py @@ -310,8 +310,7 @@ necessary parsing in modern formats than in ancient ones. # be used when writing to the file. document.body[i] = orig.decode(document.encoding) except: - last_char = document.body[i-1][-1] - mod_line, last_char = revert_unicode_line(document, i, last_char, insets, spec_chars) + mod_line = revert_unicode_line(document, i, insets, spec_chars) document.body[i:i+1] = mod_line.split('\n') i += len(mod_line.split('\n')) - 1 i += 1 @@ -355,7 +354,7 @@ def read_unicodesymbols(): return spec_chars -def revert_unicode_line(document, i, last_char, insets, spec_chars, replacement_character = '???'): +def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'): # Define strings to start and end ERT and math insets ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout ert_outro='\n\\end_layout\n\n\\end_inset\n' @@ -363,6 +362,11 @@ def revert_unicode_line(document, i, last_char, insets, spec_chars, replacement_ math_outro='$\n\\end_inset' mod_line = u'' + if i and document.body[i - 1][:1] != '\\': + last_char = document.body[i - 1][-1:] + else: + last_char = '' + line = document.body[i] for character in line: try: @@ -420,7 +424,7 @@ def revert_unicode_line(document, i, last_char, insets, spec_chars, replacement_ else: # Replace with replacement string mod_line += replacement_character - return mod_line, last_char + return mod_line def revert_unicode(document): @@ -431,7 +435,6 @@ an replacement string. Flags other than 'combined' are currently not implemented.''' spec_chars = read_unicodesymbols() insets = [] # list of active insets - last_char = '' # to store the previous character # Go through the document to capture all combining characters i = 0 @@ -447,11 +450,10 @@ implemented.''' try: # If all goes well the line is written here dummy = line.encode(document.encoding) - last_char = line[-1] i += 1 except: # Error, some character(s) in the line need to be replaced - mod_line, last_char = revert_unicode_line(document, i, last_char, insets, spec_chars) + mod_line = revert_unicode_line(document, i, insets, spec_chars) document.body[i:i+1] = mod_line.split('\n') i += len(mod_line.split('\n')) @@ -1163,20 +1165,19 @@ def revert_accent(document): # words before unicode normalization. # We do this only if the next line starts with an accent, otherwise we # would create things like '\begin_inset ERTstatus'. - numberoflines = len(document.body) - for i in range(numberoflines-1): + for i in range(len(document.body) - 1): if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ': continue - if (document.body[i+1][0] in inverse_accent_map): + if (document.body[i+1][0] in inverse_accent_map and document.body[i][:1] != '\\'): # the last character of this line and the first of the next line - # form probably a surrogate pair. + # form probably a surrogate pair, inline insets are excluded (second part of the test) while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '): document.body[i] += document.body[i+1][0] document.body[i+1] = document.body[i+1][1:] # Normalize to "Normal form D" (NFD, also known as canonical decomposition). # This is needed to catch all accented characters. - for i in range(numberoflines): + for i in range(len(document.body)): # Unfortunately we have a mixture of unicode strings and plain strings, # because we never use u'xxx' for string literals, but 'xxx'. # Therefore we may have to try two times to normalize the data. @@ -1190,9 +1191,9 @@ def revert_accent(document): # encoding. encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)] lang_re = re.compile(r"^\\lang\s(\S+)") + i = 0 while i < len(document.body): - if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '': # Track the encoding of the current line result = lang_re.match(document.body[i]) @@ -1257,7 +1258,7 @@ def revert_accent(document): i = i + 1 # Normalize to "Normal form C" (NFC, pre-composed characters) again - for i in range(numberoflines): + for i in range(len(document.body)): document.body[i] = unicodedata.normalize("NFC", document.body[i])