From ca6246e4dacdad838688f35413d527c7c930d693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Spitzm=C3=BCller?= Date: Mon, 25 Feb 2008 13:32:08 +0000 Subject: [PATCH] * lib/lyx2lyx/lyx_1_5.py: - implement wrapper around normalize that handles specific cases that must not be normalized (rest of bug 3313). git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@23227 a592a061-630c-0410-9148-cb99ea01b6c8 --- lib/lyx2lyx/lyx_1_5.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py index 762d92be4d..05fd93fdc2 100644 --- a/lib/lyx2lyx/lyx_1_5.py +++ b/lib/lyx2lyx/lyx_1_5.py @@ -1158,6 +1158,25 @@ def is_inset_line(document, i): return last_tokens.find('\\') != -1 +# A wrapper around normalize that handles special cases (cf. bug 3313) +def normalize(form, text): + # do not normalize OHM, ANGSTROM + keep_characters = [0x2126,0x212b] + result = '' + convert = '' + for i in text: + if ord(i) in keep_characters: + if len(convert) > 0: + result = result + unicodedata.normalize(form, convert) + convert = '' + result = result + i + else: + convert = convert + i + if len(convert) > 0: + result = result + unicodedata.normalize(form, convert) + return result + + def revert_accent(document): inverse_accent_map = {} for k in accent_map: @@ -1190,9 +1209,9 @@ def revert_accent(document): # because we never use u'xxx' for string literals, but 'xxx'. # Therefore we may have to try two times to normalize the data. try: - document.body[i] = unicodedata.normalize("NFD", document.body[i]) + document.body[i] = normalize("NFD", document.body[i]) except TypeError: - document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8')) + document.body[i] = normalize("NFD", unicode(document.body[i], 'utf-8')) # Replace accented characters with InsetLaTeXAccent # Do not convert characters that can be represented in the chosen @@ -1247,7 +1266,7 @@ def revert_accent(document): accented_char = inverse_accented_map[accented_char] accent = document.body[i][j] try: - dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1]) + dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1]) except UnicodeEncodeError: # Insert the rest of the line as new line if j < len(document.body[i]) - 1: @@ -1261,7 +1280,7 @@ def revert_accent(document): # Normalize to "Normal form C" (NFC, pre-composed characters) again for i in range(len(document.body)): - document.body[i] = unicodedata.normalize("NFC", document.body[i]) + document.body[i] = normalize("NFC", document.body[i]) def normalize_font_whitespace_259(document):