mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-22 21:21:32 +00:00
Improve revert unicode (bug 3958) from Anders Ekberg
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@19113 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
1007714d2e
commit
8397a051fa
@ -1769,6 +1769,7 @@ def convert_ext_font_sizes(document):
|
||||
else:
|
||||
del document.header[i]
|
||||
|
||||
|
||||
def revert_separator_layout(document):
|
||||
r'''Revert --Separator-- to a lyx note
|
||||
From
|
||||
@ -1817,6 +1818,7 @@ something
|
||||
r'\end_layout'
|
||||
]
|
||||
|
||||
|
||||
def convert_arabic (document):
|
||||
if document.language == "arabic":
|
||||
document.language = "arabic_arabtex"
|
||||
@ -1831,6 +1833,7 @@ def convert_arabic (document):
|
||||
document.body[i] = '\lang arabic_arabtex'
|
||||
i = i + 1
|
||||
|
||||
|
||||
def revert_arabic (document):
|
||||
if document.language == "arabic_arabtex":
|
||||
document.language = "arabic"
|
||||
@ -1845,13 +1848,11 @@ def revert_arabic (document):
|
||||
document.body[i] = '\lang arabic'
|
||||
i = i + 1
|
||||
|
||||
def revert_unicode(document):
|
||||
'''Transform unicode symbols according to the unicode list.
|
||||
Preamble flags are not implemented.
|
||||
Combination characters are currently ignored.
|
||||
Forced output is currently not enforced'''
|
||||
pathname = os.path.dirname(sys.argv[0])
|
||||
fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
|
||||
|
||||
def read_unicodesymbols():
|
||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
|
||||
spec_chars = {}
|
||||
for line in fp.readlines():
|
||||
if line[0] != '#':
|
||||
@ -1859,60 +1860,109 @@ Forced output is currently not enforced'''
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
||||
try:
|
||||
# flag1 and flag2 are preamble & flags
|
||||
# currently NOT implemented
|
||||
# flag1 and flag2 are preamble and other flags
|
||||
[ucs4,command,flag1,flag2] =line.split(None,3)
|
||||
spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
|
||||
except:
|
||||
pass
|
||||
fp.close()
|
||||
|
||||
return spec_chars
|
||||
|
||||
|
||||
def revert_unicode(document):
|
||||
'''Transform unicode characters that can not be written using the
|
||||
document encoding to commands according to the unicodesymbols
|
||||
file. Characters that can not be replaced by commands are replaced by
|
||||
an replacement string. Flags other than 'combined' are currently not
|
||||
implemented.'''
|
||||
|
||||
replacement_character = '???'
|
||||
spec_chars = read_unicodesymbols()
|
||||
|
||||
# Define strings to start and end ERT and math insets
|
||||
ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
|
||||
ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
|
||||
math_intro='\n\\begin_inset Formula $'
|
||||
math_outro='$\n\\end_inset\n'
|
||||
# Find unicode characters and replace them
|
||||
in_ert = 0 # flag set to 1 if in ERT inset
|
||||
in_math = 0 # flag set to 1 if in math inset
|
||||
in_ert = False # flag set to 1 if in ERT inset
|
||||
in_math = False # flag set to 1 if in math inset
|
||||
temp_file = os.tmpfile()
|
||||
insets = [] # list of active insets
|
||||
for i, current_line in enumerate(document.body):
|
||||
if current_line.find('\\begin_inset') > -1:
|
||||
mod_body = u'' # to store the modified document body
|
||||
|
||||
# Go through the file to capture all combining characters
|
||||
last_char = '' # to store the previous character
|
||||
body_string = u'' # store the document temporarily as a string
|
||||
for line in document.body:
|
||||
body_string = body_string + line +'\n'
|
||||
[body_string, apa] = body_string.rsplit('\n',1)
|
||||
|
||||
body = body_string.split('\n')
|
||||
for line in body:
|
||||
# Check for insets
|
||||
if line.find('\\begin_inset') > -1:
|
||||
# check which inset to start
|
||||
if current_line.find('\\begin_inset ERT') > -1:
|
||||
in_ert = 1
|
||||
if line.find('\\begin_inset ERT') > -1:
|
||||
in_ert = True
|
||||
insets.append('ert')
|
||||
elif current_line.find('\\begin_inset Formula') > -1:
|
||||
in_math = 1
|
||||
elif line.find('\\begin_inset Formula') > -1:
|
||||
in_math = True
|
||||
insets.append('math')
|
||||
else:
|
||||
insets.append('other')
|
||||
if current_line.find('\\end_inset') > -1:
|
||||
if line.find('\\end_inset') > -1:
|
||||
# check which inset to end
|
||||
try:
|
||||
cur_inset = insets.pop()
|
||||
if cur_inset == 'ert':
|
||||
in_ert = 0
|
||||
in_ert = False
|
||||
elif cur_inset == 'math':
|
||||
in_math = 0
|
||||
in_math = False
|
||||
else:
|
||||
pass # end of other inset
|
||||
except:
|
||||
pass # inset list was empty (for some reason)
|
||||
current_line=''; # clear to have as container for modified line
|
||||
for j in range(len(document.body[i])):
|
||||
if spec_chars.has_key(document.body[i][j]):
|
||||
flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
|
||||
if flags.find('combining') > -1:
|
||||
command = ''
|
||||
|
||||
# Try to write the line
|
||||
try:
|
||||
# If all goes well the line is written here
|
||||
temp_file.write(line.encode(document.encoding) + '\n')
|
||||
mod_body = mod_body + line + '\n'
|
||||
last_char = line[-1]
|
||||
except:
|
||||
# Error, some character(s) in the line need to be replaced
|
||||
for character in line:
|
||||
try:
|
||||
# Try to write the character
|
||||
temp_file.write(character.encode(document.encoding))
|
||||
mod_body = mod_body + character
|
||||
last_char = character
|
||||
except:
|
||||
# Try to replace with ERT/math inset
|
||||
if spec_chars.has_key(character):
|
||||
command = spec_chars[character][0]; # the command to replace unicode
|
||||
flag1 = spec_chars[character][1]
|
||||
flag2 = spec_chars[character][2]
|
||||
if flag1.find('combining') > -1 or flag2.find('combining') > -1:
|
||||
# We have a character that should be combined with the previous
|
||||
command = command + '{' +last_char + '}'
|
||||
# Remove the last character. Ignore if it is whitespace
|
||||
if len(last_char.rstrip()) > 0:
|
||||
# last_char was found and is not whitespace
|
||||
[mod_body, apa] = mod_body.rsplit(last_char,1)
|
||||
else:
|
||||
command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
|
||||
# The last character was replaced by a command. For now it is
|
||||
# ignored. This could be handled better.
|
||||
pass
|
||||
if command[0:2] == '\\\\':
|
||||
if command[2:12]=='ensuremath':
|
||||
if in_ert == 1:
|
||||
if in_ert == True:
|
||||
# math in ERT
|
||||
command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
|
||||
command = command.replace('}', '$\n')
|
||||
elif in_math == 0:
|
||||
elif in_math == False:
|
||||
# add a math inset with the replacement character
|
||||
command = command.replace('\\\\ensuremath{\\', math_intro)
|
||||
command = command.replace('}', math_outro)
|
||||
@ -1921,20 +1971,24 @@ Forced output is currently not enforced'''
|
||||
command = command.replace('\\\\ensuremath{\\', '')
|
||||
command = command.replace('}', '')
|
||||
else:
|
||||
if in_math == 1:
|
||||
if in_math == True:
|
||||
# avoid putting an ERT in a math; instead put command as text
|
||||
command = command.replace('\\\\', '\mathrm{')
|
||||
command = command + '}'
|
||||
elif in_ert == 0:
|
||||
elif in_ert == False:
|
||||
# add an ERT inset with the replacement character
|
||||
command = command.replace('\\\\', ert_intro)
|
||||
command = command + ert_outro
|
||||
else:
|
||||
command = command.replace('\\\\', '\n\\backslash\n')
|
||||
current_line = current_line + command
|
||||
last_char = '' # indicate that the character should not be removed
|
||||
mod_body = mod_body + command
|
||||
else:
|
||||
current_line = current_line + document.body[i][j]
|
||||
document.body[i] = current_line
|
||||
# Replace with replacement string
|
||||
mod_body = mod_body + replacement_character
|
||||
[mod_body, apa] = mod_body.rsplit('\n',1)
|
||||
document.body = mod_body.split('\n')
|
||||
temp_file.close()
|
||||
|
||||
|
||||
##
|
||||
|
Loading…
Reference in New Issue
Block a user