mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-23 05:25:26 +00:00
Improve revert unicode (bug 3958) from Anders Ekberg
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@19113 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
1007714d2e
commit
8397a051fa
@ -1769,6 +1769,7 @@ def convert_ext_font_sizes(document):
|
|||||||
else:
|
else:
|
||||||
del document.header[i]
|
del document.header[i]
|
||||||
|
|
||||||
|
|
||||||
def revert_separator_layout(document):
|
def revert_separator_layout(document):
|
||||||
r'''Revert --Separator-- to a lyx note
|
r'''Revert --Separator-- to a lyx note
|
||||||
From
|
From
|
||||||
@ -1817,6 +1818,7 @@ something
|
|||||||
r'\end_layout'
|
r'\end_layout'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def convert_arabic (document):
|
def convert_arabic (document):
|
||||||
if document.language == "arabic":
|
if document.language == "arabic":
|
||||||
document.language = "arabic_arabtex"
|
document.language = "arabic_arabtex"
|
||||||
@ -1831,6 +1833,7 @@ def convert_arabic (document):
|
|||||||
document.body[i] = '\lang arabic_arabtex'
|
document.body[i] = '\lang arabic_arabtex'
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
|
|
||||||
def revert_arabic (document):
|
def revert_arabic (document):
|
||||||
if document.language == "arabic_arabtex":
|
if document.language == "arabic_arabtex":
|
||||||
document.language = "arabic"
|
document.language = "arabic"
|
||||||
@ -1845,13 +1848,11 @@ def revert_arabic (document):
|
|||||||
document.body[i] = '\lang arabic'
|
document.body[i] = '\lang arabic'
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
def revert_unicode(document):
|
|
||||||
'''Transform unicode symbols according to the unicode list.
|
def read_unicodesymbols():
|
||||||
Preamble flags are not implemented.
|
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||||
Combination characters are currently ignored.
|
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||||
Forced output is currently not enforced'''
|
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
|
||||||
pathname = os.path.dirname(sys.argv[0])
|
|
||||||
fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
|
|
||||||
spec_chars = {}
|
spec_chars = {}
|
||||||
for line in fp.readlines():
|
for line in fp.readlines():
|
||||||
if line[0] != '#':
|
if line[0] != '#':
|
||||||
@ -1859,82 +1860,135 @@ Forced output is currently not enforced'''
|
|||||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||||
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
||||||
try:
|
try:
|
||||||
# flag1 and flag2 are preamble & flags
|
# flag1 and flag2 are preamble and other flags
|
||||||
# currently NOT implemented
|
|
||||||
[ucs4,command,flag1,flag2] =line.split(None,3)
|
[ucs4,command,flag1,flag2] =line.split(None,3)
|
||||||
spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
|
spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
fp.close()
|
fp.close()
|
||||||
|
|
||||||
|
return spec_chars
|
||||||
|
|
||||||
|
|
||||||
|
def revert_unicode(document):
|
||||||
|
'''Transform unicode characters that can not be written using the
|
||||||
|
document encoding to commands according to the unicodesymbols
|
||||||
|
file. Characters that can not be replaced by commands are replaced by
|
||||||
|
an replacement string. Flags other than 'combined' are currently not
|
||||||
|
implemented.'''
|
||||||
|
|
||||||
|
replacement_character = '???'
|
||||||
|
spec_chars = read_unicodesymbols()
|
||||||
|
|
||||||
# Define strings to start and end ERT and math insets
|
# Define strings to start and end ERT and math insets
|
||||||
ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
|
ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
|
||||||
ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
|
ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
|
||||||
math_intro='\n\\begin_inset Formula $'
|
math_intro='\n\\begin_inset Formula $'
|
||||||
math_outro='$\n\\end_inset\n'
|
math_outro='$\n\\end_inset\n'
|
||||||
# Find unicode characters and replace them
|
# Find unicode characters and replace them
|
||||||
in_ert = 0 # flag set to 1 if in ERT inset
|
in_ert = False # flag set to 1 if in ERT inset
|
||||||
in_math = 0 # flag set to 1 if in math inset
|
in_math = False # flag set to 1 if in math inset
|
||||||
|
temp_file = os.tmpfile()
|
||||||
insets = [] # list of active insets
|
insets = [] # list of active insets
|
||||||
for i, current_line in enumerate(document.body):
|
mod_body = u'' # to store the modified document body
|
||||||
if current_line.find('\\begin_inset') > -1:
|
|
||||||
|
# Go through the file to capture all combining characters
|
||||||
|
last_char = '' # to store the previous character
|
||||||
|
body_string = u'' # store the document temporarily as a string
|
||||||
|
for line in document.body:
|
||||||
|
body_string = body_string + line +'\n'
|
||||||
|
[body_string, apa] = body_string.rsplit('\n',1)
|
||||||
|
|
||||||
|
body = body_string.split('\n')
|
||||||
|
for line in body:
|
||||||
|
# Check for insets
|
||||||
|
if line.find('\\begin_inset') > -1:
|
||||||
# check which inset to start
|
# check which inset to start
|
||||||
if current_line.find('\\begin_inset ERT') > -1:
|
if line.find('\\begin_inset ERT') > -1:
|
||||||
in_ert = 1
|
in_ert = True
|
||||||
insets.append('ert')
|
insets.append('ert')
|
||||||
elif current_line.find('\\begin_inset Formula') > -1:
|
elif line.find('\\begin_inset Formula') > -1:
|
||||||
in_math = 1
|
in_math = True
|
||||||
insets.append('math')
|
insets.append('math')
|
||||||
else:
|
else:
|
||||||
insets.append('other')
|
insets.append('other')
|
||||||
if current_line.find('\\end_inset') > -1:
|
if line.find('\\end_inset') > -1:
|
||||||
# check which inset to end
|
# check which inset to end
|
||||||
try:
|
try:
|
||||||
cur_inset = insets.pop()
|
cur_inset = insets.pop()
|
||||||
if cur_inset == 'ert':
|
if cur_inset == 'ert':
|
||||||
in_ert = 0
|
in_ert = False
|
||||||
elif cur_inset == 'math':
|
elif cur_inset == 'math':
|
||||||
in_math = 0
|
in_math = False
|
||||||
else:
|
else:
|
||||||
pass # end of other inset
|
pass # end of other inset
|
||||||
except:
|
except:
|
||||||
pass # inset list was empty (for some reason)
|
pass # inset list was empty (for some reason)
|
||||||
current_line=''; # clear to have as container for modified line
|
|
||||||
for j in range(len(document.body[i])):
|
# Try to write the line
|
||||||
if spec_chars.has_key(document.body[i][j]):
|
try:
|
||||||
flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
|
# If all goes well the line is written here
|
||||||
if flags.find('combining') > -1:
|
temp_file.write(line.encode(document.encoding) + '\n')
|
||||||
command = ''
|
mod_body = mod_body + line + '\n'
|
||||||
else:
|
last_char = line[-1]
|
||||||
command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
|
except:
|
||||||
if command[0:2] == '\\\\':
|
# Error, some character(s) in the line need to be replaced
|
||||||
if command[2:12]=='ensuremath':
|
for character in line:
|
||||||
if in_ert == 1:
|
try:
|
||||||
# math in ERT
|
# Try to write the character
|
||||||
command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
|
temp_file.write(character.encode(document.encoding))
|
||||||
command = command.replace('}', '$\n')
|
mod_body = mod_body + character
|
||||||
elif in_math == 0:
|
last_char = character
|
||||||
# add a math inset with the replacement character
|
except:
|
||||||
command = command.replace('\\\\ensuremath{\\', math_intro)
|
# Try to replace with ERT/math inset
|
||||||
command = command.replace('}', math_outro)
|
if spec_chars.has_key(character):
|
||||||
|
command = spec_chars[character][0]; # the command to replace unicode
|
||||||
|
flag1 = spec_chars[character][1]
|
||||||
|
flag2 = spec_chars[character][2]
|
||||||
|
if flag1.find('combining') > -1 or flag2.find('combining') > -1:
|
||||||
|
# We have a character that should be combined with the previous
|
||||||
|
command = command + '{' +last_char + '}'
|
||||||
|
# Remove the last character. Ignore if it is whitespace
|
||||||
|
if len(last_char.rstrip()) > 0:
|
||||||
|
# last_char was found and is not whitespace
|
||||||
|
[mod_body, apa] = mod_body.rsplit(last_char,1)
|
||||||
else:
|
else:
|
||||||
# we are already in a math inset
|
# The last character was replaced by a command. For now it is
|
||||||
command = command.replace('\\\\ensuremath{\\', '')
|
# ignored. This could be handled better.
|
||||||
command = command.replace('}', '')
|
pass
|
||||||
else:
|
if command[0:2] == '\\\\':
|
||||||
if in_math == 1:
|
if command[2:12]=='ensuremath':
|
||||||
# avoid putting an ERT in a math; instead put command as text
|
if in_ert == True:
|
||||||
command = command.replace('\\\\', '\mathrm{')
|
# math in ERT
|
||||||
command = command + '}'
|
command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
|
||||||
elif in_ert == 0:
|
command = command.replace('}', '$\n')
|
||||||
# add an ERT inset with the replacement character
|
elif in_math == False:
|
||||||
command = command.replace('\\\\', ert_intro)
|
# add a math inset with the replacement character
|
||||||
command = command + ert_outro
|
command = command.replace('\\\\ensuremath{\\', math_intro)
|
||||||
|
command = command.replace('}', math_outro)
|
||||||
|
else:
|
||||||
|
# we are already in a math inset
|
||||||
|
command = command.replace('\\\\ensuremath{\\', '')
|
||||||
|
command = command.replace('}', '')
|
||||||
else:
|
else:
|
||||||
command = command.replace('\\\\', '\n\\backslash\n')
|
if in_math == True:
|
||||||
current_line = current_line + command
|
# avoid putting an ERT in a math; instead put command as text
|
||||||
else:
|
command = command.replace('\\\\', '\mathrm{')
|
||||||
current_line = current_line + document.body[i][j]
|
command = command + '}'
|
||||||
document.body[i] = current_line
|
elif in_ert == False:
|
||||||
|
# add an ERT inset with the replacement character
|
||||||
|
command = command.replace('\\\\', ert_intro)
|
||||||
|
command = command + ert_outro
|
||||||
|
else:
|
||||||
|
command = command.replace('\\\\', '\n\\backslash\n')
|
||||||
|
last_char = '' # indicate that the character should not be removed
|
||||||
|
mod_body = mod_body + command
|
||||||
|
else:
|
||||||
|
# Replace with replacement string
|
||||||
|
mod_body = mod_body + replacement_character
|
||||||
|
[mod_body, apa] = mod_body.rsplit('\n',1)
|
||||||
|
document.body = mod_body.split('\n')
|
||||||
|
temp_file.close()
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
|
Loading…
Reference in New Issue
Block a user