Try to fix bug 5006. The idea here is to wrap all LaTeX commands that are not in math into ERT.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@25508 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Richard Heck 2008-07-08 20:25:53 +00:00
parent 53c764ed9b
commit dc6da80146

View File

@ -43,10 +43,18 @@ def find_end_of_inset(lines, i):
# where the last statement resets the counter to accord with the added # where the last statement resets the counter to accord with the added
# lines. # lines.
def wrap_into_ert(string, src, dst): def wrap_into_ert(string, src, dst):
" Wrap a something into an ERT" '''Within string, replace occurrences of src with dst, wrapped into ERT
E.g.: wrap_into_ert('sch\"on', "\\", "\\backslash") is:
sch<ERT>\\backslash</ERT>"on'''
return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n' return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
+ dst + '\n\\end_layout\n\\end_inset\n') + dst + '\n\\end_layout\n\\end_inset\n')
def put_cmd_in_ert(string):
string = string.replace('\\', "\\backslash\n")
string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
+ string + "\n\\end_layout\n\\end_inset"
return string
def add_to_preamble(document, text): def add_to_preamble(document, text):
""" Add text to the preamble if it is not already there. """ Add text to the preamble if it is not already there.
Only the first line is checked!""" Only the first line is checked!"""
@ -125,12 +133,14 @@ def read_unicodesymbols():
# as: \"u. # as: \"u.
r = re.compile(r'\\\\(\W)\{(\w)\}') r = re.compile(r'\\\\(\W)\{(\w)\}')
for line in fp.readlines(): for line in fp.readlines():
if line[0] != '#' and line.strip() != "" and line.find("\\") != -1: if line[0] != '#' and line.strip() != "":
line=line.replace(' "',' ') # remove all quotation marks with spaces before line=line.replace(' "',' ') # remove all quotation marks with spaces before
line=line.replace('" ',' ') # remove all quotation marks with spaces after line=line.replace('" ',' ') # remove all quotation marks with spaces after
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis) line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
try: try:
[ucs4,command,dead] = line.split(None,2) [ucs4,command,dead] = line.split(None,2)
if command[0:1] != "\\":
continue
spec_chars.append([command, unichr(eval(ucs4))]) spec_chars.append([command, unichr(eval(ucs4))])
except: except:
continue continue
@ -147,18 +157,76 @@ def read_unicodesymbols():
return spec_chars return spec_chars
def line2lyx(line): def extract_argument(line):
'''Converts LaTeX commands, such as: \"u, to unicode characters, and 'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
escapes backslashes, etc, into ERT. line may well be a multi-line
string when it is returned. if not line:
NOTE: If we want to convert \label{} into an InsetLabel, then this return (None, "")
is the place to do it.'''
bracere = re.compile("(\s*)(.*)")
n = bracere.match(line)
whitespace = n.group(1)
stuff = n.group(2)
brace = stuff[:1]
if brace != "[" and brace != "{":
return (None, line)
# find closing brace
remain = stuff[1:]
pos = 0
num = 1
term = "}"
if brace == "[":
term = "]"
skip = False
for c in remain:
if skip:
skip = False
elif c == "\\":
skip = True
elif c == brace:
num += 1
elif c == term:
num -= 1
if c == 0:
break
pos += 1
if num != 0:
# We never found the matching brace
# So, to be on the safe side, let's just return everything
# which will then get wrapped as ERT
return (line, "")
return (line[:pos + 1], line[pos + 1:])
def latex2ert(line):
'''Converts LaTeX commands into ERT. line may well be a multi-line
string when it is returned.'''
if not line: if not line:
return line return line
line = wrap_into_ert(line, '\\', '\\backslash')
line = wrap_into_ert(line, '{', '{') retval = ""
line = wrap_into_ert(line, '}', '}') ## FIXME Escaped \ ??
return line labelre = re.compile(r'(.*?)\\(\\(?:[a-zA-Z]+|.))(.*)')
m = labelre.match(line)
while m != None:
retval += m.group(1)
cmd = m.group(2)
end = m.group(3)
while True:
(arg, rest) = extract_argument(end)
if arg == None:
break
cmd += arg
end = rest
cmd = put_cmd_in_ert(cmd)
retval += "\n" + cmd + "\n"
line = end
m = labelre.match(line)
retval += line
return retval
def latex2lyx(data): def latex2lyx(data):
@ -166,21 +234,37 @@ def latex2lyx(data):
converting LaTeX constructs into LyX constructs. Returns a list of converting LaTeX constructs into LyX constructs. Returns a list of
lines, suitable for insertion into document.body.''' lines, suitable for insertion into document.body.'''
mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
retval = [] retval = []
# Convert LaTeX to Unicode # Convert LaTeX to Unicode
reps = read_unicodesymbols() reps = read_unicodesymbols()
# Commands of this sort need to be checked to make sure they are
# followed by a non-alpha character, lest we replace too much.
hardone = re.compile(r'^\\\\[a-zA-Z]+$')
for rep in reps: for rep in reps:
try: if hardone.match(rep[0]):
pos = 0
while True:
pos = data.find(rep[0], pos)
if pos == -1:
break
nextpos = pos + len(rep[0])
nextchar = data[nextpos - 1 : nextpos]
if nextchar.isalpha():
# not the end of that command
pos = nextpos
continue
data = data[:pos] + rep[1] + data[nextpos:]
pos = nextpos
else:
data = data.replace(rep[0], rep[1]) data = data.replace(rep[0], rep[1])
except:
# There seems to be a character in the unicodesymbols file
# that causes problems, namely, 0x2109.
pass
# Generic, \" -> ": # Generic, \" -> ":
data = wrap_into_ert(data, r'\"', '"') data = wrap_into_ert(data, r'\"', '"')
# Math: # Math:
mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
lines = data.split('\n') lines = data.split('\n')
for line in lines: for line in lines:
#document.warning("LINE: " + line) #document.warning("LINE: " + line)
@ -194,14 +278,14 @@ def latex2lyx(data):
g = m.group(3) g = m.group(3)
if s: if s:
# this is non-math! # this is non-math!
s = line2lyx(s) s = latex2ert(s)
subst = s.split('\n') subst = s.split('\n')
retval += subst retval += subst
retval.append("\\begin_inset Formula " + f) retval.append("\\begin_inset Formula " + f)
retval.append("\\end_inset") retval.append("\\end_inset")
m = mathre.match(g) m = mathre.match(g)
# Handle whatever is left, which is just text # Handle whatever is left, which is just text
g = line2lyx(g) g = latex2ert(g)
subst = g.split('\n') subst = g.split('\n')
retval += subst retval += subst
return retval return retval