lyx_mirror/lib/lyx2lyx/lyx_2_1.py
Jürgen Spitzmüller f626cfb7a3 Implement a native IPA inset (bug #2591) with instant preview.
Currently only basic functionality (our math-tipa functionality plus multipar input). Further enhancements (such as proper unicode input and a specific toolbar) are planned. DocBook and XHTML output also need to be audited.

The lyx2lyx reversion routine surely can be improved (help appreciated), and I don't know  tex2lyx enough to handle this file format change.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@40867 a592a061-630c-0410-9148-cb99ea01b6c8
2012-03-06 07:54:22 +00:00

618 lines
23 KiB
Python

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2011 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
""" Convert files to the file format generated by lyx 2.1"""
import re, string
import unicodedata
import sys, os
# Uncomment only what you need to import, please.
from parser_tools import del_token, find_token, find_end_of, find_end_of_inset, \
find_end_of_layout, find_re, get_option_value, get_value, get_quoted_value, \
set_option_value
#from parser_tools import find_token, find_end_of, find_tokens, \
#find_token_exact, find_end_of_inset, find_end_of_layout, \
#find_token_backwards, is_in_inset, get_value, get_quoted_value, \
#del_token, check_token
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert
#from lyx2lyx_tools import insert_to_preamble, \
# put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
# revert_font_attrs, hex2ratio, str2bool
####################################################################
# Private helper functions
#def remove_option(lines, m, option):
#''' removes option from line m. returns whether we did anything '''
#l = lines[m].find(option)
#if l == -1:
#return False
#val = lines[m][l:].split('"')[1]
#lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
#return True
###############################################################################
###
### Conversion and reversion routines
###
###############################################################################
def revert_visible_space(document):
"Revert InsetSpace visible into its ERT counterpart"
i = 0
while True:
i = find_token(document.body, "\\begin_inset space \\textvisiblespace{}", i)
if i == -1:
return
end = find_end_of_inset(document.body, i)
subst = put_cmd_in_ert("\\textvisiblespace{}")
document.body[i:end + 1] = subst
def convert_undertilde(document):
" Load undertilde automatically "
i = find_token(document.header, "\\use_mathdots" , 0)
if i == -1:
i = find_token(document.header, "\\use_mhchem" , 0)
if i == -1:
i = find_token(document.header, "\\use_esint" , 0)
if i == -1:
document.warning("Malformed LyX document: Can't find \\use_mathdots.")
return;
j = find_token(document.preamble, "\\usepackage{undertilde}", 0)
if j == -1:
document.header.insert(i + 1, "\\use_undertilde 0")
else:
document.header.insert(i + 1, "\\use_undertilde 2")
del document.preamble[j]
def revert_undertilde(document):
" Load undertilde if used in the document "
undertilde = find_token(document.header, "\\use_undertilde" , 0)
if undertilde == -1:
document.warning("No \\use_undertilde line. Assuming auto.")
else:
val = get_value(document.header, "\\use_undertilde", undertilde)
del document.header[undertilde]
try:
usetilde = int(val)
except:
document.warning("Invalid \\use_undertilde value: " + val + ". Assuming auto.")
# probably usedots has not been changed, but be safe.
usetilde = 1
if usetilde == 0:
# do not load case
return
if usetilde == 2:
# force load case
add_to_preamble(document, ["\\usepackage{undertilde}"])
return
# so we are in the auto case. we want to load undertilde if \utilde is used.
i = 0
while True:
i = find_token(document.body, '\\begin_inset Formula', i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
i += 1
continue
code = "\n".join(document.body[i:j])
if code.find("\\utilde") != -1:
add_to_preamble(document, ["\\@ifundefined{utilde}{\\usepackage{undertilde}}"])
return
i = j
def revert_negative_space(document):
"Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
i = 0
j = 0
reverted = False
while True:
i = find_token(document.body, "\\begin_inset space \\negmedspace{}", i)
if i == -1:
j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
if j == -1:
# load amsmath in the preamble if not already loaded if we are at the end of checking
if reverted == True:
i = find_token(document.header, "\\use_amsmath 2", 0)
if i == -1:
add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
return
if i == -1:
return
end = find_end_of_inset(document.body, i)
subst = put_cmd_in_ert("\\negmedspace{}")
document.body[i:end + 1] = subst
j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
if j == -1:
return
end = find_end_of_inset(document.body, j)
subst = put_cmd_in_ert("\\negthickspace{}")
document.body[j:end + 1] = subst
reverted = True
def revert_math_spaces(document):
"Revert formulas with protected custom space and protected hfills to TeX-code"
i = 0
while True:
i = find_token(document.body, "\\begin_inset Formula", i)
if i == -1:
return
j = document.body[i].find("\\hspace*")
if j != -1:
end = find_end_of_inset(document.body, i)
subst = put_cmd_in_ert(document.body[i][21:])
document.body[i:end + 1] = subst
i = i + 1
def convert_japanese_encodings(document):
" Rename the japanese encodings to names understood by platex "
jap_enc_dict = {
"EUC-JP-pLaTeX": "euc",
"JIS-pLaTeX": "jis",
"SJIS-pLaTeX": "sjis"
}
i = find_token(document.header, "\\inputencoding" , 0)
if i == -1:
return
val = get_value(document.header, "\\inputencoding", i)
if val in jap_enc_dict.keys():
document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
def revert_japanese_encodings(document):
" Revert the japanese encodings name changes "
jap_enc_dict = {
"euc": "EUC-JP-pLaTeX",
"jis": "JIS-pLaTeX",
"sjis": "SJIS-pLaTeX"
}
i = find_token(document.header, "\\inputencoding" , 0)
if i == -1:
return
val = get_value(document.header, "\\inputencoding", i)
if val in jap_enc_dict.keys():
document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
def revert_justification(document):
" Revert the \\justification buffer param"
if not del_token(document.header, '\\justification', 0):
document.warning("Malformed LyX document: Missing \\justification.")
def revert_australian(document):
"Set English language variants Australian and Newzealand to English"
if document.language == "australian" or document.language == "newzealand":
document.language = "english"
i = find_token(document.header, "\\language", 0)
if i != -1:
document.header[i] = "\\language english"
j = 0
while True:
j = find_token(document.body, "\\lang australian", j)
if j == -1:
j = find_token(document.body, "\\lang newzealand", 0)
if j == -1:
return
else:
document.body[j] = document.body[j].replace("\\lang newzealand", "\\lang english")
else:
document.body[j] = document.body[j].replace("\\lang australian", "\\lang english")
j += 1
def convert_biblio_style(document):
"Add a sensible default for \\biblio_style based on the citation engine."
i = find_token(document.header, "\\cite_engine", 0)
if i != -1:
engine = get_value(document.header, "\\cite_engine", i).split("_")[0]
style = {"basic": "plain", "natbib": "plainnat", "jurabib": "jurabib"}
document.header.insert(i + 1, "\\biblio_style " + style[engine])
def revert_biblio_style(document):
"BibTeX insets with default option use the style defined by \\biblio_style."
i = find_token(document.header, "\\biblio_style" , 0)
if i == -1:
document.warning("No \\biblio_style line. Nothing to do.")
return
default_style = get_value(document.header, "\\biblio_style", i)
del document.header[i]
# We are looking for bibtex insets having the default option
i = 0
while True:
i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of bibtex inset at line " + str(i))
i += 1
return
k = find_token(document.body, "options", i, j)
if k != -1:
options = get_quoted_value(document.body, "options", k)
if "default" in options.split(","):
document.body[k] = 'options "%s"' \
% options.replace("default", default_style)
i = j
def handle_longtable_captions(document, forward):
begin_table = 0
while True:
begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
if begin_table == -1:
break
end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
if end_table == -1:
document.warning("Malformed LyX document: Could not find end of table.")
begin_table += 1
continue
fline = find_token(document.body, "<features", begin_table, end_table)
if fline == -1:
document.warning("Can't find features for inset at line " + str(begin_table))
begin_table += 1
continue
p = document.body[fline].find("islongtable")
if p == -1:
# no longtable
begin_table += 1
continue
numrows = get_option_value(document.body[begin_table], "rows")
try:
numrows = int(numrows)
except:
document.warning(document.body[begin_table])
document.warning("Unable to determine rows!")
begin_table = end_table
continue
begin_row = begin_table
for row in range(numrows):
begin_row = find_token(document.body, '<row', begin_row, end_table)
if begin_row == -1:
document.warning("Can't find row " + str(row + 1))
break
end_row = find_end_of(document.body, begin_row, '<row', '</row>')
if end_row == -1:
document.warning("Can't find end of row " + str(row + 1))
break
if forward:
if (get_option_value(document.body[begin_row], 'caption') == 'true' and
get_option_value(document.body[begin_row], 'endfirsthead') != 'true' and
get_option_value(document.body[begin_row], 'endhead') != 'true' and
get_option_value(document.body[begin_row], 'endfoot') != 'true' and
get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
elif get_option_value(document.body[begin_row], 'caption') == 'true':
if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
if get_option_value(document.body[begin_row], 'endhead') == 'true':
document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
if get_option_value(document.body[begin_row], 'endfoot') == 'true':
document.body[begin_row] = set_option_value(document.body[begin_row], 'endfoot', 'false')
if get_option_value(document.body[begin_row], 'endlastfoot') == 'true':
document.body[begin_row] = set_option_value(document.body[begin_row], 'endlastfoot', 'false')
begin_row = end_row
# since there could be a tabular inside this one, we
# cannot jump to end.
begin_table += 1
def convert_longtable_captions(document):
"Add a firsthead flag to caption rows"
handle_longtable_captions(document, True)
def revert_longtable_captions(document):
"remove head/foot flag from caption rows"
handle_longtable_captions(document, False)
def convert_use_packages(document):
"use_xxx yyy => use_package xxx yyy"
packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"]
for p in packages:
i = find_token(document.header, "\\use_%s" % p , 0)
if i != -1:
value = get_value(document.header, "\\use_%s" % p , i)
document.header[i] = "\\use_package %s %s" % (p, value)
def revert_use_packages(document):
"use_package xxx yyy => use_xxx yyy"
packages = {"amsmath":"1", "esint":"1", "mathdots":"1", "mhchem":"1", "undertilde":"1"}
# the order is arbitrary for the use_package version, and not all packages need to be given.
# Ensure a complete list and correct order (important for older LyX versions and especially lyx2lyx)
j = -1
for p in packages.keys():
regexp = re.compile(r'(\\use_package\s+%s)' % p)
i = find_re(document.header, regexp, 0)
if i != -1:
value = get_value(document.header, "\\use_package" , i).split()[1]
del document.header[i]
j = i
for (p, v) in packages.items():
document.header.insert(j, "\\use_%s %s" % (p, value))
j = j + 1
def convert_use_mathtools(document):
"insert use_package mathtools"
i = find_token(document.header, "\\use_package", 0)
if i == -1:
document.warning("Malformed LyX document: Can't find \\use_package.")
return;
j = find_token(document.preamble, "\\usepackage{mathtools}", 0)
if j == -1:
document.header.insert(i + 1, "\\use_package mathtools 0")
else:
document.header.insert(i + 1, "\\use_package mathtools 2")
del document.preamble[j]
def revert_use_mathtools(document):
"remove use_package mathtools"
regexp = re.compile(r'(\\use_package\s+mathtools)')
i = find_re(document.header, regexp, 0)
value = "1" # default is auto
if i != -1:
value = get_value(document.header, "\\use_package" , i).split()[1]
del document.header[i]
if value == "2": # on
add_to_preamble(document, ["\\usepackage{mathtools}"])
elif value == "1": # auto
commands = ["mathclap", "mathllap", "mathrlap", \
"lgathered", "rgathered", "vcentcolon", "dblcolon", \
"coloneqq", "Coloneqq", "coloneq", "Coloneq", "eqqcolon", \
"Eqqcolon", "eqcolon", "Eqcolon", "colonapprox", \
"Colonapprox", "colonsim", "Colonsim"]
i = 0
while True:
i = find_token(document.body, '\\begin_inset Formula', i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
i += 1
continue
code = "\n".join(document.body[i:j])
for c in commands:
if code.find("\\%s" % c) != -1:
add_to_preamble(document, ["\\usepackage{mathtools}"])
return
i = j
def convert_cite_engine_type(document):
"Determine the \\cite_engine_type from the citation engine."
i = find_token(document.header, "\\cite_engine", 0)
if i == -1:
return
engine = get_value(document.header, "\\cite_engine", i)
if "_" in engine:
engine, type = engine.split("_")
else:
type = {"basic": "numerical", "jurabib": "authoryear"}[engine]
document.header[i] = "\\cite_engine " + engine
document.header.insert(i + 1, "\\cite_engine_type " + type)
def revert_cite_engine_type(document):
"Natbib had the type appended with an underscore."
engine_type = "numerical"
i = find_token(document.header, "\\cite_engine_type" , 0)
if i == -1:
document.warning("No \\cite_engine_type line. Assuming numerical.")
else:
engine_type = get_value(document.header, "\\cite_engine_type", i)
del document.header[i]
# We are looking for the natbib citation engine
i = find_token(document.header, "\\cite_engine natbib", i)
if i == -1:
return
document.header[i] = "\\cite_engine natbib_" + engine_type
def revert_cancel(document):
"add cancel to the preamble if necessary"
commands = ["cancelto", "cancel", "bcancel", "xcancel"]
i = 0
while True:
i = find_token(document.body, '\\begin_inset Formula', i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
i += 1
continue
code = "\n".join(document.body[i:j])
for c in commands:
if code.find("\\%s" % c) != -1:
add_to_preamble(document, ["\\usepackage{cancel}"])
return
i = j
def revert_verbatim(document):
" Revert verbatim einvironments completely to TeX-code. "
i = 0
consecutive = False
subst_end = ['\end_layout', '', '\\begin_layout Plain Layout',
'\end_layout', '',
'\\begin_layout Plain Layout', '', '',
'\\backslash', '',
'end{verbatim}',
'\\end_layout', '', '\\end_inset',
'', '', '\\end_layout']
subst_begin = ['\\begin_layout Standard', '\\noindent',
'\\begin_inset ERT', 'status collapsed', '',
'\\begin_layout Plain Layout', '', '', '\\backslash',
'begin{verbatim}',
'\\end_layout', '', '\\begin_layout Plain Layout', '']
while 1:
i = find_token(document.body, "\\begin_layout Verbatim", i)
if i == -1:
return
j = find_end_of_layout(document.body, i)
if j == -1:
document.warning("Malformed lyx document: Can't find end of Verbatim layout")
i += 1
continue
# delete all line breaks insets (there are no other insets)
l = i
while 1:
n = find_token(document.body, "\\begin_inset Newline newline", l)
if n == -1:
n = find_token(document.body, "\\begin_inset Newline linebreak", l)
if n == -1:
break
m = find_end_of_inset(document.body, n)
del(document.body[m:m+1])
document.body[n:n+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
l += 1
j += 1
# consecutive verbatim environments need to be connected
k = find_token(document.body, "\\begin_layout Verbatim", j)
if k == j + 2 and consecutive == False:
consecutive = True
document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
document.body[i:i+1] = subst_begin
continue
if k == j + 2 and consecutive == True:
document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
del(document.body[i:i+1])
continue
if k != j + 2 and consecutive == True:
document.body[j:j+1] = subst_end
# the next paragraph must not be indented
document.body[j+19:j+19] = ['\\noindent']
del(document.body[i:i+1])
consecutive = False
continue
else:
document.body[j:j+1] = subst_end
# the next paragraph must not be indented
document.body[j+19:j+19] = ['\\noindent']
document.body[i:i+1] = subst_begin
def revert_tipa(document):
" Revert native TIPA insets to mathed or ERT. "
i = 0
while 1:
i = find_token(document.body, "\\begin_inset IPA", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed lyx document: Can't find end of IPA inset")
i += 1
continue
Multipar = False
n = find_token(document.body, "\\begin_layout", i, j)
if n == -1:
document.warning("Malformed lyx document: IPA inset has no embedded layout")
i += 1
continue
m = find_end_of_layout(document.body, n)
if m == -1:
document.warning("Malformed lyx document: Can't find end of embedded layout")
i += 1
continue
content = document.body[n+1:m]
p = find_token(document.body, "\\begin_layout", m, j)
if p != -1 or len(content) > 1:
Multipar = True
content = document.body[i+1:j]
if Multipar:
# IPA insets with multiple pars need to be wrapped by \begin{IPA}...\end{IPA}
document.body[i:j+1] = ['\\end_layout', '', '\\begin_layout Standard'] + put_cmd_in_ert("\\begin{IPA}") + ['\\end_layout'] + content + ['\\begin_layout Standard'] + put_cmd_in_ert("\\end{IPA}")
add_to_preamble(document, ["\\usepackage{tipa,tipx}"])
else:
# single-par IPA insets can be reverted to mathed
document.body[i:j+1] = ["\\begin_inset Formula $\\text{\\textipa{" + content[0] + "}}$", "\\end_inset"]
i = j
##
# Conversion hub
#
supported_versions = ["2.1.0","2.1"]
convert = [
[414, []],
[415, [convert_undertilde]],
[416, []],
[417, [convert_japanese_encodings]],
[418, []],
[419, []],
[420, [convert_biblio_style]],
[421, [convert_longtable_captions]],
[422, [convert_use_packages]],
[423, [convert_use_mathtools]],
[424, [convert_cite_engine_type]],
[425, []],
[426, []],
[427, []]
]
revert = [
[426, [revert_tipa]],
[425, [revert_verbatim]],
[424, [revert_cancel]],
[423, [revert_cite_engine_type]],
[422, [revert_use_mathtools]],
[421, [revert_use_packages]],
[420, [revert_longtable_captions]],
[419, [revert_biblio_style]],
[418, [revert_australian]],
[417, [revert_justification]],
[416, [revert_japanese_encodings]],
[415, [revert_negative_space, revert_math_spaces]],
[414, [revert_undertilde]],
[413, [revert_visible_space]]
]
if __name__ == "__main__":
pass