mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-25 09:35:39 +00:00
d18412ce9a
The function convert_accent will be used when we get rid of InsetLatexAccent. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17017 a592a061-630c-0410-9148-cb99ea01b6c8
1127 lines
45 KiB
Python
1127 lines
45 KiB
Python
# This file is part of lyx2lyx
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2006 José Matos <jamatos@lyx.org>
|
|
# Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
""" Convert files to the file format generated by lyx 1.5"""
|
|
|
|
import re
|
|
import unicodedata
|
|
|
|
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
|
|
from LyX import get_encoding
|
|
|
|
|
|
####################################################################
|
|
# Private helper functions
|
|
|
|
def find_end_of_inset(lines, i):
|
|
" Find end of inset, where lines[i] is included."
|
|
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
|
|
|
def find_end_of_layout(lines, i):
|
|
" Find end of layout, where lines[i] is included."
|
|
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
|
|
|
|
# End of helper functions
|
|
####################################################################
|
|
|
|
|
|
##
|
|
# Notes: Framed/Shaded
|
|
#
|
|
|
|
def revert_framed(document):
|
|
"Revert framed notes. "
|
|
i = 0
|
|
while 1:
|
|
i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
|
|
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset Note"
|
|
i = i + 1
|
|
|
|
|
|
##
|
|
# Fonts
|
|
#
|
|
|
|
roman_fonts = {'default' : 'default', 'ae' : 'ae',
|
|
'times' : 'times', 'palatino' : 'palatino',
|
|
'helvet' : 'default', 'avant' : 'default',
|
|
'newcent' : 'newcent', 'bookman' : 'bookman',
|
|
'pslatex' : 'times'}
|
|
sans_fonts = {'default' : 'default', 'ae' : 'default',
|
|
'times' : 'default', 'palatino' : 'default',
|
|
'helvet' : 'helvet', 'avant' : 'avant',
|
|
'newcent' : 'default', 'bookman' : 'default',
|
|
'pslatex' : 'helvet'}
|
|
typewriter_fonts = {'default' : 'default', 'ae' : 'default',
|
|
'times' : 'default', 'palatino' : 'default',
|
|
'helvet' : 'default', 'avant' : 'default',
|
|
'newcent' : 'default', 'bookman' : 'default',
|
|
'pslatex' : 'courier'}
|
|
|
|
def convert_font_settings(document):
|
|
" Convert font settings. "
|
|
i = 0
|
|
i = find_token_exact(document.header, "\\fontscheme", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\fontscheme'.")
|
|
return
|
|
font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
|
|
if font_scheme == '':
|
|
document.warning("Malformed LyX document: Empty `\\fontscheme'.")
|
|
font_scheme = 'default'
|
|
if not font_scheme in roman_fonts.keys():
|
|
document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
|
|
font_scheme = 'default'
|
|
document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
|
|
'\\font_sans %s' % sans_fonts[font_scheme],
|
|
'\\font_typewriter %s' % typewriter_fonts[font_scheme],
|
|
'\\font_default_family default',
|
|
'\\font_sc false',
|
|
'\\font_osf false',
|
|
'\\font_sf_scale 100',
|
|
'\\font_tt_scale 100']
|
|
|
|
|
|
def revert_font_settings(document):
|
|
" Revert font settings. "
|
|
i = 0
|
|
insert_line = -1
|
|
fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
|
|
for family in 'roman', 'sans', 'typewriter':
|
|
name = '\\font_%s' % family
|
|
i = find_token_exact(document.header, name, i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `%s'." % name)
|
|
i = 0
|
|
else:
|
|
if (insert_line < 0):
|
|
insert_line = i
|
|
fonts[family] = get_value(document.header, name, i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, '\\font_default_family', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_default_family'.")
|
|
font_default_family = 'default'
|
|
else:
|
|
font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, '\\font_sc', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_sc'.")
|
|
font_sc = 'false'
|
|
else:
|
|
font_sc = get_value(document.header, '\\font_sc', i, i + 1)
|
|
del document.header[i]
|
|
if font_sc != 'false':
|
|
document.warning("Conversion of '\\font_sc' not yet implemented.")
|
|
i = find_token_exact(document.header, '\\font_osf', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_osf'.")
|
|
font_osf = 'false'
|
|
else:
|
|
font_osf = get_value(document.header, '\\font_osf', i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, '\\font_sf_scale', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
|
|
font_sf_scale = '100'
|
|
else:
|
|
font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
|
|
del document.header[i]
|
|
if font_sf_scale != '100':
|
|
document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
|
|
i = find_token_exact(document.header, '\\font_tt_scale', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
|
|
font_tt_scale = '100'
|
|
else:
|
|
font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
|
|
del document.header[i]
|
|
if font_tt_scale != '100':
|
|
document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
|
|
for font_scheme in roman_fonts.keys():
|
|
if (roman_fonts[font_scheme] == fonts['roman'] and
|
|
sans_fonts[font_scheme] == fonts['sans'] and
|
|
typewriter_fonts[font_scheme] == fonts['typewriter']):
|
|
document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
|
|
if font_default_family != 'default':
|
|
document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
|
|
if font_osf == 'true':
|
|
document.warning("Ignoring `\\font_osf = true'")
|
|
return
|
|
font_scheme = 'default'
|
|
document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
|
|
if fonts['roman'] == 'cmr':
|
|
document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
|
|
if font_osf == 'true':
|
|
document.preamble.append('\\usepackage{eco}')
|
|
font_osf = 'false'
|
|
for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
|
|
if fonts['roman'] == font:
|
|
document.preamble.append('\\usepackage{%s}' % font)
|
|
for font in 'cmss', 'lmss', 'cmbr':
|
|
if fonts['sans'] == font:
|
|
document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
|
|
for font in 'berasans':
|
|
if fonts['sans'] == font:
|
|
document.preamble.append('\\usepackage{%s}' % font)
|
|
for font in 'cmtt', 'lmtt', 'cmtl':
|
|
if fonts['typewriter'] == font:
|
|
document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
|
|
for font in 'courier', 'beramono', 'luximono':
|
|
if fonts['typewriter'] == font:
|
|
document.preamble.append('\\usepackage{%s}' % font)
|
|
if font_default_family != 'default':
|
|
document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
|
|
if font_osf == 'true':
|
|
document.warning("Ignoring `\\font_osf = true'")
|
|
|
|
|
|
def revert_booktabs(document):
|
|
" We remove the booktabs flag or everything else will become a mess. "
|
|
re_row = re.compile(r'^<row.*space="[^"]+".*>$')
|
|
re_tspace = re.compile(r'\s+topspace="[^"]+"')
|
|
re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
|
|
re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset Tabular", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of tabular.")
|
|
continue
|
|
for k in range(i, j):
|
|
if re.search('^<features.* booktabs="true".*>$', document.body[k]):
|
|
document.warning("Converting 'booktabs' table to normal table.")
|
|
document.body[k] = document.body[k].replace(' booktabs="true"', '')
|
|
if re.search(re_row, document.body[k]):
|
|
document.warning("Removing extra row space.")
|
|
document.body[k] = re_tspace.sub('', document.body[k])
|
|
document.body[k] = re_bspace.sub('', document.body[k])
|
|
document.body[k] = re_ispace.sub('', document.body[k])
|
|
i = i + 1
|
|
|
|
|
|
def convert_multiencoding(document, forward):
|
|
""" Fix files with multiple encodings.
|
|
Files with an inputencoding of "auto" or "default" and multiple languages
|
|
where at least two languages have different default encodings are encoded
|
|
in multiple encodings for file formats < 249. These files are incorrectly
|
|
read and written (as if the whole file was in the encoding of the main
|
|
language).
|
|
|
|
This function
|
|
- converts from fake unicode values to true unicode if forward is true, and
|
|
- converts from true unicode values to fake unicode if forward is false.
|
|
document.encoding must be set to the old value (format 248) in both cases.
|
|
|
|
We do this here and not in LyX.py because it is far easier to do the
|
|
necessary parsing in modern formats than in ancient ones.
|
|
"""
|
|
encoding_stack = [document.encoding]
|
|
lang_re = re.compile(r"^\\lang\s(\S+)")
|
|
if document.inputencoding == "auto" or document.inputencoding == "default":
|
|
for i in range(len(document.body)):
|
|
result = lang_re.match(document.body[i])
|
|
if result:
|
|
language = result.group(1)
|
|
if language == "default":
|
|
document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding))
|
|
encoding_stack[-1] = document.encoding
|
|
else:
|
|
from lyx2lyx_lang import lang
|
|
document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]))
|
|
encoding_stack[-1] = lang[language][3]
|
|
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
|
|
document.warning("Adding nested encoding %s." % encoding_stack[-1])
|
|
encoding_stack.append(encoding_stack[-1])
|
|
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
|
|
document.warning("Removing nested encoding %s." % encoding_stack[-1])
|
|
del encoding_stack[-1]
|
|
if encoding_stack[-1] != document.encoding:
|
|
if forward:
|
|
# This line has been incorrectly interpreted as if it was
|
|
# encoded in 'encoding'.
|
|
# Convert back to the 8bit string that was in the file.
|
|
orig = document.body[i].encode(document.encoding)
|
|
# Convert the 8bit string that was in the file to unicode
|
|
# with the correct encoding.
|
|
document.body[i] = orig.decode(encoding_stack[-1])
|
|
else:
|
|
# Convert unicode to the 8bit string that will be written
|
|
# to the file with the correct encoding.
|
|
orig = document.body[i].encode(encoding_stack[-1])
|
|
# Convert the 8bit string that will be written to the
|
|
# file to fake unicode with the encoding that will later
|
|
# be used when writing to the file.
|
|
document.body[i] = orig.decode(document.encoding)
|
|
|
|
|
|
def convert_utf8(document):
|
|
" Set document encoding to UTF-8. "
|
|
convert_multiencoding(document, True)
|
|
document.encoding = "utf8"
|
|
|
|
|
|
def revert_utf8(document):
|
|
" Set document encoding to the value corresponding to inputencoding. "
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
elif get_value(document.header, "\\inputencoding", i) == "utf8":
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
document.encoding = get_encoding(document.language, document.inputencoding, 248)
|
|
convert_multiencoding(document, False)
|
|
|
|
|
|
def revert_cs_label(document):
|
|
" Remove status flag of charstyle label. "
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset CharStyle", i)
|
|
if i == -1:
|
|
return
|
|
# Seach for a line starting 'show_label'
|
|
# If it is not there, break with a warning message
|
|
i = i + 1
|
|
while 1:
|
|
if (document.body[i][:10] == "show_label"):
|
|
del document.body[i]
|
|
break
|
|
elif (document.body[i][:13] == "\\begin_layout"):
|
|
document.warning("Malformed LyX document: Missing 'show_label'.")
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def convert_bibitem(document):
|
|
""" Convert
|
|
\bibitem [option]{argument}
|
|
|
|
to
|
|
|
|
\begin_inset LatexCommand bibitem
|
|
label "option"
|
|
key "argument"
|
|
|
|
\end_inset
|
|
|
|
This must be called after convert_commandparams.
|
|
"""
|
|
regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\bibitem", i)
|
|
if i == -1:
|
|
break
|
|
match = re.match(regex, document.body[i])
|
|
option = match.group(1)
|
|
argument = match.group(2)
|
|
lines = ['\\begin_inset LatexCommand bibitem']
|
|
if option != None:
|
|
lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
|
|
lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
|
|
lines.append('')
|
|
lines.append('\\end_inset')
|
|
document.body[i:i+1] = lines
|
|
i = i + 1
|
|
|
|
|
|
commandparams_info = {
|
|
# command : [option1, option2, argument]
|
|
"bibitem" : ["label", "", "key"],
|
|
"bibtex" : ["options", "btprint", "bibfiles"],
|
|
"cite" : ["after", "before", "key"],
|
|
"citet" : ["after", "before", "key"],
|
|
"citep" : ["after", "before", "key"],
|
|
"citealt" : ["after", "before", "key"],
|
|
"citealp" : ["after", "before", "key"],
|
|
"citeauthor" : ["after", "before", "key"],
|
|
"citeyear" : ["after", "before", "key"],
|
|
"citeyearpar" : ["after", "before", "key"],
|
|
"citet*" : ["after", "before", "key"],
|
|
"citep*" : ["after", "before", "key"],
|
|
"citealt*" : ["after", "before", "key"],
|
|
"citealp*" : ["after", "before", "key"],
|
|
"citeauthor*" : ["after", "before", "key"],
|
|
"Citet" : ["after", "before", "key"],
|
|
"Citep" : ["after", "before", "key"],
|
|
"Citealt" : ["after", "before", "key"],
|
|
"Citealp" : ["after", "before", "key"],
|
|
"Citeauthor" : ["after", "before", "key"],
|
|
"Citet*" : ["after", "before", "key"],
|
|
"Citep*" : ["after", "before", "key"],
|
|
"Citealt*" : ["after", "before", "key"],
|
|
"Citealp*" : ["after", "before", "key"],
|
|
"Citeauthor*" : ["after", "before", "key"],
|
|
"citefield" : ["after", "before", "key"],
|
|
"citetitle" : ["after", "before", "key"],
|
|
"cite*" : ["after", "before", "key"],
|
|
"hfill" : ["", "", ""],
|
|
"index" : ["", "", "name"],
|
|
"printindex" : ["", "", "name"],
|
|
"label" : ["", "", "name"],
|
|
"eqref" : ["name", "", "reference"],
|
|
"pageref" : ["name", "", "reference"],
|
|
"prettyref" : ["name", "", "reference"],
|
|
"ref" : ["name", "", "reference"],
|
|
"vpageref" : ["name", "", "reference"],
|
|
"vref" : ["name", "", "reference"],
|
|
"tableofcontents" : ["", "", "type"],
|
|
"htmlurl" : ["name", "", "target"],
|
|
"url" : ["name", "", "target"]}
|
|
|
|
|
|
def convert_commandparams(document):
|
|
""" Convert
|
|
|
|
\begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
|
|
\end_inset
|
|
|
|
to
|
|
|
|
\begin_inset LatexCommand cmdname
|
|
name1 "opt1"
|
|
name2 "opt2"
|
|
name3 "arg"
|
|
\end_inset
|
|
|
|
name1, name2 and name3 can be different for each command.
|
|
"""
|
|
# \begin_inset LatexCommand bibitem was not the official version (see
|
|
# convert_bibitem()), but could be read in, so we convert it here, too.
|
|
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand", i)
|
|
if i == -1:
|
|
break
|
|
command = document.body[i][26:].strip()
|
|
if command == "":
|
|
document.warning("Malformed LyX document: Missing LatexCommand name.")
|
|
i = i + 1
|
|
continue
|
|
|
|
# The following parser is taken from the original InsetCommandParams::scanCommand
|
|
name = ""
|
|
option1 = ""
|
|
option2 = ""
|
|
argument = ""
|
|
state = "WS"
|
|
# Used to handle things like \command[foo[bar]]{foo{bar}}
|
|
nestdepth = 0
|
|
b = 0
|
|
for c in command:
|
|
if ((state == "CMDNAME" and c == ' ') or
|
|
(state == "CMDNAME" and c == '[') or
|
|
(state == "CMDNAME" and c == '{')):
|
|
state = "WS"
|
|
if ((state == "OPTION" and c == ']') or
|
|
(state == "SECOPTION" and c == ']') or
|
|
(state == "CONTENT" and c == '}')):
|
|
if nestdepth == 0:
|
|
state = "WS"
|
|
else:
|
|
nestdepth = nestdepth - 1
|
|
if ((state == "OPTION" and c == '[') or
|
|
(state == "SECOPTION" and c == '[') or
|
|
(state == "CONTENT" and c == '{')):
|
|
nestdepth = nestdepth + 1
|
|
if state == "CMDNAME":
|
|
name += c
|
|
elif state == "OPTION":
|
|
option1 += c
|
|
elif state == "SECOPTION":
|
|
option2 += c
|
|
elif state == "CONTENT":
|
|
argument += c
|
|
elif state == "WS":
|
|
if c == '\\':
|
|
state = "CMDNAME"
|
|
elif c == '[' and b != ']':
|
|
state = "OPTION"
|
|
nestdepth = 0 # Just to be sure
|
|
elif c == '[' and b == ']':
|
|
state = "SECOPTION"
|
|
nestdepth = 0 # Just to be sure
|
|
elif c == '{':
|
|
state = "CONTENT"
|
|
nestdepth = 0 # Just to be sure
|
|
b = c
|
|
|
|
# Now we have parsed the command, output the parameters
|
|
lines = ["\\begin_inset LatexCommand %s" % name]
|
|
if option1 != "":
|
|
if commandparams_info[name][0] == "":
|
|
document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
|
|
else:
|
|
lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
|
|
if option2 != "":
|
|
if commandparams_info[name][1] == "":
|
|
document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
|
|
else:
|
|
lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
|
|
if argument != "":
|
|
if commandparams_info[name][2] == "":
|
|
document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
|
|
else:
|
|
lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
|
|
document.body[i:i+1] = lines
|
|
i = i + 1
|
|
|
|
|
|
def revert_commandparams(document):
|
|
regex = re.compile(r'(\S+)\s+(.+)')
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand", i)
|
|
if i == -1:
|
|
break
|
|
name = document.body[i].split()[2]
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
option1 = ""
|
|
option2 = ""
|
|
argument = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
pname = match.group(1)
|
|
pvalue = match.group(2)
|
|
if pname == "preview":
|
|
preview_line = document.body[k]
|
|
elif (commandparams_info[name][0] != "" and
|
|
pname == commandparams_info[name][0]):
|
|
option1 = pvalue.strip('"').replace('\\"', '"')
|
|
elif (commandparams_info[name][1] != "" and
|
|
pname == commandparams_info[name][1]):
|
|
option2 = pvalue.strip('"').replace('\\"', '"')
|
|
elif (commandparams_info[name][2] != "" and
|
|
pname == commandparams_info[name][2]):
|
|
argument = pvalue.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
|
|
if name == "bibitem":
|
|
if option1 == "":
|
|
lines = ["\\bibitem {%s}" % argument]
|
|
else:
|
|
lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
|
|
else:
|
|
if option1 == "":
|
|
if option2 == "":
|
|
lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
|
|
else:
|
|
lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
|
|
else:
|
|
if option2 == "":
|
|
lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
|
|
else:
|
|
lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
|
|
if name != "bibitem":
|
|
if preview_line != "":
|
|
lines.append(preview_line)
|
|
lines.append('')
|
|
lines.append('\\end_inset')
|
|
document.body[i:j+1] = lines
|
|
i = j + 1
|
|
|
|
|
|
def revert_nomenclature(document):
|
|
" Convert nomenclature entry to ERT. "
|
|
regex = re.compile(r'(\S+)\s+(.+)')
|
|
i = 0
|
|
use_nomencl = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
|
|
if i == -1:
|
|
break
|
|
use_nomencl = 1
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
symbol = ""
|
|
description = ""
|
|
prefix = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2)
|
|
if name == "preview":
|
|
preview_line = document.body[k]
|
|
elif name == "symbol":
|
|
symbol = value.strip('"').replace('\\"', '"')
|
|
elif name == "description":
|
|
description = value.strip('"').replace('\\"', '"')
|
|
elif name == "prefix":
|
|
prefix = value.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
|
|
if prefix == "":
|
|
command = 'nomenclature{%s}{%s}' % (symbol, description)
|
|
else:
|
|
command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
|
|
document.body[i:j+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
command,
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 11
|
|
if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
|
|
document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
|
|
document.preamble.append('\\makenomenclature')
|
|
|
|
|
|
def revert_printnomenclature(document):
|
|
" Convert printnomenclature to ERT. "
|
|
regex = re.compile(r'(\S+)\s+(.+)')
|
|
i = 0
|
|
use_nomencl = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
|
|
if i == -1:
|
|
break
|
|
use_nomencl = 1
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
labelwidth = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2)
|
|
if name == "preview":
|
|
preview_line = document.body[k]
|
|
elif name == "labelwidth":
|
|
labelwidth = value.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
|
|
if labelwidth == "":
|
|
command = 'nomenclature{}'
|
|
else:
|
|
command = 'nomenclature[%s]' % labelwidth
|
|
document.body[i:j+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
command,
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 11
|
|
if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
|
|
document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
|
|
document.preamble.append('\\makenomenclature')
|
|
|
|
|
|
def convert_esint(document):
|
|
" Add \\use_esint setting to header. "
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\cite_engine'.")
|
|
return
|
|
# 0 is off, 1 is auto, 2 is on.
|
|
document.header.insert(i, '\\use_esint 0')
|
|
|
|
|
|
def revert_esint(document):
|
|
" Remove \\use_esint setting from header. "
|
|
i = find_token(document.header, "\\use_esint", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\use_esint'.")
|
|
return
|
|
use_esint = document.header[i].split()[1]
|
|
del document.header[i]
|
|
# 0 is off, 1 is auto, 2 is on.
|
|
if (use_esint == 2):
|
|
document.preamble.append('\\usepackage{esint}')
|
|
|
|
|
|
def revert_clearpage(document):
|
|
" clearpage -> ERT "
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\clearpage", i)
|
|
if i == -1:
|
|
break
|
|
document.body[i:i+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
'clearpage',
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 1
|
|
|
|
|
|
def revert_cleardoublepage(document):
|
|
" cleardoublepage -> ERT "
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\cleardoublepage", i)
|
|
if i == -1:
|
|
break
|
|
document.body[i:i+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
'cleardoublepage',
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 1
|
|
|
|
|
|
def convert_lyxline(document):
|
|
" remove fontsize commands for \lyxline "
|
|
# The problematic is: The old \lyxline definition doesn't handle the fontsize
|
|
# to change the line thickness. The new definiton does this so that imported
|
|
# \lyxlines would have a different line thickness. The eventual fontsize command
|
|
# before \lyxline is therefore removed to get the same output.
|
|
fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
|
|
"large", "Large", "LARGE", "huge", "Huge"]
|
|
for n in range(0, len(fontsizes)):
|
|
i = 0
|
|
k = 0
|
|
while i < len(document.body):
|
|
i = find_token(document.body, "\\size " + fontsizes[n], i)
|
|
k = find_token(document.body, "\\lyxline",i)
|
|
# the corresponding fontsize command is always 2 lines before the \lyxline
|
|
if (i != -1 and k == i+2):
|
|
document.body[i:i+1] = []
|
|
else:
|
|
break
|
|
i = i + 1
|
|
|
|
|
|
def revert_encodings(document):
|
|
" Set new encodings to auto. "
|
|
encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
|
|
"cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
|
|
"cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc in encodings:
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def convert_caption(document):
|
|
" Convert caption layouts to caption insets. "
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_layout Caption", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
|
|
document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
|
|
document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
|
|
"\\begin_inset Caption", "",
|
|
"\\begin_layout %s" % document.default_layout]
|
|
i = i + 1
|
|
|
|
|
|
def revert_caption(document):
|
|
" Convert caption insets to caption layouts. "
|
|
" This assumes that the text class has a caption style. "
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset Caption", i)
|
|
if i == -1:
|
|
return
|
|
|
|
# We either need to delete the previous \begin_layout line, or we
|
|
# need to end the previous layout if this inset is not in the first
|
|
# position of the paragraph.
|
|
layout_before = find_token_backwards(document.body, "\\begin_layout", i)
|
|
if layout_before == -1:
|
|
document.warning("Malformed LyX document: Missing `\\begin_layout'.")
|
|
return
|
|
layout_line = document.body[layout_before]
|
|
del_layout_before = True
|
|
l = layout_before + 1
|
|
while l < i:
|
|
if document.body[l] != "":
|
|
del_layout_before = False
|
|
break
|
|
l = l + 1
|
|
if del_layout_before:
|
|
del document.body[layout_before:i]
|
|
i = layout_before
|
|
else:
|
|
document.body[i:i] = ["\\end_layout", ""]
|
|
i = i + 2
|
|
|
|
# Find start of layout in the inset and end of inset
|
|
j = find_token(document.body, "\\begin_layout", i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\begin_layout'.")
|
|
return
|
|
k = find_end_of_inset(document.body, i)
|
|
if k == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_inset'.")
|
|
return
|
|
|
|
# We either need to delete the following \end_layout line, or we need
|
|
# to restart the old layout if this inset is not at the paragraph end.
|
|
layout_after = find_token(document.body, "\\end_layout", k)
|
|
if layout_after == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
del_layout_after = True
|
|
l = k + 1
|
|
while l < layout_after:
|
|
if document.body[l] != "":
|
|
del_layout_after = False
|
|
break
|
|
l = l + 1
|
|
if del_layout_after:
|
|
del document.body[k+1:layout_after+1]
|
|
else:
|
|
document.body[k+1:k+1] = [layout_line, ""]
|
|
|
|
# delete \begin_layout and \end_inset and replace \begin_inset with
|
|
# "\begin_layout Caption". This works because we can only have one
|
|
# paragraph in the caption inset: The old \end_layout will be recycled.
|
|
del document.body[k]
|
|
if document.body[k] == "":
|
|
del document.body[k]
|
|
del document.body[j]
|
|
if document.body[j] == "":
|
|
del document.body[j]
|
|
document.body[i] = "\\begin_layout Caption"
|
|
if document.body[i+1] == "":
|
|
del document.body[i+1]
|
|
i = i + 1
|
|
|
|
|
|
# Accents of InsetLaTeXAccent
|
|
accent_map = {
|
|
"`" : u'\u0300', # grave
|
|
"'" : u'\u0301', # acute
|
|
"^" : u'\u0302', # circumflex
|
|
"~" : u'\u0303', # tilde
|
|
"=" : u'\u0304', # macron
|
|
"u" : u'\u0306', # breve
|
|
"." : u'\u0307', # dot above
|
|
"\"": u'\u0308', # diaresis
|
|
"r" : u'\u030a', # ring above
|
|
"H" : u'\u030b', # double acute
|
|
"v" : u'\u030c', # caron
|
|
"b" : u'\u0320', # minus sign below
|
|
"d" : u'\u0323', # dot below
|
|
"c" : u'\u0327', # cedilla
|
|
"k" : u'\u0328', # ogonek
|
|
"t" : u'\u0361' # tie. This is special: It spans two characters, but
|
|
# only one is given as argument, so we don't need to
|
|
# treat it differently.
|
|
}
|
|
|
|
|
|
# special accents of InsetLaTeXAccent without argument
|
|
special_accent_map = {
|
|
'i' : u'\u0131', # dotless i
|
|
'j' : u'\u0237', # dotless j
|
|
'l' : u'\u0142', # l with stroke
|
|
'L' : u'\u0141' # L with stroke
|
|
}
|
|
|
|
|
|
# special accent arguments of InsetLaTeXAccent
|
|
accented_map = {
|
|
'\\i' : u'\u0131', # dotless i
|
|
'\\j' : u'\u0237' # dotless j
|
|
}
|
|
|
|
|
|
def _convert_accent(accent, accented_char):
|
|
type = accent
|
|
char = accented_char
|
|
if char == '':
|
|
if type in special_accent_map:
|
|
return special_accent_map[type]
|
|
# a missing char is treated as space by LyX
|
|
char = ' '
|
|
elif type == 'q' and char in ['t', 'd', 'l', 'L']:
|
|
# Special caron, only used with t, d, l and L.
|
|
# It is not in the map because we convert it to the same unicode
|
|
# character as the normal caron: \q{} is only defined if babel with
|
|
# the czech or slovak language is used, and the normal caron
|
|
# produces the correct output if the T1 font encoding is used.
|
|
# For the same reason we never convert to \q{} in the other direction.
|
|
type = 'v'
|
|
elif char in accented_map:
|
|
char = accented_map[char]
|
|
elif (len(char) > 1):
|
|
# We can only convert accents on a single char
|
|
return ''
|
|
a = accent_map.get(type)
|
|
if a:
|
|
return unicodedata.normalize("NFKC", "%s%s" % (char, a))
|
|
return ''
|
|
|
|
|
|
def convert_ertbackslash(body, i, ert, default_layout):
|
|
r""" -------------------------------------------------------------------------------------------
|
|
Convert backslashes and '\n' into valid ERT code, append the converted
|
|
text to body[i] and return the (maybe incremented) line index i"""
|
|
|
|
for c in ert:
|
|
if c == '\\':
|
|
body[i] = body[i] + '\\backslash '
|
|
i = i + 1
|
|
body.insert(i, '')
|
|
elif c == '\n':
|
|
body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
|
|
i = i + 4
|
|
else:
|
|
body[i] = body[i] + c
|
|
return i
|
|
|
|
|
|
def convert_accent(document):
|
|
# The following forms are supported by LyX:
|
|
# '\i \"{a}' (standard form, as written by LyX)
|
|
# '\i \"{}' (standard form, as written by LyX if the accented char is a space)
|
|
# '\i \"{ }' (also accepted if the accented char is a space)
|
|
# '\i \" a' (also accepted)
|
|
# '\i \"' (also accepted)
|
|
re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
|
|
re_contents = re.compile(r'^([^\s{]+)(.*)$')
|
|
re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
|
|
i = 0
|
|
while 1:
|
|
i = find_re(document.body, re_wholeinset, i)
|
|
if i == -1:
|
|
return
|
|
match = re_wholeinset.match(document.body[i])
|
|
prefix = match.group(1)
|
|
contents = match.group(3).strip()
|
|
match = re_contents.match(contents)
|
|
if match:
|
|
# Strip first char (always \)
|
|
accent = match.group(1)[1:]
|
|
accented_contents = match.group(2).strip()
|
|
match = re_accentedcontents.match(accented_contents)
|
|
accented_char = match.group(1)
|
|
converted = _convert_accent(accent, accented_char)
|
|
if converted == '':
|
|
# Normalize contents
|
|
contents = '%s{%s}' % (accent, accented_char),
|
|
else:
|
|
document.body[i] = '%s%s' % (prefix, converted)
|
|
i += 1
|
|
continue
|
|
document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
|
|
document.body[i] = prefix
|
|
document.body[i+1:i+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'']
|
|
i = convert_ertbackslash(document.body, i + 7,
|
|
'\\%s' % contents,
|
|
document.default_layout)
|
|
document.body[i+1:i+1] = ['\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i += 3
|
|
|
|
|
|
def revert_accent(document):
|
|
inverse_accent_map = {}
|
|
for k in accent_map:
|
|
inverse_accent_map[accent_map[k]] = k
|
|
inverse_special_accent_map = {}
|
|
for k in special_accent_map:
|
|
inverse_special_accent_map[special_accent_map[k]] = k
|
|
inverse_accented_map = {}
|
|
for k in accented_map:
|
|
inverse_accented_map[accented_map[k]] = k
|
|
|
|
# Since LyX may insert a line break within a word we must combine all
|
|
# words before unicode normalization.
|
|
# We do this only if the next line starts with an accent, otherwise we
|
|
# would create things like '\begin_inset ERTstatus'.
|
|
numberoflines = len(document.body)
|
|
for i in range(numberoflines-1):
|
|
if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
|
|
continue
|
|
if (document.body[i+1][0] in inverse_accent_map):
|
|
# the last character of this line and the first of the next line
|
|
# form probably a surrogate pair.
|
|
while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
|
|
document.body[i] += document.body[i+1][0]
|
|
document.body[i+1] = document.body[i+1][1:]
|
|
|
|
# Normalize to "Normal form D" (NFD, also known as canonical decomposition).
|
|
# This is needed to catch all accented characters.
|
|
for i in range(numberoflines):
|
|
# Unfortunately we have a mixture of unicode strings and plain strings,
|
|
# because we never use u'xxx' for string literals, but 'xxx'.
|
|
# Therefore we may have to try two times to normalize the data.
|
|
try:
|
|
document.body[i] = unicodedata.normalize("NFKD", document.body[i])
|
|
except TypeError:
|
|
document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
|
|
|
|
# Replace accented characters with InsetLaTeXAccent
|
|
# Do not convert characters that can be represented in the chosen
|
|
# encoding.
|
|
encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
|
|
lang_re = re.compile(r"^\\lang\s(\S+)")
|
|
for i in range(len(document.body)):
|
|
|
|
if document.inputencoding == "auto" or document.inputencoding == "default":
|
|
# Track the encoding of the current line
|
|
result = lang_re.match(document.body[i])
|
|
if result:
|
|
language = result.group(1)
|
|
if language == "default":
|
|
encoding_stack[-1] = document.encoding
|
|
else:
|
|
from lyx2lyx_lang import lang
|
|
encoding_stack[-1] = lang[language][3]
|
|
continue
|
|
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
|
|
encoding_stack.append(encoding_stack[-1])
|
|
continue
|
|
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
|
|
del encoding_stack[-1]
|
|
continue
|
|
|
|
for j in range(len(document.body[i])):
|
|
# dotless i and dotless j are both in special_accent_map and can
|
|
# occur as an accented character, so we need to test that the
|
|
# following character is no accent
|
|
if (document.body[i][j] in inverse_special_accent_map and
|
|
(j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
|
|
accent = document.body[i][j]
|
|
try:
|
|
dummy = accent.encode(encoding_stack[-1])
|
|
except UnicodeEncodeError:
|
|
# Insert the rest of the line as new line
|
|
if j < len(document.body[i]) - 1:
|
|
document.body[i+1:i+1] = document.body[i][j+1:]
|
|
# Delete the accented character
|
|
if j > 0:
|
|
document.body[i] = document.body[i][:j-1]
|
|
else:
|
|
document.body[i] = u''
|
|
# Finally add the InsetLaTeXAccent
|
|
document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
|
|
break
|
|
elif j > 0 and document.body[i][j] in inverse_accent_map:
|
|
accented_char = document.body[i][j-1]
|
|
if accented_char == ' ':
|
|
# Conform to LyX output
|
|
accented_char = ''
|
|
elif accented_char in inverse_accented_map:
|
|
accented_char = inverse_accented_map[accented_char]
|
|
accent = document.body[i][j]
|
|
try:
|
|
dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
|
|
except UnicodeEncodeError:
|
|
# Insert the rest of the line as new line
|
|
if j < len(document.body[i]) - 1:
|
|
document.body[i+1:i+1] = document.body[i][j+1:]
|
|
# Delete the accented characters
|
|
if j > 1:
|
|
document.body[i] = document.body[i][:j-2]
|
|
else:
|
|
document.body[i] = u''
|
|
# Finally add the InsetLaTeXAccent
|
|
document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
|
|
break
|
|
# Normalize to "Normal form C" (NFC, pre-composed characters) again
|
|
for i in range(numberoflines):
|
|
document.body[i] = unicodedata.normalize("NFKC", document.body[i])
|
|
|
|
|
|
##
|
|
# Conversion hub
|
|
#
|
|
|
|
supported_versions = ["1.5.0","1.5"]
|
|
convert = [[246, []],
|
|
[247, [convert_font_settings]],
|
|
[248, []],
|
|
[249, [convert_utf8]],
|
|
[250, []],
|
|
[251, []],
|
|
[252, [convert_commandparams, convert_bibitem]],
|
|
[253, []],
|
|
[254, [convert_esint]],
|
|
[255, []],
|
|
[256, []],
|
|
[257, [convert_caption]],
|
|
[258, [convert_lyxline]]]
|
|
|
|
revert = [[257, []],
|
|
[256, [revert_caption]],
|
|
[255, [revert_encodings]],
|
|
[254, [revert_clearpage, revert_cleardoublepage]],
|
|
[253, [revert_esint]],
|
|
[252, [revert_nomenclature, revert_printnomenclature]],
|
|
[251, [revert_commandparams]],
|
|
[250, [revert_cs_label]],
|
|
[249, []],
|
|
[248, [revert_accent, revert_utf8]],
|
|
[247, [revert_booktabs]],
|
|
[246, [revert_font_settings]],
|
|
[245, [revert_framed]]]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|
|
|
|
|