mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-22 07:42:02 +00:00
44e0940d75
This is similar to what we have in C++ code where we order the standard includes to be easier to read. This is a readability change only.
2300 lines
80 KiB
Python
2300 lines
80 KiB
Python
# This file is part of lyx2lyx
|
|
# Copyright (C) 2006 José Matos <jamatos@lyx.org>
|
|
# Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
"""Convert files to the file format generated by lyx 1.5"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import unicodedata
|
|
|
|
from LyX import get_encoding
|
|
from lyx2lyx_tools import insert_document_option
|
|
from parser_tools import (
|
|
find_beginning_of,
|
|
find_end_of,
|
|
find_nonempty_line,
|
|
find_re,
|
|
find_token,
|
|
find_token_backwards,
|
|
find_token_exact,
|
|
find_tokens,
|
|
get_value,
|
|
)
|
|
|
|
####################################################################
|
|
# Private helper functions
|
|
|
|
|
|
def find_end_of_inset(lines, i):
|
|
"Find end of inset, where lines[i] is included."
|
|
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
|
|
|
|
|
def find_end_of_layout(lines, i):
|
|
"Find end of layout, where lines[i] is included."
|
|
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
|
|
|
|
|
|
def find_beginning_of_layout(lines, i):
|
|
"Find beginning of layout, where lines[i] is included."
|
|
return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
|
|
|
|
|
|
# End of helper functions
|
|
####################################################################
|
|
|
|
|
|
##
|
|
# Notes: Framed/Shaded
|
|
#
|
|
|
|
|
|
def revert_framed(document):
|
|
"Revert framed notes."
|
|
i = 0
|
|
while True:
|
|
i = find_tokens(
|
|
document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i
|
|
)
|
|
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset Note"
|
|
i = i + 1
|
|
|
|
|
|
##
|
|
# Fonts
|
|
#
|
|
|
|
roman_fonts = {
|
|
"default": "default",
|
|
"ae": "ae",
|
|
"times": "times",
|
|
"palatino": "palatino",
|
|
"helvet": "default",
|
|
"avant": "default",
|
|
"newcent": "newcent",
|
|
"bookman": "bookman",
|
|
"pslatex": "times",
|
|
}
|
|
sans_fonts = {
|
|
"default": "default",
|
|
"ae": "default",
|
|
"times": "default",
|
|
"palatino": "default",
|
|
"helvet": "helvet",
|
|
"avant": "avant",
|
|
"newcent": "default",
|
|
"bookman": "default",
|
|
"pslatex": "helvet",
|
|
}
|
|
typewriter_fonts = {
|
|
"default": "default",
|
|
"ae": "default",
|
|
"times": "default",
|
|
"palatino": "default",
|
|
"helvet": "default",
|
|
"avant": "default",
|
|
"newcent": "default",
|
|
"bookman": "default",
|
|
"pslatex": "courier",
|
|
}
|
|
|
|
|
|
def convert_font_settings(document):
|
|
"Convert font settings."
|
|
i = 0
|
|
i = find_token_exact(document.header, "\\fontscheme", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\fontscheme'.")
|
|
return
|
|
font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
|
|
if font_scheme == "":
|
|
document.warning("Malformed LyX document: Empty `\\fontscheme'.")
|
|
font_scheme = "default"
|
|
if font_scheme not in list(roman_fonts.keys()):
|
|
document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
|
|
font_scheme = "default"
|
|
document.header[i : i + 1] = [
|
|
"\\font_roman %s" % roman_fonts[font_scheme],
|
|
"\\font_sans %s" % sans_fonts[font_scheme],
|
|
"\\font_typewriter %s" % typewriter_fonts[font_scheme],
|
|
"\\font_default_family default",
|
|
"\\font_sc false",
|
|
"\\font_osf false",
|
|
"\\font_sf_scale 100",
|
|
"\\font_tt_scale 100",
|
|
]
|
|
|
|
|
|
def revert_font_settings(document):
|
|
"Revert font settings."
|
|
i = 0
|
|
insert_line = -1
|
|
fonts = {"roman": "default", "sans": "default", "typewriter": "default"}
|
|
for family in "roman", "sans", "typewriter":
|
|
name = "\\font_%s" % family
|
|
i = find_token_exact(document.header, name, i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `%s'." % name)
|
|
i = 0
|
|
else:
|
|
if insert_line < 0:
|
|
insert_line = i
|
|
fonts[family] = get_value(document.header, name, i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, "\\font_default_family", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_default_family'.")
|
|
font_default_family = "default"
|
|
else:
|
|
font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, "\\font_sc", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_sc'.")
|
|
font_sc = "false"
|
|
else:
|
|
font_sc = get_value(document.header, "\\font_sc", i, i + 1)
|
|
del document.header[i]
|
|
if font_sc != "false":
|
|
document.warning("Conversion of '\\font_sc' not yet implemented.")
|
|
i = find_token_exact(document.header, "\\font_osf", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_osf'.")
|
|
font_osf = "false"
|
|
else:
|
|
font_osf = get_value(document.header, "\\font_osf", i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, "\\font_sf_scale", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
|
|
font_sf_scale = "100"
|
|
else:
|
|
font_sf_scale = get_value(document.header, "\\font_sf_scale", i, i + 1)
|
|
del document.header[i]
|
|
if font_sf_scale != "100":
|
|
document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
|
|
i = find_token_exact(document.header, "\\font_tt_scale", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
|
|
font_tt_scale = "100"
|
|
else:
|
|
font_tt_scale = get_value(document.header, "\\font_tt_scale", i, i + 1)
|
|
del document.header[i]
|
|
if font_tt_scale != "100":
|
|
document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
|
|
for font_scheme in list(roman_fonts.keys()):
|
|
if (
|
|
roman_fonts[font_scheme] == fonts["roman"]
|
|
and sans_fonts[font_scheme] == fonts["sans"]
|
|
and typewriter_fonts[font_scheme] == fonts["typewriter"]
|
|
):
|
|
document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
|
|
if font_default_family != "default":
|
|
document.preamble.append(
|
|
"\\renewcommand{\\familydefault}{\\%s}" % font_default_family
|
|
)
|
|
if font_osf == "true":
|
|
document.warning("Ignoring `\\font_osf = true'")
|
|
return
|
|
font_scheme = "default"
|
|
document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
|
|
if fonts["roman"] == "cmr":
|
|
document.preamble.append("\\renewcommand{\\rmdefault}{cmr}")
|
|
if font_osf == "true":
|
|
document.preamble.append("\\usepackage{eco}")
|
|
font_osf = "false"
|
|
for font in "lmodern", "charter", "utopia", "beraserif", "ccfonts", "chancery":
|
|
if fonts["roman"] == font:
|
|
document.preamble.append("\\usepackage{%s}" % font)
|
|
for font in "cmss", "lmss", "cmbr":
|
|
if fonts["sans"] == font:
|
|
document.preamble.append("\\renewcommand{\\sfdefault}{%s}" % font)
|
|
for font in "berasans":
|
|
if fonts["sans"] == font:
|
|
document.preamble.append("\\usepackage{%s}" % font)
|
|
for font in "cmtt", "lmtt", "cmtl":
|
|
if fonts["typewriter"] == font:
|
|
document.preamble.append("\\renewcommand{\\ttdefault}{%s}" % font)
|
|
for font in "courier", "beramono", "luximono":
|
|
if fonts["typewriter"] == font:
|
|
document.preamble.append("\\usepackage{%s}" % font)
|
|
if font_default_family != "default":
|
|
document.preamble.append("\\renewcommand{\\familydefault}{\\%s}" % font_default_family)
|
|
if font_osf == "true":
|
|
document.warning("Ignoring `\\font_osf = true'")
|
|
|
|
|
|
def revert_booktabs(document):
|
|
"We remove the booktabs flag or everything else will become a mess."
|
|
re_row = re.compile(r'^<row.*space="[^"]+".*>$')
|
|
re_tspace = re.compile(r'\s+topspace="[^"]+"')
|
|
re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
|
|
re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Tabular", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of tabular.")
|
|
continue
|
|
for k in range(i, j):
|
|
if re.search('^<features.* booktabs="true".*>$', document.body[k]):
|
|
document.warning("Converting 'booktabs' table to normal table.")
|
|
document.body[k] = document.body[k].replace(' booktabs="true"', "")
|
|
if re.search(re_row, document.body[k]):
|
|
document.warning("Removing extra row space.")
|
|
document.body[k] = re_tspace.sub("", document.body[k])
|
|
document.body[k] = re_bspace.sub("", document.body[k])
|
|
document.body[k] = re_ispace.sub("", document.body[k])
|
|
i = i + 1
|
|
|
|
|
|
def convert_multiencoding(document, forward):
|
|
"""Fix files with multiple encodings.
|
|
Files with an inputencoding of "auto" or "default" and multiple languages
|
|
where at least two languages have different default encodings are encoded
|
|
in multiple encodings for file formats < 249. These files are incorrectly
|
|
read and written (as if the whole file was in the encoding of the main
|
|
language).
|
|
This is not true for files written by CJK-LyX, they are always in the locale
|
|
encoding.
|
|
|
|
This function
|
|
- converts from fake unicode values to true unicode if forward is true, and
|
|
- converts from true unicode values to fake unicode if forward is false.
|
|
document.encoding must be set to the old value (format 248) in both cases.
|
|
|
|
We do this here and not in LyX.py because it is far easier to do the
|
|
necessary parsing in modern formats than in ancient ones.
|
|
"""
|
|
inset_types = ["Foot", "Note"]
|
|
if document.cjk_encoding != "":
|
|
return
|
|
encoding_stack = [document.encoding]
|
|
insets = []
|
|
lang_re = re.compile(r"^\\lang\s(\S+)")
|
|
inset_re = re.compile(r"^\\begin_inset\s(\S+)")
|
|
if not forward: # no need to read file unless we are reverting
|
|
spec_chars = read_unicodesymbols()
|
|
|
|
if document.inputencoding == "auto" or document.inputencoding == "default":
|
|
i = 0
|
|
while i < len(document.body):
|
|
result = lang_re.match(document.body[i])
|
|
if result:
|
|
language = result.group(1)
|
|
if language == "default":
|
|
document.warning(
|
|
f"Resetting encoding from {encoding_stack[-1]} to {document.encoding}.",
|
|
3,
|
|
)
|
|
encoding_stack[-1] = document.encoding
|
|
else:
|
|
from lyx2lyx_lang import lang
|
|
|
|
document.warning(
|
|
f"Setting encoding from {encoding_stack[-1]} to {lang[language][3]}.",
|
|
3,
|
|
)
|
|
encoding_stack[-1] = lang[language][3]
|
|
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
|
|
document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
|
|
if len(insets) > 0 and insets[-1] in inset_types:
|
|
from lyx2lyx_lang import lang
|
|
|
|
encoding_stack.append(lang[document.language][3])
|
|
else:
|
|
encoding_stack.append(encoding_stack[-1])
|
|
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
|
|
document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
|
|
if len(encoding_stack) == 1:
|
|
# Don't remove the document encoding from the stack
|
|
document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
|
|
else:
|
|
del encoding_stack[-1]
|
|
elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
|
|
inset_result = inset_re.match(document.body[i])
|
|
if inset_result:
|
|
insets.append(inset_result.group(1))
|
|
else:
|
|
insets.append("")
|
|
elif find_token(document.body, "\\end_inset", i, i + 1) == i:
|
|
del insets[-1]
|
|
if encoding_stack[-1] != document.encoding:
|
|
if forward:
|
|
# This line has been incorrectly interpreted as if it was
|
|
# encoded in 'encoding'.
|
|
# Convert back to the 8bit string that was in the file.
|
|
orig = document.body[i].encode(document.encoding)
|
|
# Convert the 8bit string that was in the file to unicode
|
|
# with the correct encoding.
|
|
document.body[i] = orig.decode(encoding_stack[-1])
|
|
else:
|
|
try:
|
|
# Convert unicode to the 8bit string that will be written
|
|
# to the file with the correct encoding.
|
|
orig = document.body[i].encode(encoding_stack[-1])
|
|
# Convert the 8bit string that will be written to the
|
|
# file to fake unicode with the encoding that will later
|
|
# be used when writing to the file.
|
|
document.body[i] = orig.decode(document.encoding)
|
|
except:
|
|
mod_line = revert_unicode_line(document, i, insets, spec_chars)
|
|
document.body[i : i + 1] = mod_line.split("\n")
|
|
i += len(mod_line.split("\n")) - 1
|
|
i += 1
|
|
|
|
|
|
def convert_utf8(document):
|
|
"Set document encoding to UTF-8."
|
|
convert_multiencoding(document, True)
|
|
document.encoding = "utf8"
|
|
|
|
|
|
def revert_utf8(document):
|
|
"Set document encoding to the value corresponding to inputencoding."
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
elif get_value(document.header, "\\inputencoding", i) == "utf8":
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
document.encoding = get_encoding(
|
|
document.language, document.inputencoding, 248, document.cjk_encoding
|
|
)
|
|
convert_multiencoding(document, False)
|
|
|
|
|
|
# FIXME: Use the version in unicode_symbols.py which has some bug fixes
|
|
def read_unicodesymbols():
|
|
"Read the unicodesymbols list of unicode characters and corresponding commands."
|
|
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
|
fp = open(os.path.join(pathname.strip("lyx2lyx"), "unicodesymbols"))
|
|
spec_chars = {}
|
|
for line in fp.readlines():
|
|
if line[0] != "#":
|
|
line = line.replace(' "', " ") # remove all quotation marks with spaces before
|
|
line = line.replace('" ', " ") # remove all quotation marks with spaces after
|
|
line = line.replace(r"\"", '"') # replace \" by " (for characters with diaeresis)
|
|
try:
|
|
# flag1 and flag2 are preamble and other flags
|
|
[ucs4, command, flag1, flag2] = line.split(None, 3)
|
|
spec_chars[chr(eval(ucs4))] = [command, flag1, flag2]
|
|
except:
|
|
pass
|
|
fp.close()
|
|
return spec_chars
|
|
|
|
|
|
def revert_unicode_line(document, i, insets, spec_chars, replacement_character="???"):
|
|
# Define strings to start and end ERT and math insets
|
|
ert_intro = (
|
|
"\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s" % document.default_layout
|
|
)
|
|
ert_outro = "\n\\end_layout\n\n\\end_inset\n"
|
|
math_intro = "\n\\begin_inset Formula $"
|
|
math_outro = "$\n\\end_inset"
|
|
|
|
mod_line = ""
|
|
if i and not is_inset_line(document, i - 1):
|
|
last_char = document.body[i - 1][-1:]
|
|
else:
|
|
last_char = ""
|
|
|
|
line = document.body[i]
|
|
for character in line:
|
|
try:
|
|
# Try to write the character
|
|
dummy = character.encode(document.encoding)
|
|
mod_line += character
|
|
last_char = character
|
|
except:
|
|
# Try to replace with ERT/math inset
|
|
if character in spec_chars:
|
|
command = spec_chars[character][0] # the command to replace unicode
|
|
flag1 = spec_chars[character][1]
|
|
flag2 = spec_chars[character][2]
|
|
if flag1.find("combining") > -1 or flag2.find("combining") > -1:
|
|
# We have a character that should be combined with the previous
|
|
command += "{" + last_char + "}"
|
|
# Remove the last character. Ignore if it is whitespace
|
|
if len(last_char.rstrip()):
|
|
# last_char was found and is not whitespace
|
|
if mod_line:
|
|
mod_line = mod_line[:-1]
|
|
else: # last_char belongs to the last line
|
|
document.body[i - 1] = document.body[i - 1][:-1]
|
|
else:
|
|
# The last character was replaced by a command. For now it is
|
|
# ignored. This could be handled better.
|
|
pass
|
|
if command[0:2] == "\\\\":
|
|
if command[2:12] == "ensuremath":
|
|
if insets and insets[-1] == "ERT":
|
|
# math in ERT
|
|
command = command.replace("\\\\ensuremath{\\\\", "$\n\\backslash\n")
|
|
command = command.replace("}", "$\n")
|
|
elif not insets or insets[-1] != "Formula":
|
|
# add a math inset with the replacement character
|
|
command = command.replace("\\\\ensuremath{\\", math_intro)
|
|
command = command.replace("}", math_outro)
|
|
else:
|
|
# we are already in a math inset
|
|
command = command.replace("\\\\ensuremath{\\", "")
|
|
command = command.replace("}", "")
|
|
else:
|
|
if insets and insets[-1] == "Formula":
|
|
# avoid putting an ERT in a math; instead put command as text
|
|
command = command.replace("\\\\", r"\mathrm{")
|
|
command = command + "}"
|
|
elif not insets or insets[-1] != "ERT":
|
|
# add an ERT inset with the replacement character
|
|
command = command.replace("\\\\", "\n\\backslash\n")
|
|
command = ert_intro + command + ert_outro
|
|
else:
|
|
command = command.replace("\\\\", "\n\\backslash\n")
|
|
last_char = "" # indicate that the character should not be removed
|
|
mod_line += command
|
|
else:
|
|
# Replace with replacement string
|
|
mod_line += replacement_character
|
|
return mod_line
|
|
|
|
|
|
def revert_unicode(document):
|
|
"""Transform unicode characters that can not be written using the
|
|
document encoding to commands according to the unicodesymbols
|
|
file. Characters that can not be replaced by commands are replaced by
|
|
an replacement string. Flags other than 'combined' are currently not
|
|
implemented."""
|
|
spec_chars = read_unicodesymbols()
|
|
insets = [] # list of active insets
|
|
|
|
# Go through the document to capture all combining characters
|
|
i = 0
|
|
while i < len(document.body):
|
|
line = document.body[i]
|
|
# Check for insets
|
|
if line.find("\\begin_inset") > -1:
|
|
insets.append(line[13:].split()[0])
|
|
if line.find("\\end_inset") > -1:
|
|
del insets[-1]
|
|
|
|
# Try to write the line
|
|
try:
|
|
# If all goes well the line is written here
|
|
dummy = line.encode(document.encoding)
|
|
i += 1
|
|
except:
|
|
# Error, some character(s) in the line need to be replaced
|
|
mod_line = revert_unicode_line(document, i, insets, spec_chars)
|
|
document.body[i : i + 1] = mod_line.split("\n")
|
|
i += len(mod_line.split("\n"))
|
|
|
|
|
|
def revert_cs_label(document):
|
|
"Remove status flag of charstyle label."
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CharStyle", i)
|
|
if i == -1:
|
|
return
|
|
# Seach for a line starting 'show_label'
|
|
# If it is not there, break with a warning message
|
|
i = i + 1
|
|
while True:
|
|
if document.body[i][:10] == "show_label":
|
|
del document.body[i]
|
|
break
|
|
elif document.body[i][:13] == "\\begin_layout":
|
|
document.warning("Malformed LyX document: Missing 'show_label'.")
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def convert_bibitem(document):
|
|
r"""Convert
|
|
\bibitem [option]{argument}
|
|
|
|
to
|
|
|
|
\begin_inset LatexCommand bibitem
|
|
label "option"
|
|
key "argument"
|
|
|
|
\end_inset
|
|
|
|
This must be called after convert_commandparams.
|
|
"""
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\bibitem", i)
|
|
if i == -1:
|
|
break
|
|
j = document.body[i].find("[") + 1
|
|
k = document.body[i].rfind("]")
|
|
if j == 0: # No optional argument found
|
|
option = None
|
|
else:
|
|
option = document.body[i][j:k]
|
|
j = document.body[i].rfind("{") + 1
|
|
k = document.body[i].rfind("}")
|
|
argument = document.body[i][j:k]
|
|
lines = ["\\begin_inset LatexCommand bibitem"]
|
|
if option != None:
|
|
lines.append('label "%s"' % option.replace('"', '\\"'))
|
|
lines.append('key "%s"' % argument.replace('"', '\\"'))
|
|
lines.append("")
|
|
lines.append("\\end_inset")
|
|
document.body[i : i + 1] = lines
|
|
i = i + 1
|
|
|
|
|
|
commandparams_info = {
|
|
# command : [option1, option2, argument]
|
|
"bibitem": ["label", "", "key"],
|
|
"bibtex": ["options", "btprint", "bibfiles"],
|
|
"cite": ["after", "before", "key"],
|
|
"citet": ["after", "before", "key"],
|
|
"citep": ["after", "before", "key"],
|
|
"citealt": ["after", "before", "key"],
|
|
"citealp": ["after", "before", "key"],
|
|
"citeauthor": ["after", "before", "key"],
|
|
"citeyear": ["after", "before", "key"],
|
|
"citeyearpar": ["after", "before", "key"],
|
|
"citet*": ["after", "before", "key"],
|
|
"citep*": ["after", "before", "key"],
|
|
"citealt*": ["after", "before", "key"],
|
|
"citealp*": ["after", "before", "key"],
|
|
"citeauthor*": ["after", "before", "key"],
|
|
"Citet": ["after", "before", "key"],
|
|
"Citep": ["after", "before", "key"],
|
|
"Citealt": ["after", "before", "key"],
|
|
"Citealp": ["after", "before", "key"],
|
|
"Citeauthor": ["after", "before", "key"],
|
|
"Citet*": ["after", "before", "key"],
|
|
"Citep*": ["after", "before", "key"],
|
|
"Citealt*": ["after", "before", "key"],
|
|
"Citealp*": ["after", "before", "key"],
|
|
"Citeauthor*": ["after", "before", "key"],
|
|
"citefield": ["after", "before", "key"],
|
|
"citetitle": ["after", "before", "key"],
|
|
"cite*": ["after", "before", "key"],
|
|
"hfill": ["", "", ""],
|
|
"index": ["", "", "name"],
|
|
"printindex": ["", "", "name"],
|
|
"label": ["", "", "name"],
|
|
"eqref": ["name", "", "reference"],
|
|
"pageref": ["name", "", "reference"],
|
|
"prettyref": ["name", "", "reference"],
|
|
"ref": ["name", "", "reference"],
|
|
"vpageref": ["name", "", "reference"],
|
|
"vref": ["name", "", "reference"],
|
|
"tableofcontents": ["", "", "type"],
|
|
"htmlurl": ["name", "", "target"],
|
|
"url": ["name", "", "target"],
|
|
}
|
|
|
|
|
|
def convert_commandparams(document):
|
|
"""Convert
|
|
|
|
\\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
|
|
\\end_inset
|
|
|
|
to
|
|
|
|
\\begin_inset LatexCommand cmdname
|
|
name1 "opt1"
|
|
name2 "opt2"
|
|
name3 "arg"
|
|
\\end_inset
|
|
|
|
name1, name2 and name3 can be different for each command.
|
|
"""
|
|
# \begin_inset LatexCommand bibitem was not the official version (see
|
|
# convert_bibitem()), but could be read in, so we convert it here, too.
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand", i)
|
|
if i == -1:
|
|
break
|
|
command = document.body[i][26:].strip()
|
|
if command == "":
|
|
document.warning("Malformed LyX document: Missing LatexCommand name.")
|
|
i = i + 1
|
|
continue
|
|
|
|
j = find_token(document.body, "\\end_inset", i + 1)
|
|
if j == -1:
|
|
document.warning("Malformed document")
|
|
else:
|
|
command += "".join(document.body[i + 1 : j])
|
|
document.body[i + 1 : j] = []
|
|
|
|
# The following parser is taken from the original InsetCommandParams::scanCommand
|
|
name = ""
|
|
option1 = ""
|
|
option2 = ""
|
|
argument = ""
|
|
state = "WS"
|
|
# Used to handle things like \command[foo[bar]]{foo{bar}}
|
|
nestdepth = 0
|
|
b = 0
|
|
for c in command:
|
|
if (
|
|
(state == "CMDNAME" and c == " ")
|
|
or (state == "CMDNAME" and c == "[")
|
|
or (state == "CMDNAME" and c == "{")
|
|
):
|
|
state = "WS"
|
|
if (
|
|
(state == "OPTION" and c == "]")
|
|
or (state == "SECOPTION" and c == "]")
|
|
or (state == "CONTENT" and c == "}")
|
|
):
|
|
if nestdepth == 0:
|
|
state = "WS"
|
|
else:
|
|
nestdepth = nestdepth - 1
|
|
if (
|
|
(state == "OPTION" and c == "[")
|
|
or (state == "SECOPTION" and c == "[")
|
|
or (state == "CONTENT" and c == "{")
|
|
):
|
|
nestdepth = nestdepth + 1
|
|
if state == "CMDNAME":
|
|
name += c
|
|
elif state == "OPTION":
|
|
option1 += c
|
|
elif state == "SECOPTION":
|
|
option2 += c
|
|
elif state == "CONTENT":
|
|
argument += c
|
|
elif state == "WS":
|
|
if c == "\\":
|
|
state = "CMDNAME"
|
|
elif c == "[" and b != "]":
|
|
state = "OPTION"
|
|
nestdepth = 0 # Just to be sure
|
|
elif c == "[" and b == "]":
|
|
state = "SECOPTION"
|
|
nestdepth = 0 # Just to be sure
|
|
elif c == "{":
|
|
state = "CONTENT"
|
|
nestdepth = 0 # Just to be sure
|
|
b = c
|
|
|
|
# Now we have parsed the command, output the parameters
|
|
lines = ["\\begin_inset LatexCommand %s" % name]
|
|
if option1 != "":
|
|
if commandparams_info[name][0] == "":
|
|
document.warning(f"Ignoring invalid option `{option1}' of command `{name}'.")
|
|
else:
|
|
lines.append(
|
|
'{} "{}"'.format(
|
|
commandparams_info[name][0],
|
|
option1.replace("\\", "\\\\").replace('"', '\\"'),
|
|
)
|
|
)
|
|
if option2 != "":
|
|
if commandparams_info[name][1] == "":
|
|
document.warning(
|
|
f"Ignoring invalid second option `{option2}' of command `{name}'."
|
|
)
|
|
else:
|
|
lines.append(
|
|
'{} "{}"'.format(
|
|
commandparams_info[name][1],
|
|
option2.replace("\\", "\\\\").replace('"', '\\"'),
|
|
)
|
|
)
|
|
if argument != "":
|
|
if commandparams_info[name][2] == "":
|
|
document.warning(f"Ignoring invalid argument `{argument}' of command `{name}'.")
|
|
else:
|
|
lines.append(
|
|
'{} "{}"'.format(
|
|
commandparams_info[name][2],
|
|
argument.replace("\\", "\\\\").replace('"', '\\"'),
|
|
)
|
|
)
|
|
document.body[i : i + 1] = lines
|
|
i = i + 1
|
|
|
|
|
|
def revert_commandparams(document):
|
|
regex = re.compile(r"(\S+)\s+(.+)")
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand", i)
|
|
if i == -1:
|
|
break
|
|
name = document.body[i].split()[2]
|
|
j = find_end_of_inset(document.body, i)
|
|
preview_line = ""
|
|
option1 = ""
|
|
option2 = ""
|
|
argument = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
pname = match.group(1)
|
|
pvalue = match.group(2)
|
|
if pname == "preview":
|
|
preview_line = document.body[k]
|
|
elif commandparams_info[name][0] != "" and pname == commandparams_info[name][0]:
|
|
option1 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
|
|
elif commandparams_info[name][1] != "" and pname == commandparams_info[name][1]:
|
|
option2 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
|
|
elif commandparams_info[name][2] != "" and pname == commandparams_info[name][2]:
|
|
argument = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
|
|
elif document.body[k].strip() != "":
|
|
document.warning(
|
|
f"Ignoring unknown contents `{document.body[k]}' in command inset {name}."
|
|
)
|
|
if name == "bibitem":
|
|
if option1 == "":
|
|
lines = ["\\bibitem {%s}" % argument]
|
|
else:
|
|
lines = [f"\\bibitem [{option1}]{{{argument}}}"]
|
|
else:
|
|
if option1 == "":
|
|
if option2 == "":
|
|
lines = [f"\\begin_inset LatexCommand \\{name}{{{argument}}}"]
|
|
else:
|
|
lines = [f"\\begin_inset LatexCommand \\{name}[][{option2}]{{{argument}}}"]
|
|
else:
|
|
if option2 == "":
|
|
lines = [f"\\begin_inset LatexCommand \\{name}[{option1}]{{{argument}}}"]
|
|
else:
|
|
lines = [
|
|
f"\\begin_inset LatexCommand \\{name}[{option1}][{option2}]{{{argument}}}"
|
|
]
|
|
if name != "bibitem":
|
|
if preview_line != "":
|
|
lines.append(preview_line)
|
|
lines.append("")
|
|
lines.append("\\end_inset")
|
|
document.body[i : j + 1] = lines
|
|
i += len(lines) + 1
|
|
|
|
|
|
def revert_nomenclature(document):
|
|
"Convert nomenclature entry to ERT."
|
|
regex = re.compile(r"(\S+)\s+(.+)")
|
|
i = 0
|
|
use_nomencl = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
|
|
if i == -1:
|
|
break
|
|
use_nomencl = 1
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
symbol = ""
|
|
description = ""
|
|
prefix = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2)
|
|
if name == "preview":
|
|
preview_line = document.body[k]
|
|
elif name == "symbol":
|
|
symbol = value.strip('"').replace('\\"', '"')
|
|
elif name == "description":
|
|
description = value.strip('"').replace('\\"', '"')
|
|
elif name == "prefix":
|
|
prefix = value.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning(
|
|
"Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]
|
|
)
|
|
if prefix == "":
|
|
command = f"nomenclature{{{symbol}}}{{{description}}}"
|
|
else:
|
|
command = f"nomenclature[{prefix}]{{{symbol}}}{{{description}}}"
|
|
document.body[i : j + 1] = [
|
|
"\\begin_inset ERT",
|
|
"status collapsed",
|
|
"",
|
|
"\\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
"\\backslash",
|
|
command,
|
|
"\\end_layout",
|
|
"",
|
|
"\\end_inset",
|
|
]
|
|
i = i + 11
|
|
if (
|
|
use_nomencl
|
|
and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
|
|
):
|
|
document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
|
|
document.preamble.append("\\makenomenclature")
|
|
|
|
|
|
def revert_printnomenclature(document):
|
|
"Convert printnomenclature to ERT."
|
|
regex = re.compile(r"(\S+)\s+(.+)")
|
|
i = 0
|
|
use_nomencl = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
|
|
if i == -1:
|
|
break
|
|
use_nomencl = 1
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
labelwidth = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2)
|
|
if name == "preview":
|
|
preview_line = document.body[k]
|
|
elif name == "labelwidth":
|
|
labelwidth = value.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning(
|
|
"Ignoring unknown contents `%s' in printnomenclature inset."
|
|
% document.body[k]
|
|
)
|
|
if labelwidth == "":
|
|
command = "nomenclature{}"
|
|
else:
|
|
command = "nomenclature[%s]" % labelwidth
|
|
document.body[i : j + 1] = [
|
|
"\\begin_inset ERT",
|
|
"status collapsed",
|
|
"",
|
|
"\\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
"\\backslash",
|
|
command,
|
|
"\\end_layout",
|
|
"",
|
|
"\\end_inset",
|
|
]
|
|
i = i + 11
|
|
if (
|
|
use_nomencl
|
|
and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
|
|
):
|
|
document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
|
|
document.preamble.append("\\makenomenclature")
|
|
|
|
|
|
def convert_esint(document):
|
|
"Add \\use_esint setting to header."
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\cite_engine'.")
|
|
return
|
|
# 0 is off, 1 is auto, 2 is on.
|
|
document.header.insert(i, "\\use_esint 0")
|
|
|
|
|
|
def revert_esint(document):
|
|
"Remove \\use_esint setting from header."
|
|
i = find_token(document.header, "\\use_esint", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\use_esint'.")
|
|
return
|
|
use_esint = document.header[i].split()[1]
|
|
del document.header[i]
|
|
# 0 is off, 1 is auto, 2 is on.
|
|
if use_esint == 2:
|
|
document.preamble.append("\\usepackage{esint}")
|
|
|
|
|
|
def revert_clearpage(document):
|
|
"clearpage -> ERT"
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\clearpage", i)
|
|
if i == -1:
|
|
break
|
|
document.body[i : i + 1] = [
|
|
"\\begin_inset ERT",
|
|
"status collapsed",
|
|
"",
|
|
"\\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
"\\backslash",
|
|
"clearpage",
|
|
"\\end_layout",
|
|
"",
|
|
"\\end_inset",
|
|
]
|
|
i = i + 1
|
|
|
|
|
|
def revert_cleardoublepage(document):
|
|
"cleardoublepage -> ERT"
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\cleardoublepage", i)
|
|
if i == -1:
|
|
break
|
|
document.body[i : i + 1] = [
|
|
"\\begin_inset ERT",
|
|
"status collapsed",
|
|
"",
|
|
"\\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
"\\backslash",
|
|
"cleardoublepage",
|
|
"\\end_layout",
|
|
"",
|
|
"\\end_inset",
|
|
]
|
|
i = i + 1
|
|
|
|
|
|
def convert_lyxline(document):
|
|
r"remove fontsize commands for \lyxline"
|
|
# The problematic is: The old \lyxline definition doesn't handle the fontsize
|
|
# to change the line thickness. The new definiton does this so that imported
|
|
# \lyxlines would have a different line thickness. The eventual fontsize command
|
|
# before \lyxline is therefore removed to get the same output.
|
|
fontsizes = [
|
|
"tiny",
|
|
"scriptsize",
|
|
"footnotesize",
|
|
"small",
|
|
"normalsize",
|
|
"large",
|
|
"Large",
|
|
"LARGE",
|
|
"huge",
|
|
"Huge",
|
|
]
|
|
for n in range(0, len(fontsizes)):
|
|
i = 0
|
|
k = 0
|
|
while i < len(document.body):
|
|
i = find_token(document.body, "\\size " + fontsizes[n], i)
|
|
k = find_token(document.body, "\\lyxline", i)
|
|
# the corresponding fontsize command is always 2 lines before the \lyxline
|
|
if i != -1 and k == i + 2:
|
|
document.body[i : i + 1] = []
|
|
else:
|
|
break
|
|
i = i + 1
|
|
|
|
|
|
def revert_encodings(document):
|
|
"Set new encodings to auto."
|
|
encodings = [
|
|
"8859-6",
|
|
"8859-8",
|
|
"cp437",
|
|
"cp437de",
|
|
"cp850",
|
|
"cp852",
|
|
"cp855",
|
|
"cp858",
|
|
"cp862",
|
|
"cp865",
|
|
"cp866",
|
|
"cp1250",
|
|
"cp1252",
|
|
"cp1256",
|
|
"cp1257",
|
|
"latin10",
|
|
"pt254",
|
|
"tis620-0",
|
|
]
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc in encodings:
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def convert_caption(document):
|
|
"Convert caption layouts to caption insets."
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_layout Caption", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
|
|
document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
|
|
document.body[i : i + 1] = [
|
|
"\\begin_layout %s" % document.default_layout,
|
|
"\\begin_inset Caption",
|
|
"",
|
|
"\\begin_layout %s" % document.default_layout,
|
|
]
|
|
i = i + 1
|
|
|
|
|
|
def revert_caption(document):
|
|
"Convert caption insets to caption layouts."
|
|
" This assumes that the text class has a caption style. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Caption", i)
|
|
if i == -1:
|
|
return
|
|
|
|
# We either need to delete the previous \begin_layout line, or we
|
|
# need to end the previous layout if this inset is not in the first
|
|
# position of the paragraph.
|
|
layout_before = find_token_backwards(document.body, "\\begin_layout", i)
|
|
if layout_before == -1:
|
|
document.warning("Malformed LyX document: Missing `\\begin_layout'.")
|
|
return
|
|
layout_line = document.body[layout_before]
|
|
del_layout_before = True
|
|
l = layout_before + 1
|
|
while l < i:
|
|
if document.body[l] != "":
|
|
del_layout_before = False
|
|
break
|
|
l = l + 1
|
|
if del_layout_before:
|
|
del document.body[layout_before:i]
|
|
i = layout_before
|
|
else:
|
|
document.body[i:i] = ["\\end_layout", ""]
|
|
i = i + 2
|
|
|
|
# Find start of layout in the inset and end of inset
|
|
j = find_token(document.body, "\\begin_layout", i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\begin_layout'.")
|
|
return
|
|
k = find_end_of_inset(document.body, i)
|
|
if k == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_inset'.")
|
|
return
|
|
|
|
# We either need to delete the following \end_layout line, or we need
|
|
# to restart the old layout if this inset is not at the paragraph end.
|
|
layout_after = find_token(document.body, "\\end_layout", k)
|
|
if layout_after == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
del_layout_after = True
|
|
l = k + 1
|
|
while l < layout_after:
|
|
if document.body[l] != "":
|
|
del_layout_after = False
|
|
break
|
|
l = l + 1
|
|
if del_layout_after:
|
|
del document.body[k + 1 : layout_after + 1]
|
|
else:
|
|
document.body[k + 1 : k + 1] = [layout_line, ""]
|
|
|
|
# delete \begin_layout and \end_inset and replace \begin_inset with
|
|
# "\begin_layout Caption". This works because we can only have one
|
|
# paragraph in the caption inset: The old \end_layout will be recycled.
|
|
del document.body[k]
|
|
if document.body[k] == "":
|
|
del document.body[k]
|
|
del document.body[j]
|
|
if document.body[j] == "":
|
|
del document.body[j]
|
|
document.body[i] = "\\begin_layout Caption"
|
|
if document.body[i + 1] == "":
|
|
del document.body[i + 1]
|
|
i = i + 1
|
|
|
|
|
|
# Accents of InsetLaTeXAccent
|
|
accent_map = {
|
|
"`": "\u0300", # grave
|
|
"'": "\u0301", # acute
|
|
"^": "\u0302", # circumflex
|
|
"~": "\u0303", # tilde
|
|
"=": "\u0304", # macron
|
|
"u": "\u0306", # breve
|
|
".": "\u0307", # dot above
|
|
'"': "\u0308", # diaeresis
|
|
"r": "\u030a", # ring above
|
|
"H": "\u030b", # double acute
|
|
"v": "\u030c", # caron
|
|
"b": "\u0320", # minus sign below
|
|
"d": "\u0323", # dot below
|
|
"c": "\u0327", # cedilla
|
|
"k": "\u0328", # ogonek
|
|
"t": "\u0361", # tie. This is special: It spans two characters, but
|
|
# only one is given as argument, so we don't need to
|
|
# treat it differently.
|
|
}
|
|
|
|
|
|
# special accents of InsetLaTeXAccent without argument
|
|
special_accent_map = {
|
|
"i": "\u0131", # dotless i
|
|
"j": "\u0237", # dotless j
|
|
"l": "\u0142", # l with stroke
|
|
"L": "\u0141", # L with stroke
|
|
}
|
|
|
|
|
|
# special accent arguments of InsetLaTeXAccent
|
|
accented_map = {
|
|
"\\i": "\u0131", # dotless i
|
|
"\\j": "\u0237", # dotless j
|
|
}
|
|
|
|
|
|
def _convert_accent(accent, accented_char):
|
|
type = accent
|
|
char = accented_char
|
|
if char == "":
|
|
if type in special_accent_map:
|
|
return special_accent_map[type]
|
|
# a missing char is treated as space by LyX
|
|
char = " "
|
|
elif type == "q" and char in ["t", "d", "l", "L"]:
|
|
# Special caron, only used with t, d, l and L.
|
|
# It is not in the map because we convert it to the same unicode
|
|
# character as the normal caron: \q{} is only defined if babel with
|
|
# the czech or slovak language is used, and the normal caron
|
|
# produces the correct output if the T1 font encoding is used.
|
|
# For the same reason we never convert to \q{} in the other direction.
|
|
type = "v"
|
|
elif char in accented_map:
|
|
char = accented_map[char]
|
|
elif len(char) > 1:
|
|
# We can only convert accents on a single char
|
|
return ""
|
|
a = accent_map.get(type)
|
|
if a:
|
|
return unicodedata.normalize("NFC", f"{char}{a}")
|
|
return ""
|
|
|
|
|
|
def convert_ertbackslash(body, i, ert, default_layout):
|
|
r"""-------------------------------------------------------------------------------------------
|
|
Convert backslashes and '\n' into valid ERT code, append the converted
|
|
text to body[i] and return the (maybe incremented) line index i"""
|
|
|
|
for c in ert:
|
|
if c == "\\":
|
|
body[i] = body[i] + "\\backslash "
|
|
i = i + 1
|
|
body.insert(i, "")
|
|
elif c == "\n":
|
|
body[i + 1 : i + 1] = [
|
|
"\\end_layout",
|
|
"",
|
|
"\\begin_layout %s" % default_layout,
|
|
"",
|
|
]
|
|
i = i + 4
|
|
else:
|
|
body[i] = body[i] + c
|
|
return i
|
|
|
|
|
|
def convert_accent(document):
|
|
# The following forms are supported by LyX:
|
|
# '\i \"{a}' (standard form, as written by LyX)
|
|
# '\i \"{}' (standard form, as written by LyX if the accented char is a space)
|
|
# '\i \"{ }' (also accepted if the accented char is a space)
|
|
# '\i \" a' (also accepted)
|
|
# '\i \"' (also accepted)
|
|
re_wholeinset = re.compile(r"^(.*)(\\i\s+)(.*)$")
|
|
re_contents = re.compile(r"^([^\s{]+)(.*)$")
|
|
re_accentedcontents = re.compile(r"^\s*{?([^{}]*)}?\s*$")
|
|
i = 0
|
|
while True:
|
|
i = find_re(document.body, re_wholeinset, i)
|
|
if i == -1:
|
|
return
|
|
match = re_wholeinset.match(document.body[i])
|
|
prefix = match.group(1)
|
|
contents = match.group(3).strip()
|
|
match = re_contents.match(contents)
|
|
if match:
|
|
# Strip first char (always \)
|
|
accent = match.group(1)[1:]
|
|
accented_contents = match.group(2).strip()
|
|
match = re_accentedcontents.match(accented_contents)
|
|
accented_char = match.group(1)
|
|
converted = _convert_accent(accent, accented_char)
|
|
if converted == "":
|
|
# Normalize contents
|
|
contents = (f"{accent}{{{accented_char}}}",)
|
|
else:
|
|
document.body[i] = f"{prefix}{converted}"
|
|
i += 1
|
|
continue
|
|
document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
|
|
document.body[i] = prefix
|
|
document.body[i + 1 : i + 1] = [
|
|
"\\begin_inset ERT",
|
|
"status collapsed",
|
|
"",
|
|
"\\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
"",
|
|
]
|
|
i = convert_ertbackslash(
|
|
document.body, i + 7, "\\%s" % contents, document.default_layout
|
|
)
|
|
document.body[i + 1 : i + 1] = ["\\end_layout", "", "\\end_inset"]
|
|
i += 3
|
|
|
|
|
|
def is_inset_line(document, i):
|
|
"""Line i of body has an inset"""
|
|
if document.body[i][:1] == "\\":
|
|
return True
|
|
last_tokens = "".join(document.body[i].split()[-2:])
|
|
return last_tokens.find("\\") != -1
|
|
|
|
|
|
# A wrapper around normalize that handles special cases (cf. bug 3313)
|
|
def normalize(form, text):
|
|
# do not normalize OHM, ANGSTROM
|
|
keep_characters = [0x2126, 0x212B]
|
|
result = ""
|
|
convert = ""
|
|
for i in text:
|
|
if ord(i) in keep_characters:
|
|
if len(convert) > 0:
|
|
result = result + unicodedata.normalize(form, convert)
|
|
convert = ""
|
|
result = result + i
|
|
else:
|
|
convert = convert + i
|
|
if len(convert) > 0:
|
|
result = result + unicodedata.normalize(form, convert)
|
|
return result
|
|
|
|
|
|
def revert_accent(document):
|
|
inverse_accent_map = {}
|
|
for k in accent_map:
|
|
inverse_accent_map[accent_map[k]] = k
|
|
inverse_special_accent_map = {}
|
|
for k in special_accent_map:
|
|
inverse_special_accent_map[special_accent_map[k]] = k
|
|
inverse_accented_map = {}
|
|
for k in accented_map:
|
|
inverse_accented_map[accented_map[k]] = k
|
|
|
|
# Since LyX may insert a line break within a word we must combine all
|
|
# words before unicode normalization.
|
|
# We do this only if the next line starts with an accent, otherwise we
|
|
# would create things like '\begin_inset ERTstatus'.
|
|
for i in range(len(document.body) - 1):
|
|
if document.body[i] == "" or document.body[i + 1] == "" or document.body[i][-1] == " ":
|
|
continue
|
|
if document.body[i + 1][0] in inverse_accent_map and not is_inset_line(document, i):
|
|
# the last character of this line and the first of the next line
|
|
# form probably a surrogate pair, inline insets are excluded (second part of the test)
|
|
while len(document.body[i + 1]) > 0 and document.body[i + 1][0] != " ":
|
|
document.body[i] += document.body[i + 1][0]
|
|
document.body[i + 1] = document.body[i + 1][1:]
|
|
|
|
# Normalize to "Normal form D" (NFD, also known as canonical decomposition).
|
|
# This is needed to catch all accented characters.
|
|
for i in range(len(document.body)):
|
|
# Unfortunately we have a mixture of unicode strings and plain strings,
|
|
# because we never use u'xxx' for string literals, but 'xxx'.
|
|
# Therefore we may have to try two times to normalize the data.
|
|
try:
|
|
document.body[i] = normalize("NFD", document.body[i])
|
|
except TypeError:
|
|
document.body[i] = normalize("NFD", str(document.body[i], "utf-8"))
|
|
|
|
# Replace accented characters with InsetLaTeXAccent
|
|
# Do not convert characters that can be represented in the chosen
|
|
# encoding.
|
|
encoding_stack = [
|
|
get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
|
|
]
|
|
lang_re = re.compile(r"^\\lang\s(\S+)")
|
|
|
|
i = 0
|
|
while i < len(document.body):
|
|
if (
|
|
document.inputencoding == "auto" or document.inputencoding == "default"
|
|
) and document.cjk_encoding != "":
|
|
# Track the encoding of the current line
|
|
result = lang_re.match(document.body[i])
|
|
if result:
|
|
language = result.group(1)
|
|
if language == "default":
|
|
encoding_stack[-1] = document.encoding
|
|
else:
|
|
from lyx2lyx_lang import lang
|
|
|
|
encoding_stack[-1] = lang[language][3]
|
|
continue
|
|
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
|
|
encoding_stack.append(encoding_stack[-1])
|
|
continue
|
|
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
|
|
del encoding_stack[-1]
|
|
continue
|
|
|
|
for j in range(len(document.body[i])):
|
|
# dotless i and dotless j are both in special_accent_map and can
|
|
# occur as an accented character, so we need to test that the
|
|
# following character is no accent
|
|
if document.body[i][j] in inverse_special_accent_map and (
|
|
j == len(document.body[i]) - 1
|
|
or document.body[i][j + 1] not in inverse_accent_map
|
|
):
|
|
accent = document.body[i][j]
|
|
try:
|
|
dummy = accent.encode(encoding_stack[-1])
|
|
except UnicodeEncodeError:
|
|
# Insert the rest of the line as new line
|
|
if j < len(document.body[i]) - 1:
|
|
document.body.insert(i + 1, document.body[i][j + 1 :])
|
|
# Delete the accented character
|
|
document.body[i] = document.body[i][:j]
|
|
# Finally add the InsetLaTeXAccent
|
|
document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
|
|
break
|
|
elif j > 0 and document.body[i][j] in inverse_accent_map:
|
|
accented_char = document.body[i][j - 1]
|
|
if accented_char == " ":
|
|
# Conform to LyX output
|
|
accented_char = ""
|
|
elif accented_char in inverse_accented_map:
|
|
accented_char = inverse_accented_map[accented_char]
|
|
accent = document.body[i][j]
|
|
try:
|
|
dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
|
|
except UnicodeEncodeError:
|
|
# Insert the rest of the line as new line
|
|
if j < len(document.body[i]) - 1:
|
|
document.body.insert(i + 1, document.body[i][j + 1 :])
|
|
# Delete the accented characters
|
|
document.body[i] = document.body[i][: j - 1]
|
|
# Finally add the InsetLaTeXAccent
|
|
document.body[i] += f"\\i \\{inverse_accent_map[accent]}{{{accented_char}}}"
|
|
break
|
|
i = i + 1
|
|
|
|
# Normalize to "Normal form C" (NFC, pre-composed characters) again
|
|
for i in range(len(document.body)):
|
|
document.body[i] = normalize("NFC", document.body[i])
|
|
|
|
|
|
def normalize_font_whitespace_259(document):
|
|
"""Before format 259 the font changes were ignored if a
|
|
whitespace was the first or last character in the sequence, this function
|
|
transfers the whitespace outside."""
|
|
|
|
char_properties = {
|
|
"\\series": "default",
|
|
"\\emph": "default",
|
|
"\\color": "none",
|
|
"\\shape": "default",
|
|
"\\bar": "default",
|
|
"\\family": "default",
|
|
}
|
|
return normalize_font_whitespace(document, char_properties)
|
|
|
|
|
|
def normalize_font_whitespace_274(document):
|
|
"""Before format 259 (sic) the font changes were ignored if a
|
|
whitespace was the first or last character in the sequence. This was
|
|
corrected for most font properties in format 259, but the language
|
|
was forgotten then. This function applies the same conversion done
|
|
there (namely, transfers the whitespace outside) for font language
|
|
changes, as well."""
|
|
|
|
char_properties = {"\\lang": "default"}
|
|
return normalize_font_whitespace(document, char_properties)
|
|
|
|
|
|
def get_paragraph_language(document, i):
|
|
"""Return the language of the paragraph in which line i of the document
|
|
body is. If the first thing in the paragraph is a \\lang command, that
|
|
is the paragraph's langauge; otherwise, the paragraph's language is the
|
|
document's language."""
|
|
|
|
lines = document.body
|
|
|
|
first_nonempty_line = find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
|
|
|
|
words = lines[first_nonempty_line].split()
|
|
|
|
if len(words) > 1 and words[0] == "\\lang":
|
|
return words[1]
|
|
else:
|
|
return document.language
|
|
|
|
|
|
def normalize_font_whitespace(document, char_properties):
|
|
"""Before format 259 the font changes were ignored if a
|
|
whitespace was the first or last character in the sequence, this function
|
|
transfers the whitespace outside. Only a change in one of the properties
|
|
in the provided char_properties is handled by this function."""
|
|
|
|
if document.backend != "latex":
|
|
return
|
|
|
|
lines = document.body
|
|
|
|
changes = {}
|
|
|
|
i = 0
|
|
while i < len(lines):
|
|
words = lines[i].split()
|
|
|
|
if len(words) > 0 and words[0] == "\\begin_layout":
|
|
# a new paragraph resets all font changes
|
|
changes.clear()
|
|
# also reset the default language to be the paragraph's language
|
|
if "\\lang" in list(char_properties.keys()):
|
|
char_properties["\\lang"] = get_paragraph_language(document, i + 1)
|
|
|
|
elif len(words) > 1 and words[0] in list(char_properties.keys()):
|
|
# we have a font change
|
|
if char_properties[words[0]] == words[1]:
|
|
# property gets reset
|
|
if words[0] in list(changes.keys()):
|
|
del changes[words[0]]
|
|
defaultproperty = True
|
|
else:
|
|
# property gets set
|
|
changes[words[0]] = words[1]
|
|
defaultproperty = False
|
|
|
|
# We need to explicitly reset all changed properties if we find
|
|
# a space below, because LyX 1.4 would output the space after
|
|
# closing the previous change and before starting the new one,
|
|
# and closing a font change means to close all properties, not
|
|
# just the changed one.
|
|
|
|
if lines[i - 1] and lines[i - 1][-1] == " ":
|
|
lines[i - 1] = lines[i - 1][:-1]
|
|
# a space before the font change
|
|
added_lines = [" "]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that is already in lines[i]
|
|
if k != words[0]:
|
|
added_lines[1:1] = [f"{k} {changes[k]}"]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that must be added below anyway
|
|
if k != words[0]:
|
|
added_lines[0:0] = [f"{k} {char_properties[k]}"]
|
|
if defaultproperty:
|
|
# Property is reset in lines[i], so add the new stuff afterwards
|
|
lines[i + 1 : i + 1] = added_lines
|
|
else:
|
|
# Reset property for the space
|
|
added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
|
|
lines[i:i] = added_lines
|
|
i = i + len(added_lines)
|
|
|
|
elif (
|
|
lines[i + 1]
|
|
and lines[i + 1][0] == " "
|
|
and (len(changes) > 0 or not defaultproperty)
|
|
):
|
|
# a space after the font change
|
|
if lines[i + 1] == " " and lines[i + 2]:
|
|
next_words = lines[i + 2].split()
|
|
if len(next_words) > 0 and next_words[0] == words[0]:
|
|
# a single blank with a property different from the
|
|
# previous and the next line must not be changed
|
|
i = i + 2
|
|
continue
|
|
lines[i + 1] = lines[i + 1][1:]
|
|
added_lines = [" "]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that is already in lines[i]
|
|
if k != words[0]:
|
|
added_lines[1:1] = [f"{k} {changes[k]}"]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that must be added below anyway
|
|
if k != words[0]:
|
|
added_lines[0:0] = [f"{k} {char_properties[k]}"]
|
|
# Reset property for the space
|
|
added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
|
|
lines[i:i] = added_lines
|
|
i = i + len(added_lines)
|
|
|
|
i = i + 1
|
|
|
|
|
|
def revert_utf8x(document):
|
|
"Set utf8x encoding to utf8."
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc == "utf8x":
|
|
document.header[i] = "\\inputencoding utf8"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def revert_utf8plain(document):
|
|
"Set utf8plain encoding to utf8."
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc == "utf8-plain":
|
|
document.header[i] = "\\inputencoding utf8"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def revert_beamer_alert(document):
|
|
"Revert beamer's \\alert inset back to ERT."
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset ERT"
|
|
i = i + 1
|
|
while True:
|
|
if document.body[i][:13] == "\\begin_layout":
|
|
# Insert the \alert command
|
|
document.body[i + 1] = "\\alert{" + document.body[i + 1] + "}"
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def revert_beamer_structure(document):
|
|
"Revert beamer's \\structure inset back to ERT."
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset ERT"
|
|
i = i + 1
|
|
while True:
|
|
if document.body[i][:13] == "\\begin_layout":
|
|
document.body[i + 1] = "\\structure{" + document.body[i + 1] + "}"
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def convert_changes(document):
|
|
"Switch output_changes off if tracking_changes is off."
|
|
i = find_token(document.header, "\\tracking_changes", 0)
|
|
if i == -1:
|
|
document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
|
|
return
|
|
j = find_token(document.header, "\\output_changes", 0)
|
|
if j == -1:
|
|
document.warning("Malformed lyx document: Missing '\\output_changes'.")
|
|
return
|
|
tracking_changes = get_value(document.header, "\\tracking_changes", i)
|
|
output_changes = get_value(document.header, "\\output_changes", j)
|
|
if tracking_changes == "false" and output_changes == "true":
|
|
document.header[j] = "\\output_changes false"
|
|
|
|
|
|
def revert_ascii(document):
|
|
"Set ascii encoding to auto."
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc == "ascii":
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def normalize_language_name(document):
|
|
lang = {"brazil": "brazilian", "portuges": "portuguese"}
|
|
|
|
if document.language in lang:
|
|
document.language = lang[document.language]
|
|
i = find_token(document.header, "\\language", 0)
|
|
document.header[i] = "\\language %s" % document.language
|
|
|
|
|
|
def revert_language_name(document):
|
|
lang = {"brazilian": "brazil", "portuguese": "portuges"}
|
|
|
|
if document.language in lang:
|
|
document.language = lang[document.language]
|
|
i = find_token(document.header, "\\language", 0)
|
|
document.header[i] = "\\language %s" % document.language
|
|
|
|
|
|
#
|
|
# \textclass cv -> \textclass simplecv
|
|
def convert_cv_textclass(document):
|
|
if document.textclass == "cv":
|
|
document.textclass = "simplecv"
|
|
|
|
|
|
def revert_cv_textclass(document):
|
|
if document.textclass == "simplecv":
|
|
document.textclass = "cv"
|
|
|
|
|
|
#
|
|
# add scaleBeforeRotation graphics param
|
|
def convert_graphics_rotation(document):
|
|
"add scaleBeforeRotation graphics parameter."
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Graphics", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# should not happen
|
|
document.warning("Malformed LyX document: Could not find end of graphics inset.")
|
|
# Seach for rotateAngle and width or height or scale
|
|
# If these params are not there, nothing needs to be done.
|
|
k = find_token(document.body, "\trotateAngle", i + 1, j)
|
|
l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
|
|
if k != -1 and l != -1:
|
|
document.body.insert(j, "scaleBeforeRotation")
|
|
i = i + 1
|
|
|
|
|
|
#
|
|
# remove scaleBeforeRotation graphics param
|
|
def revert_graphics_rotation(document):
|
|
"remove scaleBeforeRotation graphics parameter."
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Graphics", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# should not happen
|
|
document.warning("Malformed LyX document: Could not find end of graphics inset.")
|
|
# If there's a scaleBeforeRotation param, just remove that
|
|
k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
|
|
if k != -1:
|
|
del document.body[k]
|
|
else:
|
|
# if not, and if we have rotateAngle and width or height or scale,
|
|
# we have to put the rotateAngle value to special
|
|
rotateAngle = get_value(document.body, "rotateAngle", i + 1, j)
|
|
special = get_value(document.body, "special", i + 1, j)
|
|
if rotateAngle != "":
|
|
k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
|
|
if k == -1:
|
|
break
|
|
if special == "":
|
|
document.body.insert(j - 1, "\tspecial angle=%s" % rotateAngle)
|
|
else:
|
|
l = find_token(document.body, "\tspecial", i + 1, j)
|
|
document.body[l] = document.body[l].replace(
|
|
special, f"angle={rotateAngle},{special}"
|
|
)
|
|
k = find_token(document.body, "\trotateAngle", i + 1, j)
|
|
if k != -1:
|
|
del document.body[k]
|
|
i = i + 1
|
|
|
|
|
|
def convert_tableborder(document):
|
|
# The problem is: LyX doubles the table cell border as it ignores the "|" character in
|
|
# the cell arguments. A fix takes care of this and therefore the "|" has to be removed
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
|
|
k = document.body[i].find("|>{", 0, len(document.body[i]))
|
|
# the two tokens have to be in one line
|
|
if h != -1 and k != -1:
|
|
# delete the "|"
|
|
document.body[i] = (
|
|
document.body[i][:k] + document.body[i][k + 1 : len(document.body[i])]
|
|
)
|
|
i = i + 1
|
|
|
|
|
|
def revert_tableborder(document):
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
|
|
k = document.body[i].find(">{", 0, len(document.body[i]))
|
|
# the two tokens have to be in one line
|
|
if h != -1 and k != -1:
|
|
# add the "|"
|
|
document.body[i] = document.body[i][:k] + "|" + document.body[i][k:]
|
|
i = i + 1
|
|
|
|
|
|
def revert_armenian(document):
|
|
# set inputencoding from armscii8 to auto
|
|
if document.inputencoding == "armscii8":
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\inputencoding auto"
|
|
# check if preamble exists, if not k is set to -1
|
|
i = 0
|
|
k = -1
|
|
while i < len(document.preamble):
|
|
if k == -1:
|
|
k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
|
|
if k == -1:
|
|
k = document.preamble[i].find("%", 0, len(document.preamble[i]))
|
|
i = i + 1
|
|
# add the entry \usepackage{armtex} to the document preamble
|
|
if document.language == "armenian":
|
|
# set the armtex entry as the first preamble line
|
|
if k != -1:
|
|
document.preamble[0:0] = ["\\usepackage{armtex}"]
|
|
# create the preamble when it doesn't exist
|
|
else:
|
|
document.preamble.append("\\usepackage{armtex}")
|
|
# Set document language from armenian to english
|
|
if document.language == "armenian":
|
|
document.language = "english"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language english"
|
|
|
|
|
|
def revert_CJK(document):
|
|
"Set CJK encodings to default and languages chinese, japanese and korean to english."
|
|
encodings = [
|
|
"Bg5",
|
|
"Bg5+",
|
|
"GB",
|
|
"GBt",
|
|
"GBK",
|
|
"JIS",
|
|
"KS",
|
|
"SJIS",
|
|
"UTF8",
|
|
"EUC-TW",
|
|
"EUC-JP",
|
|
]
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc in encodings:
|
|
document.header[i] = "\\inputencoding default"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
if (
|
|
document.language == "chinese-simplified"
|
|
or document.language == "chinese-traditional"
|
|
or document.language == "japanese"
|
|
or document.language == "korean"
|
|
):
|
|
document.language = "english"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language english"
|
|
|
|
|
|
def revert_preamble_listings_params(document):
|
|
r"Revert preamble option \listings_params"
|
|
i = find_token(document.header, "\\listings_params", 0)
|
|
if i != -1:
|
|
document.preamble.append("\\usepackage{listings}")
|
|
document.preamble.append("\\lstset{%s}" % document.header[i].split()[1].strip('"'))
|
|
document.header.pop(i)
|
|
|
|
|
|
def revert_listings_inset(document):
|
|
r"""Revert listings inset to \lstinline or \begin, \end lstlisting, translate
|
|
FROM
|
|
|
|
\begin_inset
|
|
lstparams "language=Delphi"
|
|
inline true
|
|
status open
|
|
|
|
\begin_layout Standard
|
|
var i = 10;
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
TO
|
|
|
|
\begin_inset ERT
|
|
status open
|
|
\begin_layout Standard
|
|
|
|
|
|
\backslash
|
|
lstinline[language=Delphi]{var i = 10;}
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
There can be an caption inset in this inset
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Caption
|
|
|
|
\begin_layout Standard
|
|
before label
|
|
\begin_inset LatexCommand label
|
|
name "lst:caption"
|
|
|
|
\end_inset
|
|
|
|
after label
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
"""
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset listings", i)
|
|
if i == -1:
|
|
break
|
|
else:
|
|
if "\\usepackage{listings}" not in document.preamble:
|
|
document.preamble.append("\\usepackage{listings}")
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# this should not happen
|
|
break
|
|
inline = "false"
|
|
params = ""
|
|
status = "open"
|
|
# first three lines
|
|
for line in range(i + 1, i + 4):
|
|
if document.body[line].startswith("inline"):
|
|
inline = document.body[line].split()[1]
|
|
if document.body[line].startswith("lstparams"):
|
|
params = document.body[line].split()[1].strip('"')
|
|
if document.body[line].startswith("status"):
|
|
status = document.body[line].split()[1].strip()
|
|
k = line + 1
|
|
# caption?
|
|
caption = ""
|
|
label = ""
|
|
cap = find_token(document.body, "\\begin_inset Caption", i)
|
|
if cap != -1:
|
|
cap_end = find_end_of_inset(document.body, cap + 1)
|
|
if cap_end == -1:
|
|
# this should not happen
|
|
break
|
|
# label?
|
|
lbl = find_token(document.body, "\\begin_inset LatexCommand label", cap + 1)
|
|
if lbl != -1:
|
|
lbl_end = find_end_of_inset(document.body, lbl + 1)
|
|
if lbl_end == -1:
|
|
# this should not happen
|
|
break
|
|
else:
|
|
lbl = cap_end
|
|
lbl_end = cap_end
|
|
for line in document.body[lbl : lbl_end + 1]:
|
|
if line.startswith("name "):
|
|
label = line.split()[1].strip('"')
|
|
break
|
|
for line in document.body[cap:lbl] + document.body[lbl_end + 1 : cap_end + 1]:
|
|
if not line.startswith("\\"):
|
|
caption += line.strip()
|
|
k = cap_end + 1
|
|
inlinecode = ""
|
|
# looking for the oneline code for lstinline
|
|
inlinecode = document.body[
|
|
find_end_of_layout(
|
|
document.body,
|
|
find_token(document.body, "\\begin_layout %s" % document.default_layout, i + 1)
|
|
+ 1,
|
|
)
|
|
- 1
|
|
]
|
|
if len(caption) > 0:
|
|
if len(params) == 0:
|
|
params = "caption={%s}" % caption
|
|
else:
|
|
params += ",caption={%s}" % caption
|
|
if len(label) > 0:
|
|
if len(params) == 0:
|
|
params = "label={%s}" % label
|
|
else:
|
|
params += ",label={%s}" % label
|
|
if len(params) > 0:
|
|
params = "[%s]" % params
|
|
params = params.replace("\\", "\\backslash\n")
|
|
if inline == "true":
|
|
document.body[i : (j + 1)] = [
|
|
r"\begin_inset ERT",
|
|
"status %s" % status,
|
|
r"\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
r"\backslash",
|
|
f"lstinline{params}{{{inlinecode}}}",
|
|
r"\end_layout",
|
|
"",
|
|
r"\end_inset",
|
|
]
|
|
else:
|
|
document.body[i : j + 1] = (
|
|
[
|
|
r"\begin_inset ERT",
|
|
"status %s" % status,
|
|
"",
|
|
r"\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
r"\backslash",
|
|
r"begin{lstlisting}%s" % params,
|
|
r"\end_layout",
|
|
"",
|
|
r"\begin_layout %s" % document.default_layout,
|
|
]
|
|
+ document.body[k : j - 1]
|
|
+ [
|
|
"",
|
|
r"\begin_layout %s" % document.default_layout,
|
|
"",
|
|
r"\backslash",
|
|
"end{lstlisting}",
|
|
r"\end_layout",
|
|
"",
|
|
r"\end_inset",
|
|
]
|
|
)
|
|
|
|
|
|
def revert_include_listings(document):
|
|
r"""Revert lstinputlisting Include option , translate
|
|
\begin_inset Include \lstinputlisting{file}[opt]
|
|
preview false
|
|
|
|
\end_inset
|
|
|
|
TO
|
|
|
|
\begin_inset ERT
|
|
status open
|
|
|
|
\begin_layout Standard
|
|
|
|
|
|
\backslash
|
|
lstinputlisting{file}[opt]
|
|
\end_layout
|
|
|
|
\end_inset
|
|
"""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, r"\begin_inset Include \lstinputlisting", i)
|
|
if i == -1:
|
|
break
|
|
else:
|
|
if "\\usepackage{listings}" not in document.preamble:
|
|
document.preamble.append("\\usepackage{listings}")
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# this should not happen
|
|
break
|
|
# find command line lstinputlisting{file}[options]
|
|
cmd, file, option = "", "", ""
|
|
if re.match(r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]):
|
|
cmd, file, option = re.match(
|
|
r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]
|
|
).groups()
|
|
option = option.replace("\\", "\\backslash\n")
|
|
document.body[i : j + 1] = [
|
|
r"\begin_inset ERT",
|
|
"status open",
|
|
"",
|
|
r"\begin_layout %s" % document.default_layout,
|
|
"",
|
|
"",
|
|
r"\backslash",
|
|
f"{cmd}{option}{{{file}}}",
|
|
r"\end_layout",
|
|
"",
|
|
r"\end_inset",
|
|
]
|
|
|
|
|
|
def revert_ext_font_sizes(document):
|
|
if document.backend != "latex":
|
|
return
|
|
if not document.textclass.startswith("ext"):
|
|
return
|
|
|
|
fontsize = get_value(document.header, "\\paperfontsize", 0)
|
|
if fontsize not in ("10", "11", "12"):
|
|
return
|
|
fontsize += "pt"
|
|
|
|
i = find_token(document.header, "\\paperfontsize", 0)
|
|
document.header[i] = "\\paperfontsize default"
|
|
insert_document_option(document, fontsize)
|
|
|
|
|
|
def convert_ext_font_sizes(document):
|
|
if document.backend != "latex":
|
|
return
|
|
if not document.textclass.startswith("ext"):
|
|
return
|
|
|
|
fontsize = get_value(document.header, "\\paperfontsize", 0)
|
|
if fontsize != "default":
|
|
return
|
|
|
|
i = find_token(document.header, "\\options", 0)
|
|
if i == -1:
|
|
return
|
|
|
|
options = get_value(document.header, "\\options", i)
|
|
|
|
fontsizes = "10pt", "11pt", "12pt"
|
|
for fs in fontsizes:
|
|
if options.find(fs) != -1:
|
|
break
|
|
else: # this else will only be attained if the for cycle had no match
|
|
return
|
|
|
|
options = options.split(",")
|
|
for j, opt in enumerate(options):
|
|
if opt in fontsizes:
|
|
fontsize = opt[:-2]
|
|
del options[j]
|
|
break
|
|
else:
|
|
return
|
|
|
|
k = find_token(document.header, "\\paperfontsize", 0)
|
|
document.header[k] = "\\paperfontsize %s" % fontsize
|
|
|
|
if options:
|
|
document.header[i] = "\\options %s" % ",".join(options)
|
|
else:
|
|
del document.header[i]
|
|
|
|
|
|
def revert_separator_layout(document):
|
|
r"""Revert --Separator-- to a lyx note
|
|
From
|
|
|
|
\begin_layout --Separator--
|
|
something
|
|
\end_layout
|
|
|
|
to
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Note Note
|
|
status open
|
|
|
|
\begin_layout Standard
|
|
Separate Evironment
|
|
\end_layout
|
|
|
|
\end_inset
|
|
something
|
|
|
|
\end_layout
|
|
|
|
"""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, r"\begin_layout --Separator--", i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_layout(document.body, i + 1)
|
|
if j == -1:
|
|
# this should not happen
|
|
break
|
|
document.body[i : j + 1] = (
|
|
[
|
|
r"\begin_layout %s" % document.default_layout,
|
|
r"\begin_inset Note Note",
|
|
"status open",
|
|
"",
|
|
r"\begin_layout %s" % document.default_layout,
|
|
"Separate Environment",
|
|
r"\end_layout",
|
|
"",
|
|
r"\end_inset",
|
|
]
|
|
+ document.body[i + 1 : j]
|
|
+ ["", r"\end_layout"]
|
|
)
|
|
|
|
|
|
def convert_arabic(document):
|
|
if document.language == "arabic":
|
|
document.language = "arabic_arabtex"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language arabic_arabtex"
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
|
|
if h != -1:
|
|
# change the language name
|
|
document.body[i] = r"\lang arabic_arabtex"
|
|
i = i + 1
|
|
|
|
|
|
def revert_arabic(document):
|
|
if document.language == "arabic_arabtex":
|
|
document.language = "arabic"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language arabic"
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
|
|
if h != -1:
|
|
# change the language name
|
|
document.body[i] = r"\lang arabic"
|
|
i = i + 1
|
|
|
|
|
|
##
|
|
# Conversion hub
|
|
#
|
|
|
|
supported_versions = ["1.5.0", "1.5"]
|
|
convert = [
|
|
[246, []],
|
|
[247, [convert_font_settings]],
|
|
[248, []],
|
|
[249, [convert_utf8]],
|
|
[250, []],
|
|
[251, []],
|
|
[252, [convert_commandparams, convert_bibitem]],
|
|
[253, []],
|
|
[254, [convert_esint]],
|
|
[255, []],
|
|
[256, []],
|
|
[257, [convert_caption]],
|
|
[258, [convert_lyxline]],
|
|
[259, [convert_accent, normalize_font_whitespace_259]],
|
|
[260, []],
|
|
[261, [convert_changes]],
|
|
[262, []],
|
|
[263, [normalize_language_name]],
|
|
[264, [convert_cv_textclass]],
|
|
[265, [convert_tableborder]],
|
|
[266, []],
|
|
[267, []],
|
|
[268, []],
|
|
[269, []],
|
|
[270, []],
|
|
[271, [convert_ext_font_sizes]],
|
|
[272, []],
|
|
[273, []],
|
|
[274, [normalize_font_whitespace_274]],
|
|
[275, [convert_graphics_rotation]],
|
|
[276, [convert_arabic]],
|
|
]
|
|
|
|
revert = [
|
|
[275, [revert_arabic]],
|
|
[274, [revert_graphics_rotation]],
|
|
[273, []],
|
|
[272, [revert_separator_layout]],
|
|
[
|
|
271,
|
|
[
|
|
revert_preamble_listings_params,
|
|
revert_listings_inset,
|
|
revert_include_listings,
|
|
],
|
|
],
|
|
[270, [revert_ext_font_sizes]],
|
|
[269, [revert_beamer_alert, revert_beamer_structure]],
|
|
[
|
|
268,
|
|
[
|
|
revert_preamble_listings_params,
|
|
revert_listings_inset,
|
|
revert_include_listings,
|
|
],
|
|
],
|
|
[267, [revert_CJK]],
|
|
[266, [revert_utf8plain]],
|
|
[265, [revert_armenian]],
|
|
[264, [revert_tableborder]],
|
|
[263, [revert_cv_textclass]],
|
|
[262, [revert_language_name]],
|
|
[261, [revert_ascii]],
|
|
[260, []],
|
|
[259, [revert_utf8x]],
|
|
[258, []],
|
|
[257, []],
|
|
[256, [revert_caption]],
|
|
[255, [revert_encodings]],
|
|
[254, [revert_clearpage, revert_cleardoublepage]],
|
|
[253, [revert_esint]],
|
|
[252, [revert_nomenclature, revert_printnomenclature]],
|
|
[251, [revert_commandparams]],
|
|
[250, [revert_cs_label]],
|
|
[249, []],
|
|
[248, [revert_accent, revert_utf8, revert_unicode]],
|
|
[247, [revert_booktabs]],
|
|
[246, [revert_font_settings]],
|
|
[245, [revert_framed]],
|
|
]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|