mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-23 21:40:19 +00:00
7feffb89e9
Discussed in https://www.lyx.org/trac/ticket/13058.
1097 lines
37 KiB
Python
1097 lines
37 KiB
Python
# This file is part of lyx2lyx
|
|
# Copyright (C) 2024 The LyX team
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
"""Convert files to the file format generated by lyx 2.5"""
|
|
|
|
import re
|
|
|
|
# Uncomment only what you need to import, please (lyx2lyx_tools):
|
|
# convert_info_insets, get_ert, hex2ratio, insert_to_preamble,
|
|
# length_in_bp, lyx2verbatim,
|
|
# revert_flex_inset, revert_flex_inset, revert_font_attrs,
|
|
# str2bool
|
|
from lyx2lyx_tools import (
|
|
add_to_preamble,
|
|
latex_length,
|
|
lyx2latex,
|
|
put_cmd_in_ert,
|
|
revert_language
|
|
)
|
|
|
|
# Uncomment only what you need to import, please (parser_tools):
|
|
# check_token, count_pars_in_inset, del_complete_lines,
|
|
# del_value, find_complete_lines, find_end_of,
|
|
# find_re, find_token_backwards, find_token_exact,
|
|
# find_tokens,
|
|
# get_containing_layout, get_option_value,
|
|
# is_in_inset, set_bool_value
|
|
from parser_tools import (
|
|
del_token,
|
|
find_end_of_inset,
|
|
find_end_of_layout,
|
|
find_re,
|
|
find_substring,
|
|
find_token,
|
|
get_containing_inset,
|
|
get_bool_value,
|
|
get_quoted_value,
|
|
get_value
|
|
)
|
|
|
|
####################################################################
|
|
# Private helper functions
|
|
|
|
|
|
###############################################################################
|
|
###
|
|
### Conversion and reversion routines
|
|
###
|
|
###############################################################################
|
|
|
|
|
|
def convert_url_escapes(document):
|
|
"""Unescape # and % in URLs with hyperref."""
|
|
|
|
hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
|
|
beamer = document.textclass in [
|
|
"beamer",
|
|
"scrarticle-beamer",
|
|
"beamerposter",
|
|
"article-beamer",
|
|
]
|
|
|
|
if not hyperref and not beamer:
|
|
return
|
|
|
|
rurl = re.compile(r"^[%#].*")
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Flex URL", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of URL inset.")
|
|
i += 1
|
|
continue
|
|
while True:
|
|
surl = find_re(document.body, rurl, i, j)
|
|
if surl == -1:
|
|
i = j
|
|
break
|
|
if document.body[surl - 1] == "\\backslash":
|
|
del document.body[surl - 1]
|
|
i = surl
|
|
|
|
|
|
def revert_url_escapes(document):
|
|
"""Unescape # and % in URLs with hyperref."""
|
|
|
|
hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
|
|
beamer = document.textclass in [
|
|
"beamer",
|
|
"scrarticle-beamer",
|
|
"beamerposter",
|
|
"article-beamer",
|
|
]
|
|
|
|
if not hyperref and not beamer:
|
|
return
|
|
|
|
rurl = re.compile(r"^(.*)([%#].*)")
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Flex URL", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of URL inset.")
|
|
i += 1
|
|
continue
|
|
while True:
|
|
surl = find_re(document.body, rurl, i, j)
|
|
if surl == -1:
|
|
i = j
|
|
break
|
|
m = rurl.match(document.body[surl])
|
|
if m:
|
|
if m.group(1) == "" and document.body[surl - 1] == "\\backslash":
|
|
break
|
|
document.body[surl : surl + 1] = [m.group(1), "\\backslash", m.group(2)]
|
|
i = surl
|
|
|
|
|
|
def convert_url_escapes2(document):
|
|
"""Unescape backslashes in URLs with hyperref."""
|
|
|
|
i = find_token(document.header, "\\use_hyperref true", 0)
|
|
|
|
if i == -1 and document.textclass not in [
|
|
"beamer",
|
|
"scrarticle-beamer",
|
|
"beamerposter",
|
|
"article-beamer",
|
|
]:
|
|
return
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Flex URL", i + 1)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of URL inset.")
|
|
i += 1
|
|
continue
|
|
while True:
|
|
bs = find_token(document.body, "\\backslash", i, j)
|
|
if bs == -1:
|
|
break
|
|
if document.body[bs + 2] == "\\backslash":
|
|
del document.body[bs + 2]
|
|
i = bs + 1
|
|
|
|
|
|
def revert_url_escapes2(document):
|
|
"""Escape backslashes in URLs with hyperref."""
|
|
|
|
i = find_token(document.header, "\\use_hyperref true", 0)
|
|
|
|
if i == -1 and document.textclass not in [
|
|
"beamer",
|
|
"scrarticle-beamer",
|
|
"beamerposter",
|
|
"article-beamer",
|
|
]:
|
|
return
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Flex URL", i + 1)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of URL inset.")
|
|
i += 1
|
|
continue
|
|
while True:
|
|
bs = find_token(document.body, "\\backslash", i, j)
|
|
if bs == -1:
|
|
break
|
|
document.body[bs] = "\\backslash\\backslash"
|
|
i = bs + 1
|
|
|
|
|
|
def revert_glue_parskip(document):
|
|
"""Revert parskip with glue length to user preamble."""
|
|
|
|
i = find_token(document.header, "\\paragraph_separation skip", 0)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_token(document.header, "\\defskip", 0)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document! Missing \\defskip.")
|
|
return
|
|
|
|
val = get_value(document.header, "\\defskip", j)
|
|
|
|
if val.find("+") == -1 and val.find("-", 1) == -1:
|
|
# not a glue length
|
|
return
|
|
|
|
add_to_preamble(document, ["\\usepackage[skip={" + latex_length(val)[1] + "}]{parskip}"])
|
|
|
|
document.header[i] = "\\paragraph_separation indent"
|
|
document.header[j] = "\\paragraph_indentation default"
|
|
|
|
|
|
def convert_he_letter(document):
|
|
"""Convert hebrew letter to letter document class"""
|
|
|
|
if document.textclass == "heb-letter":
|
|
document.textclass = "letter"
|
|
|
|
|
|
chicago_local_layout = [
|
|
"### Inserted by lyx2lyx (biblatex-chicago) ###",
|
|
"Requires biblatex-chicago",
|
|
"### End of insertion by lyx2lyx (biblatex-chicago) ###" "",
|
|
]
|
|
|
|
def convert_biblatex_chicago(document):
|
|
"""Convert biblatex-chicago documents"""
|
|
|
|
chicago = document.del_local_layout(chicago_local_layout)
|
|
if not chicago:
|
|
chicago = document.del_from_header(["Requires biblatex-chicago"])
|
|
if not chicago:
|
|
return
|
|
|
|
# 1. Get cite engine
|
|
engine = "basic"
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\cite_engine")
|
|
else:
|
|
engine = get_value(document.header, "\\cite_engine", i)
|
|
|
|
# 2. If biblatex set to chicago
|
|
biblatex = False
|
|
if engine not in ["biblatex", "biblatex-natbib"]:
|
|
return
|
|
|
|
document.header[i] = "\\cite_engine biblatex-chicago"
|
|
|
|
i = find_token(document.header, "\\biblio_options", 0)
|
|
bibopts = ""
|
|
if i == -1:
|
|
val = get_value(document.header, "\\biblio_options", i)
|
|
|
|
cetype = "authoryear"
|
|
if bibopts.find("authordate") == -1:
|
|
cetype = "notes"
|
|
|
|
# 2. Set cite type
|
|
i = find_token(document.header, "\\cite_engine_type", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\cite_engine_type")
|
|
else:
|
|
document.header[i] = "\\cite_engine_type %s" % cetype
|
|
|
|
|
|
def revert_biblatex_chicago(document):
|
|
"""Revert biblatex-chicago to ERT where necessary"""
|
|
|
|
# 1. Get cite engine
|
|
engine = "basic"
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\cite_engine")
|
|
else:
|
|
engine = get_value(document.header, "\\cite_engine", i)
|
|
|
|
# 2. Do we use biblatex-chicago?
|
|
if engine != "biblatex-chicago":
|
|
return
|
|
|
|
# 3. Reset cite engine
|
|
document.header[i] = "\\cite_engine biblatex"
|
|
|
|
# 4. Set cite type
|
|
cetype = "authoryear"
|
|
i = find_token(document.header, "\\cite_engine_type", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\cite_engine_type")
|
|
else:
|
|
cetype = get_value(document.header, "\\cite_engine_type", i)
|
|
document.header[i] = "\\cite_engine_type authoryear"
|
|
|
|
# 5. Add authordate option if needed
|
|
if cetype == "authoryear":
|
|
i = find_token(document.header, "\\biblio_options", 0)
|
|
if i != -1:
|
|
bibopts = get_value(document.header, "\\biblio_options", i)
|
|
if bibopts.find("authordate") != -1:
|
|
document.header[i] = "\\biblio_options %s" % bibopts + ", authordate"
|
|
else:
|
|
i = find_token(document.header, "\\biblio_style", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\biblio_style")
|
|
else:
|
|
document.header[i+1:i+1] = ["\\biblio_options authordate"]
|
|
|
|
# 6. Set local layout
|
|
document.append_local_layout(chicago_local_layout)
|
|
|
|
# 7. Handle special citation commands
|
|
# Specific citation insets used in biblatex that need to be reverted to ERT
|
|
new_citations = {
|
|
"atcite": "atcite",
|
|
"atpcite": "atpcite",
|
|
"gentextcite": "gentextcite",
|
|
"Gentextcite": "Gentextcite",
|
|
}
|
|
if cetype == "notes":
|
|
new_citations = {
|
|
"citeyear": "citeyear*",
|
|
"Citetitle": "Citetitle",
|
|
"Citetitle*": "Citetitle*",
|
|
"gentextcite": "gentextcite",
|
|
"Gentextcite": "Gentextcite",
|
|
"shortcite": "shortcite",
|
|
"shortcite*": "shortcite*",
|
|
"shortrefcite": "shortrefcite",
|
|
"shorthandcite": "shorthandcite",
|
|
"shorthandcite*": "shorthandcite*",
|
|
"shorthandrefcite": "shorthandrefcite",
|
|
"citejournal": "citejournal",
|
|
"headlesscite": "headlesscite",
|
|
"Headlesscite": "Headlesscite",
|
|
"headlessfullcite": "headlessfullcite",
|
|
"surnamecite": "surnamecite",
|
|
}
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CommandInset citation", i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Can't find end of citation inset at line %d!!" % (i))
|
|
i += 1
|
|
continue
|
|
k = find_token(document.body, "LatexCommand", i, j)
|
|
if k == -1:
|
|
document.warning("Can't find LatexCommand for citation inset at line %d!" % (i))
|
|
i = j + 1
|
|
continue
|
|
cmd = get_value(document.body, "LatexCommand", k)
|
|
if cmd in list(new_citations.keys()):
|
|
pre = get_quoted_value(document.body, "before", i, j)
|
|
post = get_quoted_value(document.body, "after", i, j)
|
|
key = get_quoted_value(document.body, "key", i, j)
|
|
if not key:
|
|
document.warning("Citation inset at line %d does not have a key!" % (i))
|
|
key = "???"
|
|
# Replace known new commands with ERT
|
|
res = "\\" + new_citations[cmd]
|
|
if pre:
|
|
res += "[" + pre + "]"
|
|
if post:
|
|
res += "[" + post + "]"
|
|
elif pre:
|
|
res += "[]"
|
|
res += "{" + key + "}"
|
|
document.body[i : j + 1] = put_cmd_in_ert([res])
|
|
i = j + 1
|
|
|
|
|
|
def revert_nptextcite(document):
|
|
"""Revert \\nptextcite and MLA's autocite variants to ERT"""
|
|
|
|
# 1. Get cite engine
|
|
engine = "basic"
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\cite_engine")
|
|
else:
|
|
engine = get_value(document.header, "\\cite_engine", i)
|
|
|
|
# 2. Do we use biblatex?
|
|
if engine != "biblatex" and engine != "biblatex-natbib":
|
|
return
|
|
|
|
# 3. and APA?
|
|
cetype = "authoryear"
|
|
i = find_token(document.header, "\\biblatex_citestyle", 0)
|
|
if i == -1:
|
|
return
|
|
|
|
# 4. Convert \nptextcite to ERT
|
|
new_citations = {
|
|
"nptextcite": "nptextcite",
|
|
"mlaautocite": "autocite",
|
|
"Mlaautocite": "Autocite",
|
|
"mlaautocite*": "autocite*",
|
|
"Mlaautocite*": "Autocite*",
|
|
}
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CommandInset citation", i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Can't find end of citation inset at line %d!!" % (i))
|
|
i += 1
|
|
continue
|
|
k = find_token(document.body, "LatexCommand", i, j)
|
|
if k == -1:
|
|
document.warning("Can't find LatexCommand for citation inset at line %d!" % (i))
|
|
i = j + 1
|
|
continue
|
|
cmd = get_value(document.body, "LatexCommand", k)
|
|
if cmd in list(new_citations.keys()):
|
|
pre = get_quoted_value(document.body, "before", i, j)
|
|
post = get_quoted_value(document.body, "after", i, j)
|
|
key = get_quoted_value(document.body, "key", i, j)
|
|
if not key:
|
|
document.warning("Citation inset at line %d does not have a key!" % (i))
|
|
key = "???"
|
|
# Replace known new commands with ERT
|
|
res = "\\" + new_citations[cmd]
|
|
if pre:
|
|
res += "[" + pre + "]"
|
|
if post:
|
|
res += "[" + post + "]"
|
|
elif pre:
|
|
res += "[]"
|
|
res += "{" + key + "}"
|
|
document.body[i : j + 1] = put_cmd_in_ert([res])
|
|
i = j + 1
|
|
|
|
|
|
def revert_nomencl_textwidth(document):
|
|
"""Revert nomencl textwidth parameter to ERT."""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning(
|
|
"Malformed LyX document: Can't find end of command inset at line %d" % i
|
|
)
|
|
i += 1
|
|
continue
|
|
|
|
sw = get_quoted_value(document.body, "set_width", i, j)
|
|
if sw != "textwidth":
|
|
i += 1
|
|
continue
|
|
|
|
# change set_width to "none"
|
|
k = find_token(document.body, "set_width", i, j)
|
|
if k != -1:
|
|
document.body[k] = "set_width \"none\""
|
|
tw = get_quoted_value(document.body, "width", i, j)
|
|
# delete width
|
|
del_token(document.body, "width", i, j)
|
|
# Insert ERT
|
|
res = "\\settowidth{\\nomlabelwidth}{" + tw + "}"
|
|
document.body[i : i] = put_cmd_in_ert([res])
|
|
i = j
|
|
|
|
|
|
def convert_nomencl(document):
|
|
"""Convert nomencl inset to collapsible."""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CommandInset nomenclature", i)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning(
|
|
"Malformed LyX document: Can't find end of command inset at line %d" % i
|
|
)
|
|
i += 1
|
|
continue
|
|
|
|
literal = get_quoted_value(document.body, "literal", i, j)
|
|
prefix = get_quoted_value(document.body, "prefix", i, j)
|
|
symbol = get_quoted_value(document.body, "symbol", i, j)
|
|
description = get_quoted_value(document.body, "description", i, j)
|
|
|
|
newins = ["\\begin_inset Nomenclature", "status open", "", "\\begin_layout Plain Layout"]
|
|
if prefix:
|
|
newins += ["\\begin_inset Argument 1",
|
|
"status open",
|
|
"",
|
|
"\\begin_layout Plain Layout",
|
|
prefix,
|
|
"\\end_layout",
|
|
"",
|
|
"\\end_inset",
|
|
""]
|
|
if literal == "true":
|
|
newins += put_cmd_in_ert(symbol.replace("\\\\", "\\").replace("\\\"", "\""))
|
|
else:
|
|
newins += [symbol.replace("\\\"", "\"")]
|
|
if description:
|
|
newins += ["\\begin_inset Argument post:1",
|
|
"status open",
|
|
"",
|
|
"\\begin_layout Plain Layout"]
|
|
if literal == "true":
|
|
newins += put_cmd_in_ert(description.replace("\\\\", "\\").replace("\\\"", "\""))
|
|
else:
|
|
newins += [description.replace("\\\"", "\"")]
|
|
newins += ["\\end_layout",
|
|
"",
|
|
"\\end_inset",
|
|
""]
|
|
newins += ["\\end_layout",
|
|
"",
|
|
"\\end_inset"]
|
|
document.body[i : j + 1] = newins
|
|
|
|
|
|
def revert_nomencl(document):
|
|
"""Revert nomencl inset to InsetCommand."""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Nomenclature", i)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning(
|
|
"Malformed LyX document: Can't find end of command inset at line %d" % i
|
|
)
|
|
i += 1
|
|
continue
|
|
|
|
arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
|
|
prefix = []
|
|
if arg != -1:
|
|
endarg = find_end_of_inset(document.body, arg)
|
|
argbeginPlain = find_token(
|
|
document.body, "\\begin_layout Plain Layout", arg, endarg
|
|
)
|
|
if argbeginPlain == -1:
|
|
document.warning("Malformed LyX document: Can't find optarg plain Layout")
|
|
continue
|
|
argendPlain = find_end_of_inset(document.body, argbeginPlain)
|
|
prefix = document.body[argbeginPlain + 1 : argendPlain - 2]
|
|
|
|
# remove Arg insets and paragraph, if it only contains this inset
|
|
if (
|
|
document.body[arg - 1] == "\\begin_layout Plain Layout"
|
|
and find_end_of_layout(document.body, arg - 1) == endarg + 3
|
|
):
|
|
del document.body[arg - 1 : endarg + 4]
|
|
else:
|
|
del document.body[arg : endarg + 1]
|
|
|
|
arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
|
|
description = []
|
|
if arg != -1:
|
|
endarg = find_end_of_inset(document.body, arg)
|
|
argbeginPlain = find_token(
|
|
document.body, "\\begin_layout Plain Layout", arg, endarg
|
|
)
|
|
if argbeginPlain == -1:
|
|
document.warning("Malformed LyX document: Can't find arg 1 plain Layout")
|
|
continue
|
|
argendPlain = find_end_of_inset(document.body, argbeginPlain)
|
|
description = document.body[argbeginPlain + 1 : argendPlain - 2]
|
|
|
|
# remove Arg insets and paragraph, if it only contains this inset
|
|
if (
|
|
document.body[arg - 1] == "\\begin_layout Plain Layout"
|
|
and find_end_of_layout(document.body, arg - 1) == endarg + 3
|
|
):
|
|
del document.body[arg - 1 : endarg + 4]
|
|
else:
|
|
del document.body[arg : endarg + 1]
|
|
|
|
beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
|
|
endPlain = find_end_of_layout(document.body, beginPlain)
|
|
symbol = document.body[beginPlain + 1 : endPlain]
|
|
literal = "false"
|
|
if "\\begin_inset ERT" in symbol or "\\begin_inset ERT" in description:
|
|
literal = "true"
|
|
|
|
newins = ["\\begin_inset CommandInset nomenclature", "LatexCommand nomenclature"]
|
|
if prefix:
|
|
newins += ["prefix \"" + lyx2latex(document, prefix) + "\""]
|
|
if symbol:
|
|
if literal == "true":
|
|
newins += ["symbol \"" + lyx2latex(document, symbol).replace("\\", "\\\\").replace("\"", "\\\"") + "\""]
|
|
else:
|
|
newins += ["symbol \"" + lyx2latex(document, symbol).replace("\"", "\\\"") + "\""]
|
|
if description:
|
|
if literal == "true":
|
|
newins += ["description \"" + lyx2latex(document, description).replace("\\", "\\\\").replace("\"", "\\\"") + "\""]
|
|
else:
|
|
newins += ["description \"" + lyx2latex(document, description).replace("\"", "\\\"") + "\""]
|
|
newins += ["literal \"" + literal + "\""]
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
document.body[i : j] = newins
|
|
|
|
i += 1
|
|
|
|
|
|
def convert_index_sc(document):
|
|
"""Convert index special characters to ERT."""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Index", i)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning(
|
|
"Malformed LyX document: Can't find end of index inset at line %d" % i
|
|
)
|
|
i += 1
|
|
continue
|
|
|
|
escchars = ["!", "@", "|"]
|
|
for ec in escchars:
|
|
k = i;
|
|
while True:
|
|
j = find_end_of_inset(document.body, i)
|
|
k = find_substring(document.body, ec, k, j)
|
|
if k == -1:
|
|
break
|
|
inInset = get_containing_inset(document.body, k)
|
|
if inInset and inInset[0] == "ERT":
|
|
k += 1
|
|
continue
|
|
|
|
line = document.body[k]
|
|
chunks = line.split(ec)
|
|
repl = []
|
|
chunks_len = len(chunks)-1
|
|
for ch in chunks[:-1]:
|
|
repl += [ch]
|
|
repl += put_cmd_in_ert(ec)
|
|
repl += chunks[-1:]
|
|
document.body[k:k+1] = repl
|
|
i += 1
|
|
|
|
|
|
def revert_index_sc(document):
|
|
"""Escape index special characters."""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Index", i)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning(
|
|
"Malformed LyX document: Can't find end of index inset at line %d" % i
|
|
)
|
|
i += 1
|
|
continue
|
|
|
|
escchars = ["!", "@", "|"]
|
|
for ec in escchars:
|
|
k = i;
|
|
while True:
|
|
j = find_end_of_inset(document.body, i)
|
|
k = find_substring(document.body, ec, k, j)
|
|
if k == -1:
|
|
break
|
|
inInset = get_containing_inset(document.body, k)
|
|
if inInset and inInset[0] == "ERT":
|
|
k += 1
|
|
continue
|
|
|
|
line = document.body[k]
|
|
chunks = line.split(ec)
|
|
repl = []
|
|
chunks_len = len(chunks)-1
|
|
for ch in chunks[:-1]:
|
|
repl += [ch]
|
|
repl += put_cmd_in_ert("\"")
|
|
repl += [ec]
|
|
repl += chunks[-1:]
|
|
document.body[k:k+1] = repl
|
|
k += len(repl)
|
|
i += 1
|
|
|
|
|
|
def revert_nomentbl(document):
|
|
"""Revert nomentbl inset to ERT."""
|
|
|
|
i = find_token(document.header, "\\nomencl_options", 0)
|
|
if i == -1:
|
|
# nothing to do
|
|
return
|
|
|
|
opts = get_value(document.header, "\\nomencl_options", i)
|
|
# remove header
|
|
del document.header[i]
|
|
|
|
# store options
|
|
document.append_local_layout([r"### Inserted by lyx2lyx (nomencl) ###",
|
|
r"PackageOptions nomencl %s" % opts])
|
|
|
|
if opts.find("nomentbl") == -1:
|
|
return
|
|
|
|
# revert insets to ERT
|
|
have_nomencl = False
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Nomenclature", i)
|
|
if i == -1:
|
|
break
|
|
|
|
have_nomencl = True
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning(
|
|
"Malformed LyX document: Can't find end of command inset at line %d" % i
|
|
)
|
|
i += 1
|
|
continue
|
|
|
|
arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
|
|
prefix = []
|
|
if arg != -1:
|
|
endarg = find_end_of_inset(document.body, arg)
|
|
argbeginPlain = find_token(
|
|
document.body, "\\begin_layout Plain Layout", arg, endarg
|
|
)
|
|
if argbeginPlain == -1:
|
|
document.warning("Malformed LyX document: Can't find optarg plain Layout")
|
|
continue
|
|
argendPlain = find_end_of_inset(document.body, argbeginPlain)
|
|
prefix = document.body[argbeginPlain + 1 : argendPlain - 2]
|
|
|
|
# remove Arg insets and paragraph, if it only contains this inset
|
|
if (
|
|
document.body[arg - 1] == "\\begin_layout Plain Layout"
|
|
and find_end_of_layout(document.body, arg - 1) == endarg + 3
|
|
):
|
|
del document.body[arg - 1 : endarg + 4]
|
|
else:
|
|
del document.body[arg : endarg + 1]
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
|
|
description = []
|
|
if arg != -1:
|
|
endarg = find_end_of_inset(document.body, arg)
|
|
argbeginPlain = find_token(
|
|
document.body, "\\begin_layout Plain Layout", arg, endarg
|
|
)
|
|
if argbeginPlain == -1:
|
|
document.warning("Malformed LyX document: Can't find arg post:1 plain Layout")
|
|
continue
|
|
argendPlain = find_end_of_inset(document.body, argbeginPlain)
|
|
description = document.body[argbeginPlain + 1 : argendPlain - 2]
|
|
|
|
# remove Arg insets and paragraph, if it only contains this inset
|
|
if (
|
|
document.body[arg - 1] == "\\begin_layout Plain Layout"
|
|
and find_end_of_layout(document.body, arg - 1) == endarg + 3
|
|
):
|
|
del document.body[arg - 1 : endarg + 4]
|
|
else:
|
|
del document.body[arg : endarg + 1]
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
arg = find_token(document.body, "\\begin_inset Argument post:2", i, j)
|
|
unit = []
|
|
if arg != -1:
|
|
endarg = find_end_of_inset(document.body, arg)
|
|
argbeginPlain = find_token(
|
|
document.body, "\\begin_layout Plain Layout", arg, endarg
|
|
)
|
|
if argbeginPlain == -1:
|
|
document.warning("Malformed LyX document: Can't find arg post:2 plain Layout")
|
|
continue
|
|
argendPlain = find_end_of_inset(document.body, argbeginPlain)
|
|
unit = document.body[argbeginPlain + 1 : argendPlain - 2]
|
|
|
|
# remove Arg insets and paragraph, if it only contains this inset
|
|
if (
|
|
document.body[arg - 1] == "\\begin_layout Plain Layout"
|
|
and find_end_of_layout(document.body, arg - 1) == endarg + 3
|
|
):
|
|
del document.body[arg - 1 : endarg + 4]
|
|
else:
|
|
del document.body[arg : endarg + 1]
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
arg = find_token(document.body, "\\begin_inset Argument post:3", i, j)
|
|
note = []
|
|
if arg != -1:
|
|
endarg = find_end_of_inset(document.body, arg)
|
|
argbeginPlain = find_token(
|
|
document.body, "\\begin_layout Plain Layout", arg, endarg
|
|
)
|
|
if argbeginPlain == -1:
|
|
document.warning("Malformed LyX document: Can't find arg post:3 plain Layout")
|
|
continue
|
|
argendPlain = find_end_of_inset(document.body, argbeginPlain)
|
|
note = document.body[argbeginPlain + 1 : argendPlain - 2]
|
|
|
|
# remove Arg insets and paragraph, if it only contains this inset
|
|
if (
|
|
document.body[arg - 1] == "\\begin_layout Plain Layout"
|
|
and find_end_of_layout(document.body, arg - 1) == endarg + 3
|
|
):
|
|
del document.body[arg - 1 : endarg + 4]
|
|
else:
|
|
del document.body[arg : endarg + 1]
|
|
|
|
beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
|
|
endPlain = find_end_of_layout(document.body, beginPlain)
|
|
symbol = document.body[beginPlain + 1 : endPlain]
|
|
|
|
# Replace command with ERT
|
|
res = put_cmd_in_ert(["\\nomenclature"])
|
|
if prefix:
|
|
res += put_cmd_in_ert(["["]) + prefix + put_cmd_in_ert(["]"])
|
|
res += put_cmd_in_ert(["{"]) + symbol + put_cmd_in_ert(["}{"]) \
|
|
+ description + put_cmd_in_ert(["}{"]) + unit + put_cmd_in_ert(["}{"]) \
|
|
+ note + put_cmd_in_ert(["}"])
|
|
|
|
j = find_end_of_inset(document.body, i)
|
|
document.body[i : j + 1] = res
|
|
|
|
i += 1
|
|
|
|
|
|
def revert_langopts(document):
|
|
"""Remove language options header."""
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.header, "\\language_options_", 0)
|
|
if i == -1:
|
|
# nothing to do
|
|
return
|
|
|
|
# remove header
|
|
del document.header[i]
|
|
|
|
|
|
def revert_new_polyglossia_languages(document):
|
|
"""Emulate support for Simplified Chinese, Traditional Chinese, Japanese, Sorani Kurdish,
|
|
Classic, Ecclesiastic and Medieval Latin, N'ko, Odia, Punjabi, and Uyghur with polyglossia."""
|
|
|
|
# Does the document use polyglossia?
|
|
use_polyglossia = False
|
|
if get_bool_value(document.header, "\\use_non_tex_fonts"):
|
|
i = find_token(document.header, "\\language_package")
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\language_package")
|
|
else:
|
|
pack = get_value(document.header, "\\language_package", i)
|
|
if pack in ("default", "auto"):
|
|
use_polyglossia = True
|
|
|
|
if not use_polyglossia:
|
|
return
|
|
|
|
# lyxname: (babelname, polyglossianame, polyglossiaopts)
|
|
new_languages = {
|
|
"chinese-simplified": ("", "chinese", "variant=simplified"),
|
|
"chinese-traditional": ("", "chinese", "variant=traditional"),
|
|
"japanese": ("", "japanese", ""),
|
|
"sorani": ("", "kurdish", "variant=sorani"),
|
|
"latin-classic": ("", "latin", "variant=classic"),
|
|
"latin-ecclesiastic": ("", "latin", "variant=ecclesiastic"),
|
|
"latin-medieval": ("", "latin", "variant=medieval"),
|
|
"nko": ("", "nko", ""),
|
|
"odia": ("", "odia", ""),
|
|
"punjabi": ("", "punjabi", ""),
|
|
"uyghur": ("", "uyghur", ""),
|
|
}
|
|
if document.language in new_languages:
|
|
used_languages = {document.language}
|
|
else:
|
|
used_languages = set()
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\lang", i + 1)
|
|
if i == -1:
|
|
break
|
|
val = get_value(document.body, "\\lang", i)
|
|
if val in new_languages:
|
|
used_languages.add(val)
|
|
|
|
for lang in used_languages:
|
|
revert_language(document, lang, *new_languages[lang])
|
|
|
|
|
|
def revert_new_babel_languages(document):
|
|
"""Emulate support for Amharic, Armenian, Asturian, Bengali, Church Slavonic,
|
|
Coptic, Divehi, Kannada, Kazakh, Khmer, Kurdish (Sorani), Lao, Latin (Classic),
|
|
Latin (Ecclesiastic), Latin (Medieval), Malayalam, Marathi, N'ko, Occitan, Odia,
|
|
Punjabi, Russian (Petrine orthography), Sanskrit, Syriac, Tamil, Telugu, Tibetan,
|
|
Urdu, and Uyghur with babel."""
|
|
|
|
# Does the document use polyglossia?
|
|
use_polyglossia = False
|
|
if get_bool_value(document.header, "\\use_non_tex_fonts"):
|
|
i = find_token(document.header, "\\language_package")
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\language_package")
|
|
else:
|
|
pack = get_value(document.header, "\\language_package", i)
|
|
if pack in ("default", "auto"):
|
|
use_polyglossia = True
|
|
|
|
if use_polyglossia:
|
|
return
|
|
|
|
# lyxname: (babelname, polyglossianame, polyglossiaopts, babelprovide)
|
|
new_languages = {
|
|
"amharic": ("amharic", "", "", True),
|
|
"armenian": ("armenian", "", "", True),
|
|
"asturian": ("asturian", "", "", True),
|
|
"bengali": ("bengali", "", "", True),
|
|
"churchslavonic": ("churchslavic", "", "", True),
|
|
"coptic": ("coptic", "", "", True),
|
|
"divehi": ("divehi", "", "", True),
|
|
"hindi": ("hindi", "", "", True),
|
|
"kannada": ("kannada", "", "", True),
|
|
"kazakh": ("kazakh", "", "", True),
|
|
"khmer": ("khmer", "", "", True),
|
|
"lao": ("lao", "", "", True),
|
|
"latin-classic": ("classiclatin", "", "", False),
|
|
"latin-ecclesiastic": ("ecclesiasticlatin", "", "", False),
|
|
"latin-medieval": ("medievallatin", "", "", False),
|
|
"malayalam": ("malayalam", "", "", True),
|
|
"marathi": ("marathi", "", "", True),
|
|
"nko": ("nko", "", "", True),
|
|
"occitan": ("occitan", "", "", False),
|
|
"odia": ("odia", "", "", True),
|
|
"punjabi": ("punjabi", "", "", True),
|
|
"sanskrit": ("sanskrit", "", "", True),
|
|
"sorani": ("sorani", "", "", True),
|
|
"syriac": ("syriac", "", "", True),
|
|
"tamil": ("tamil", "", "", True),
|
|
"telugu": ("telugu", "", "", True),
|
|
"tibetan": ("tibetan", "", "", True),
|
|
"urdu": ("urdu", "", "", True),
|
|
"uyghur": ("uyghur", "", "", True),
|
|
}
|
|
if document.language in new_languages:
|
|
used_languages = {document.language}
|
|
else:
|
|
used_languages = set()
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\lang", i + 1)
|
|
if i == -1:
|
|
break
|
|
val = get_value(document.body, "\\lang", i)
|
|
if val in new_languages:
|
|
used_languages.add(val)
|
|
|
|
for lang in used_languages:
|
|
revert_language(document, lang, *new_languages[lang])
|
|
|
|
# revert oldrussian to russian
|
|
have_oldrussian = False
|
|
if document.language == "oldrussian":
|
|
document.language = "russian"
|
|
have_oldrussian = True
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\lang oldrussian", i + 1)
|
|
if i == -1:
|
|
break
|
|
have_oldrussian = True
|
|
document.body[i] = "\\lang russian"
|
|
|
|
if have_oldrussian:
|
|
add_to_preamble(document, ["\\AddToHook{package/babel/after}{\\languageattribute{russian}{ancient}}"])
|
|
|
|
# Some babel languages require special treatment with unicode engines
|
|
if get_bool_value(document.header, "\\use_non_tex_fonts"):
|
|
if document.language == "hebrew" or find_token(document.body, "\\lang oldrussian", 0) != -1:
|
|
add_to_preamble(document, ["\\PassOptionsToPackage{provide*=*}{babel}"])
|
|
|
|
|
|
def convert_mathml_version(document):
|
|
"""Add MathML version header for DocBook to use MathML 3 preferably.
|
|
|
|
For cleanliness, add this header close to other DocBook headers if present.
|
|
|
|
Leave XHTML alone, as the default value is still probably what the user wants (MathML Core)."""
|
|
|
|
i = find_token(document.header, "\\docbook", 0)
|
|
if i == -1:
|
|
document.header += ["\\docbook_mathml_version 0"]
|
|
else:
|
|
document.header.insert(i + 1, "\\docbook_mathml_version 0")
|
|
|
|
|
|
def revert_mathml_version(document):
|
|
"""Remove MathML version header.
|
|
|
|
For XHTML, only remove the value 4 for \html_math_output (MathML 3) and replace it with 0
|
|
(MathML Core with format 631+, MathML for 630-).
|
|
|
|
For DocBook, totally remove the header (the default with 630- is MathML)."""
|
|
|
|
while True:
|
|
i = find_token(document.header, "\\html_math_output", 0)
|
|
if i == -1:
|
|
# nothing to do
|
|
break
|
|
|
|
# remove XHTML header if using the new value, leave alone otherwise.
|
|
if "4" in document.header:
|
|
document.header[i] = "\\html_math_output 0"
|
|
|
|
while True:
|
|
i = find_token(document.header, "\\docbook_mathml_version", 0)
|
|
if i == -1:
|
|
# nothing to do
|
|
return
|
|
|
|
# remove header
|
|
del document.header[i]
|
|
|
|
##
|
|
# Conversion hub
|
|
#
|
|
|
|
supported_versions = ["2.5.0", "2.5"]
|
|
convert = [
|
|
[621, [convert_url_escapes, convert_url_escapes2]],
|
|
[622, []],
|
|
[623, [convert_he_letter]],
|
|
[624, [convert_biblatex_chicago]],
|
|
[625, []],
|
|
[626, []],
|
|
[627, [convert_nomencl, convert_index_sc]],
|
|
[628, []],
|
|
[629, []],
|
|
[630, []],
|
|
[631, [convert_mathml_version]]
|
|
]
|
|
|
|
|
|
revert = [
|
|
[630, [revert_mathml_version]],
|
|
[629, [revert_new_polyglossia_languages, revert_new_babel_languages]],
|
|
[628, [revert_langopts]],
|
|
[627, [revert_nomentbl]],
|
|
[626, [revert_nomencl, revert_index_sc]],
|
|
[625, [revert_nomencl_textwidth]],
|
|
[624, [revert_nptextcite]],
|
|
[623, [revert_biblatex_chicago]],
|
|
[622, []],
|
|
[621, [revert_glue_parskip]],
|
|
[620, [revert_url_escapes2, revert_url_escapes]],
|
|
]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|