lyx_mirror/lib/lyx2lyx/lyx_2_5.py
Juergen Spitzmueller 6fa0532ed4 Escape special chars in indexes
We now have the functionality via sub-insets, and unexperienced users
are surprised when they don't get a literal !.

The unescaped chars can still be used via ERT.
2024-08-12 17:10:57 +02:00

741 lines
24 KiB
Python

# This file is part of lyx2lyx
# Copyright (C) 2024 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""Convert files to the file format generated by lyx 2.5"""
import re
# Uncomment only what you need to import, please (lyx2lyx_tools):
# convert_info_insets, get_ert, hex2ratio, insert_to_preamble,
# length_in_bp, lyx2verbatim,
# revert_flex_inset, revert_flex_inset, revert_font_attrs,
# revert_language, str2bool
from lyx2lyx_tools import (
add_to_preamble,
latex_length,
lyx2latex,
put_cmd_in_ert
)
# Uncomment only what you need to import, please (parser_tools):
# check_token, count_pars_in_inset, del_complete_lines,
# del_value, find_complete_lines, find_end_of,
# find_re, find_token_backwards, find_token_exact,
# find_tokens, get_bool_value,
# get_containing_layout, get_option_value,
# is_in_inset, set_bool_value
from parser_tools import (
del_token,
find_end_of_inset,
find_end_of_layout,
find_re,
find_substring,
find_token,
get_containing_inset,
get_quoted_value,
get_value
)
####################################################################
# Private helper functions
###############################################################################
###
### Conversion and reversion routines
###
###############################################################################
def convert_url_escapes(document):
"""Unescape # and % in URLs with hyperref."""
hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
beamer = document.textclass in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]
if not hyperref and not beamer:
return
rurl = re.compile(r"^[%#].*")
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex URL", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Could not find end of URL inset.")
i += 1
continue
while True:
surl = find_re(document.body, rurl, i, j)
if surl == -1:
i = j
break
if document.body[surl - 1] == "\\backslash":
del document.body[surl - 1]
i = surl
def revert_url_escapes(document):
"""Unescape # and % in URLs with hyperref."""
hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
beamer = document.textclass in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]
if not hyperref and not beamer:
return
rurl = re.compile(r"^(.*)([%#].*)")
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex URL", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Could not find end of URL inset.")
i += 1
continue
while True:
surl = find_re(document.body, rurl, i, j)
if surl == -1:
i = j
break
m = rurl.match(document.body[surl])
if m:
if m.group(1) == "" and document.body[surl - 1] == "\\backslash":
break
document.body[surl : surl + 1] = [m.group(1), "\\backslash", m.group(2)]
i = surl
def convert_url_escapes2(document):
"""Unescape backslashes in URLs with hyperref."""
i = find_token(document.header, "\\use_hyperref true", 0)
if i == -1 and document.textclass not in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]:
return
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex URL", i + 1)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Could not find end of URL inset.")
i += 1
continue
while True:
bs = find_token(document.body, "\\backslash", i, j)
if bs == -1:
break
if document.body[bs + 2] == "\\backslash":
del document.body[bs + 2]
i = bs + 1
def revert_url_escapes2(document):
"""Escape backslashes in URLs with hyperref."""
i = find_token(document.header, "\\use_hyperref true", 0)
if i == -1 and document.textclass not in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]:
return
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex URL", i + 1)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Could not find end of URL inset.")
i += 1
continue
while True:
bs = find_token(document.body, "\\backslash", i, j)
if bs == -1:
break
document.body[bs] = "\\backslash\\backslash"
i = bs + 1
def revert_glue_parskip(document):
"""Revert parskip with glue length to user preamble."""
i = find_token(document.header, "\\paragraph_separation skip", 0)
if i == -1:
return
j = find_token(document.header, "\\defskip", 0)
if j == -1:
document.warning("Malformed LyX document! Missing \\defskip.")
return
val = get_value(document.header, "\\defskip", j)
if val.find("+") == -1 and val.find("-", 1) == -1:
# not a glue length
return
add_to_preamble(document, ["\\usepackage[skip={" + latex_length(val)[1] + "}]{parskip}"])
document.header[i] = "\\paragraph_separation indent"
document.header[j] = "\\paragraph_indentation default"
def convert_he_letter(document):
"""Convert hebrew letter to letter document class"""
if document.textclass == "heb-letter":
document.textclass = "letter"
chicago_local_layout = [
"### Inserted by lyx2lyx (biblatex-chicago) ###",
"Requires biblatex-chicago",
"### End of insertion by lyx2lyx (biblatex-chicago) ###" "",
]
def convert_biblatex_chicago(document):
"""Convert biblatex-chicago documents"""
chicago = document.del_local_layout(chicago_local_layout)
if not chicago:
chicago = document.del_from_header(["Requires biblatex-chicago"])
if not chicago:
return
# 1. Get cite engine
engine = "basic"
i = find_token(document.header, "\\cite_engine", 0)
if i == -1:
document.warning("Malformed document! Missing \\cite_engine")
else:
engine = get_value(document.header, "\\cite_engine", i)
# 2. If biblatex set to chicago
biblatex = False
if engine not in ["biblatex", "biblatex-natbib"]:
return
document.header[i] = "\\cite_engine biblatex-chicago"
i = find_token(document.header, "\\biblio_options", 0)
bibopts = ""
if i == -1:
val = get_value(document.header, "\\biblio_options", i)
cetype = "authoryear"
if bibopts.find("authordate") == -1:
cetype = "notes"
# 2. Set cite type
i = find_token(document.header, "\\cite_engine_type", 0)
if i == -1:
document.warning("Malformed document! Missing \\cite_engine_type")
else:
document.header[i] = "\\cite_engine_type %s" % cetype
def revert_biblatex_chicago(document):
"""Revert biblatex-chicago to ERT where necessary"""
# 1. Get cite engine
engine = "basic"
i = find_token(document.header, "\\cite_engine", 0)
if i == -1:
document.warning("Malformed document! Missing \\cite_engine")
else:
engine = get_value(document.header, "\\cite_engine", i)
# 2. Do we use biblatex-chicago?
if engine != "biblatex-chicago":
return
# 3. Reset cite engine
document.header[i] = "\\cite_engine biblatex"
# 4. Set cite type
cetype = "authoryear"
i = find_token(document.header, "\\cite_engine_type", 0)
if i == -1:
document.warning("Malformed document! Missing \\cite_engine_type")
else:
cetype = get_value(document.header, "\\cite_engine_type", i)
document.header[i] = "\\cite_engine_type authoryear"
# 5. Add authordate option if needed
if cetype == "authoryear":
i = find_token(document.header, "\\biblio_options", 0)
if i != -1:
bibopts = get_value(document.header, "\\biblio_options", i)
if bibopts.find("authordate") != -1:
document.header[i] = "\\biblio_options %s" % bibopts + ", authordate"
else:
i = find_token(document.header, "\\biblio_style", 0)
if i == -1:
document.warning("Malformed document! Missing \\biblio_style")
else:
document.header[i+1:i+1] = ["\\biblio_options authordate"]
# 6. Set local layout
document.append_local_layout(chicago_local_layout)
# 7. Handle special citation commands
# Specific citation insets used in biblatex that need to be reverted to ERT
new_citations = {
"atcite": "atcite",
"atpcite": "atpcite",
"gentextcite": "gentextcite",
"Gentextcite": "Gentextcite",
}
if cetype == "notes":
new_citations = {
"citeyear": "citeyear*",
"Citetitle": "Citetitle",
"Citetitle*": "Citetitle*",
"gentextcite": "gentextcite",
"Gentextcite": "Gentextcite",
"shortcite": "shortcite",
"shortcite*": "shortcite*",
"shortrefcite": "shortrefcite",
"shorthandcite": "shorthandcite",
"shorthandcite*": "shorthandcite*",
"shorthandrefcite": "shorthandrefcite",
"citejournal": "citejournal",
"headlesscite": "headlesscite",
"Headlesscite": "Headlesscite",
"headlessfullcite": "headlessfullcite",
"surnamecite": "surnamecite",
}
i = 0
while True:
i = find_token(document.body, "\\begin_inset CommandInset citation", i)
if i == -1:
break
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Can't find end of citation inset at line %d!!" % (i))
i += 1
continue
k = find_token(document.body, "LatexCommand", i, j)
if k == -1:
document.warning("Can't find LatexCommand for citation inset at line %d!" % (i))
i = j + 1
continue
cmd = get_value(document.body, "LatexCommand", k)
if cmd in list(new_citations.keys()):
pre = get_quoted_value(document.body, "before", i, j)
post = get_quoted_value(document.body, "after", i, j)
key = get_quoted_value(document.body, "key", i, j)
if not key:
document.warning("Citation inset at line %d does not have a key!" % (i))
key = "???"
# Replace known new commands with ERT
res = "\\" + new_citations[cmd]
if pre:
res += "[" + pre + "]"
if post:
res += "[" + post + "]"
elif pre:
res += "[]"
res += "{" + key + "}"
document.body[i : j + 1] = put_cmd_in_ert([res])
i = j + 1
def revert_nptextcite(document):
"""Revert \\nptextcite and MLA's autocite variants to ERT"""
# 1. Get cite engine
engine = "basic"
i = find_token(document.header, "\\cite_engine", 0)
if i == -1:
document.warning("Malformed document! Missing \\cite_engine")
else:
engine = get_value(document.header, "\\cite_engine", i)
# 2. Do we use biblatex?
if engine != "biblatex" and engine != "biblatex-natbib":
return
# 3. and APA?
cetype = "authoryear"
i = find_token(document.header, "\\biblatex_citestyle", 0)
if i == -1:
return
# 4. Convert \nptextcite to ERT
new_citations = {
"nptextcite": "nptextcite",
"mlaautocite": "autocite",
"Mlaautocite": "Autocite",
"mlaautocite*": "autocite*",
"Mlaautocite*": "Autocite*",
}
i = 0
while True:
i = find_token(document.body, "\\begin_inset CommandInset citation", i)
if i == -1:
break
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Can't find end of citation inset at line %d!!" % (i))
i += 1
continue
k = find_token(document.body, "LatexCommand", i, j)
if k == -1:
document.warning("Can't find LatexCommand for citation inset at line %d!" % (i))
i = j + 1
continue
cmd = get_value(document.body, "LatexCommand", k)
if cmd in list(new_citations.keys()):
pre = get_quoted_value(document.body, "before", i, j)
post = get_quoted_value(document.body, "after", i, j)
key = get_quoted_value(document.body, "key", i, j)
if not key:
document.warning("Citation inset at line %d does not have a key!" % (i))
key = "???"
# Replace known new commands with ERT
res = "\\" + new_citations[cmd]
if pre:
res += "[" + pre + "]"
if post:
res += "[" + post + "]"
elif pre:
res += "[]"
res += "{" + key + "}"
document.body[i : j + 1] = put_cmd_in_ert([res])
i = j + 1
def revert_nomencl_textwidth(document):
"""Revert nomencl textwidth parameter to ERT."""
i = 0
while True:
i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning(
"Malformed LyX document: Can't find end of command inset at line %d" % i
)
i += 1
continue
sw = get_quoted_value(document.body, "set_width", i, j)
if sw != "textwidth":
i += 1
continue
# change set_width to "none"
k = find_token(document.body, "set_width", i, j)
if k != -1:
document.body[k] = "set_width \"none\""
tw = get_quoted_value(document.body, "width", i, j)
# delete width
del_token(document.body, "width", i, j)
# Insert ERT
res = "\\settowidth{\\nomlabelwidth}{" + tw + "}"
document.body[i : i] = put_cmd_in_ert([res])
i = j
def convert_nomencl(document):
"""Convert nomencl inset to collapsible."""
i = 0
while True:
i = find_token(document.body, "\\begin_inset CommandInset nomenclature", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning(
"Malformed LyX document: Can't find end of command inset at line %d" % i
)
i += 1
continue
literal = get_quoted_value(document.body, "literal", i, j)
prefix = get_quoted_value(document.body, "prefix", i, j)
symbol = get_quoted_value(document.body, "symbol", i, j)
description = get_quoted_value(document.body, "description", i, j)
newins = ["\\begin_inset Nomenclature", "status open", "", "\\begin_layout Plain Layout"]
if prefix:
newins += ["\\begin_inset Argument 1",
"status open",
"",
"\\begin_layout Plain Layout",
prefix,
"\\end_layout",
"",
"\\end_inset",
""]
if literal == "true":
newins += put_cmd_in_ert(symbol.replace("\\\\", "\\").replace("\\\"", "\""))
else:
newins += [symbol.replace("\\\"", "\"")]
if description:
newins += ["\\begin_inset Argument post:1",
"status open",
"",
"\\begin_layout Plain Layout"]
if literal == "true":
newins += put_cmd_in_ert(description.replace("\\\\", "\\").replace("\\\"", "\""))
else:
newins += [description.replace("\\\"", "\"")]
newins += ["\\end_layout",
"",
"\\end_inset",
""]
newins += ["\\end_layout",
"",
"\\end_inset"]
document.body[i : j + 1] = newins
def revert_nomencl(document):
"""Revert nomencl inset to InsetCommand."""
i = 0
while True:
i = find_token(document.body, "\\begin_inset Nomenclature", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning(
"Malformed LyX document: Can't find end of command inset at line %d" % i
)
i += 1
continue
arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
prefix = []
if arg != -1:
endarg = find_end_of_inset(document.body, arg)
argbeginPlain = find_token(
document.body, "\\begin_layout Plain Layout", arg, endarg
)
if argbeginPlain == -1:
document.warning("Malformed LyX document: Can't find optarg plain Layout")
continue
argendPlain = find_end_of_inset(document.body, argbeginPlain)
prefix = document.body[argbeginPlain + 1 : argendPlain - 2]
# remove Arg insets and paragraph, if it only contains this inset
if (
document.body[arg - 1] == "\\begin_layout Plain Layout"
and find_end_of_layout(document.body, arg - 1) == endarg + 3
):
del document.body[arg - 1 : endarg + 4]
else:
del document.body[arg : endarg + 1]
arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
description = []
if arg != -1:
endarg = find_end_of_inset(document.body, arg)
argbeginPlain = find_token(
document.body, "\\begin_layout Plain Layout", arg, endarg
)
if argbeginPlain == -1:
document.warning("Malformed LyX document: Can't find arg 1 plain Layout")
continue
argendPlain = find_end_of_inset(document.body, argbeginPlain)
description = document.body[argbeginPlain + 1 : argendPlain - 2]
# remove Arg insets and paragraph, if it only contains this inset
if (
document.body[arg - 1] == "\\begin_layout Plain Layout"
and find_end_of_layout(document.body, arg - 1) == endarg + 3
):
del document.body[arg - 1 : endarg + 4]
else:
del document.body[arg : endarg + 1]
beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
endPlain = find_end_of_layout(document.body, beginPlain)
symbol = document.body[beginPlain + 1 : endPlain]
literal = "false"
if "\\begin_inset ERT" in symbol or "\\begin_inset ERT" in description:
literal = "true"
newins = ["\\begin_inset CommandInset nomenclature", "LatexCommand nomenclature"]
if prefix:
newins += ["prefix \"" + lyx2latex(document, prefix) + "\""]
if symbol:
newins += ["symbol \"" + lyx2latex(document, symbol) + "\""]
if description:
newins += ["description \"" + lyx2latex(document, description) + "\""]
newins += ["literal \"" + literal + "\""]
j = find_end_of_inset(document.body, i)
document.body[i : j] = newins
i += 1
def convert_index_sc(document):
"""Convert index special characters to ERT."""
i = 0
while True:
i = find_token(document.body, "\\begin_inset Index", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning(
"Malformed LyX document: Can't find end of index inset at line %d" % i
)
i += 1
continue
escchars = ["!", "@", "|"]
for ec in escchars:
k = i;
while True:
j = find_end_of_inset(document.body, i)
k = find_substring(document.body, ec, k, j)
if k == -1:
break
if get_containing_inset(document.body, k)[0] == "ERT":
k += 1
continue
line = document.body[k]
chunks = line.split(ec)
repl = []
if line[0] == ec:
repl = put_cmd_in_ert(ec)
chunks_len = len(chunks)-1
for ch in chunks[:-1]:
repl += [ch]
repl += put_cmd_in_ert(ec)
repl += chunks[-1:]
if line[-1:] == ec:
repl += put_cmd_in_ert(ec)
document.body[k:k+1] = repl
i += 1
def revert_index_sc(document):
"""Escape index special characters."""
i = 0
while True:
i = find_token(document.body, "\\begin_inset Index", i)
if i == -1:
return
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning(
"Malformed LyX document: Can't find end of index inset at line %d" % i
)
i += 1
continue
escchars = ["!", "@", "|"]
for ec in escchars:
k = i;
while True:
j = find_end_of_inset(document.body, i)
k = find_substring(document.body, ec, k, j)
if k == -1:
break
if get_containing_inset(document.body, k)[0] == "ERT":
k += 1
continue
line = document.body[k]
chunks = line.split(ec)
repl = []
chunks_len = len(chunks)-1
for ch in chunks[:-1]:
repl += [ch]
repl += put_cmd_in_ert("\"")
repl += [ec]
repl += chunks[-1:]
document.body[k:k+1] = repl
k += len(repl)
i += 1
##
# Conversion hub
#
supported_versions = ["2.5.0", "2.5"]
convert = [
[621, [convert_url_escapes, convert_url_escapes2]],
[622, []],
[623, [convert_he_letter]],
[624, [convert_biblatex_chicago]],
[625, []],
[626, []],
[627, [convert_nomencl, convert_index_sc]]
]
revert = [
[626, [revert_nomencl, revert_index_sc]],
[625, [revert_nomencl_textwidth]],
[624, [revert_nptextcite]],
[623, [revert_biblatex_chicago]],
[622, []],
[621, [revert_glue_parskip]],
[620, [revert_url_escapes2, revert_url_escapes]],
]
if __name__ == "__main__":
pass