lyx_mirror/lib/lyx2lyx/lyx_2_5.py

# This file is part of lyx2lyx
# Copyright (C) 2024 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

"""Convert files to the file format generated by lyx 2.5"""

import re

# Uncomment only what you need to import, please (lyx2lyx_tools):
#    convert_info_insets, get_ert, hex2ratio, insert_to_preamble,
#    length_in_bp, lyx2verbatim,
#    revert_flex_inset, revert_flex_inset, revert_font_attrs,
#    str2bool
from lyx2lyx_tools import (
    add_to_preamble,
    latex_length,
    lyx2latex,
    put_cmd_in_ert,
    revert_language
)

# Uncomment only what you need to import, please (parser_tools):
#    check_token, count_pars_in_inset, del_complete_lines,
#    del_value, find_complete_lines, find_end_of,
#    find_re, find_token_backwards, find_token_exact,
#    find_tokens,
#    get_containing_layout, get_option_value,
#    is_in_inset, set_bool_value
from parser_tools import (
    del_token,
    find_end_of_inset,
    find_end_of_layout,
    find_re,
    find_substring,
    find_token,
    get_containing_inset,
    get_bool_value,
    get_quoted_value,
    get_value
)

####################################################################
# Private helper functions


###############################################################################
###
### Conversion and reversion routines
###
###############################################################################


def convert_url_escapes(document):
    """Unescape # and % in URLs with hyperref."""

    hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
    beamer = document.textclass in [
        "beamer",
        "scrarticle-beamer",
        "beamerposter",
        "article-beamer",
    ]

    if not hyperref and not beamer:
        return

    rurl = re.compile(r"^[%#].*")
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex URL", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of URL inset.")
            i += 1
            continue
        while True:
            surl = find_re(document.body, rurl, i, j)
            if surl == -1:
                i = j
                break
            if document.body[surl - 1] == "\\backslash":
                del document.body[surl - 1]
            i = surl


def revert_url_escapes(document):
    """Unescape # and % in URLs with hyperref."""

    hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
    beamer = document.textclass in [
        "beamer",
        "scrarticle-beamer",
        "beamerposter",
        "article-beamer",
    ]

    if not hyperref and not beamer:
        return

    rurl = re.compile(r"^(.*)([%#].*)")
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex URL", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of URL inset.")
            i += 1
            continue
        while True:
            surl = find_re(document.body, rurl, i, j)
            if surl == -1:
                i = j
                break
            m = rurl.match(document.body[surl])
            if m:
                if m.group(1) == "" and document.body[surl - 1] == "\\backslash":
                    break
                document.body[surl : surl + 1] = [m.group(1), "\\backslash", m.group(2)]
            i = surl


def convert_url_escapes2(document):
    """Unescape backslashes in URLs with hyperref."""

    i = find_token(document.header, "\\use_hyperref true", 0)

    if i == -1 and document.textclass not in [
        "beamer",
        "scrarticle-beamer",
        "beamerposter",
        "article-beamer",
    ]:
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex URL", i + 1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of URL inset.")
            i += 1
            continue
        while True:
            bs = find_token(document.body, "\\backslash", i, j)
            if bs == -1:
                break
            if document.body[bs + 2] == "\\backslash":
                del document.body[bs + 2]
            i = bs + 1


def revert_url_escapes2(document):
    """Escape backslashes in URLs with hyperref."""

    i = find_token(document.header, "\\use_hyperref true", 0)

    if i == -1 and document.textclass not in [
        "beamer",
        "scrarticle-beamer",
        "beamerposter",
        "article-beamer",
    ]:
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex URL", i + 1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of URL inset.")
            i += 1
            continue
        while True:
            bs = find_token(document.body, "\\backslash", i, j)
            if bs == -1:
                break
            document.body[bs] = "\\backslash\\backslash"
            i = bs + 1


def revert_glue_parskip(document):
    """Revert parskip with glue length to user preamble."""

    i = find_token(document.header, "\\paragraph_separation skip", 0)
    if i == -1:
        return

    j = find_token(document.header, "\\defskip", 0)
    if j == -1:
        document.warning("Malformed LyX document! Missing \\defskip.")
        return

    val = get_value(document.header, "\\defskip", j)

    if val.find("+") == -1 and val.find("-", 1) == -1:
        # not a glue length
        return

    add_to_preamble(document, ["\\usepackage[skip={" + latex_length(val)[1] + "}]{parskip}"])

    document.header[i] = "\\paragraph_separation indent"
    document.header[j] = "\\paragraph_indentation default"


def convert_he_letter(document):
    """Convert hebrew letter to letter document class"""

    if document.textclass == "heb-letter":
        document.textclass = "letter"


chicago_local_layout = [
    "### Inserted by lyx2lyx (biblatex-chicago) ###",
    "Requires biblatex-chicago",
    "### End of insertion by lyx2lyx (biblatex-chicago) ###" "",
]

def convert_biblatex_chicago(document):
    """Convert biblatex-chicago documents"""

    chicago = document.del_local_layout(chicago_local_layout)
    if not chicago:
        chicago = document.del_from_header(["Requires biblatex-chicago"])
    if not chicago:
        return

    # 1. Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # 2. If biblatex set to chicago
    biblatex = False
    if engine not in ["biblatex", "biblatex-natbib"]:
        return

    document.header[i] = "\\cite_engine biblatex-chicago"

    i = find_token(document.header, "\\biblio_options", 0)
    bibopts = ""
    if i == -1:
        val = get_value(document.header, "\\biblio_options", i)

    cetype = "authoryear"
    if bibopts.find("authordate") == -1:
        cetype = "notes"

    # 2. Set cite type
    i = find_token(document.header, "\\cite_engine_type", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine_type")
    else:
        document.header[i] = "\\cite_engine_type %s" % cetype


def revert_biblatex_chicago(document):
    """Revert biblatex-chicago to ERT where necessary"""

    # 1. Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # 2. Do we use biblatex-chicago?
    if engine != "biblatex-chicago":
        return

    # 3. Reset cite engine
    document.header[i] = "\\cite_engine biblatex"

    # 4. Set cite type
    cetype = "authoryear"
    i = find_token(document.header, "\\cite_engine_type", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine_type")
    else:
        cetype = get_value(document.header, "\\cite_engine_type", i)
        document.header[i] = "\\cite_engine_type authoryear"

    # 5. Add authordate option if needed
    if cetype == "authoryear":
        i = find_token(document.header, "\\biblio_options", 0)
        if i != -1:
            bibopts = get_value(document.header, "\\biblio_options", i)
            if bibopts.find("authordate") != -1:
                document.header[i] = "\\biblio_options %s" % bibopts + ", authordate"
        else:
            i = find_token(document.header, "\\biblio_style", 0)
            if i == -1:
                document.warning("Malformed document! Missing \\biblio_style")
            else:
                document.header[i+1:i+1] = ["\\biblio_options authordate"]

    # 6. Set local layout
    document.append_local_layout(chicago_local_layout)

    # 7. Handle special citation commands
    # Specific citation insets used in biblatex that need to be reverted to ERT
    new_citations = {
        "atcite": "atcite",
        "atpcite": "atpcite",
        "gentextcite": "gentextcite",
        "Gentextcite": "Gentextcite",
    }
    if cetype == "notes":
        new_citations = {
            "citeyear": "citeyear*",
            "Citetitle": "Citetitle",
            "Citetitle*": "Citetitle*",
            "gentextcite": "gentextcite",
            "Gentextcite": "Gentextcite",
            "shortcite": "shortcite",
            "shortcite*": "shortcite*",
            "shortrefcite": "shortrefcite",
            "shorthandcite": "shorthandcite",
            "shorthandcite*": "shorthandcite*",
            "shorthandrefcite": "shorthandrefcite",
            "citejournal": "citejournal",
            "headlesscite": "headlesscite",
            "Headlesscite": "Headlesscite",
            "headlessfullcite": "headlessfullcite",
            "surnamecite": "surnamecite",
        }

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset citation", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of citation inset at line %d!!" % (i))
            i += 1
            continue
        k = find_token(document.body, "LatexCommand", i, j)
        if k == -1:
            document.warning("Can't find LatexCommand for citation inset at line %d!" % (i))
            i = j + 1
            continue
        cmd = get_value(document.body, "LatexCommand", k)
        if cmd in list(new_citations.keys()):
            pre = get_quoted_value(document.body, "before", i, j)
            post = get_quoted_value(document.body, "after", i, j)
            key = get_quoted_value(document.body, "key", i, j)
            if not key:
                document.warning("Citation inset at line %d does not have a key!" % (i))
                key = "???"
            # Replace known new commands with ERT
            res = "\\" + new_citations[cmd]
            if pre:
                res += "[" + pre + "]"
            if post:
                res += "[" + post + "]"
            elif pre:
                res += "[]"
            res += "{" + key + "}"
            document.body[i : j + 1] = put_cmd_in_ert([res])
        i = j + 1


def revert_nptextcite(document):
    """Revert \\nptextcite and MLA's autocite variants to ERT"""

    # 1. Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # 2. Do we use biblatex?
    if engine != "biblatex" and engine != "biblatex-natbib":
        return

    # 3. and APA?
    cetype = "authoryear"
    i = find_token(document.header, "\\biblatex_citestyle", 0)
    if i == -1:
        return

    # 4. Convert \nptextcite to ERT
    new_citations = {
        "nptextcite": "nptextcite",
        "mlaautocite": "autocite",
        "Mlaautocite": "Autocite",
        "mlaautocite*": "autocite*",
        "Mlaautocite*": "Autocite*",
    }
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset citation", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of citation inset at line %d!!" % (i))
            i += 1
            continue
        k = find_token(document.body, "LatexCommand", i, j)
        if k == -1:
            document.warning("Can't find LatexCommand for citation inset at line %d!" % (i))
            i = j + 1
            continue
        cmd = get_value(document.body, "LatexCommand", k)
        if cmd in list(new_citations.keys()):
            pre = get_quoted_value(document.body, "before", i, j)
            post = get_quoted_value(document.body, "after", i, j)
            key = get_quoted_value(document.body, "key", i, j)
            if not key:
                document.warning("Citation inset at line %d does not have a key!" % (i))
                key = "???"
            # Replace known new commands with ERT
            res = "\\" + new_citations[cmd]
            if pre:
                res += "[" + pre + "]"
            if post:
                res += "[" + post + "]"
            elif pre:
                res += "[]"
            res += "{" + key + "}"
            document.body[i : j + 1] = put_cmd_in_ert([res])
        i = j + 1


def revert_nomencl_textwidth(document):
    """Revert nomencl textwidth parameter to ERT."""

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
        if i == -1:
            return

        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning(
                "Malformed LyX document: Can't find end of command inset at line %d" % i
            )
            i += 1
            continue

        sw = get_quoted_value(document.body, "set_width", i, j)
        if sw != "textwidth":
            i += 1
            continue

        # change set_width to "none"
        k = find_token(document.body, "set_width", i, j)
        if k != -1:
            document.body[k] = "set_width \"none\""
        tw = get_quoted_value(document.body, "width", i, j)
        # delete width
        del_token(document.body, "width", i, j)
        # Insert ERT
        res = "\\settowidth{\\nomlabelwidth}{" + tw + "}"
        document.body[i : i] = put_cmd_in_ert([res])
        i = j


def convert_nomencl(document):
    """Convert nomencl inset to collapsible."""

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset nomenclature", i)
        if i == -1:
            return

        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning(
                "Malformed LyX document: Can't find end of command inset at line %d" % i
            )
            i += 1
            continue

        literal = get_quoted_value(document.body, "literal", i, j)
        prefix = get_quoted_value(document.body, "prefix", i, j)
        symbol = get_quoted_value(document.body, "symbol", i, j)
        description = get_quoted_value(document.body, "description", i, j)

        newins = ["\\begin_inset Nomenclature", "status open", "", "\\begin_layout Plain Layout"]
        if prefix:
            newins += ["\\begin_inset Argument 1",
                       "status open",
                       "",
                       "\\begin_layout Plain Layout",
                       prefix,
                       "\\end_layout",
                       "",
                       "\\end_inset",
                       ""]
        if literal == "true":
            newins += put_cmd_in_ert(symbol.replace("\\\\", "\\").replace("\\\"", "\""))
        else:
            newins += [symbol.replace("\\\"", "\"")]
        if description:
            newins += ["\\begin_inset Argument post:1",
                       "status open",
                       "",
                       "\\begin_layout Plain Layout"]
            if literal == "true":
                newins += put_cmd_in_ert(description.replace("\\\\", "\\").replace("\\\"", "\""))
            else:
                newins += [description.replace("\\\"", "\"")]
            newins += ["\\end_layout",
                       "",
                       "\\end_inset",
                       ""]
        newins += ["\\end_layout",
                   "",
                   "\\end_inset"]
        document.body[i : j + 1] = newins


def revert_nomencl(document):
    """Revert nomencl inset to InsetCommand."""

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Nomenclature", i)
        if i == -1:
            return

        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning(
                "Malformed LyX document: Can't find end of command inset at line %d" % i
            )
            i += 1
            continue

        arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
        prefix = []
        if arg != -1:
            endarg = find_end_of_inset(document.body, arg)
            argbeginPlain = find_token(
                document.body, "\\begin_layout Plain Layout", arg, endarg
            )
            if argbeginPlain == -1:
                document.warning("Malformed LyX document: Can't find optarg plain Layout")
                continue
            argendPlain = find_end_of_inset(document.body, argbeginPlain)
            prefix = document.body[argbeginPlain + 1 : argendPlain - 2]

            # remove Arg insets and paragraph, if it only contains this inset
            if (
                document.body[arg - 1] == "\\begin_layout Plain Layout"
                and find_end_of_layout(document.body, arg - 1) == endarg + 3
            ):
                del document.body[arg - 1 : endarg + 4]
            else:
                del document.body[arg : endarg + 1]

        arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
        description = []
        if arg != -1:
            endarg = find_end_of_inset(document.body, arg)
            argbeginPlain = find_token(
                document.body, "\\begin_layout Plain Layout", arg, endarg
            )
            if argbeginPlain == -1:
                document.warning("Malformed LyX document: Can't find arg 1 plain Layout")
                continue
            argendPlain = find_end_of_inset(document.body, argbeginPlain)
            description = document.body[argbeginPlain + 1 : argendPlain - 2]

            # remove Arg insets and paragraph, if it only contains this inset
            if (
                document.body[arg - 1] == "\\begin_layout Plain Layout"
                and find_end_of_layout(document.body, arg - 1) == endarg + 3
            ):
                del document.body[arg - 1 : endarg + 4]
            else:
                del document.body[arg : endarg + 1]

        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
        endPlain = find_end_of_layout(document.body, beginPlain)
        symbol = document.body[beginPlain + 1 : endPlain]
        literal = "false"
        if "\\begin_inset ERT" in symbol or "\\begin_inset ERT" in description:
            literal = "true"

        newins = ["\\begin_inset CommandInset nomenclature", "LatexCommand nomenclature"]
        if prefix:
            newins += ["prefix \"" + lyx2latex(document, prefix) + "\""]
        if symbol:
            if literal == "true":
                newins += ["symbol \"" + lyx2latex(document, symbol).replace("\\", "\\\\").replace("\"", "\\\"") + "\""]
            else:
                newins += ["symbol \"" + lyx2latex(document, symbol).replace("\"", "\\\"") + "\""]
        if description:
            if literal == "true":
                newins += ["description \"" + lyx2latex(document, description).replace("\\", "\\\\").replace("\"", "\\\"") + "\""]
            else:
                newins += ["description \"" + lyx2latex(document, description).replace("\"", "\\\"") + "\""]
        newins += ["literal \"" + literal + "\""]

        j = find_end_of_inset(document.body, i)
        document.body[i : j] = newins

        i += 1


def convert_index_sc(document):
    """Convert index special characters to ERT."""

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Index", i)
        if i == -1:
            return

        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning(
                "Malformed LyX document: Can't find end of index inset at line %d" % i
            )
            i += 1
            continue

        escchars = ["!", "@", "|"]
        for ec in escchars:
            k = i;
            while True:
                j = find_end_of_inset(document.body, i)
                k = find_substring(document.body, ec, k, j)
                if k == -1:
                    break
                inInset = get_containing_inset(document.body, k)
                if inInset and inInset[0] == "ERT":
                    k += 1
                    continue

                line = document.body[k]
                chunks = line.split(ec)
                repl = []
                chunks_len = len(chunks)-1
                for ch in chunks[:-1]:
                    repl += [ch]
                    repl += put_cmd_in_ert(ec)
                repl += chunks[-1:]
                document.body[k:k+1] = repl
        i += 1


def revert_index_sc(document):
    """Escape index special characters."""

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Index", i)
        if i == -1:
            return

        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning(
                "Malformed LyX document: Can't find end of index inset at line %d" % i
            )
            i += 1
            continue

        escchars = ["!", "@", "|"]
        for ec in escchars:
            k = i;
            while True:
                j = find_end_of_inset(document.body, i)
                k = find_substring(document.body, ec, k, j)
                if k == -1:
                    break
                inInset = get_containing_inset(document.body, k)
                if inInset and inInset[0] == "ERT":
                    k += 1
                    continue

                line = document.body[k]
                chunks = line.split(ec)
                repl = []
                chunks_len = len(chunks)-1
                for ch in chunks[:-1]:
                    repl += [ch]
                    repl += put_cmd_in_ert("\"")
                    repl += [ec]
                repl += chunks[-1:]
                document.body[k:k+1] = repl
                k += len(repl)
        i += 1


def revert_nomentbl(document):
    """Revert nomentbl inset to ERT."""

    i = find_token(document.header, "\\nomencl_options", 0)
    if i == -1:
        # nothing to do
        return

    opts = get_value(document.header, "\\nomencl_options", i)
    # remove header
    del document.header[i]

    # store options
    document.append_local_layout([r"### Inserted by lyx2lyx (nomencl) ###",
                                  r"PackageOptions nomencl %s" % opts])

    if opts.find("nomentbl") == -1:
        return

    # revert insets to ERT
    have_nomencl = False
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Nomenclature", i)
        if i == -1:
            break

        have_nomencl = True
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning(
                "Malformed LyX document: Can't find end of command inset at line %d" % i
            )
            i += 1
            continue

        arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
        prefix = []
        if arg != -1:
            endarg = find_end_of_inset(document.body, arg)
            argbeginPlain = find_token(
                document.body, "\\begin_layout Plain Layout", arg, endarg
            )
            if argbeginPlain == -1:
                document.warning("Malformed LyX document: Can't find optarg plain Layout")
                continue
            argendPlain = find_end_of_inset(document.body, argbeginPlain)
            prefix = document.body[argbeginPlain + 1 : argendPlain - 2]

            # remove Arg insets and paragraph, if it only contains this inset
            if (
                document.body[arg - 1] == "\\begin_layout Plain Layout"
                and find_end_of_layout(document.body, arg - 1) == endarg + 3
            ):
                del document.body[arg - 1 : endarg + 4]
            else:
                del document.body[arg : endarg + 1]

        j = find_end_of_inset(document.body, i)
        arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
        description = []
        if arg != -1:
            endarg = find_end_of_inset(document.body, arg)
            argbeginPlain = find_token(
                document.body, "\\begin_layout Plain Layout", arg, endarg
            )
            if argbeginPlain == -1:
                document.warning("Malformed LyX document: Can't find arg post:1 plain Layout")
                continue
            argendPlain = find_end_of_inset(document.body, argbeginPlain)
            description = document.body[argbeginPlain + 1 : argendPlain - 2]

            # remove Arg insets and paragraph, if it only contains this inset
            if (
                document.body[arg - 1] == "\\begin_layout Plain Layout"
                and find_end_of_layout(document.body, arg - 1) == endarg + 3
            ):
                del document.body[arg - 1 : endarg + 4]
            else:
                del document.body[arg : endarg + 1]

        j = find_end_of_inset(document.body, i)
        arg = find_token(document.body, "\\begin_inset Argument post:2", i, j)
        unit = []
        if arg != -1:
            endarg = find_end_of_inset(document.body, arg)
            argbeginPlain = find_token(
                document.body, "\\begin_layout Plain Layout", arg, endarg
            )
            if argbeginPlain == -1:
                document.warning("Malformed LyX document: Can't find arg post:2 plain Layout")
                continue
            argendPlain = find_end_of_inset(document.body, argbeginPlain)
            unit = document.body[argbeginPlain + 1 : argendPlain - 2]

            # remove Arg insets and paragraph, if it only contains this inset
            if (
                document.body[arg - 1] == "\\begin_layout Plain Layout"
                and find_end_of_layout(document.body, arg - 1) == endarg + 3
            ):
                del document.body[arg - 1 : endarg + 4]
            else:
                del document.body[arg : endarg + 1]

        j = find_end_of_inset(document.body, i)
        arg = find_token(document.body, "\\begin_inset Argument post:3", i, j)
        note = []
        if arg != -1:
            endarg = find_end_of_inset(document.body, arg)
            argbeginPlain = find_token(
                document.body, "\\begin_layout Plain Layout", arg, endarg
            )
            if argbeginPlain == -1:
                document.warning("Malformed LyX document: Can't find arg post:3 plain Layout")
                continue
            argendPlain = find_end_of_inset(document.body, argbeginPlain)
            note = document.body[argbeginPlain + 1 : argendPlain - 2]

            # remove Arg insets and paragraph, if it only contains this inset
            if (
                document.body[arg - 1] == "\\begin_layout Plain Layout"
                and find_end_of_layout(document.body, arg - 1) == endarg + 3
            ):
                del document.body[arg - 1 : endarg + 4]
            else:
                del document.body[arg : endarg + 1]

        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
        endPlain = find_end_of_layout(document.body, beginPlain)
        symbol = document.body[beginPlain + 1 : endPlain]

        # Replace command with ERT
        res = put_cmd_in_ert(["\\nomenclature"])
        if prefix:
            res += put_cmd_in_ert(["["]) + prefix + put_cmd_in_ert(["]"])
        res += put_cmd_in_ert(["{"]) + symbol + put_cmd_in_ert(["}{"]) \
            + description + put_cmd_in_ert(["}{"]) + unit + put_cmd_in_ert(["}{"]) \
            + note + put_cmd_in_ert(["}"])

        j = find_end_of_inset(document.body, i)
        document.body[i : j + 1] = res

        i += 1


def revert_langopts(document):
    """Remove language options header."""

    i = 0
    while True:
        i = find_token(document.header, "\\language_options_", 0)
        if i == -1:
            # nothing to do
            return

        # remove header
        del document.header[i]


def revert_new_polyglossia_languages(document):
    """Emulate support for Simplified Chinese, Traditional Chinese, Japanese, Sorani Kurdish,
       Classic, Ecclesiastic and Medieval Latin, N'ko, Odia, Punjabi, and Uyghur with polyglossia."""

    # Does the document use polyglossia?
    use_polyglossia = False
    if get_bool_value(document.header, "\\use_non_tex_fonts"):
        i = find_token(document.header, "\\language_package")
        if i == -1:
            document.warning("Malformed document! Missing \\language_package")
        else:
            pack = get_value(document.header, "\\language_package", i)
            if pack in ("default", "auto"):
                use_polyglossia = True

    if not use_polyglossia:
        return

    #    lyxname:    (babelname, polyglossianame, polyglossiaopts)
    new_languages = {
        "chinese-simplified": ("", "chinese", "variant=simplified"),
        "chinese-traditional": ("", "chinese", "variant=traditional"),
        "japanese": ("", "japanese", ""),
        "sorani": ("", "kurdish", "variant=sorani"),
        "latin-classic": ("", "latin", "variant=classic"),
        "latin-ecclesiastic": ("", "latin", "variant=ecclesiastic"),
        "latin-medieval": ("", "latin", "variant=medieval"),
        "nko": ("", "nko", ""),
        "odia": ("", "odia", ""),
        "punjabi": ("", "punjabi", ""),
        "uyghur": ("", "uyghur", ""),
    }
    if document.language in new_languages:
        used_languages = {document.language}
    else:
        used_languages = set()
    i = 0
    while True:
        i = find_token(document.body, "\\lang", i + 1)
        if i == -1:
            break
        val = get_value(document.body, "\\lang", i)
        if val in new_languages:
            used_languages.add(val)

    for lang in used_languages:
        revert_language(document, lang, *new_languages[lang])


def revert_new_babel_languages(document):
    """Emulate support for Amharic, Armenian, Asturian, Bengali, Church Slavonic,
    Coptic, Divehi, Kannada, Kazakh, Khmer, Kurdish (Sorani), Lao, Latin (Classic),
    Latin (Ecclesiastic), Latin (Medieval), Malayalam, Marathi, N'ko, Occitan, Odia,
    Punjabi, Russian (Petrine orthography), Sanskrit, Syriac, Tamil, Telugu, Tibetan,
    Urdu, and Uyghur with babel."""

    # Does the document use polyglossia?
    use_polyglossia = False
    if get_bool_value(document.header, "\\use_non_tex_fonts"):
        i = find_token(document.header, "\\language_package")
        if i == -1:
            document.warning("Malformed document! Missing \\language_package")
        else:
            pack = get_value(document.header, "\\language_package", i)
            if pack in ("default", "auto"):
                use_polyglossia = True

    if use_polyglossia:
        return

    #    lyxname:    (babelname, polyglossianame, polyglossiaopts, babelprovide)
    new_languages = {
        "amharic": ("amharic", "", "", True),
        "armenian": ("armenian", "", "", True),
        "asturian": ("asturian", "", "", True),
        "bengali": ("bengali", "", "", True),
        "churchslavonic": ("churchslavic", "", "", True),
        "coptic": ("coptic", "", "", True),
        "divehi": ("divehi", "", "", True),
        "hindi": ("hindi", "", "", True),
        "kannada": ("kannada", "", "", True),
        "kazakh": ("kazakh", "", "", True),
        "khmer": ("khmer", "", "", True),
        "lao": ("lao", "", "", True),
        "latin-classic": ("classiclatin", "", "", False),
        "latin-ecclesiastic": ("ecclesiasticlatin", "", "", False),
        "latin-medieval": ("medievallatin", "", "", False),
        "malayalam": ("malayalam", "", "", True),
        "marathi": ("marathi", "", "", True),
        "nko": ("nko", "", "", True),
        "occitan": ("occitan", "", "", False),
        "odia": ("odia", "", "", True),
        "punjabi": ("punjabi", "", "", True),
        "sanskrit": ("sanskrit", "", "", True),
        "sorani": ("sorani", "", "", True),
        "syriac": ("syriac", "", "", True),
        "tamil": ("tamil", "", "", True),
        "telugu": ("telugu", "", "", True),
        "tibetan": ("tibetan", "", "", True),
        "urdu": ("urdu", "", "", True),
        "uyghur": ("uyghur", "", "", True),
    }
    if document.language in new_languages:
        used_languages = {document.language}
    else:
        used_languages = set()
    i = 0
    while True:
        i = find_token(document.body, "\\lang", i + 1)
        if i == -1:
            break
        val = get_value(document.body, "\\lang", i)
        if val in new_languages:
            used_languages.add(val)

    for lang in used_languages:
        revert_language(document, lang, *new_languages[lang])

    # revert oldrussian to russian
    have_oldrussian = False
    if document.language == "oldrussian":
        document.language = "russian"
        have_oldrussian = True

    i = 0
    while True:
        i = find_token(document.body, "\\lang oldrussian", i + 1)
        if i == -1:
            break
        have_oldrussian = True
        document.body[i] = "\\lang russian"

    if have_oldrussian:
        add_to_preamble(document, ["\\AddToHook{package/babel/after}{\\languageattribute{russian}{ancient}}"])

    # Some babel languages require special treatment with unicode engines
    if get_bool_value(document.header, "\\use_non_tex_fonts"):
        if document.language == "hebrew" or find_token(document.body, "\\lang oldrussian", 0) != -1:
            add_to_preamble(document, ["\\PassOptionsToPackage{provide*=*}{babel}"])


def convert_mathml_version(document):
    """Add MathML version header for DocBook to use MathML 3 preferably.

    For cleanliness, add this header close to other DocBook headers if present.

    Leave XHTML alone, as the default value is still probably what the user wants (MathML Core)."""

    i = find_token(document.header, "\\docbook", 0)
    if i == -1:
        document.header.insert(-1, "\\docbook_mathml_version 0")
    else:
        document.header.insert(i + 1, "\\docbook_mathml_version 0")


def revert_mathml_version(document):
    """Remove MathML version header.

    For XHTML, only remove the value 4 for \\html_math_output (MathML 3) and replace it with 0
    (MathML Core with format 631+, MathML for 630-).

    For DocBook, totally remove the header (the default with 630- is MathML)."""

    while True:
        i = find_token(document.header, "\\html_math_output", 0)
        if i == -1:
            # nothing to do
            break

        # reset XHTML header if using the new value, leave alone otherwise.
        if "4" in document.header[i]:
            document.header[i] = "\\html_math_output 0"
        break

    while True:
        i = find_token(document.header, "\\docbook_mathml_version", 0)
        if i == -1:
            # nothing to do
            return

        # remove header
        del document.header[i]

##
# Conversion hub
#

supported_versions = ["2.5.0", "2.5"]
convert = [
    [621, [convert_url_escapes, convert_url_escapes2]],
    [622, []],
    [623, [convert_he_letter]],
    [624, [convert_biblatex_chicago]],
    [625, []],
    [626, []],
    [627, [convert_nomencl, convert_index_sc]],
    [628, []],
    [629, []],
    [630, []],
    [631, [convert_mathml_version]]
]


revert = [
    [630, [revert_mathml_version]],
    [629, [revert_new_polyglossia_languages, revert_new_babel_languages]],
    [628, [revert_langopts]],
    [627, [revert_nomentbl]],
    [626, [revert_nomencl, revert_index_sc]],
    [625, [revert_nomencl_textwidth]],
    [624, [revert_nptextcite]],
    [623, [revert_biblatex_chicago]],
    [622, []],
    [621, [revert_glue_parskip]],
    [620, [revert_url_escapes2, revert_url_escapes]],
]


if __name__ == "__main__":
    pass