lyx_mirror/lib/lyx2lyx/lyx_2_2.py

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2011 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 2.2"""

import re, string
import unicodedata
import sys, os

# Uncomment only what you need to import, please.

#from parser_tools import find_token, find_end_of, find_tokens, \
#  find_token_exact, find_end_of_inset, find_end_of_layout, \
#  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
#  del_token, check_token, get_option_value

from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, lyx2latex#, \
#  insert_to_preamble, latex_length, revert_flex_inset, \
#  revert_font_attrs, hex2ratio, str2bool

from parser_tools import find_token, find_token_backwards, find_re, \
     find_end_of_inset, find_end_of_layout, find_nonempty_line, \
     get_containing_layout, get_value, check_token

###############################################################################
###
### Conversion and reversion routines
###
###############################################################################

def convert_separator(document):
    """
    Convert layout separators to separator insets and add (LaTeX) paragraph
    breaks in order to mimic previous LaTeX export.
    """

    parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""]
    parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak",
              "\\end_inset", "", "\\end_layout", ""]
    sty_dict = {
        "family" : "default",
        "series" : "default",
        "shape"  : "default",
        "size"   : "default",
        "bar"    : "default",
        "color"  : "inherit"
        }

    i = 0
    while 1:
        i = find_token(document.body, "\\begin_deeper", i)
        if i == -1:
            break

        j = find_token_backwards(document.body, "\\end_layout", i-1)
        if j != -1:
            # reset any text style before inserting the inset
            lay = get_containing_layout(document.body, j-1)
            if lay != False:
                content = "\n".join(document.body[lay[1]:lay[2]])
                for val in list(sty_dict.keys()):
                    if content.find("\\%s" % val) != -1:
                        document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
                        i = i + 1
                        j = j + 1
            document.body[j:j] = parins
            i = i + len(parins) + 1
        else:
            i = i + 1

    i = 0
    while 1:
        i = find_token(document.body, "\\align", i)
        if i == -1:
            break

        lay = get_containing_layout(document.body, i)
        if lay != False and lay[0] == "Plain Layout":
            i = i + 1
            continue

        j = find_token_backwards(document.body, "\\end_layout", i-1)
        if j != -1:
            lay = get_containing_layout(document.body, j-1)
            if lay != False and lay[0] == "Standard" \
               and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \
               and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1:
                # reset any text style before inserting the inset
                content = "\n".join(document.body[lay[1]:lay[2]])
                for val in list(sty_dict.keys()):
                    if content.find("\\%s" % val) != -1:
                        document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
                        i = i + 1
                        j = j + 1
                document.body[j:j] = parins
                i = i + len(parins) + 1
            else:
                i = i + 1
        else:
            i = i + 1

    regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE)

    i = 0
    while 1:
        i = find_re(document.body, regexp, i)
        if i == -1:
            return

        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Missing `\\end_layout'.")
            return

        lay = get_containing_layout(document.body, j-1)
        if lay != False:
            lines = document.body[lay[3]:lay[2]]
        else:
            lines = []

        document.body[i:j+1] = parlay
        if len(lines) > 0:
            document.body[i+1:i+1] = lines

        i = i + len(parlay) + len(lines) + 1


def revert_separator(document):
    " Revert separator insets to layout separators "

    beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
    if document.textclass in beamer_classes:
        beglaysep = "\\begin_layout Separator"
    else:
        beglaysep = "\\begin_layout --Separator--"

    parsep = [beglaysep, "", "\\end_layout", ""]
    comert = ["\\begin_inset ERT", "status collapsed", "",
              "\\begin_layout Plain Layout", "%", "\\end_layout",
              "", "\\end_inset", ""]
    empert = ["\\begin_inset ERT", "status collapsed", "",
              "\\begin_layout Plain Layout", " ", "\\end_layout",
              "", "\\end_inset", ""]

    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset Separator", i)
        if i == -1:
            return

        lay = get_containing_layout(document.body, i)
        if lay == False:
            document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i))
            i = i + 1
            continue

        layoutname = lay[0]
        beg = lay[1]
        end = lay[2]
        kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1]
        before = document.body[beg+1:i]
        something_before = len(before) > 0 and len("".join(before)) > 0
        j = find_end_of_inset(document.body, i)
        after = document.body[j+1:end]
        something_after = len(after) > 0 and len("".join(after)) > 0
        if kind == "plain":
            beg = beg + len(before) + 1
        elif something_before:
            document.body[i:i] = ["\\end_layout", ""]
            i = i + 2
            j = j + 2
            beg = i
            end = end + 2

        if kind == "plain":
            if something_after:
                document.body[beg:j+1] = empert
                i = i + len(empert)
            else:
                document.body[beg:j+1] = comert
                i = i + len(comert)
        else:
            if something_after:
                if layoutname == "Standard":
                    if not something_before:
                        document.body[beg:j+1] = parsep
                        i = i + len(parsep)
                        document.body[i:i] = ["", "\\begin_layout Standard"]
                        i = i + 2
                    else:
                        document.body[beg:j+1] = ["\\begin_layout Standard"]
                        i = i + 1
                else:
                    document.body[beg:j+1] = ["\\begin_deeper"]
                    i = i + 1
                    end = end + 1 - (j + 1 - beg)
                    if not something_before:
                        document.body[i:i] = parsep
                        i = i + len(parsep)
                        end = end + len(parsep)
                    document.body[i:i] = ["\\begin_layout Standard"]
                    document.body[end+2:end+2] = ["", "\\end_deeper", ""]
                    i = i + 4
            else:
                next_par_is_aligned = False
                k = find_nonempty_line(document.body, end+1)
                if k != -1 and check_token(document.body[k], "\\begin_layout"):
                    lay = get_containing_layout(document.body, k)
                    next_par_is_aligned = lay != False and \
                            find_token(document.body, "\\align", lay[1], lay[2]) != -1
                if k != -1 and not next_par_is_aligned \
                        and not check_token(document.body[k], "\\end_deeper") \
                        and not check_token(document.body[k], "\\begin_deeper"):
                    if layoutname == "Standard":
                        document.body[beg:j+1] = [beglaysep]
                        i = i + 1
                    else:
                        document.body[beg:j+1] = ["\\begin_deeper", beglaysep]
                        end = end + 2 - (j + 1 - beg)
                        document.body[end+1:end+1] = ["", "\\end_deeper", ""]
                        i = i + 3
                else:
                    if something_before:
                        del document.body[i:end+1]
                    else:
                        del document.body[i:end-1]

        i = i + 1


def revert_smash(document):
    " Set amsmath to on if smash commands are used "

    commands = ["smash[t]", "smash[b]", "notag"]
    i = find_token(document.header, "\\use_package amsmath", 0)
    if i == -1:
        document.warning("Malformed LyX document: Can't find \\use_package amsmath.")
        return;
    value = get_value(document.header, "\\use_package amsmath", i).split()[1]
    if value != "1":
        # nothing to do if package is not auto but on or off
        return;
    j = 0
    while True:
        j = find_token(document.body, '\\begin_inset Formula', j)
        if j == -1:
            return
        k = find_end_of_inset(document.body, j)
        if k == -1:
            document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j))
            j += 1
            continue
        code = "\n".join(document.body[j:k])
        for c in commands:
            if code.find("\\%s" % c) != -1:
                # set amsmath to on, since it is loaded by the newer format
                document.header[i] = "\\use_package amsmath 2"
                return
        j = k


def revert_swissgerman(document):
    " Set language german-ch-old to german "
    i = 0
    if document.language == "german-ch-old":
        document.language = "german"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language german"
    j = 0
    while True:
        j = find_token(document.body, "\\lang german-ch-old", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german")
        j = j + 1


def revert_use_package(document, pkg, commands, oldauto):
    # oldauto defines how the version we are reverting to behaves:
    # if it is true, the old version uses the package automatically.
    # if it is false, the old version never uses the package.
    regexp = re.compile(r'(\\use_package\s+%s)' % pkg)
    i = find_re(document.header, regexp, 0)
    value = "1" # default is auto
    if i != -1:
        value = get_value(document.header, "\\use_package" , i).split()[1]
        del document.header[i]
    if value == "2": # on
        add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
    elif value == "1" and not oldauto: # auto
        i = 0
        while True:
            i = find_token(document.body, '\\begin_inset Formula', i)
            if i == -1:
                return
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
                i += 1
                continue
            code = "\n".join(document.body[i:j])
            for c in commands:
                if code.find("\\%s" % c) != -1:
                    add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
                    return
            i = j


mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \
                "xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \
                "xLeftarrow", "xleftharpoondown", "xleftharpoonup", \
                "xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \
                "xmapsto"]

def revert_xarrow(document):
    "remove use_package mathtools"
    revert_use_package(document, "mathtools", mathtools_commands, False)


def revert_beamer_lemma(document):
    " Reverts beamer lemma layout to ERT "

    beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
    if document.textclass not in beamer_classes:
        return

    consecutive = False
    i = 0
    while True:
        i = find_token(document.body, "\\begin_layout Lemma", i)
        if i == -1:
            return
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Lemma layout")
            i += 1
            continue
        arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j)
        endarg1 = find_end_of_inset(document.body, arg1)
        arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j)
        endarg2 = find_end_of_inset(document.body, arg2)
        subst1 = []
        subst2 = []
        if arg1 != -1:
            beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1)
            if beginPlain1 == -1:
                document.warning("Malformed LyX document: Can't find arg1 plain Layout")
                i += 1
                continue
            endPlain1 = find_end_of_inset(document.body, beginPlain1)
            content1 = document.body[beginPlain1 + 1 : endPlain1 - 2]
            subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">")
        if arg2 != -1:
            beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2)
            if beginPlain2 == -1:
                document.warning("Malformed LyX document: Can't find arg2 plain Layout")
                i += 1
                continue
            endPlain2 = find_end_of_inset(document.body, beginPlain2)
            content2 = document.body[beginPlain2 + 1 : endPlain2 - 2]
            subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]")

        # remove Arg insets
        if arg1 < arg2:
            del document.body[arg2 : endarg2 + 1]
            if arg1 != -1:
                del document.body[arg1 : endarg1 + 1]
        if arg2 < arg1:
            del document.body[arg1 : endarg1 + 1]
            if arg2 != -1:
                del document.body[arg2 : endarg2 + 1]

        # index of end layout has probably changed
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Lemma layout")
            i += 1
            continue

        begcmd = []

        # if this is not a consecutive env, add start command
        if not consecutive:
            begcmd = put_cmd_in_ert("\\begin{lemma}")

        # has this a consecutive lemma?
        consecutive = document.body[j + 2] == "\\begin_layout Lemma"

        # if this is not followed by a consecutive env, add end command
        if not consecutive:
            document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"]

        document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2

        i = j


def revert_question_env(document):
    """
    Reverts question and question* environments of
    theorems-ams-extended-bytype module to ERT
    """

    # Do we use theorems-ams-extended-bytype module?
    have_mod = False
    mods = document.get_module_list()
    for mod in mods:
        if mod == "theorems-ams-extended-bytype":
            have_mod = True
            continue

    if not have_mod:
        return

    consecutive = False
    i = 0
    while True:
        i = find_token(document.body, "\\begin_layout Question", i)
        if i == -1:
            return

        starred = document.body[i] == "\\begin_layout Question*"

        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Question layout")
            i += 1
            continue

        # if this is not a consecutive env, add start command
        begcmd = []
        if not consecutive:
            if starred:
                begcmd = put_cmd_in_ert("\\begin{question*}")
            else:
                begcmd = put_cmd_in_ert("\\begin{question}")

        # has this a consecutive theorem of same type?
        consecutive = False
        if starred:
            consecutive = document.body[j + 2] == "\\begin_layout Question*"
        else:
            consecutive = document.body[j + 2] == "\\begin_layout Question"

        # if this is not followed by a consecutive env, add end command
        if not consecutive:
            if starred:
                document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"]
            else:
                document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"]

        document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd

        add_to_preamble(document, "\\providecommand{\questionname}{Question}")

        if starred:
            add_to_preamble(document, "\\theoremstyle{plain}\n" \
                                      "\\newtheorem*{question*}{\\protect\\questionname}")
        else:
            add_to_preamble(document, "\\theoremstyle{plain}\n" \
                                      "\\newtheorem{question}{\\protect\\questionname}")

        i = j


def convert_dashes(document):
    "convert -- and --- to \\twohyphens and \\threehyphens"

    if document.backend != "latex":
        return

    i = 0
    while i < len(document.body):
        words = document.body[i].split()
        if len(words) > 1 and words[0] == "\\begin_inset" and \
           words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
            # must not replace anything in insets that store LaTeX contents in .lyx files
            # (math and command insets withut overridden read() and write() methods
            # filtering out IPA makes Text::readParToken() more simple
            # skip ERT as well since it is not needed there
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
                i += 1
            else:
                i = j
            continue
        while True:
            j = document.body[i].find("--")
            if j == -1:
                break
            front = document.body[i][:j]
            back = document.body[i][j+2:]
            # We can have an arbitrary number of consecutive hyphens.
            # These must be split into the corresponding number of two and three hyphens
            # We must match what LaTeX does: First try emdash, then endash, then single hyphen
            if back.find("-") == 0:
                back = back[1:]
                if len(back) > 0:
                    document.body.insert(i+1, back)
                document.body[i] = front + "\\threehyphens"
            else:
                if len(back) > 0:
                    document.body.insert(i+1, back)
                document.body[i] = front + "\\twohyphens"
        i += 1


def revert_dashes(document):
    "convert \\twohyphens and \\threehyphens to -- and ---"

    i = 0
    while i < len(document.body):
        words = document.body[i].split()
        if len(words) > 1 and words[0] == "\\begin_inset" and \
           words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
            # see convert_dashes
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
                i += 1
            else:
                i = j
            continue
        replaced = False
        if document.body[i].find("\\twohyphens") >= 0:
            document.body[i] = document.body[i].replace("\\twohyphens", "--")
            replaced = True
        if document.body[i].find("\\threehyphens") >= 0:
            document.body[i] = document.body[i].replace("\\threehyphens", "---")
            replaced = True
        if replaced and i+1 < len(document.body) and \
           (document.body[i+1].find("\\") != 0 or \
            document.body[i+1].find("\\twohyphens") == 0 or
            document.body[i+1].find("\\threehyphens") == 0) and \
           len(document.body[i]) + len(document.body[i+1]) <= 80:
            document.body[i] = document.body[i] + document.body[i+1]
            document.body[i+1:i+2] = []
        else:
            i += 1


# order is important for the last three!
phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"]

def is_part_of_converted_phrase(line, j, phrase):
    "is phrase part of an already converted phrase?"
    for p in phrases:
        converted = "\\SpecialCharNoPassThru \\" + p
        pos = j + len(phrase) - len(converted)
        if pos >= 0:
            if line[pos:pos+len(converted)] == converted:
                return True
    return False


def convert_phrases(document):
    "convert special phrases from plain text to \\SpecialCharNoPassThru"

    if document.backend != "latex":
        return

    for phrase in phrases:
        i = 0
        while i < len(document.body):
            words = document.body[i].split()
            if len(words) > 1 and words[0] == "\\begin_inset" and \
               words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
                # must not replace anything in insets that store LaTeX contents in .lyx files
                # (math and command insets withut overridden read() and write() methods
                j = find_end_of_inset(document.body, i)
                if j == -1:
                    document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
                    i += 1
                else:
                    i = j
                continue
            if document.body[i].find("\\") == 0:
                i += 1
                continue
            j = document.body[i].find(phrase)
            if j == -1:
                i += 1
                continue
            if not is_part_of_converted_phrase(document.body[i], j, phrase):
                front = document.body[i][:j]
                back = document.body[i][j+len(phrase):]
                if len(back) > 0:
                    document.body.insert(i+1, back)
                # We cannot use SpecialChar since we do not know whether we are outside passThru
                document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase
            i += 1


def revert_phrases(document):
    "convert special phrases to plain text"

    i = 0
    while i < len(document.body):
        words = document.body[i].split()
        if len(words) > 1 and words[0] == "\\begin_inset" and \
           words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
            # see convert_phrases
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
                i += 1
            else:
                i = j
            continue
        replaced = False
        for phrase in phrases:
            # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts
            if document.body[i].find("\\SpecialChar \\" + phrase) >= 0:
                document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase)
                replaced = True
            if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0:
                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase)
                replaced = True
        if replaced and i+1 < len(document.body) and \
           (document.body[i+1].find("\\") != 0 or \
            document.body[i+1].find("\\SpecialChar") == 0) and \
           len(document.body[i]) + len(document.body[i+1]) <= 80:
            document.body[i] = document.body[i] + document.body[i+1]
            document.body[i+1:i+2] = []
            i -= 1
        i += 1


def convert_specialchar_internal(document, forward):
    specialchars = {"\\-":"softhyphen", "\\textcompwordmark{}":"ligaturebreak", \
        "\\@.":"endofsentence", "\\ldots{}":"ldots", \
        "\\menuseparator":"menuseparator", "\\slash{}":"breakableslash", \
        "\\nobreakdash-":"nobreakdash", "\\LyX":"LyX", \
        "\\TeX":"TeX", "\\LaTeX2e":"LaTeX2e", \
        "\\LaTeX":"LaTeX" # must be after LaTeX2e
    }

    i = 0
    while i < len(document.body):
        words = document.body[i].split()
        if len(words) > 1 and words[0] == "\\begin_inset" and \
           words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
            # see convert_phrases
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
                i += 1
            else:
                i = j
            continue
        for key, value in specialchars.iteritems():
            if forward:
                document.body[i] = document.body[i].replace("\\SpecialChar " + key, "\\SpecialChar " + value)
                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + key, "\\SpecialCharNoPassThru " + value)
            else:
                document.body[i] = document.body[i].replace("\\SpecialChar " + value, "\\SpecialChar " + key)
                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + value, "\\SpecialCharNoPassThru " + key)
        i += 1


def convert_specialchar(document):
    "convert special characters to new syntax"
    convert_specialchar_internal(document, True)


def revert_specialchar(document):
    "convert special characters to old syntax"
    convert_specialchar_internal(document, False)


def revert_georgian(document):
    "Set the document language to English but assure Georgian output"

    if document.language == "georgian":
        document.language = "english"
        i = find_token(document.header, "\\language georgian", 0)
        if i != -1:
    	    document.header[i] = "\\language english"
        j = find_token(document.header, "\\language_package default", 0)
        if j != -1:
    	    document.header[j] = "\\language_package babel"
        k = find_token(document.header, "\\options", 0)
        if k != -1:
    	    document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
        else:
    	    l = find_token(document.header, "\\use_default_options", 0)
    	    document.header.insert(l + 1, "\\options georgian")


def revert_sigplan_doi(document):
    " Reverts sigplanconf DOI layout to ERT "

    if document.textclass != "sigplanconf":
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_layout DOI", i)
        if i == -1:
            return
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of DOI layout")
            i += 1
            continue

        content = lyx2latex(document, document.body[i:j + 1])
        add_to_preamble(document, ["\\doi{" + content + "}"])
        del document.body[i:j + 1]
        # no need to reset i


def revert_ex_itemargs(document):
    " Reverts \\item arguments of the example environments (Linguistics module) to TeX-code "

    # Do we use the linguistics module?
    have_mod = False
    mods = document.get_module_list()
    for mod in mods:
        if mod == "linguistics":
            have_mod = True
            continue

    if not have_mod:
        return

    i = 0
    example_layouts = ["Numbered Examples (consecutive)", "Subexample"]
    while True:
        i = find_token(document.body, "\\begin_inset Argument item:", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        # Find containing paragraph layout
        parent = get_containing_layout(document.body, i)
        if parent == False:
            document.warning("Malformed LyX document: Can't find parent paragraph layout")
            i += 1
            continue
        parbeg = parent[3]
        layoutname = parent[0]
        if layoutname in example_layouts:
            beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
            endPlain = find_end_of_layout(document.body, beginPlain)
            content = document.body[beginPlain + 1 : endPlain]
            del document.body[i:j+1]
            subst = put_cmd_in_ert("[") + content + put_cmd_in_ert("]")
            document.body[parbeg : parbeg] = subst
        i += 1


def revert_forest(document):
    " Reverts the forest environment (Linguistics module) to TeX-code "

    # Do we use the linguistics module?
    have_mod = False
    mods = document.get_module_list()
    for mod in mods:
        if mod == "linguistics":
            have_mod = True
            continue

    if not have_mod:
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex Structure Tree", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Structure Tree inset")
            i += 1
            continue

        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
        endPlain = find_end_of_layout(document.body, beginPlain)
        content = lyx2latex(document, document.body[beginPlain : endPlain])

        add_to_preamble(document, ["\\usepackage{forest}"])

        document.body[i:j + 1] = ["\\begin_inset ERT", "status collapsed", "",
                "\\begin_layout Plain Layout", "", "\\backslash",
                "begin{forest}", "\\end_layout", "", "\\begin_layout Plain Layout",
                content, "\\end_layout", "", "\\begin_layout Plain Layout",
                "\\backslash", "end{forest}", "", "\\end_layout", "", "\\end_inset"]
        # no need to reset i


def revert_glossgroup(document):
    " Reverts the GroupGlossedWords inset (Linguistics module) to TeX-code "

    # Do we use the linguistics module?
    have_mod = False
    mods = document.get_module_list()
    for mod in mods:
        if mod == "linguistics":
            have_mod = True
            continue

    if not have_mod:
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex GroupGlossedWords", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of GroupGlossedWords inset")
            i += 1
            continue

        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
        endPlain = find_end_of_layout(document.body, beginPlain)
        content = lyx2latex(document, document.body[beginPlain : endPlain])
        document.warning("content: %s" % content)

        document.body[i:j + 1] = ["{", "", content, "", "}"]
        # no need to reset i


def revert_newgloss(document):
    " Reverts the new Glosse insets (Linguistics module) to the old format "

    # Do we use the linguistics module?
    have_mod = False
    mods = document.get_module_list()
    for mod in mods:
        if mod == "linguistics":
            have_mod = True
            continue

    if not have_mod:
        return

    glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
    for glosse in glosses:
        i = 0
        while True:
            i = find_token(document.body, glosse, i)
            if i == -1:
                break
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Glosse inset")
                i += 1
                continue

            arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
            endarg = find_end_of_inset(document.body, arg)
            argcontent = ""
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find arg plain Layout")
                    i += 1
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                argcontent = lyx2latex(document, document.body[argbeginPlain : argendPlain - 2])

                document.body[j:j] = ["", "\\begin_layout Plain Layout","\\backslash", "glt ",
                    argcontent, "\\end_layout"]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
            endPlain = find_end_of_layout(document.body, beginPlain)
            content = lyx2latex(document, document.body[beginPlain : endPlain])

            document.body[beginPlain + 1:endPlain] = [content]
            i = beginPlain + 1


def convert_newgloss(document):
    " Converts Glosse insets (Linguistics module) to the new format "

    # Do we use the linguistics module?
    have_mod = False
    mods = document.get_module_list()
    for mod in mods:
        if mod == "linguistics":
            have_mod = True
            continue

    if not have_mod:
        return

    glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
    for glosse in glosses:
        i = 0
        while True:
            i = find_token(document.body, glosse, i)
            if i == -1:
                break
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Glosse inset")
                i += 1
                continue

            k = i
            while True:
                argcontent = []
                beginPlain = find_token(document.body, "\\begin_layout Plain Layout", k, j)
                if beginPlain == -1:
                    break
                endPlain = find_end_of_layout(document.body, beginPlain)
                if endPlain == -1:
                    document.warning("Malformed LyX document: Can't find end of Glosse layout")
                    i += 1
                    continue

                glt  = find_token(document.body, "\\backslash", beginPlain, endPlain)
                if glt != -1 and document.body[glt + 1].startswith("glt"):
                    document.body[glt + 1] = document.body[glt + 1].lstrip("glt").lstrip()
                    argcontent = document.body[glt + 1 : endPlain]
                    document.body[beginPlain + 1 : endPlain] = ["\\begin_inset Argument 1", "status open", "",
                        "\\begin_layout Plain Layout", "\\begin_inset ERT", "status open", "",
                        "\\begin_layout Plain Layout", ""] + argcontent + ["\\end_layout", "", "\\end_inset", "",
                        "\\end_layout", "", "\\end_inset"]
                else:
                    content = document.body[beginPlain + 1 : endPlain]
                    document.body[beginPlain + 1 : endPlain] = ["\\begin_inset ERT", "status open", "",
                        "\\begin_layout Plain Layout"] + content + ["\\end_layout", "", "\\end_inset"]

                endPlain = find_end_of_layout(document.body, beginPlain)
                k = endPlain
                j = find_end_of_inset(document.body, i)

            i = endPlain + 1


def convert_BoxFeatures(document):
    " adds new box features "

    i = 0
    while True:
        i = find_token(document.body, "height_special", i)
        if i == -1:
            return
        document.body.insert(i + 1, 'thickness "0.4pt"\nseparation "3pt"\nshadowsize "4pt"')
        i = i + 1


def revert_BoxFeatures(document):
    " outputs new box features as TeX code "

    i = 0
    defaultSep = "3pt"
    defaultThick = "0.4pt"
    defaultShadow = "4pt"
    while True:
        i = find_token(document.body, "height_special", i)
        if i == -1:
            return
        # read out the values
        beg = document.body[i+1].find('"');
        end = document.body[i+1].rfind('"');
        thickness = document.body[i+1][beg+1:end];
        beg = document.body[i+2].find('"');
        end = document.body[i+2].rfind('"');
        separation = document.body[i+2][beg+1:end];
        beg = document.body[i+3].find('"');
        end = document.body[i+3].rfind('"');
        shadowsize = document.body[i+3][beg+1:end];
        # delete the specification
        del document.body[i+1:i+4]
        # output ERT
        # first output the closing brace
        if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
            document.body[i + 10 : i + 10] = put_cmd_in_ert("}")
        # now output the lengths
        if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
            document.body[i - 10 : i - 10] = put_cmd_in_ert("{")
        if thickness != defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness]
        if separation != defaultSep and thickness == defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation]
        if separation != defaultSep and thickness != defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation]
        if shadowsize != defaultShadow and separation == defaultSep and thickness == defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash shadowsize " + shadowsize]
        if shadowsize != defaultShadow and separation != defaultSep and thickness == defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
        if shadowsize != defaultShadow and separation == defaultSep and thickness != defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash shadowsize " + shadowsize]
        if shadowsize != defaultShadow and separation != defaultSep and thickness != defaultThick:
            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
        i = i + 11


def convert_origin(document):
    " Insert the origin tag "

    i = find_token(document.header, "\\textclass ", 0)
    if i == -1:
        document.warning("Malformed LyX document: No \\textclass!!")
        return;
    if document.dir == "":
        origin = "stdin"
    else:
        origin = document.dir.replace('\\', '/')
    document.header[i:i] = ["\\origin " + origin]


def revert_origin(document):
    " Remove the origin tag "

    i = find_token(document.header, "\\origin ", 0)
    if i == -1:
        document.warning("Malformed LyX document: No \\origin!!")
        return;
    del document.header[i]


color_names = ["brown", "darkgray", "gray", \
               "lightgray", "lime", "olive", "orange", \
               "pink", "purple", "teal", "violet"]

def revert_textcolor(document):
    " revert new \texcolor colors to TeX code "

    i = 0
    j = 0
    xcolor = False
    add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\\usepackage{xcolor}}"])
    while True:
        i = find_token(document.body, "\\color ", i)
        if i == -1:
            return
        else:
            for color in list(color_names):
                if document.body[i] == "\\color " + color:
                    # register that xcolor must be loaded in the preamble
                    if xcolor == False:
                        xcolor = True
                        add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\usepackage{xcolor}}"])
                    # find the next \\color and/or the next \\end_layout
                    j = find_token(document.body, "\\color", i + 1)
                    k = find_token(document.body, "\\end_layout", i + 1)
                    if j == -1 and k != -1:
                        j = k +1
                    # output TeX code
                    # first output the closing brace
                    if k < j:
                        document.body[k: k] = put_cmd_in_ert("}")
                    else:
                        document.body[j: j] = put_cmd_in_ert("}")
                    # now output the \textcolor command
                    document.body[i : i + 1] = put_cmd_in_ert("\\textcolor{" + color + "}{")
        i = i + 1


def convert_colorbox(document):
    " adds color settings for boxes "

    i = 0
    while True:
        # the routine convert_BoxFeatures adds already "shadowsize" to the box params
        # but for an unknown reason this is not yet done before this routine is run
        # therefore handle the case that shadowsize exists (for files in version 489  491)
        # and that it don't exists
        i = find_token(document.body, "height_special", i)
        if i == -1:
            return
        j = find_token(document.body, "shadowsize", i)
        if j == i + 3:
            document.body.insert(i + 4, 'framecolor "black"\nbackgroundcolor "none"')
        else:
            document.body.insert(i + 2, 'framecolor "black"\nbackgroundcolor "none"')
        i = i + 2


def revert_colorbox(document):
    " outputs color settings for boxes as TeX code "

    i = 0
    defaultframecolor = "black"
    defaultbackcolor = "white"
    while True:
        i = find_token(document.body, "framecolor", i)
        if i == -1:
            return
        # read out the values
        beg = document.body[i].find('"');
        end = document.body[i].rfind('"');
        framecolor = document.body[i][beg+1:end];
        beg = document.body[i+1].find('"');
        end = document.body[i+1].rfind('"');
        backcolor = document.body[i+1][beg+1:end];
        # delete the specification
        del document.body[i:i+2]
        # output TeX code
        # first output the closing brace
        if framecolor != defaultframecolor or backcolor != defaultbackcolor:
            document.body[i + 9 : i + 9] = put_cmd_in_ert("}")
        # now output the box commands
        if framecolor != defaultframecolor or backcolor != defaultbackcolor:
            document.body[i - 14 : i - 14] = put_cmd_in_ert("{")
        if framecolor != defaultframecolor:
            document.body[i - 9 : i - 8] = ["\\backslash fboxcolor{" + framecolor + "}{" + backcolor + "}{"]
        if backcolor != defaultbackcolor and framecolor == defaultframecolor:
            document.body[i - 9 : i - 8] = ["\\backslash colorbox{" + backcolor + "}{"]
        i = i + 11


def revert_mathmulticol(document):
    " Convert formulas to ERT if they contain multicolumns "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Formula', i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
            i += 1
            continue
        lines = document.body[i:j]
        lines[0] = lines[0].replace('\\begin_inset Formula', '').lstrip()
        code = "\n".join(lines)
        if code.find("\\multicolumn") != -1:
            ert = put_cmd_in_ert(code)
            document.body[i:j+1] = ert
            i = find_end_of_inset(document.body, i)
        else:
            i = j


##
# Conversion hub
#

supported_versions = ["2.2.0", "2.2"]
convert = [
           [475, [convert_separator]],
           # nothing to do for 476: We consider it a bug that older versions
           # did not load amsmath automatically for these commands, and do not
           # want to hardcode amsmath off.
           [476, []],
           [477, []],
           [478, []],
           [479, []],
           [480, []],
           [481, [convert_dashes]],
           [482, [convert_phrases]],
           [483, [convert_specialchar]],
           [484, []],
           [485, []],
           [486, []],
           [487, []],
           [488, [convert_newgloss]],
           [489, [convert_BoxFeatures]],
           [490, [convert_origin]],
           [491, []],
           [492, [convert_colorbox]],
           [493, []]
          ]

revert =  [
           [492, [revert_mathmulticol]],
           [491, [revert_colorbox]],
           [490, [revert_textcolor]],
           [489, [revert_origin]],
           [488, [revert_BoxFeatures]],
           [487, [revert_newgloss, revert_glossgroup]],
           [486, [revert_forest]],
           [485, [revert_ex_itemargs]],
           [484, [revert_sigplan_doi]],
           [483, [revert_georgian]],
           [482, [revert_specialchar]],
           [481, [revert_phrases]],
           [480, [revert_dashes]],
           [479, [revert_question_env]],
           [478, [revert_beamer_lemma]],
           [477, [revert_xarrow]],
           [476, [revert_swissgerman]],
           [475, [revert_smash]],
           [474, [revert_separator]]
          ]


if __name__ == "__main__":
    pass