lyx_mirror/lib/lyx2lyx/lyx_2_4.py

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# Copyright (C) 2018 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 2.4"""

import re, string
import unicodedata
import sys, os

from datetime import (datetime, date, time)

# Uncomment only what you need to import, please.

from parser_tools import (count_pars_in_inset, del_token, find_end_of_inset,
    find_end_of_layout, find_token, find_token_backwards, find_token_exact,
    find_re, get_bool_value,
    get_containing_layout, get_option_value, get_value, get_quoted_value)
#    del_value, del_complete_lines,
#    find_complete_lines, find_end_of,
#    find_re, find_substring,
#    get_containing_inset,
#    is_in_inset, set_bool_value
#    find_tokens, check_token

from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, insert_to_preamble, lyx2latex,
                           revert_language, revert_flex_inset, str2bool)
#  revert_font_attrs, latex_length
#  get_ert, lyx2verbatim, length_in_bp, convert_info_insets
#  revert_flex_inset, hex2ratio

####################################################################
# Private helper functions

def add_preamble_fonts(document, fontmap):
    " Add collected font-packages with their option to user-preamble"

    for pkg in fontmap:
        if len(fontmap[pkg]) > 0:
            xoption = "[" + ",".join(fontmap[pkg]) + "]"
        else:
            xoption = ""
        preamble = "\\usepackage%s{%s}" % (xoption, pkg)
        add_to_preamble(document, [preamble])


def createkey(pkg, options):
    options.sort()
    return pkg + ':' + "-".join(options)

class fontinfo:
    def __init__(self):
        self.fontname = None    # key into font2pkgmap
        self.fonttype = None    # roman,sans,typewriter,math
        self.scaletype = None   # None,sf,tt
        self.scaleopt = None    # None, 'scaled', 'scale'
        self.scaleval = 1
        self.package = None
        self.options = []
        self.pkgkey = None      # key into pkg2fontmap
        self.osfopt = None    # None, string

    def addkey(self):
        self.pkgkey = createkey(self.package, self.options)

class fontmapping:
    def __init__(self):
        self.font2pkgmap = dict()
        self.pkg2fontmap = dict()
        self.pkginmap = dict()  # defines, if a map for package exists

    def expandFontMapping(self, font_list, font_type, scale_type, pkg, scaleopt = None, osfopt = None):
        " Expand fontinfo mapping"
        #
        # fontlist:    list of fontnames, each element
        #              may contain a ','-separated list of needed options
        #              like e.g. 'IBMPlexSansCondensed,condensed'
        # font_type:   one of 'roman', 'sans', 'typewriter', 'math'
        # scale_type:  one of None, 'sf', 'tt'
        # pkg:         package defining the font. Defaults to fontname if None
        # scaleopt:    one of None, 'scale', 'scaled', or some other string
        #              to be used in scale option (e.g. scaled=0.7)
        # osfopt:      None or some other string to be used in osf option
        for fl in font_list:
            fe = fontinfo()
            fe.fonttype = font_type
            fe.scaletype = scale_type
            flt = fl.split(",")
            font_name = flt[0]
            fe.fontname = font_name
            fe.options = flt[1:]
            fe.scaleopt = scaleopt
            fe.osfopt = osfopt
            if pkg == None:
                fe.package = font_name
            else:
                fe.package = pkg
            fe.addkey()
            self.font2pkgmap[font_name] = fe
            if fe.pkgkey in self.pkg2fontmap:
                # Repeated the same entry? Check content
                if self.pkg2fontmap[fe.pkgkey] != font_name:
                    document.error("Something is wrong in pkgname+options <-> fontname mapping")
            self.pkg2fontmap[fe.pkgkey] = font_name
            self.pkginmap[fe.package] = 1

    def getfontname(self, pkg, options):
        options.sort()
        pkgkey = createkey(pkg, options)
        if not pkgkey in self.pkg2fontmap:
            return None
        fontname = self.pkg2fontmap[pkgkey]
        if not fontname in self.font2pkgmap:
            document.error("Something is wrong in pkgname+options <-> fontname mapping")
            return None
        if pkgkey == self.font2pkgmap[fontname].pkgkey:
            return fontname
        return None

def createFontMapping(fontlist):
    # Create info for known fonts for the use in
    #   convert_latexFonts() and
    #   revert_latexFonts()
    #
    # * Would be more handy to parse latexFonts file,
    #   but the path to this file is unknown
    # * For now, add DejaVu and IBMPlex only.
    # * Expand, if desired
    fm = fontmapping()
    for font in fontlist:
        if font == 'DejaVu':
            fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
            fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
            fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
        elif font == 'IBM':
            fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
                                  'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
                                  'IBMPlexSerifSemibold,semibold'],
                                 "roman", None, "plex-serif")
            fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
                                  'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
                                  'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
                                 "sans", "sf", "plex-sans", "scale")
            fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
                                  'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
                                  'IBMPlexMonoSemibold,semibold'],
                                 "typewriter", "tt", "plex-mono", "scale")
        elif font == 'Adobe':
            fm.expandFontMapping(['ADOBESourceSerifPro'], "roman", None, "sourceserifpro", None, "osf")
            fm.expandFontMapping(['ADOBESourceSansPro'], "sans", "sf", "sourcesanspro", "scaled")
            fm.expandFontMapping(['ADOBESourceCodePro'], "typewriter", "tt", "sourcecodepro", "scaled")
        elif font == 'Noto':
            fm.expandFontMapping(['NotoSerifRegular,regular', 'NotoSerifMedium,medium',
                                  'NotoSerifThin,thin', 'NotoSerifLight,light',
                                  'NotoSerifExtralight,extralight'],
                                  "roman", None, "noto-serif", None, "osf")
            fm.expandFontMapping(['NotoSansRegular,regular', 'NotoSansMedium,medium',
                                  'NotoSansThin,thin', 'NotoSansLight,light',
                                  'NotoSansExtralight,extralight'],
                                  "sans", "sf", "noto-sans", "scaled")
            fm.expandFontMapping(['NotoMonoRegular,regular'], "typewriter", "tt", "noto-mono", "scaled")
    return fm

def convert_fonts(document, fm):
    " Handle font definition (LaTeX preamble -> native) "

    rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
    rscaleopt = re.compile(r'^scaled?=(.*)')

    i = 0
    while i < len(document.preamble):
        i = find_re(document.preamble, rpkg, i+1)
        if i == -1:
            return
        mo = rpkg.search(document.preamble[i])
        if mo == None or mo.group(2) == None:
            options = []
        else:
            options = mo.group(2).replace(' ', '').split(",")
        pkg = mo.group(3)
        o = 0
        oscale = 1
        osfoption = "osf"
        has_osf = False
        while o < len(options):
            if options[o] == osfoption:
                has_osf = True
                del options[o]
                continue
            mo = rscaleopt.search(options[o])
            if mo == None:
                o += 1
                continue
            oscale = mo.group(1)
            del options[o]
            continue

        if not pkg in fm.pkginmap:
            continue
        # determine fontname
        fn = fm.getfontname(pkg, options)
        if fn == None:
            continue
        del document.preamble[i]
        fontinfo = fm.font2pkgmap[fn]
        if fontinfo.scaletype == None:
            fontscale = None
        else:
            fontscale = "\\font_" + fontinfo.scaletype + "_scale"
            fontinfo.scaleval = oscale
        if has_osf:
             if fontinfo.osfopt == None:
                 options.extend("osf")
                 continue
             osf = find_token(document.header, "\\font_osf false")
             if osf != -1:
                 document.header[osf] = "\\font_osf true"
        if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
            del document.preamble[i-1]
            i -= 1
        if fontscale != None:
            j = find_token(document.header, fontscale, 0)
            if j != -1:
                val = get_value(document.header, fontscale, j)
                vals = val.split()
                scale = "100"
                if oscale != None:
                    scale = "%03d" % int(float(oscale) * 100)
                document.header[j] = fontscale + " " + scale + " " + vals[1]
        ft = "\\font_" + fontinfo.fonttype
        j = find_token(document.header, ft, 0)
        if j != -1:
            val = get_value(document.header, ft, j)
            words = val.split() # ! splits also values like '"DejaVu Sans"'
            words[0] = '"' + fn + '"'
            document.header[j] = ft + ' ' + ' '.join(words)

def revert_fonts(document, fm, fontmap, OnlyWithXOpts = False):
    " Revert native font definition to LaTeX "
    # fonlist := list of fonts created from the same package
    # Empty package means that the font-name is the same as the package-name
    # fontmap (key = package, val += found options) will be filled
    # and used later in add_preamble_fonts() to be added to user-preamble

    rfontscale = re.compile(r'^\s*(\\font_(roman|sans|typewriter|math))\s+')
    rscales = re.compile(r'^\s*(\d+)\s+(\d+)')
    i = 0
    while i < len(document.header):
        i = find_re(document.header, rfontscale, i+1)
        if (i == -1):
            return True
        mo = rfontscale.search(document.header[i])
        if mo == None:
            continue
        ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
        val = get_value(document.header, ft, i)
        words = val.split(' ')     # ! splits also values like '"DejaVu Sans"'
        font = words[0].strip('"') # TeX font name has no whitespace
        if not font in fm.font2pkgmap:
            continue
        fontinfo = fm.font2pkgmap[font]
        val = fontinfo.package
        if not val in fontmap:
            fontmap[val] = []
        x = -1
        if OnlyWithXOpts:
            if ft == "\\font_math":
                return False
            regexp = re.compile(r'^\s*(\\font_roman_opts)\s+')
            if ft == "\\font_sans":
                regexp = re.compile(r'^\s*(\\font_sans_opts)\s+')
            elif ft == "\\font_typewriter":
                regexp = re.compile(r'^\s*(\\font_typewriter_opts)\s+')
            x = find_re(document.header, regexp, 0)
            if x == -1:
                return False

            # We need to use this regex since split() does not handle quote protection
            xopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
            opts = xopts[1].strip('"').split(",")
            fontmap[val].extend(opts)
            del document.header[x]
        words[0] = '"default"'
        document.header[i] = ft + ' ' + ' '.join(words)
        if fontinfo.scaleopt != None:
            xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
            mo = rscales.search(xval)
            if mo != None:
                xval1 = mo.group(1)
                xval2 = mo.group(2)
                if xval1 != "100":
                    # set correct scale option
                    fontmap[val].extend([fontinfo.scaleopt + "=" + format(float(xval1) / 100, '.2f')])
        if fontinfo.osfopt != None and fontinfo.fonttype == "roman":
            osf = find_token(document.header, "\\font_osf true")
            if osf != -1:
                document.header[osf] = "\\font_osf false"
                fontmap[val].extend([fontinfo.osfopt])
        if len(fontinfo.options) > 0:
            fontmap[val].extend(fontinfo.options)
    return True

###############################################################################
###
### Conversion and reversion routines
###
###############################################################################

def convert_inputencoding_namechange(document):
    " Rename inputencoding settings. "
    i = find_token(document.header, "\\inputencoding", 0)
    if i == -1:
        return
    s = document.header[i].replace("auto", "auto-legacy")
    document.header[i] = s.replace("default", "auto-legacy-plain")

def revert_inputencoding_namechange(document):
    " Rename inputencoding settings. "
    i = find_token(document.header, "\\inputencoding", 0)
    if i == -1:
        return
    s = document.header[i].replace("auto-legacy-plain", "default")
    document.header[i] = s.replace("auto-legacy", "auto")

def convert_notoFonts(document):
    " Handle Noto fonts definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fm = createFontMapping(['Noto'])
        convert_fonts(document, fm)

def revert_notoFonts(document):
    " Revert native Noto font definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fontmap = dict()
        fm = createFontMapping(['Noto'])
        if revert_fonts(document, fm, fontmap):
            add_preamble_fonts(document, fontmap)

def convert_latexFonts(document):
    " Handle DejaVu and IBMPlex fonts definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fm = createFontMapping(['DejaVu', 'IBM'])
        convert_fonts(document, fm)

def revert_latexFonts(document):
    " Revert native DejaVu font definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fontmap = dict()
        fm = createFontMapping(['DejaVu', 'IBM'])
        if revert_fonts(document, fm, fontmap):
            add_preamble_fonts(document, fontmap)

def convert_AdobeFonts(document):
    " Handle Adobe Source fonts definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fm = createFontMapping(['Adobe'])
        convert_fonts(document, fm)

def revert_AdobeFonts(document):
    " Revert Adobe Source font definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fontmap = dict()
        fm = createFontMapping(['Adobe'])
        if revert_fonts(document, fm, fontmap):
            add_preamble_fonts(document, fontmap)

def removeFrontMatterStyles(document):
    " Remove styles Begin/EndFrontmatter"

    layouts = ['BeginFrontmatter', 'EndFrontmatter']
    tokenend = len('\\begin_layout ')
    i = 0
    while True:
        i = find_token_exact(document.body, '\\begin_layout ', i+1)
        if i == -1:
            return
        layout = document.body[i][tokenend:].strip()
        if layout not in layouts:
            continue
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
            continue
        while document.body[j+1].strip() == '':
            j += 1
        document.body[i:j+1] = []

def addFrontMatterStyles(document):
    " Use styles Begin/EndFrontmatter for elsarticle"

    if document.textclass != "elsarticle":
        return

    def insertFrontmatter(prefix, line):
        above = line
        while above > 0 and document.body[above-1].strip() == '':
            above -= 1
        below = line
        while document.body[below].strip() == '':
            below += 1
        document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
                                    '\\begin_inset Note Note',
                                    'status open', '',
                                    '\\begin_layout Plain Layout',
                                    'Keep this empty!',
                                    '\\end_layout', '',
                                    '\\end_inset', '', '',
                                    '\\end_layout', '']

    layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
                'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
    tokenend = len('\\begin_layout ')
    first = -1
    i = 0
    while True:
        i = find_token_exact(document.body, '\\begin_layout ', i+1)
        if i == -1:
            break
        layout = document.body[i][tokenend:].strip()
        if layout not in layouts:
            continue
        k = find_end_of_layout(document.body, i)
        if k == -1:
            document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
            continue
        if first == -1:
            first = i
        i = k
    if first == -1:
        return
    insertFrontmatter('End', k+1)
    insertFrontmatter('Begin', first)


def convert_lst_literalparam(document):
    " Add param literal to include inset "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset CommandInset include', i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
            continue
        while i < j and document.body[i].strip() != '':
            i += 1
        document.body.insert(i, 'literal "true"')


def revert_lst_literalparam(document):
    " Remove param literal from include inset "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset CommandInset include', i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
            continue
        del_token(document.body, 'literal', i, j)


def revert_paratype(document):
    " Revert ParaType font definitions to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        preamble = ""
        i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
        i2 = find_token(document.header, "\\font_sans \"default\"", 0)
        i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
        j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
        sfval = get_value(document.header, "\\font_sf_scale", 0)
        # cutoff " 100"
        sfval = sfval[:-4]
        sfoption = ""
        if sfval != "100":
            sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
        k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
        ttval = get_value(document.header, "\\font_tt_scale", 0)
        # cutoff " 100"
        ttval = ttval[:-4]
        ttoption = ""
        if ttval != "100":
            ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
        if i1 != -1 and i2 != -1 and i3!= -1:
            add_to_preamble(document, ["\\usepackage{paratype}"])
        else:
            if i1!= -1:
                add_to_preamble(document, ["\\usepackage{PTSerif}"])
                document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
            if j!= -1:
                if sfoption != "":
                    add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
                else:
                    add_to_preamble(document, ["\\usepackage{PTSans}"])
                document.header[j] = document.header[j].replace("PTSans-TLF", "default")
            if k!= -1:
                if ttoption != "":
                    add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
                else:
                    add_to_preamble(document, ["\\usepackage{PTMono}"])
                document.header[k] = document.header[k].replace("PTMono-TLF", "default")


def revert_xcharter(document):
    " Revert XCharter font definitions to LaTeX "

    i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
    if i == -1:
        return

    # replace unsupported font setting
    document.header[i] = document.header[i].replace("xcharter", "default")
    # no need for preamble code with system fonts
    if get_bool_value(document.header, "\\use_non_tex_fonts"):
        return

    # transfer old style figures setting to package options
    j = find_token(document.header, "\\font_osf true")
    if j != -1:
        options = "[osf]"
        document.header[j] = "\\font_osf false"
    else:
        options = ""
    if i != -1:
        add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])


def revert_lscape(document):
    " Reverts the landscape environment (Landscape module) to TeX-code "

    if not "landscape" in document.get_module_list():
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex Landscape", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Landscape inset")
            continue

        if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
            document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
            document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
            add_to_preamble(document, ["\\usepackage{afterpage}"])
        else:
            document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
            document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")

        add_to_preamble(document, ["\\usepackage{pdflscape}"])


def convert_fontenc(document):
    " Convert default fontenc setting "

    i = find_token(document.header, "\\fontencoding global", 0)
    if i == -1:
        return

    document.header[i] = document.header[i].replace("global", "auto")


def revert_fontenc(document):
    " Revert default fontenc setting "

    i = find_token(document.header, "\\fontencoding auto", 0)
    if i == -1:
        return

    document.header[i] = document.header[i].replace("auto", "global")


def revert_nospellcheck(document):
    " Remove nospellcheck font info param "

    i = 0
    while True:
        i = find_token(document.body, '\\nospellcheck', i)
        if i == -1:
            return
        del document.body[i]


def revert_floatpclass(document):
    " Remove float placement params 'document' and 'class' "

    del_token(document.header, "\\float_placement class")

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Float', i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        k = find_token(document.body, 'placement class', i, i + 2)
        if k == -1:
            k = find_token(document.body, 'placement document', i, i + 2)
            if k != -1:
                del document.body[k]
            continue
        del document.body[k]


def revert_floatalignment(document):
    " Remove float alignment params "

    galignment = get_value(document.header, "\\float_alignment", delete=True)

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Float', i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
            continue
        k = find_token(document.body, 'alignment', i, i+4)
        if k == -1:
            i = j
            continue
        alignment = get_value(document.body, "alignment", k)
        if alignment == "document":
            alignment = galignment
        del document.body[k]
        l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
        if l == -1:
            document.warning("Can't find float layout!")
            continue
        alcmd = []
        if alignment == "left":
            alcmd = put_cmd_in_ert("\\raggedright{}")
        elif alignment == "center":
            alcmd = put_cmd_in_ert("\\centering{}")
        elif alignment == "right":
            alcmd = put_cmd_in_ert("\\raggedleft{}")
        if len(alcmd) > 0:
            document.body[l+1:l+1] = alcmd
        i = j

def revert_tuftecite(document):
    " Revert \cite commands in tufte classes "

    tufte = ["tufte-book", "tufte-handout"]
    if document.textclass not in tufte:
        return

    i = 0
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset citation", i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of citation inset at line %d!!" %(i))
            continue
        k = find_token(document.body, "LatexCommand", i, j)
        if k == -1:
            document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
            i = j
            continue
        cmd = get_value(document.body, "LatexCommand", k)
        if cmd != "cite":
            i = j
            continue
        pre = get_quoted_value(document.body, "before", i, j)
        post = get_quoted_value(document.body, "after", i, j)
        key = get_quoted_value(document.body, "key", i, j)
        if not key:
            document.warning("Citation inset at line %d does not have a key!" %(i))
            key = "???"
        # Replace command with ERT
        res = "\\cite"
        if pre:
            res += "[" + pre + "]"
        if post:
            res += "[" + post + "]"
        elif pre:
            res += "[]"
        res += "{" + key + "}"
        document.body[i:j+1] = put_cmd_in_ert([res])
        i = j


def revert_stretchcolumn(document):
    " We remove the column varwidth flags or everything else will become a mess. "
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Tabular", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
            continue
        for k in range(i, j):
            if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
                document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
                document.body[k] = document.body[k].replace(' varwidth="true"', '')


def revert_vcolumns(document):
    " Revert standard columns with line breaks etc. "
    i = 0
    needvarwidth = False
    needarray = False
    try:
        while True:
            i = find_token(document.body, "\\begin_inset Tabular", i+1)
            if i == -1:
                return
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Could not find end of tabular.")
                continue

            # Collect necessary column information
            m = i + 1
            nrows = int(document.body[i+1].split('"')[3])
            ncols = int(document.body[i+1].split('"')[5])
            col_info = []
            for k in range(ncols):
                m = find_token(document.body, "<column", m)
                width = get_option_value(document.body[m], 'width')
                varwidth = get_option_value(document.body[m], 'varwidth')
                alignment = get_option_value(document.body[m], 'alignment')
                special = get_option_value(document.body[m], 'special')
                col_info.append([width, varwidth, alignment, special, m])

            # Now parse cells
            m = i + 1
            lines = []
            for row in range(nrows):
                for col in range(ncols):
                    m = find_token(document.body, "<cell", m)
                    multicolumn = get_option_value(document.body[m], 'multicolumn')
                    multirow = get_option_value(document.body[m], 'multirow')
                    width = get_option_value(document.body[m], 'width')
                    rotate = get_option_value(document.body[m], 'rotate')
                    # Check for: linebreaks, multipars, non-standard environments
                    begcell = m
                    endcell = find_token(document.body, "</cell>", begcell)
                    vcand = False
                    if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
                        vcand = True
                    elif count_pars_in_inset(document.body, begcell + 2) > 1:
                        vcand = True
                    elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
                        vcand = True
                    if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
                        if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
                            needvarwidth = True
                            alignment = col_info[col][2]
                            col_line = col_info[col][4]
                            vval = ""
                            if alignment == "center":
                                vval = ">{\\centering}"
                            elif  alignment == "left":
                                vval = ">{\\raggedright}"
                            elif alignment == "right":
                                vval = ">{\\raggedleft}"
                            if vval != "":
                                needarray = True
                            vval += "V{\\linewidth}"

                            document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
                            # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
                            # with newlines, and we do not want that)
                            while True:
                                endcell = find_token(document.body, "</cell>", begcell)
                                linebreak = False
                                nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
                                if nl == -1:
                                    nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
                                    if nl == -1:
                                         break
                                    linebreak = True
                                nle = find_end_of_inset(document.body, nl)
                                del(document.body[nle:nle+1])
                                if linebreak:
                                    document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
                                else:
                                    document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
                    m += 1

            i = j

    finally:
        if needarray == True:
            add_to_preamble(document, ["\\usepackage{array}"])
        if needvarwidth == True:
            add_to_preamble(document, ["\\usepackage{varwidth}"])


def revert_bibencoding(document):
    " Revert bibliography encoding "

    # Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # Check if biblatex
    biblatex = False
    if engine in ["biblatex", "biblatex-natbib"]:
        biblatex = True

    # Map lyx to latex encoding names
    encodings = {
        "utf8" : "utf8",
        "utf8x" : "utf8x",
        "armscii8" : "armscii8",
        "iso8859-1" : "latin1",
        "iso8859-2" : "latin2",
        "iso8859-3" : "latin3",
        "iso8859-4" : "latin4",
        "iso8859-5" : "iso88595",
        "iso8859-6" : "8859-6",
        "iso8859-7" : "iso-8859-7",
        "iso8859-8" : "8859-8",
        "iso8859-9" : "latin5",
        "iso8859-13" : "latin7",
        "iso8859-15" : "latin9",
        "iso8859-16" : "latin10",
        "applemac" : "applemac",
        "cp437" : "cp437",
        "cp437de" : "cp437de",
        "cp850" : "cp850",
        "cp852" : "cp852",
        "cp855" : "cp855",
        "cp858" : "cp858",
        "cp862" : "cp862",
        "cp865" : "cp865",
        "cp866" : "cp866",
        "cp1250" : "cp1250",
        "cp1251" : "cp1251",
        "cp1252" : "cp1252",
        "cp1255" : "cp1255",
        "cp1256" : "cp1256",
        "cp1257" : "cp1257",
        "koi8-r" : "koi8-r",
        "koi8-u" : "koi8-u",
        "pt154" : "pt154",
        "utf8-platex" : "utf8",
        "ascii" : "ascii"
    }

    i = 0
    bibresources = []
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of bibtex inset at line %d!!" %(i))
            continue
        encoding = get_quoted_value(document.body, "encoding", i, j)
        if not encoding:
            continue
        # remove encoding line
        k = find_token(document.body, "encoding", i, j)
        if k != -1:
            del document.body[k]
        if encoding == "default":
            continue
        # Re-find inset end line
        j = find_end_of_inset(document.body, i)
        if biblatex:
            biblio_options = ""
            h = find_token(document.header, "\\biblio_options", 0)
            if h != -1:
                biblio_options = get_value(document.header, "\\biblio_options", h)
                if not "bibencoding" in biblio_options:
                     document.header[h] += ",bibencoding=%s" % encodings[encoding]
            else:
                bs = find_token(document.header, "\\biblatex_bibstyle", 0)
                if bs == -1:
                    # this should not happen
                    document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
                else:
                    document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
        else:
            document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
            document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")

        i = j


def convert_vcsinfo(document):
    " Separate vcs Info inset from buffer Info inset. "

    types = {
        "vcs-revision" : "revision",
        "vcs-tree-revision" : "tree-revision",
        "vcs-author" : "author",
        "vcs-time" : "time",
        "vcs-date" : "date"
    }
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "buffer":
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv not in list(types.keys()):
            continue
        document.body[tp] = "type \"vcs\""
        document.body[arg] = "arg \"" + types[argv] + "\""


def revert_vcsinfo(document):
    " Merge vcs Info inset to buffer Info inset. "

    args = ["revision", "tree-revision", "author", "time", "date" ]
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "vcs":
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv not in args:
            document.warning("Malformed Info inset. Invalid vcs arg.")
            continue
        document.body[tp] = "type \"buffer\""
        document.body[arg] = "arg \"vcs-" + argv + "\""


def revert_dateinfo(document):
    " Revert date info insets to static text. "

# FIXME This currently only considers the main language and uses the system locale
# Ideally, it should honor context languages and switch the locale accordingly.

    # The date formats for each language using strftime syntax:
    # long, short, loclong, locmedium, locshort
    dateformats = {
        "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
        "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
        "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
        "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
        "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
        "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
        "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
        "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
        "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
        "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
        "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
        "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
        "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
        "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
        "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
        "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
        "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
        "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
        "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
        "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
        "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
        "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
        "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
        "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
        "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
        "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
        "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
        "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
        "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
        "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
        "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
        "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
        "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
        "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
        "malayalam" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
        "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
        "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
        "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
        "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
        "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
        "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
        "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
        "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
        "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
        "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
        "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
    }

    types = ["date", "fixdate", "moddate" ]
    lang = get_value(document.header, "\\language")
    if lang == "":
        document.warning("Malformed LyX document! No \\language header found!")
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv not in types:
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        isodate = ""
        dte = date.today()
        if tpv == "fixdate":
            datecomps = argv.split('@')
            if len(datecomps) > 1:
                argv = datecomps[0]
                isodate = datecomps[1]
                m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
                if m:
                    dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
# FIXME if we had the path to the original document (not the one in the tmp dir),
#        we could use the mtime.
#        elif tpv == "moddate":
#            dte = date.fromtimestamp(os.path.getmtime(document.dir))
        result = ""
        if argv == "ISO":
            result = dte.isodate()
        elif argv == "long":
            result = dte.strftime(dateformats[lang][0])
        elif argv == "short":
            result = dte.strftime(dateformats[lang][1])
        elif argv == "loclong":
            result = dte.strftime(dateformats[lang][2])
        elif argv == "locmedium":
            result = dte.strftime(dateformats[lang][3])
        elif argv == "locshort":
            result = dte.strftime(dateformats[lang][4])
        else:
            fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
            fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
            fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
            fmt = re.sub('[^\'%]d', '%d', fmt)
            fmt = fmt.replace("'", "")
            result = dte.strftime(fmt)
        if sys.version_info < (3,0):
            # In Python 2, datetime module works with binary strings,
            # our dateformat strings are utf8-encoded:
            result = result.decode('utf-8')
        document.body[i : j+1] = [result]


def revert_timeinfo(document):
    " Revert time info insets to static text. "

# FIXME This currently only considers the main language and uses the system locale
# Ideally, it should honor context languages and switch the locale accordingly.
# Also, the time object is "naive", i.e., it does not know of timezones (%Z will
# be empty).

    # The time formats for each language using strftime syntax:
    # long, short
    timeformats = {
        "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
        "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
        "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "armenian" : ["%H:%M:%S %Z", "%H:%M"],
        "asturian" : ["%H:%M:%S %Z", "%H:%M"],
        "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "austrian" : ["%H:%M:%S %Z", "%H:%M"],
        "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
        "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
        "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
        "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
        "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
        "breton" : ["%H:%M:%S %Z", "%H:%M"],
        "british" : ["%H:%M:%S %Z", "%H:%M"],
        "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
        "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "canadien" : ["%H:%M:%S %Z", "%H h %M"],
        "catalan" : ["%H:%M:%S %Z", "%H:%M"],
        "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
        "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
        "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
        "czech" : ["%H:%M:%S %Z", "%H:%M"],
        "danish" : ["%H.%M.%S %Z", "%H.%M"],
        "divehi" : ["%H:%M:%S %Z", "%H:%M"],
        "dutch" : ["%H:%M:%S %Z", "%H:%M"],
        "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "estonian" : ["%H:%M:%S %Z", "%H:%M"],
        "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
        "finnish" : ["%H.%M.%S %Z", "%H.%M"],
        "french" : ["%H:%M:%S %Z", "%H:%M"],
        "friulan" : ["%H:%M:%S %Z", "%H:%M"],
        "galician" : ["%H:%M:%S %Z", "%H:%M"],
        "georgian" : ["%H:%M:%S %Z", "%H:%M"],
        "german" : ["%H:%M:%S %Z", "%H:%M"],
        "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
        "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
        "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
        "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
        "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
        "irish" : ["%H:%M:%S %Z", "%H:%M"],
        "italian" : ["%H:%M:%S %Z", "%H:%M"],
        "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
        "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
        "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
        "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
        "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
        "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "latvian" : ["%H:%M:%S %Z", "%H:%M"],
        "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
        "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
        "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
        "magyar" : ["%H:%M:%S %Z", "%H:%M"],
        "malayalam" : ["%p %I:%M:%S %Z", "%p %I:%M"],
        "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
        "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
        "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
        "norsk" : ["%H:%M:%S %Z", "%H:%M"],
        "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
        "occitan" : ["%H:%M:%S %Z", "%H:%M"],
        "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "polish" : ["%H:%M:%S %Z", "%H:%M"],
        "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
        "romanian" : ["%H:%M:%S %Z", "%H:%M"],
        "romansh" : ["%H:%M:%S %Z", "%H:%M"],
        "russian" : ["%H:%M:%S %Z", "%H:%M"],
        "samin" : ["%H:%M:%S %Z", "%H:%M"],
        "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
        "scottish" : ["%H:%M:%S %Z", "%H:%M"],
        "serbian" : ["%H:%M:%S %Z", "%H:%M"],
        "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
        "slovak" : ["%H:%M:%S %Z", "%H:%M"],
        "slovene" : ["%H:%M:%S %Z", "%H:%M"],
        "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
        "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
        "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
        "syriac" : ["%H:%M:%S %Z", "%H:%M"],
        "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
        "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
        "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "turkish" : ["%H:%M:%S %Z", "%H:%M"],
        "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
        "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
        "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
        "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
        "welsh" : ["%H:%M:%S %Z", "%H:%M"]
    }

    types = ["time", "fixtime", "modtime" ]
    i = 0
    i = find_token(document.header, "\\language", 0)
    if i == -1:
        # this should not happen
        document.warning("Malformed LyX document! No \\language header found!")
        return
    lang = get_value(document.header, "\\language", i)

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv not in types:
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        isotime = ""
        dtme = datetime.now()
        tme = dtme.time()
        if tpv == "fixtime":
            timecomps = argv.split('@')
            if len(timecomps) > 1:
                argv = timecomps[0]
                isotime = timecomps[1]
                m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
                if m:
                    tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
                else:
                    m = re.search('(\d\d):(\d\d)', isotime)
                    if m:
                        tme = time(int(m.group(1)), int(m.group(2)))
# FIXME if we had the path to the original document (not the one in the tmp dir),
#        we could use the mtime.
#        elif tpv == "moddate":
#            dte = date.fromtimestamp(os.path.getmtime(document.dir))
        result = ""
        if argv == "ISO":
            result = tme.isoformat()
        elif argv == "long":
            result = tme.strftime(timeformats[lang][0])
        elif argv == "short":
            result = tme.strftime(timeformats[lang][1])
        else:
            fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
            fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
            fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
            fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
            fmt = fmt.replace("'", "")
            result = dte.strftime(fmt)
        document.body[i : j+1] = result


def revert_namenoextinfo(document):
    " Merge buffer Info inset type name-noext to name. "

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "buffer":
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv != "name-noext":
            continue
        document.body[arg] = "arg \"name\""


def revert_l7ninfo(document):
    " Revert l7n Info inset to text. "

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "l7n":
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
        argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
        document.body[i : j+1] = argv


def revert_listpargs(document):
    " Reverts listpreamble arguments to TeX-code "
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Argument listpreamble:", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        # Find containing paragraph layout
        parent = get_containing_layout(document.body, i)
        if parent == False:
            document.warning("Malformed LyX document: Can't find parent paragraph layout")
            continue
        parbeg = parent[3]
        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
        endPlain = find_end_of_layout(document.body, beginPlain)
        content = document.body[beginPlain + 1 : endPlain]
        del document.body[i:j+1]
        subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
                 "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
        document.body[parbeg : parbeg] = subst


def revert_lformatinfo(document):
    " Revert layout format Info inset to text. "

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i+1)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i+1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "lyxinfo":
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv != "layoutformat":
            continue
        # hardcoded for now
        document.body[i : j+1] = "69"


def convert_hebrew_parentheses(document):
    """ Swap opening/closing parentheses in Hebrew text.

    Up to LyX 2.4, "(" was used as closing parenthesis and
    ")" as opening parenthesis for Hebrew in the LyX source.
    """
    # print("convert hebrew parentheses")
    current_languages = [document.language]
    for i, line in enumerate(document.body):
        if line.startswith('\\lang '):
            current_languages[-1] = line.lstrip('\\lang ')
        elif line.startswith('\\begin_layout'):
            current_languages.append(current_languages[-1])
            # print (line, current_languages[-1])
        elif line.startswith('\\end_layout'):
            current_languages.pop()
        elif current_languages[-1] == 'hebrew' and not line.startswith('\\'):
            document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')


def revert_hebrew_parentheses(document):
    " Store parentheses in Hebrew text reversed"
    # This only exists to keep the convert/revert naming convention
    convert_hebrew_parentheses(document)


def revert_malayalam(document):
    " Set the document language to English but assure Malayalam output "

    revert_language(document, "malayalam", "", "malayalam")


def revert_soul(document):
    " Revert soul module flex insets to ERT "

    flexes = ["Spaceletters", "Strikethrough", "Underline", "Highlight", "Capitalize"]

    for flex in flexes:
        i = find_token(document.body, "\\begin_inset Flex %s" % flex, 0)
        if i != -1:
            add_to_preamble(document, ["\\usepackage{soul}"])
            break
    i = find_token(document.body, "\\begin_inset Flex Highlight", 0)
    if i != -1:
        add_to_preamble(document, ["\\usepackage{color}"])

    revert_flex_inset(document.body, "Spaceletters", "\\so")
    revert_flex_inset(document.body, "Strikethrough", "\\st")
    revert_flex_inset(document.body, "Underline", "\\ul")
    revert_flex_inset(document.body, "Highlight", "\\hl")
    revert_flex_inset(document.body, "Capitalize", "\\caps")


def revert_tablestyle(document):
    " Remove tablestyle params "

    i = 0
    i = find_token(document.header, "\\tablestyle")
    if i != -1:
        del document.header[i]


def revert_bibfileencodings(document):
    " Revert individual Biblatex bibliography encodings "

    # Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # Check if biblatex
    biblatex = False
    if engine in ["biblatex", "biblatex-natbib"]:
        biblatex = True

    # Map lyx to latex encoding names
    encodings = {
        "utf8" : "utf8",
        "utf8x" : "utf8x",
        "armscii8" : "armscii8",
        "iso8859-1" : "latin1",
        "iso8859-2" : "latin2",
        "iso8859-3" : "latin3",
        "iso8859-4" : "latin4",
        "iso8859-5" : "iso88595",
        "iso8859-6" : "8859-6",
        "iso8859-7" : "iso-8859-7",
        "iso8859-8" : "8859-8",
        "iso8859-9" : "latin5",
        "iso8859-13" : "latin7",
        "iso8859-15" : "latin9",
        "iso8859-16" : "latin10",
        "applemac" : "applemac",
        "cp437" : "cp437",
        "cp437de" : "cp437de",
        "cp850" : "cp850",
        "cp852" : "cp852",
        "cp855" : "cp855",
        "cp858" : "cp858",
        "cp862" : "cp862",
        "cp865" : "cp865",
        "cp866" : "cp866",
        "cp1250" : "cp1250",
        "cp1251" : "cp1251",
        "cp1252" : "cp1252",
        "cp1255" : "cp1255",
        "cp1256" : "cp1256",
        "cp1257" : "cp1257",
        "koi8-r" : "koi8-r",
        "koi8-u" : "koi8-u",
        "pt154" : "pt154",
        "utf8-platex" : "utf8",
        "ascii" : "ascii"
    }

    i = 0
    bibresources = []
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i+1)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of bibtex inset at line %d!!" %(i))
            continue
        encodings = get_quoted_value(document.body, "file_encodings", i, j)
        if not encodings:
            i = j
            continue
        bibfiles = get_quoted_value(document.body, "bibfiles", i, j).split(",")
        opts = get_quoted_value(document.body, "biblatexopts", i, j)
        if len(bibfiles) == 0:
            document.warning("Bibtex inset at line %d does not have a bibfile!" %(i))
        # remove encoding line
        k = find_token(document.body, "file_encodings", i, j)
        if k != -1:
            del document.body[k]
        # Re-find inset end line
        j = find_end_of_inset(document.body, i)
        if biblatex:
            enclist = encodings.split("\t")
            encmap = dict()
            for pp in enclist:
                ppp = pp.split(" ", 1)
                encmap[ppp[0]] = ppp[1]
            for bib in bibfiles:
                pr = "\\addbibresource"
                if bib in encmap.keys():
                    pr += "[bibencoding=" + encmap[bib] + "]"
                pr += "{" + bib + "}"
                add_to_preamble(document, [pr])
            # Insert ERT \\printbibliography and wrap bibtex inset to a Note
            pcmd = "printbibliography"
            if opts:
                pcmd += "[" + opts + "]"
            repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
                    "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
                    "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
                    "status open", "", "\\begin_layout Plain Layout" ]
            repl += document.body[i:j+1]
            repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
            document.body[i:j+1] = repl
            j += 27

        i = j


def revert_cmidruletrimming(document):
    " Remove \\cmidrule trimming "

    # FIXME: Revert to TeX code?
    i = 0
    while True:
        # first, let's find out if we need to do anything
        i = find_token(document.body, '<cell ', i+1)
        if i == -1:
            return
        j = document.body[i].find('trim="')
        if j == -1:
             continue
        rgx = re.compile(r' (bottom|top)line[lr]trim="true"')
        # remove trim option
        document.body[i] = rgx.sub('', document.body[i])


ruby_inset_def = [
    r'### Inserted by lyx2lyx (ruby inset) ###',
    r'InsetLayout Flex:Ruby',
    r'  LyxType       charstyle',
    r'  LatexType     command',
    r'  LatexName     ruby',
    r'  HTMLTag       ruby',
    r'  HTMLAttr      ""',
    r'  HTMLInnerTag  rb',
    r'  HTMLInnerAttr ""',
    r'  BgColor       none',
    r'  LabelString   "Ruby"',
    r'  Decoration    Conglomerate',
    r'  Preamble',
    r'    \ifdefined\kanjiskip',
    r'      \IfFileExists{okumacro.sty}{\usepackage{okumacro}}{}',
    r'    \else \ifdefined\luatexversion',
    r'      \usepackage{luatexja-ruby}',
    r'    \else \ifdefined\XeTeXversion',
    r'      \usepackage{ruby}%',
    r'    \fi\fi\fi',
    r'    \providecommand{\ruby}[2]{\shortstack{\tiny #2\\#1}}',
    r'  EndPreamble',
    r'  Argument  post:1',
    r'    LabelString  "ruby text"',
    r'    MenuString  "Ruby Text|R"',
    r'    Tooltip    "Reading aid (ruby, furigana) for Chinese characters."',
    r'    Decoration  Conglomerate',
    r'    Font',
    r'      Size    tiny',
    r'    EndFont',
    r'    LabelFont',
    r'      Size    tiny',
    r'    EndFont',
    r'    Mandatory  1',
    r'  EndArgument',
    r'End',
]

def convert_ruby_module(document):
    " Use ruby module instead of local module definition "
    if document.del_local_layout(ruby_inset_def):
        document.add_module("ruby")

def revert_ruby_module(document):
    " Replace ruby module with local module definition "
    if document.del_module("ruby"):
        document.append_local_layout(ruby_inset_def)


def convert_utf8_japanese(document):
    " Use generic utf8 with Japanese documents."
    lang = get_value(document.header, "\\language")
    if not lang.startswith("japanese"):
        return
    inputenc = get_value(document.header, "\\inputencoding")
    if ((lang == "japanese" and inputenc == "utf8-platex")
        or (lang == "japanese-cjk" and inputenc == "utf8-cjk")):
        document.set_parameter("inputencoding", "utf8")

def revert_utf8_japanese(document):
    " Use Japanese utf8 variants with Japanese documents."
    inputenc = get_value(document.header, "\\inputencoding")
    if inputenc != "utf8":
        return
    lang = get_value(document.header, "\\language")
    if lang == "japanese":
        document.set_parameter("inputencoding", "utf8-platex")
    if lang == "japanese-cjk":
        document.set_parameter("inputencoding", "utf8-cjk")


def revert_lineno(document):
    " Replace lineno setting with user-preamble code."

    options = get_quoted_value(document.header, "\\lineno_options",
                               delete=True)
    if not get_bool_value(document.header, "\\use_lineno", delete=True):
        return
    if options:
        options = "[" + options + "]"
    add_to_preamble(document, ["\\usepackage%s{lineno}" % options,
                               "\\linenumbers"])

def convert_lineno(document):
    " Replace user-preamble code with native lineno support."
    use_lineno = 0
    options = ""
    i = find_token(document.preamble, "\\linenumbers", 1)
    if i > -1:
        usepkg = re.match(r"\\usepackage(.*){lineno}", document.preamble[i-1])
        if usepkg:
            use_lineno = 1
            options = usepkg.group(1).strip("[]")
            del(document.preamble[i-1:i+1])
            del_token(document.preamble, "% Added by lyx2lyx", i-2, i-1)

    k = find_token(document.header, "\\index ")
    if options == "":
        document.header[k:k] = ["\\use_lineno %d" % use_lineno]
    else:
        document.header[k:k] = ["\\use_lineno %d" % use_lineno,
                                "\\lineno_options %s" % options]


def revert_new_languages(document):
    """Emulate support for Azerbaijani, Bengali, Church Slavonic, Korean,
    and Russian (Petrine orthography)."""

    #                lyxname:          (babelname, polyglossianame)
    new_languages = {"azerbaijani":    ("azerbaijani", ""),
                     "bengali":        ("", "bengali"),
                     "churchslavonic": ("", "churchslavonic"),
                     "oldrussian":     ("", "russian"),
                     "korean":         ("", "korean"),
                    }
    used_languages = set()
    if document.language in new_languages:
        used_languages.add(document.language)
    i = 0
    while True:
        i = find_token(document.body, "\\lang", i+1)
        if i == -1:
            break
        if document.body[i][6:].strip() in new_languages:
            used_languages.add(document.language)

    # Korean is already supported via CJK, so leave as-is for Babel
    if ("korean" in used_languages
        and get_bool_value(document.header, "\\use_non_tex_fonts")
        and get_value(document.header, "\\language_package") in ("default", "auto")):
        revert_language(document, "korean", "", "korean")
    used_languages.discard("korean")

    for lang in used_languages:
        revert(lang, *new_languages[lang])


gloss_inset_def = [
    r'### Inserted by lyx2lyx (deprecated ling glosses) ###',
    r'InsetLayout Flex:Glosse',
    r'  LyXType               custom',
    r'  LabelString           "Gloss (old version)"',
    r'  MenuString            "Gloss (old version)"',
    r'  LatexType             environment',
    r'  LatexName             linggloss',
    r'  Decoration            minimalistic',
    r'  LabelFont',
    r'    Size                Small',
    r'  EndFont',
    r'  MultiPar              true',
    r'  CustomPars            false',
    r'  ForcePlain            true',
    r'  ParbreakIsNewline     true',
    r'  FreeSpacing           true',
    r'  Requires              covington',
    r'  Preamble',
    r'          \def\glosstr{}',
    r'          \@ifundefined{linggloss}{%',
    r'          \newenvironment{linggloss}[2][]{',
    r'             \def\glosstr{\glt #1}%',
    r'             \gll #2}',
    r'          {\glosstr\glend}}{}',
    r'  EndPreamble',
    r'  InToc                 true',
    r'  ResetsFont            true',
    r'  Argument 1',
    r'          Decoration    conglomerate',
    r'          LabelString   "Translation"',
    r'          MenuString    "Glosse Translation|s"',
    r'          Tooltip       "Add a translation for the glosse"',
    r'  EndArgument',
    r'End'
]

glosss_inset_def = [
    r'### Inserted by lyx2lyx (deprecated ling glosses) ###',
    r'InsetLayout Flex:Tri-Glosse',
    r'  LyXType               custom',
    r'  LabelString           "Tri-Gloss (old version)"',
    r'  MenuString            "Tri-Gloss (old version)"',
    r'  LatexType             environment',
    r'  LatexName             lingglosss',
    r'  Decoration            minimalistic',
    r'  LabelFont',
    r'    Size                Small',
    r'  EndFont',
    r'  MultiPar              true',
    r'  CustomPars            false',
    r'  ForcePlain            true',
    r'  ParbreakIsNewline     true',
    r'  FreeSpacing           true',
    r'  InToc                 true',
    r'  Requires              covington',
    r'  Preamble',
    r'          \def\glosstr{}',
    r'          \@ifundefined{lingglosss}{%',
    r'          \newenvironment{lingglosss}[2][]{',
    r'              \def\glosstr{\glt #1}%',
    r'              \glll #2}',
    r'          {\glosstr\glend}}{}',
    r'  EndPreamble',
    r'  ResetsFont            true',
    r'  Argument 1',
    r'          Decoration    conglomerate',
    r'          LabelString   "Translation"',
    r'          MenuString    "Glosse Translation|s"',
    r'          Tooltip       "Add a translation for the glosse"',
    r'  EndArgument',
    r'End'
]

def convert_linggloss(document):
    " Move old ling glosses to local layout "
    if find_token(document.body, '\\begin_inset Flex Glosse', 0) != -1:
        document.append_local_layout(gloss_inset_def)
    if find_token(document.body, '\\begin_inset Flex Tri-Glosse', 0) != -1:
        document.append_local_layout(glosss_inset_def)

def revert_linggloss(document):
    " Revert to old ling gloss definitions "
    if not "linguistics" in document.get_module_list():
        return
    document.del_local_layout(gloss_inset_def)
    document.del_local_layout(glosss_inset_def)

    cov_req = False
    glosses = ["\\begin_inset Flex Interlinear Gloss (2 Lines)", "\\begin_inset Flex Interlinear Gloss (3 Lines)"]
    for glosse in glosses:
        i = 0
        while True:
            i = find_token(document.body, glosse, i+1)
            if i == -1:
                break
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Gloss inset")
                continue

            arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
            endarg = find_end_of_inset(document.body, arg)
            optargcontent = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find optarg plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                optargcontent = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
            endarg = find_end_of_inset(document.body, arg)
            marg1content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find arg 1 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                marg1content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            arg = find_token(document.body, "\\begin_inset Argument post:2", i, j)
            endarg = find_end_of_inset(document.body, arg)
            marg2content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find arg 2 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                marg2content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            arg = find_token(document.body, "\\begin_inset Argument post:3", i, j)
            endarg = find_end_of_inset(document.body, arg)
            marg3content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find arg 3 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                marg3content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            cmd = "\\digloss"
            if glosse == "\\begin_inset Flex Interlinear Gloss (3 Lines)":
                cmd = "\\trigloss"

            beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
            endInset = find_end_of_inset(document.body, i)
            endPlain = find_token_backwards(document.body, "\\end_layout", endInset)
            precontent = put_cmd_in_ert(cmd)
            if len(optargcontent) > 0:
                precontent += put_cmd_in_ert("[") + optargcontent + put_cmd_in_ert("]")
            precontent += put_cmd_in_ert("{")

            postcontent = put_cmd_in_ert("}{") + marg1content + put_cmd_in_ert("}{") + marg2content
            if cmd == "\\trigloss":
                postcontent += put_cmd_in_ert("}{") + marg3content
            postcontent += put_cmd_in_ert("}")

            document.body[endPlain:endInset + 1] = postcontent
            document.body[beginPlain + 1:beginPlain] = precontent
            del document.body[i : beginPlain + 1]
            if not cov_req:
                document.append_local_layout("Requires covington")
                cov_req = True
            i = beginPlain


def revert_subexarg(document):
    " Revert linguistic subexamples with argument to ERT "

    if not "linguistics" in document.get_module_list():
        return

    cov_req = False
    i = 0
    while True:
        i = find_token(document.body, "\\begin_layout Subexample", i+1)
        if i == -1:
            break
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Subexample layout")
            continue
        while True:
            # check for consecutive layouts
            k = find_token(document.body, "\\begin_layout", j)
            if k == -1 or document.body[k] != "\\begin_layout Subexample":
                break
            j = find_end_of_layout(document.body, k)
            if j == -1:
                 document.warning("Malformed LyX document: Can't find end of Subexample layout")
                 continue

        arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
        if arg == -1:
            continue

        endarg = find_end_of_inset(document.body, arg)
        optargcontent = ""
        argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
        if argbeginPlain == -1:
            document.warning("Malformed LyX document: Can't find optarg plain Layout")
            continue
        argendPlain = find_end_of_inset(document.body, argbeginPlain)
        optargcontent = lyx2latex(document, document.body[argbeginPlain + 1 : argendPlain - 2])

        # remove Arg insets and paragraph, if it only contains this inset
        if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
            del document.body[arg - 1 : endarg + 4]
        else:
            del document.body[arg : endarg + 1]

        cmd = put_cmd_in_ert("\\begin{subexamples}[" + optargcontent + "]")

        # re-find end of layout
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Subexample layout")
            continue
        while True:
            # check for consecutive layouts
            k = find_token(document.body, "\\begin_layout", j)
            if k == -1 or document.body[k] != "\\begin_layout Subexample":
                break
            document.body[k : k + 1] = ["\\begin_layout Standard"] + put_cmd_in_ert("\\item ")
            j = find_end_of_layout(document.body, k)
            if j == -1:
                 document.warning("Malformed LyX document: Can't find end of Subexample layout")
                 continue

        endev = put_cmd_in_ert("\\end{subexamples}")

        document.body[j : j] = ["\\end_layout", "", "\\begin_layout Standard"] + endev
        document.body[i : i + 1] = ["\\begin_layout Standard"] + cmd \
                + ["\\end_layout", "", "\\begin_layout Standard"] + put_cmd_in_ert("\\item ")
        if not cov_req:
            document.append_local_layout("Requires covington")
            cov_req = True


def revert_drs(document):
    " Revert DRS insets (linguistics) to ERT "

    if not "linguistics" in document.get_module_list():
        return

    cov_req = False
    drses = ["\\begin_inset Flex DRS", "\\begin_inset Flex DRS*",
             "\\begin_inset Flex IfThen-DRS", "\\begin_inset Flex Cond-DRS",
             "\\begin_inset Flex QDRS", "\\begin_inset Flex NegDRS",
             "\\begin_inset Flex SDRS"]
    for drs in drses:
        i = 0
        while True:
            i = find_token(document.body, drs, i+1)
            if i == -1:
                break
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of DRS inset")
                continue

            # Check for arguments
            arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
            endarg = find_end_of_inset(document.body, arg)
            prearg1content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find Argument 1 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                prearg1content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            # re-find inset end
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of DRS inset")
                continue

            arg = find_token(document.body, "\\begin_inset Argument 2", i, j)
            endarg = find_end_of_inset(document.body, arg)
            prearg2content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find Argument 2 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                prearg2content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            # re-find inset end
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of DRS inset")
                continue

            arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
            endarg = find_end_of_inset(document.body, arg)
            postarg1content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find Argument post:1 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                postarg1content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            # re-find inset end
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of DRS inset")
                continue

            arg = find_token(document.body, "\\begin_inset Argument post:2", i, j)
            endarg = find_end_of_inset(document.body, arg)
            postarg2content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find Argument post:2 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                postarg2content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            # re-find inset end
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of DRS inset")
                continue

            arg = find_token(document.body, "\\begin_inset Argument post:3", i, j)
            endarg = find_end_of_inset(document.body, arg)
            postarg3content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find Argument post:3 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                postarg3content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            # re-find inset end
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of DRS inset")
                continue

            arg = find_token(document.body, "\\begin_inset Argument post:4", i, j)
            endarg = find_end_of_inset(document.body, arg)
            postarg4content = []
            if arg != -1:
                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
                if argbeginPlain == -1:
                    document.warning("Malformed LyX document: Can't find Argument post:4 plain Layout")
                    continue
                argendPlain = find_end_of_inset(document.body, argbeginPlain)
                postarg4content = document.body[argbeginPlain + 1 : argendPlain - 2]

                # remove Arg insets and paragraph, if it only contains this inset
                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
                    del document.body[arg - 1 : endarg + 4]
                else:
                    del document.body[arg : endarg + 1]

            # The respective LaTeX command
            cmd = "\\drs"
            if drs == "\\begin_inset Flex DRS*":
                cmd = "\\drs*"
            elif drs == "\\begin_inset Flex IfThen-DRS":
                cmd = "\\ifdrs"
            elif drs == "\\begin_inset Flex Cond-DRS":
                cmd = "\\condrs"
            elif drs == "\\begin_inset Flex QDRS":
                cmd = "\\qdrs"
            elif drs == "\\begin_inset Flex NegDRS":
                cmd = "\\negdrs"
            elif drs == "\\begin_inset Flex SDRS":
                cmd = "\\sdrs"

            beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
            endInset = find_end_of_inset(document.body, i)
            endPlain = find_token_backwards(document.body, "\\end_layout", endInset)
            precontent = put_cmd_in_ert(cmd)
            precontent += put_cmd_in_ert("{") + prearg1content + put_cmd_in_ert("}")
            if drs == "\\begin_inset Flex SDRS":
                precontent += put_cmd_in_ert("{") + prearg2content + put_cmd_in_ert("}")
            precontent += put_cmd_in_ert("{")

            postcontent = []
            if cmd == "\\qdrs" or cmd == "\\condrs" or cmd == "\\ifdrs":
                postcontent = put_cmd_in_ert("}{") + postarg1content + put_cmd_in_ert("}{") + postarg2content + put_cmd_in_ert("}")
                if cmd == "\\condrs" or cmd == "\\qdrs":
                    postcontent += put_cmd_in_ert("{") + postarg3content + put_cmd_in_ert("}")
                if cmd == "\\qdrs":
                    postcontent += put_cmd_in_ert("{") + postarg4content + put_cmd_in_ert("}")
            else:
                postcontent = put_cmd_in_ert("}")

            document.body[endPlain:endInset + 1] = postcontent
            document.body[beginPlain + 1:beginPlain] = precontent
            del document.body[i : beginPlain + 1]
            if not cov_req:
                document.append_local_layout("Provides covington 1")
                add_to_preamble(document, ["\\usepackage{drs,covington}"])
                cov_req = True
            i = beginPlain


def revert_babelfont(document):
    " Reverts the use of \\babelfont to user preamble "

    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    if not str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
        return
    i = find_token(document.header, '\\language_package', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\language_package.")
        return
    if get_value(document.header, "\\language_package", 0) != "babel":
        return

    # check font settings
    # defaults
    roman = sans = typew = "default"
    osf = False
    sf_scale = tt_scale = 100.0

    j = find_token(document.header, "\\font_roman", 0)
    if j == -1:
        document.warning("Malformed LyX document: Missing \\font_roman.")
    else:
        # We need to use this regex since split() does not handle quote protection
        romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
        roman = romanfont[2].strip('"')
        romanfont[2] = '"default"'
        document.header[j] = " ".join(romanfont)

    j = find_token(document.header, "\\font_sans", 0)
    if j == -1:
        document.warning("Malformed LyX document: Missing \\font_sans.")
    else:
        # We need to use this regex since split() does not handle quote protection
        sansfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
        sans = sansfont[2].strip('"')
        sansfont[2] = '"default"'
        document.header[j] = " ".join(sansfont)

    j = find_token(document.header, "\\font_typewriter", 0)
    if j == -1:
        document.warning("Malformed LyX document: Missing \\font_typewriter.")
    else:
        # We need to use this regex since split() does not handle quote protection
        ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
        typew = ttfont[2].strip('"')
        ttfont[2] = '"default"'
        document.header[j] = " ".join(ttfont)

    i = find_token(document.header, "\\font_osf", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_osf.")
    else:
        osf = str2bool(get_value(document.header, "\\font_osf", i))

    j = find_token(document.header, "\\font_sf_scale", 0)
    if j == -1:
        document.warning("Malformed LyX document: Missing \\font_sf_scale.")
    else:
        sfscale = document.header[j].split()
        val = sfscale[2]
        sfscale[2] = "100"
        document.header[j] = " ".join(sfscale)
        try:
            # float() can throw
            sf_scale = float(val)
        except:
            document.warning("Invalid font_sf_scale value: " + val)

    j = find_token(document.header, "\\font_tt_scale", 0)
    if j == -1:
        document.warning("Malformed LyX document: Missing \\font_tt_scale.")
    else:
        ttscale = document.header[j].split()
        val = ttscale[2]
        ttscale[2] = "100"
        document.header[j] = " ".join(ttscale)
        try:
            # float() can throw
            tt_scale = float(val)
        except:
            document.warning("Invalid font_tt_scale value: " + val)

    # set preamble stuff
    pretext = ['%% This document must be processed with xelatex or lualatex!']
    pretext.append('\\AtBeginDocument{%')
    if roman != "default":
        pretext.append('\\babelfont{rm}[Mapping=tex-text]{' + roman + '}')
    if sans != "default":
        sf = '\\babelfont{sf}['
        if sf_scale != 100.0:
            sf += 'Scale=' + str(sf_scale / 100.0) + ','
        sf += 'Mapping=tex-text]{' + sans + '}'
        pretext.append(sf)
    if typew != "default":
        tw = '\\babelfont{tt}'
        if tt_scale != 100.0:
            tw += '[Scale=' + str(tt_scale / 100.0) + ']'
        tw += '{' + typew + '}'
        pretext.append(tw)
    if osf:
        pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
    pretext.append('}')
    insert_to_preamble(document, pretext)


def revert_minionpro(document):
    " Revert native MinionPro font definition (with extra options) to LaTeX "

    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
        return

    regexp = re.compile(r'(\\font_roman_opts)')
    x = find_re(document.header, regexp, 0)
    if x == -1:
        return

    # We need to use this regex since split() does not handle quote protection
    romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
    opts = romanopts[1].strip('"')

    i = find_token(document.header, "\\font_roman", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_roman.")
        return
    else:
        # We need to use this regex since split() does not handle quote protection
        romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
        roman = romanfont[1].strip('"')
        if roman != "minionpro":
            return
        romanfont[1] = '"default"'
        document.header[i] = " ".join(romanfont)
        osf = False
        j = find_token(document.header, "\\font_osf true", 0)
        if j != -1:
            osf = True
        preamble = "\\usepackage["
        if osf:
            document.header[j] = "\\font_osf false"
        else:
            preamble += "lf,"
        preamble += opts
        preamble += "]{MinionPro}"
        add_to_preamble(document, [preamble])
        del document.header[x]


def revert_font_opts(document):
    " revert font options by outputting \\setxxxfont or \\babelfont to the preamble "

    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    NonTeXFonts = str2bool(get_value(document.header, "\\use_non_tex_fonts", i))
    i = find_token(document.header, '\\language_package', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\language_package.")
        return
    Babel = (get_value(document.header, "\\language_package", 0) == "babel")

    # 1. Roman
    regexp = re.compile(r'(\\font_roman_opts)')
    i = find_re(document.header, regexp, 0)
    if i != -1:
        # We need to use this regex since split() does not handle quote protection
        romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
        opts = romanopts[1].strip('"')
        del document.header[i]
        if NonTeXFonts:
            regexp = re.compile(r'(\\font_roman)')
            i = find_re(document.header, regexp, 0)
            if i != -1:
                # We need to use this regex since split() does not handle quote protection
                romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
                font = romanfont[2].strip('"')
                romanfont[2] = '"default"'
                document.header[i] = " ".join(romanfont)
                if font != "default":
                    if Babel:
                        preamble = "\\babelfont{rm}["
                    else:
                        preamble = "\\setmainfont["
                    preamble += opts
                    preamble += ","
                    preamble += "Mapping=tex-text]{"
                    preamble += font
                    preamble += "}"
                    add_to_preamble(document, [preamble])

    # 2. Sans
    regexp = re.compile(r'(\\font_sans_opts)')
    i = find_re(document.header, regexp, 0)
    if i != -1:
        scaleval = 100
        # We need to use this regex since split() does not handle quote protection
        sfopts = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
        opts = sfopts[1].strip('"')
        del document.header[i]
        if NonTeXFonts:
            regexp = re.compile(r'(\\font_sf_scale)')
            i = find_re(document.header, regexp, 0)
            if i != -1:
                scaleval = get_value(document.header, "\\font_sf_scale" , i).split()[1]
            regexp = re.compile(r'(\\font_sans)')
            i = find_re(document.header, regexp, 0)
            if i != -1:
                # We need to use this regex since split() does not handle quote protection
                sffont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
                font = sffont[2].strip('"')
                sffont[2] = '"default"'
                document.header[i] = " ".join(sffont)
                if font != "default":
                    if Babel:
                        preamble = "\\babelfont{sf}["
                    else:
                        preamble = "\\setsansfont["
                    preamble += opts
                    preamble += ","
                    if scaleval != 100:
                        preamble += "Scale=0."
                        preamble += scaleval
                        preamble += ","
                    preamble += "Mapping=tex-text]{"
                    preamble += font
                    preamble += "}"
                    add_to_preamble(document, [preamble])

    # 3. Typewriter
    regexp = re.compile(r'(\\font_typewriter_opts)')
    i = find_re(document.header, regexp, 0)
    if i != -1:
        scaleval = 100
        # We need to use this regex since split() does not handle quote protection
        ttopts = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
        opts = ttopts[1].strip('"')
        del document.header[i]
        if NonTeXFonts:
            regexp = re.compile(r'(\\font_tt_scale)')
            i = find_re(document.header, regexp, 0)
            if i != -1:
                scaleval = get_value(document.header, "\\font_tt_scale" , i).split()[1]
            regexp = re.compile(r'(\\font_typewriter)')
            i = find_re(document.header, regexp, 0)
            if i != -1:
                # We need to use this regex since split() does not handle quote protection
                ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
                font = ttfont[2].strip('"')
                ttfont[2] = '"default"'
                document.header[i] = " ".join(ttfont)
                if font != "default":
                    if Babel:
                        preamble = "\\babelfont{tt}["
                    else:
                        preamble = "\\setmonofont["
                    preamble += opts
                    preamble += ","
                    if scaleval != 100:
                        preamble += "Scale=0."
                        preamble += scaleval
                        preamble += ","
                    preamble += "Mapping=tex-text]{"
                    preamble += font
                    preamble += "}"
                    add_to_preamble(document, [preamble])


def revert_plainNotoFonts_xopts(document):
    " Revert native (straight) Noto font definition (with extra options) to LaTeX "

    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
        return

    osf = False
    y = find_token(document.header, "\\font_osf true", 0)
    if y != -1:
        osf = True

    regexp = re.compile(r'(\\font_roman_opts)')
    x = find_re(document.header, regexp, 0)
    if x == -1 and not osf:
        return

    opts = ""
    if x != -1:
        # We need to use this regex since split() does not handle quote protection
        romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
        opts = romanopts[1].strip('"')
    if osf:
        if opts != "":
            opts += ", "
        opts += "osf"

    i = find_token(document.header, "\\font_roman", 0)
    if i == -1:
        return

    # We need to use this regex since split() does not handle quote protection
    romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
    roman = romanfont[1].strip('"')
    if roman != "NotoSerif-TLF":
        return

    j = find_token(document.header, "\\font_sans", 0)
    if j == -1:
        return

    # We need to use this regex since split() does not handle quote protection
    sffont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
    sf = sffont[1].strip('"')
    if sf != "default":
        return

    j = find_token(document.header, "\\font_typewriter", 0)
    if j == -1:
        return

    # We need to use this regex since split() does not handle quote protection
    ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
    tt = ttfont[1].strip('"')
    if tt != "default":
        return

    # So we have noto as "complete font"
    romanfont[1] = '"default"'
    document.header[i] = " ".join(romanfont)

    preamble = "\\usepackage["
    preamble += opts
    preamble += "]{noto}"
    add_to_preamble(document, [preamble])
    if osf:
        document.header[y] = "\\font_osf false"
    if x != -1:
        del document.header[x]


def revert_notoFonts_xopts(document):
    " Revert native (extended) Noto font definition (with extra options) to LaTeX "

    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
        return

    fontmap = dict()
    fm = createFontMapping(['Noto'])
    if revert_fonts(document, fm, fontmap, True):
        add_preamble_fonts(document, fontmap)


def revert_IBMFonts_xopts(document):
    " Revert native IBM font definition (with extra options) to LaTeX "


    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
        return

    fontmap = dict()
    fm = createFontMapping(['IBM'])
    ft = ""
    if revert_fonts(document, fm, fontmap, True):
        add_preamble_fonts(document, fontmap)


def revert_AdobeFonts_xopts(document):
    " Revert native Adobe font definition (with extra options) to LaTeX "

    i = find_token(document.header, '\\use_non_tex_fonts', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
        return
    if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
        return

    fontmap = dict()
    fm = createFontMapping(['Adobe'])
    ft = ""
    if revert_fonts(document, fm, fontmap, True):
        add_preamble_fonts(document, fontmap)


##
# Conversion hub
#

supported_versions = ["2.4.0", "2.4"]
convert = [
           [545, [convert_lst_literalparam]],
           [546, []],
           [547, []],
           [548, []],
           [549, []],
           [550, [convert_fontenc]],
           [551, []],
           [552, []],
           [553, []],
           [554, []],
           [555, []],
           [556, []],
           [557, [convert_vcsinfo]],
           [558, [removeFrontMatterStyles]],
           [559, []],
           [560, []],
           [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
           [562, []],
           [563, []],
           [564, []],
           [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
           [566, [convert_hebrew_parentheses]],
           [567, []],
           [568, []],
           [569, []],
           [570, []],
           [571, []],
           [572, [convert_notoFonts]],  # Added options thin, light, extralight for Noto
           [573, [convert_inputencoding_namechange]],
           [574, [convert_ruby_module, convert_utf8_japanese]],
           [575, [convert_lineno]],
           [576, []],
           [577, [convert_linggloss]],
           [578, []],
           [579, []],
           [580, []]
          ]

revert =  [[579, [revert_minionpro, revert_plainNotoFonts_xopts, revert_notoFonts_xopts, revert_IBMFonts_xopts, revert_AdobeFonts_xopts, revert_font_opts]], # keep revert_font_opts last!
           [578, [revert_babelfont]],
           [577, [revert_drs]],
           [576, [revert_linggloss, revert_subexarg]],
           [575, [revert_new_languages]],
           [574, [revert_lineno]],
           [573, [revert_ruby_module, revert_utf8_japanese]],
           [572, [revert_inputencoding_namechange]],
           [571, [revert_notoFonts]],
           [570, [revert_cmidruletrimming]],
           [569, [revert_bibfileencodings]],
           [568, [revert_tablestyle]],
           [567, [revert_soul]],
           [566, [revert_malayalam]],
           [565, [revert_hebrew_parentheses]],
           [564, [revert_AdobeFonts]],
           [563, [revert_lformatinfo]],
           [562, [revert_listpargs]],
           [561, [revert_l7ninfo]],
           [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble
           [559, [revert_timeinfo, revert_namenoextinfo]],
           [558, [revert_dateinfo]],
           [557, [addFrontMatterStyles]],
           [556, [revert_vcsinfo]],
           [555, [revert_bibencoding]],
           [554, [revert_vcolumns]],
           [553, [revert_stretchcolumn]],
           [552, [revert_tuftecite]],
           [551, [revert_floatpclass, revert_floatalignment]],
           [550, [revert_nospellcheck]],
           [549, [revert_fontenc]],
           [548, []],# dummy format change
           [547, [revert_lscape]],
           [546, [revert_xcharter]],
           [545, [revert_paratype]],
           [544, [revert_lst_literalparam]]
          ]


if __name__ == "__main__":
    pass