lyx_mirror/lib/lyx2lyx/lyx_2_4.py

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# Copyright (C) 2018 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 2.4"""

import re, string
import unicodedata
import sys, os

from datetime import (datetime, date, time)

# Uncomment only what you need to import, please.

from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
                          find_token, find_re, get_bool_value, get_containing_layout,
                          get_option_value, get_value, get_quoted_value)
#    del_token, del_value, del_complete_lines,
#    find_complete_lines, find_end_of,
#    find_re, find_substring, find_token_backwards,
#    get_containing_inset,
#    is_in_inset, set_bool_value
#    find_tokens, find_token_exact, check_token

from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, revert_language, revert_flex_inset)
#  revert_font_attrs, insert_to_preamble, latex_length
#  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
#  revert_flex_inset, hex2ratio, str2bool

####################################################################
# Private helper functions

def add_preamble_fonts(document, fontmap):
    " Add collected font-packages with their option to user-preamble"

    for pkg in fontmap:
        if len(fontmap[pkg]) > 0:
            xoption = "[" + ",".join(fontmap[pkg]) + "]"
        else:
            xoption = ""
        preamble = "\\usepackage" + xoption + "{%s}" % pkg
        add_to_preamble(document, [preamble])


def createkey(pkg, options):
    options.sort()
    return pkg + ':' + "-".join(options)

class fontinfo:
    def __init__(self):
        self.fontname = None    # key into font2pkgmap
        self.fonttype = None    # roman,sans,typewriter,math
        self.scaletype = None   # None,sf,tt
        self.scaleopt = None    # None, 'scaled', 'scale'
        self.scaleval = 1
        self.package = None
        self.options = []
        self.pkgkey = None      # key into pkg2fontmap

    def addkey(self):
        self.pkgkey = createkey(self.package, self.options)

class fontmapping:
    def __init__(self):
        self.font2pkgmap = dict()
        self.pkg2fontmap = dict()
        self.pkginmap = dict()  # defines, if a map for package exists

    def expandFontMapping(self, font_list, font_type, scale_type, pkg, scaleopt = None):
        " Expand fontinfo mapping"
        #
        # fontlist:    list of fontnames, each element
        #              may contain a ','-separated list of needed options
        #              like e.g. 'IBMPlexSansCondensed,condensed'
        # font_type:   one of 'roman', 'sans', 'typewriter', 'math'
        # scale_type:  one of None, 'sf', 'tt'
        # pkg:         package defining the font. Defaults to fontname if None
        # scaleopt:    one of None, 'scale', 'scaled', or some other string
        #              to be used in scale option (e.g. scaled=0.7)
        for fl in font_list:
            fe = fontinfo()
            fe.fonttype = font_type
            fe.scaletype = scale_type
            flt = fl.split(",")
            font_name = flt[0]
            fe.fontname = font_name
            fe.options = flt[1:]
            fe.scaleopt = scaleopt
            if pkg == None:
                fe.package = font_name
            else:
                fe.package = pkg
            fe.addkey()
            self.font2pkgmap[font_name] = fe
            if fe.pkgkey in self.pkg2fontmap:
                # Repeated the same entry? Check content
                if self.pkg2fontmap[fe.pkgkey] != font_name:
                    document.error("Something is wrong in pkgname+options <-> fontname mapping")
            self.pkg2fontmap[fe.pkgkey] = font_name
            self.pkginmap[fe.package] = 1

    def getfontname(self, pkg, options):
        options.sort()
        pkgkey = createkey(pkg, options)
        if not pkgkey in self.pkg2fontmap:
            return None
        fontname = self.pkg2fontmap[pkgkey]
        if not fontname in self.font2pkgmap:
            document.error("Something is wrong in pkgname+options <-> fontname mapping")
            return None
        if pkgkey == self.font2pkgmap[fontname].pkgkey:
            return fontname
        return None

def createFontMapping(fontlist):
    # Create info for known fonts for the use in
    #   convert_latexFonts() and
    #   revert_latexFonts()
    #
    # * Would be more handy to parse latexFonts file,
    #   but the path to this file is unknown
    # * For now, add DejaVu and IBMPlex only.
    # * Expand, if desired
    fm = fontmapping()
    for font in fontlist:
        if font == 'DejaVu':
            fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
            fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
            fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
        elif font == 'IBM':
            fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
                                  'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
                                  'IBMPlexSerifSemibold,semibold'],
                                 "roman", None, "plex-serif")
            fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
                                  'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
                                  'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
                                 "sans", "sf", "plex-sans", "scale")
            fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
                                  'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
                                  'IBMPlexMonoSemibold,semibold'],
                                 "typewriter", "tt", "plex-mono", "scale")
        elif font == 'Adobe':
            fm.expandFontMapping(['ADOBESourceSerifPro'], "roman", None, "sourceserifpro")
            fm.expandFontMapping(['ADOBESourceSansPro'], "sans", "sf", "sourcesanspro", "scaled")
            fm.expandFontMapping(['ADOBESourceCodePro'], "typewriter", "tt", "sourcecodepro", "scaled")
        elif font == 'Noto':
            fm.expandFontMapping(['NotoSerifRegular,regular', 'NotoSerifMedium,medium',
                                  'NotoSerifThin,thin', 'NotoSerifLight,light',
                                  'NotoSerifExtralight,extralight'],
                                  "roman", None, "noto-serif")
            fm.expandFontMapping(['NotoSansRegular,regular', 'NotoSansMedium,medium',
                                  'NotoSansThin,thin', 'NotoSansLight,light',
                                  'NotoSansExtralight,extralight'],
                                  "sans", "sf", "noto-sans", "scaled")
            fm.expandFontMapping(['NotoMonoRegular'], "typewriter", "tt", "noto-mono", "scaled")
    return fm

def convert_fonts(document, fm):
    " Handle font definition to LaTeX "

    rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
    rscaleopt = re.compile(r'^scaled?=(.*)')

    i = 0
    while i < len(document.preamble):
        i = find_re(document.preamble, rpkg, i)
        if i == -1:
            return
        mo = rpkg.search(document.preamble[i])
        if mo == None or mo.group(2) == None:
            options = []
        else:
            options = mo.group(2).replace(' ', '').split(",")
        pkg = mo.group(3)
        o = 0
        oscale = 1
        while o < len(options):
            mo = rscaleopt.search(options[o])
            if mo == None:
                o += 1
                continue
            oscale = mo.group(1)
            del options[o]
            break

        if not pkg in fm.pkginmap:
            i += 1
            continue
        # determine fontname
        fn = fm.getfontname(pkg, options)
        if fn == None:
            i += 1
            continue
        del document.preamble[i]
        fontinfo = fm.font2pkgmap[fn]
        if fontinfo.scaletype == None:
            fontscale = None
        else:
            fontscale = "\\font_" + fontinfo.scaletype + "_scale"
            fontinfo.scaleval = oscale

        if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
            del document.preamble[i-1]
        if fontscale != None:
            j = find_token(document.header, fontscale, 0)
            if j != -1:
                val = get_value(document.header, fontscale, j)
                vals = val.split()
                scale = "100"
                if oscale != None:
                    scale = "%03d" % int(float(oscale) * 100)
                document.header[j] = fontscale + " " + scale + " " + vals[1]
        ft = "\\font_" + fontinfo.fonttype
        j = find_token(document.header, ft, 0)
        if j != -1:
            val = get_value(document.header, ft, j)
            words = val.split() # ! splits also values like '"DejaVu Sans"'
            words[0] = '"' + fn + '"'
            document.header[j] = ft + ' ' + ' '.join(words)

def revert_fonts(document, fm, fontmap):
    " Revert native font definition to LaTeX "
    # fonlist := list of fonts created from the same package
    # Empty package means that the font-name is the same as the package-name
    # fontmap (key = package, val += found options) will be filled
    # and used later in add_preamble_fonts() to be added to user-preamble

    rfontscale = re.compile(r'^\s*(\\font_(roman|sans|typewriter|math))\s+')
    rscales = re.compile(r'^\s*(\d+)\s+(\d+)')
    i = 0
    while i < len(document.header):
        i = find_re(document.header, rfontscale, i)
        if (i == -1):
            break
        mo = rfontscale.search(document.header[i])
        if mo == None:
            i += 1
            continue
        ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
        val = get_value(document.header, ft, i)
        words = val.split(' ')     # ! splits also values like '"DejaVu Sans"'
        font = words[0].strip('"') # TeX font name has no whitespace
        if not font in fm.font2pkgmap:
            i += 1
            continue
        fontinfo = fm.font2pkgmap[font]
        val = fontinfo.package
        if not val in fontmap:
            fontmap[val] = []
        words[0] = '"default"'
        document.header[i] = ft + ' ' + ' '.join(words)
        if fontinfo.scaleopt != None:
            xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
            mo = rscales.search(xval)
            if mo != None:
                xval1 = mo.group(1)
                xval2 = mo.group(2)
                if xval1 != "100":
                    # set correct scale option
                    fontmap[val].extend([fontinfo.scaleopt + "=" + format(float(xval1) / 100, '.2f')])
        if len(fontinfo.options) > 0:
            fontmap[val].extend(fontinfo.options)
        i += 1

###############################################################################
###
### Conversion and reversion routines
###
###############################################################################

def convert_notoFonts(document):
    " Handle Noto fonts definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fm = createFontMapping(['Noto'])
        convert_fonts(document, fm)

def revert_notoFonts(document):
    " Revert native Noto font definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fontmap = dict()
        fm = createFontMapping(['Noto'])
        revert_fonts(document, fm, fontmap)
        add_preamble_fonts(document, fontmap)

def convert_latexFonts(document):
    " Handle DejaVu and IBMPlex fonts definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fm = createFontMapping(['DejaVu', 'IBM'])
        convert_fonts(document, fm)

def revert_latexFonts(document):
    " Revert native DejaVu font definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fontmap = dict()
        fm = createFontMapping(['DejaVu', 'IBM'])
        revert_fonts(document, fm, fontmap)
        add_preamble_fonts(document, fontmap)

def convert_AdobeFonts(document):
    " Handle DejaVu and IBMPlex fonts definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fm = createFontMapping(['Adobe'])
        convert_fonts(document, fm)

def revert_AdobeFonts(document):
    " Revert native DejaVu font definition to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        fontmap = dict()
        fm = createFontMapping(['Adobe'])
        revert_fonts(document, fm, fontmap)
        add_preamble_fonts(document, fontmap)

def removeFrontMatterStyles(document):
    " Remove styles Begin/EndFrontmatter"

    layouts = ['BeginFrontmatter', 'EndFrontmatter']
    for layout in layouts:
        i = 0
        while True:
            i = find_token(document.body, '\\begin_layout ' + layout, i)
            if i == -1:
                break
            j = find_end_of_layout(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
                i += 1
                continue
            while i > 0 and document.body[i-1].strip() == '':
                i -= 1
            while document.body[j+1].strip() == '':
                j = j + 1
            document.body[i:j+1] = ['']

def addFrontMatterStyles(document):
    " Use styles Begin/EndFrontmatter for elsarticle"

    def insertFrontmatter(prefix, line):
        above = line
        while above > 0 and document.body[above-1].strip() == '':
            above -= 1
        below = line
        while document.body[below].strip() == '':
            below += 1
        document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
                                    '\\begin_inset Note Note',
                                    'status open', '',
                                    '\\begin_layout Plain Layout',
                                    'Keep this empty!',
                                    '\\end_layout', '',
                                    '\\end_inset', '', '',
                                    '\\end_layout', '']

    if document.textclass == "elsarticle":
        layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
                   'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
        first = -1
        last = -1
        for layout in layouts:
            i = 0
            while True:
                i = find_token(document.body, '\\begin_layout ' + layout, i)
                if i == -1:
                    break
                k = find_end_of_layout(document.body, i)
                if k == -1:
                    document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
                    i += 1;
                    continue
                if first == -1 or i < first:
                    first = i
                if last == -1 or last <= k:
                    last = k+1
                i = k+1
        if first == -1:
            return
        insertFrontmatter('End', last)
        insertFrontmatter('Begin', first)

def convert_lst_literalparam(document):
    " Add param literal to include inset "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset CommandInset include', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
            i += 1
            continue
        while i < j and document.body[i].strip() != '':
            i += 1
        document.body.insert(i, "literal \"true\"")


def revert_lst_literalparam(document):
    " Remove param literal from include inset "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset CommandInset include', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
            i += 1
            continue
        k = find_token(document.body, 'literal', i, j)
        if k == -1:
            i += 1
            continue
        del document.body[k]


def revert_paratype(document):
    " Revert ParaType font definitions to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        preamble = ""
        i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
        i2 = find_token(document.header, "\\font_sans \"default\"", 0)
        i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
        j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
        sfval = get_value(document.header, "\\font_sf_scale", 0)
        # cutoff " 100"
        sfval = sfval[:-4]
        sfoption = ""
        if sfval != "100":
            sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
        k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
        ttval = get_value(document.header, "\\font_tt_scale", 0)
        # cutoff " 100"
        ttval = ttval[:-4]
        ttoption = ""
        if ttval != "100":
            ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
        if i1 != -1 and i2 != -1 and i3!= -1:
            add_to_preamble(document, ["\\usepackage{paratype}"])
        else:
            if i1!= -1:
                add_to_preamble(document, ["\\usepackage{PTSerif}"])
                document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
            if j!= -1:
                if sfoption != "":
                    add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
                else:
                    add_to_preamble(document, ["\\usepackage{PTSans}"])
                document.header[j] = document.header[j].replace("PTSans-TLF", "default")
            if k!= -1:
                if ttoption != "":
                    add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
                else:
                    add_to_preamble(document, ["\\usepackage{PTMono}"])
                document.header[k] = document.header[k].replace("PTMono-TLF", "default")


def revert_xcharter(document):
    " Revert XCharter font definitions to LaTeX "

    i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
    if i == -1:
        return

    # replace unsupported font setting
    document.header[i] = document.header[i].replace("xcharter", "default")
    # no need for preamble code with system fonts
    if get_bool_value(document.header, "\\use_non_tex_fonts"):
        return

    # transfer old style figures setting to package options
    j = find_token(document.header, "\\font_osf true")
    if j != -1:
        options = "[osf]"
        document.header[j] = "\\font_osf false"
    else:
        options = ""
    if i != -1:
        add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])


def revert_lscape(document):
    " Reverts the landscape environment (Landscape module) to TeX-code "

    if not "landscape" in document.get_module_list():
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex Landscape", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Landscape inset")
            i += 1
            continue

        if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
            document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
            document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
            add_to_preamble(document, ["\\usepackage{afterpage}"])
        else:
            document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
            document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")

        add_to_preamble(document, ["\\usepackage{pdflscape}"])
        # no need to reset i


def convert_fontenc(document):
    " Convert default fontenc setting "

    i = find_token(document.header, "\\fontencoding global", 0)
    if i == -1:
        return

    document.header[i] = document.header[i].replace("global", "auto")


def revert_fontenc(document):
    " Revert default fontenc setting "

    i = find_token(document.header, "\\fontencoding auto", 0)
    if i == -1:
        return

    document.header[i] = document.header[i].replace("auto", "global")


def revert_nospellcheck(document):
    " Remove nospellcheck font info param "

    i = 0
    while True:
        i = find_token(document.body, '\\nospellcheck', i)
        if i == -1:
            return
        del document.body[i]


def revert_floatpclass(document):
    " Remove float placement params 'document' and 'class' "

    i = 0
    i = find_token(document.header, "\\float_placement class", 0)
    if i != -1:
        del document.header[i]

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Float', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        k = find_token(document.body, 'placement class', i, i + 2)
        if k == -1:
            k = find_token(document.body, 'placement document', i, i + 2)
            if k != -1:
                del document.body[k]
            i += 1
            continue
        del document.body[k]


def revert_floatalignment(document):
    " Remove float alignment params "

    i = 0
    i = find_token(document.header, "\\float_alignment", 0)
    galignment = ""
    if i != -1:
        galignment = get_value(document.header, "\\float_alignment", i)
        del document.header[i]

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Float', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
            i += 1
        k = find_token(document.body, 'alignment', i, i + 4)
        if k == -1:
            i = j
            continue
        alignment = get_value(document.body, "alignment", k)
        if alignment == "document":
            alignment = galignment
        del document.body[k]
        l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
        if l == -1:
            document.warning("Can't find float layout!")
            i += 1
            continue
        alcmd = []
        if alignment == "left":
            alcmd = put_cmd_in_ert("\\raggedright{}")
        elif alignment == "center":
            alcmd = put_cmd_in_ert("\\centering{}")
        elif alignment == "right":
            alcmd = put_cmd_in_ert("\\raggedleft{}")
        if len(alcmd) > 0:
            document.body[l+1:l+1] = alcmd
        i += 1


def revert_tuftecite(document):
    " Revert \cite commands in tufte classes "

    tufte = ["tufte-book", "tufte-handout"]
    if document.textclass not in tufte:
        return

    i = 0
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset citation", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of citation inset at line %d!!" %(i))
            i += 1
            continue
        k = find_token(document.body, "LatexCommand", i, j)
        if k == -1:
            document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
            i = j + 1
            continue
        cmd = get_value(document.body, "LatexCommand", k)
        if cmd != "cite":
            i = j + 1
            continue
        pre = get_quoted_value(document.body, "before", i, j)
        post = get_quoted_value(document.body, "after", i, j)
        key = get_quoted_value(document.body, "key", i, j)
        if not key:
            document.warning("Citation inset at line %d does not have a key!" %(i))
            key = "???"
        # Replace command with ERT
        res = "\\cite"
        if pre:
            res += "[" + pre + "]"
        if post:
            res += "[" + post + "]"
        elif pre:
            res += "[]"
        res += "{" + key + "}"
        document.body[i:j+1] = put_cmd_in_ert([res])
        i = j + 1


def revert_stretchcolumn(document):
    " We remove the column varwidth flags or everything else will become a mess. "
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Tabular", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
            continue
        for k in range(i, j):
            if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
                document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
                document.body[k] = document.body[k].replace(' varwidth="true"', '')
        i = i + 1


def revert_vcolumns(document):
    " Revert standard columns with line breaks etc. "
    i = 0
    needvarwidth = False
    needarray = False
    try:
        while True:
            i = find_token(document.body, "\\begin_inset Tabular", i)
            if i == -1:
                return
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Could not find end of tabular.")
                i += 1
                continue

            # Collect necessary column information
            m = i + 1
            nrows = int(document.body[i+1].split('"')[3])
            ncols = int(document.body[i+1].split('"')[5])
            col_info = []
            for k in range(ncols):
                m = find_token(document.body, "<column", m)
                width = get_option_value(document.body[m], 'width')
                varwidth = get_option_value(document.body[m], 'varwidth')
                alignment = get_option_value(document.body[m], 'alignment')
                special = get_option_value(document.body[m], 'special')
                col_info.append([width, varwidth, alignment, special, m])

            # Now parse cells
            m = i + 1
            lines = []
            for row in range(nrows):
                for col in range(ncols):
                    m = find_token(document.body, "<cell", m)
                    multicolumn = get_option_value(document.body[m], 'multicolumn')
                    multirow = get_option_value(document.body[m], 'multirow')
                    width = get_option_value(document.body[m], 'width')
                    rotate = get_option_value(document.body[m], 'rotate')
                    # Check for: linebreaks, multipars, non-standard environments
                    begcell = m
                    endcell = find_token(document.body, "</cell>", begcell)
                    vcand = False
                    if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
                        vcand = True
                    elif count_pars_in_inset(document.body, begcell + 2) > 1:
                        vcand = True
                    elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
                        vcand = True
                    if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
                        if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
                            needvarwidth = True
                            alignment = col_info[col][2]
                            col_line = col_info[col][4]
                            vval = ""
                            if alignment == "center":
                                vval = ">{\\centering}"
                            elif  alignment == "left":
                                vval = ">{\\raggedright}"
                            elif alignment == "right":
                                vval = ">{\\raggedleft}"
                            if vval != "":
                                needarray = True
                            vval += "V{\\linewidth}"

                            document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
                            # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
                            # with newlines, and we do not want that)
                            while True:
                                endcell = find_token(document.body, "</cell>", begcell)
                                linebreak = False
                                nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
                                if nl == -1:
                                    nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
                                    if nl == -1:
                                         break
                                    linebreak = True
                                nle = find_end_of_inset(document.body, nl)
                                del(document.body[nle:nle+1])
                                if linebreak:
                                    document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
                                else:
                                    document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
                    m += 1

            i = j + 1

    finally:
        if needarray == True:
            add_to_preamble(document, ["\\usepackage{array}"])
        if needvarwidth == True:
            add_to_preamble(document, ["\\usepackage{varwidth}"])


def revert_bibencoding(document):
    " Revert bibliography encoding "

    # Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # Check if biblatex
    biblatex = False
    if engine in ["biblatex", "biblatex-natbib"]:
        biblatex = True

    # Map lyx to latex encoding names
    encodings = {
        "utf8" : "utf8",
        "utf8x" : "utf8x",
        "armscii8" : "armscii8",
        "iso8859-1" : "latin1",
        "iso8859-2" : "latin2",
        "iso8859-3" : "latin3",
        "iso8859-4" : "latin4",
        "iso8859-5" : "iso88595",
        "iso8859-6" : "8859-6",
        "iso8859-7" : "iso-8859-7",
        "iso8859-8" : "8859-8",
        "iso8859-9" : "latin5",
        "iso8859-13" : "latin7",
        "iso8859-15" : "latin9",
        "iso8859-16" : "latin10",
        "applemac" : "applemac",
        "cp437" : "cp437",
        "cp437de" : "cp437de",
        "cp850" : "cp850",
        "cp852" : "cp852",
        "cp855" : "cp855",
        "cp858" : "cp858",
        "cp862" : "cp862",
        "cp865" : "cp865",
        "cp866" : "cp866",
        "cp1250" : "cp1250",
        "cp1251" : "cp1251",
        "cp1252" : "cp1252",
        "cp1255" : "cp1255",
        "cp1256" : "cp1256",
        "cp1257" : "cp1257",
        "koi8-r" : "koi8-r",
        "koi8-u" : "koi8-u",
        "pt154" : "pt154",
        "utf8-platex" : "utf8",
        "ascii" : "ascii"
    }

    i = 0
    bibresources = []
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of bibtex inset at line %d!!" %(i))
            i += 1
            continue
        encoding = get_quoted_value(document.body, "encoding", i, j)
        if not encoding:
            i += 1
            continue
        # remove encoding line
        k = find_token(document.body, "encoding", i, j)
        if k != -1:
            del document.body[k]
        if encoding == "default":
            i += 1
            continue
        # Re-find inset end line
        j = find_end_of_inset(document.body, i)
        if biblatex:
            biblio_options = ""
            h = find_token(document.header, "\\biblio_options", 0)
            if h != -1:
                biblio_options = get_value(document.header, "\\biblio_options", h)
                if not "bibencoding" in biblio_options:
                     document.header[h] += ",bibencoding=%s" % encodings[encoding]
            else:
                bs = find_token(document.header, "\\biblatex_bibstyle", 0)
                if bs == -1:
                    # this should not happen
                    document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
                else:
                    document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
        else:
            document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
            document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")

        i = j + 1


def convert_vcsinfo(document):
    " Separate vcs Info inset from buffer Info inset. "

    types = {
        "vcs-revision" : "revision",
        "vcs-tree-revision" : "tree-revision",
        "vcs-author" : "author",
        "vcs-time" : "time",
        "vcs-date" : "date"
    }
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "buffer":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv not in list(types.keys()):
            i = i + 1
            continue
        document.body[tp] = "type \"vcs\""
        document.body[arg] = "arg \"" + types[argv] + "\""
        i = i + 1


def revert_vcsinfo(document):
    " Merge vcs Info inset to buffer Info inset. "

    args = ["revision", "tree-revision", "author", "time", "date" ]
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "vcs":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv not in args:
            document.warning("Malformed Info inset. Invalid vcs arg.")
            i = i + 1
            continue
        document.body[tp] = "type \"buffer\""
        document.body[arg] = "arg \"vcs-" + argv + "\""
        i = i + 1


def revert_dateinfo(document):
    " Revert date info insets to static text. "

# FIXME This currently only considers the main language and uses the system locale
# Ideally, it should honor context languages and switch the locale accordingly.

    # The date formats for each language using strftime syntax:
    # long, short, loclong, locmedium, locshort
    dateformats = {
        "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
        "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
        "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
        "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
        "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
        "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
        "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
        "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
        "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
        "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
        "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
        "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
        "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
        "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
        "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
        "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
        "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
        "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
        "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
        "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
        "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
        "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
        "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
        "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
        "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
        "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
        "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
        "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
        "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
        "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
        "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
        "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
        "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
        "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
        "malayalam" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
        "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
        "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
        "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
        "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
        "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
        "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
        "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
        "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
        "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
        "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
        "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
        "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
        "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
        "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
        "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
        "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
        "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
    }

    types = ["date", "fixdate", "moddate" ]
    i = 0
    i = find_token(document.header, "\\language", 0)
    if i == -1:
        # this should not happen
        document.warning("Malformed LyX document! No \\language header found!")
        return
    lang = get_value(document.header, "\\language", i)

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv not in types:
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        isodate = ""
        dte = date.today()
        if tpv == "fixdate":
            datecomps = argv.split('@')
            if len(datecomps) > 1:
                argv = datecomps[0]
                isodate = datecomps[1]
                m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
                if m:
                    dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
# FIXME if we had the path to the original document (not the one in the tmp dir),
#        we could use the mtime.
#        elif tpv == "moddate":
#            dte = date.fromtimestamp(os.path.getmtime(document.dir))
        result = ""
        if argv == "ISO":
            result = dte.isodate()
        elif argv == "long":
            result = dte.strftime(dateformats[lang][0])
        elif argv == "short":
            result = dte.strftime(dateformats[lang][1])
        elif argv == "loclong":
            result = dte.strftime(dateformats[lang][2])
        elif argv == "locmedium":
            result = dte.strftime(dateformats[lang][3])
        elif argv == "locshort":
            result = dte.strftime(dateformats[lang][4])
        else:
            fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
            fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
            fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
            fmt = re.sub('[^\'%]d', '%d', fmt)
            fmt = fmt.replace("'", "")
            result = dte.strftime(fmt)
        if sys.version_info < (3,0):
            # In Python 2, datetime module works with binary strings,
            # our dateformat strings are utf8-encoded:
            result = result.decode('utf-8')
        document.body[i : j+1] = result
        i = i + 1


def revert_timeinfo(document):
    " Revert time info insets to static text. "

# FIXME This currently only considers the main language and uses the system locale
# Ideally, it should honor context languages and switch the locale accordingly.
# Also, the time object is "naive", i.e., it does not know of timezones (%Z will
# be empty).

    # The time formats for each language using strftime syntax:
    # long, short
    timeformats = {
        "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
        "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
        "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "armenian" : ["%H:%M:%S %Z", "%H:%M"],
        "asturian" : ["%H:%M:%S %Z", "%H:%M"],
        "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "austrian" : ["%H:%M:%S %Z", "%H:%M"],
        "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
        "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
        "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
        "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
        "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
        "breton" : ["%H:%M:%S %Z", "%H:%M"],
        "british" : ["%H:%M:%S %Z", "%H:%M"],
        "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
        "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "canadien" : ["%H:%M:%S %Z", "%H h %M"],
        "catalan" : ["%H:%M:%S %Z", "%H:%M"],
        "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
        "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
        "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
        "czech" : ["%H:%M:%S %Z", "%H:%M"],
        "danish" : ["%H.%M.%S %Z", "%H.%M"],
        "divehi" : ["%H:%M:%S %Z", "%H:%M"],
        "dutch" : ["%H:%M:%S %Z", "%H:%M"],
        "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "estonian" : ["%H:%M:%S %Z", "%H:%M"],
        "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
        "finnish" : ["%H.%M.%S %Z", "%H.%M"],
        "french" : ["%H:%M:%S %Z", "%H:%M"],
        "friulan" : ["%H:%M:%S %Z", "%H:%M"],
        "galician" : ["%H:%M:%S %Z", "%H:%M"],
        "georgian" : ["%H:%M:%S %Z", "%H:%M"],
        "german" : ["%H:%M:%S %Z", "%H:%M"],
        "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
        "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
        "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
        "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
        "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
        "irish" : ["%H:%M:%S %Z", "%H:%M"],
        "italian" : ["%H:%M:%S %Z", "%H:%M"],
        "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
        "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
        "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
        "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
        "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
        "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "latvian" : ["%H:%M:%S %Z", "%H:%M"],
        "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
        "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
        "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
        "magyar" : ["%H:%M:%S %Z", "%H:%M"],
        "malayalam" : ["%p %I:%M:%S %Z", "%p %I:%M"],
        "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
        "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
        "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
        "norsk" : ["%H:%M:%S %Z", "%H:%M"],
        "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
        "occitan" : ["%H:%M:%S %Z", "%H:%M"],
        "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
        "polish" : ["%H:%M:%S %Z", "%H:%M"],
        "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
        "romanian" : ["%H:%M:%S %Z", "%H:%M"],
        "romansh" : ["%H:%M:%S %Z", "%H:%M"],
        "russian" : ["%H:%M:%S %Z", "%H:%M"],
        "samin" : ["%H:%M:%S %Z", "%H:%M"],
        "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
        "scottish" : ["%H:%M:%S %Z", "%H:%M"],
        "serbian" : ["%H:%M:%S %Z", "%H:%M"],
        "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
        "slovak" : ["%H:%M:%S %Z", "%H:%M"],
        "slovene" : ["%H:%M:%S %Z", "%H:%M"],
        "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
        "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
        "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
        "syriac" : ["%H:%M:%S %Z", "%H:%M"],
        "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
        "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
        "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "turkish" : ["%H:%M:%S %Z", "%H:%M"],
        "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
        "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
        "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
        "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
        "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
        "welsh" : ["%H:%M:%S %Z", "%H:%M"]
    }

    types = ["time", "fixtime", "modtime" ]
    i = 0
    i = find_token(document.header, "\\language", 0)
    if i == -1:
        # this should not happen
        document.warning("Malformed LyX document! No \\language header found!")
        return
    lang = get_value(document.header, "\\language", i)

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv not in types:
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        isotime = ""
        dtme = datetime.now()
        tme = dtme.time()
        if tpv == "fixtime":
            timecomps = argv.split('@')
            if len(timecomps) > 1:
                argv = timecomps[0]
                isotime = timecomps[1]
                m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
                if m:
                    tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
                else:
                    m = re.search('(\d\d):(\d\d)', isotime)
                    if m:
                        tme = time(int(m.group(1)), int(m.group(2)))
# FIXME if we had the path to the original document (not the one in the tmp dir),
#        we could use the mtime.
#        elif tpv == "moddate":
#            dte = date.fromtimestamp(os.path.getmtime(document.dir))
        result = ""
        if argv == "ISO":
            result = tme.isoformat()
        elif argv == "long":
            result = tme.strftime(timeformats[lang][0])
        elif argv == "short":
            result = tme.strftime(timeformats[lang][1])
        else:
            fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
            fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
            fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
            fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
            fmt = fmt.replace("'", "")
            result = dte.strftime(fmt)
        document.body[i : j+1] = result
        i = i + 1


def revert_namenoextinfo(document):
    " Merge buffer Info inset type name-noext to name. "

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "buffer":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv != "name-noext":
            i = i + 1
            continue
        document.body[arg] = "arg \"name\""
        i = i + 1


def revert_l7ninfo(document):
    " Revert l7n Info inset to text. "

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "l7n":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
        argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
        document.body[i : j+1] = argv
        i = i + 1


def revert_listpargs(document):
    " Reverts listpreamble arguments to TeX-code "
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Argument listpreamble:", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        # Find containing paragraph layout
        parent = get_containing_layout(document.body, i)
        if parent == False:
            document.warning("Malformed LyX document: Can't find parent paragraph layout")
            i += 1
            continue
        parbeg = parent[3]
        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
        endPlain = find_end_of_layout(document.body, beginPlain)
        content = document.body[beginPlain + 1 : endPlain]
        del document.body[i:j+1]
        subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
                 "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
        document.body[parbeg : parbeg] = subst
        i += 1


def revert_lformatinfo(document):
    " Revert layout format Info inset to text. "

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "lyxinfo":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv != "layoutformat":
            i = i + 1
            continue
        # hardcoded for now
        document.body[i : j+1] = "69"
        i = i + 1


def convert_hebrew_parentheses(document):
    " Don't reverse parentheses in Hebrew text"
    current_language = document.language
    for i, line in enumerate(document.body):
        if line.startswith('\\lang '):
            current_language = line[len('\\lang '):]
        elif line.startswith('\\end_layout'):
            current_language = document.language
        elif current_language == 'hebrew' and not line.startswith('\\'):
            document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')


def revert_hebrew_parentheses(document):
    " Store parentheses in Hebrew text reversed"
    # This only exists to keep the convert/revert naming convention
    convert_hebrew_parentheses(document)


def revert_malayalam(document):
    " Set the document language to English but assure Malayalam output "

    revert_language(document, "malayalam", "", "malayalam")


def revert_soul(document):
    " Revert soul module flex insets to ERT "

    flexes = ["Spaceletters", "Strikethrough", "Underline", "Highlight", "Capitalize"]

    for flex in flexes:
        i = find_token(document.body, "\\begin_inset Flex %s" % flex, 0)
        if i != -1:
            add_to_preamble(document, ["\\usepackage{soul}"])
            break
    i = find_token(document.body, "\\begin_inset Flex Highlight", 0)
    if i != -1:
        add_to_preamble(document, ["\\usepackage{color}"])

    revert_flex_inset(document.body, "Spaceletters", "\\so")
    revert_flex_inset(document.body, "Strikethrough", "\\st")
    revert_flex_inset(document.body, "Underline", "\\ul")
    revert_flex_inset(document.body, "Highlight", "\\hl")
    revert_flex_inset(document.body, "Capitalize", "\\caps")


def revert_tablestyle(document):
    " Remove tablestyle params "

    i = 0
    i = find_token(document.header, "\\tablestyle", 0)
    if i != -1:
        del document.header[i]


def revert_bibfileencodings(document):
    " Revert individual Biblatex bibliography encodings "

    # Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # Check if biblatex
    biblatex = False
    if engine in ["biblatex", "biblatex-natbib"]:
        biblatex = True

    # Map lyx to latex encoding names
    encodings = {
        "utf8" : "utf8",
        "utf8x" : "utf8x",
        "armscii8" : "armscii8",
        "iso8859-1" : "latin1",
        "iso8859-2" : "latin2",
        "iso8859-3" : "latin3",
        "iso8859-4" : "latin4",
        "iso8859-5" : "iso88595",
        "iso8859-6" : "8859-6",
        "iso8859-7" : "iso-8859-7",
        "iso8859-8" : "8859-8",
        "iso8859-9" : "latin5",
        "iso8859-13" : "latin7",
        "iso8859-15" : "latin9",
        "iso8859-16" : "latin10",
        "applemac" : "applemac",
        "cp437" : "cp437",
        "cp437de" : "cp437de",
        "cp850" : "cp850",
        "cp852" : "cp852",
        "cp855" : "cp855",
        "cp858" : "cp858",
        "cp862" : "cp862",
        "cp865" : "cp865",
        "cp866" : "cp866",
        "cp1250" : "cp1250",
        "cp1251" : "cp1251",
        "cp1252" : "cp1252",
        "cp1255" : "cp1255",
        "cp1256" : "cp1256",
        "cp1257" : "cp1257",
        "koi8-r" : "koi8-r",
        "koi8-u" : "koi8-u",
        "pt154" : "pt154",
        "utf8-platex" : "utf8",
        "ascii" : "ascii"
    }

    i = 0
    bibresources = []
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of bibtex inset at line %d!!" %(i))
            i += 1
            continue
        encodings = get_quoted_value(document.body, "file_encodings", i, j)
        if not encodings:
            i += 1
            continue
        bibfiles = get_quoted_value(document.body, "bibfiles", i, j).split(",")
        opts = get_quoted_value(document.body, "biblatexopts", i, j)
        if len(bibfiles) == 0:
            document.warning("Bibtex inset at line %d does not have a bibfile!" %(i))
        # remove encoding line
        k = find_token(document.body, "file_encodings", i, j)
        if k != -1:
            del document.body[k]
        # Re-find inset end line
        j = find_end_of_inset(document.body, i)
        if biblatex:
            enclist = encodings.split("\t")
            encmap = dict()
            for pp in enclist:
                ppp = pp.split(" ", 1)
                encmap[ppp[0]] = ppp[1]
            for bib in bibfiles:
                pr = "\\addbibresource"
                if bib in encmap.keys():
                    pr += "[bibencoding=" + encmap[bib] + "]"
                pr += "{" + bib + "}"
                add_to_preamble(document, [pr])
            # Insert ERT \\printbibliography and wrap bibtex inset to a Note
            pcmd = "printbibliography"
            if opts:
                pcmd += "[" + opts + "]"
            repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
                    "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
                    "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
                    "status open", "", "\\begin_layout Plain Layout" ]
            repl += document.body[i:j+1]
            repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
            document.body[i:j+1] = repl
            j += 27

        i = j + 1


def revert_cmidruletrimming(document):
    " Remove \\cmidrule trimming "

    # FIXME: Revert to TeX code?
    i = 0
    while True:
        # first, let's find out if we need to do anything
        i = find_token(document.body, '<cell ', i)
        if i == -1:
            return
        j = document.body[i].find('trim="')
        if j == -1:
             i += 1
             continue
        rgx = re.compile(r' (bottom|top)line[lr]trim="true"')
        # remove trim option
        document.body[i] = rgx.sub('', document.body[i])

        i += 1


##
# Conversion hub
#

supported_versions = ["2.4.0", "2.4"]
convert = [
           [545, [convert_lst_literalparam]],
           [546, []],
           [547, []],
           [548, []],
           [549, []],
           [550, [convert_fontenc]],
           [551, []],
           [552, []],
           [553, []],
           [554, []],
           [555, []],
           [556, []],
           [557, [convert_vcsinfo]],
           [558, [removeFrontMatterStyles]],
           [559, []],
           [560, []],
           [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
           [562, []],
           [563, []],
           [564, []],
           [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
           [566, [convert_hebrew_parentheses]],
           [567, []],
           [568, []],
           [569, []],
           [570, []],
           [571, []],
           [572, [convert_notoFonts]]  # Added options thin, light, extralight for Noto
          ]

revert =  [
           [571, [revert_notoFonts]],
           [570, [revert_cmidruletrimming]],
           [569, [revert_bibfileencodings]],
           [568, [revert_tablestyle]],
           [567, [revert_soul]],
           [566, [revert_malayalam]],
           [565, [revert_hebrew_parentheses]],
           [564, [revert_AdobeFonts]],
           [563, [revert_lformatinfo]],
           [562, [revert_listpargs]],
           [561, [revert_l7ninfo]],
           [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble
           [559, [revert_timeinfo, revert_namenoextinfo]],
           [558, [revert_dateinfo]],
           [557, [addFrontMatterStyles]],
           [556, [revert_vcsinfo]],
           [555, [revert_bibencoding]],
           [554, [revert_vcolumns]],
           [553, [revert_stretchcolumn]],
           [552, [revert_tuftecite]],
           [551, [revert_floatpclass, revert_floatalignment]],
           [550, [revert_nospellcheck]],
           [549, [revert_fontenc]],
           [548, []],# dummy format change
           [547, [revert_lscape]],
           [546, [revert_xcharter]],
           [545, [revert_paratype]],
           [544, [revert_lst_literalparam]]
          ]


if __name__ == "__main__":
    pass