lyx_mirror/lib/lyx2lyx/lyx_2_4.py

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# Copyright (C) 2018 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 2.4"""

import re, string
import unicodedata
import sys, os

# Uncomment only what you need to import, please.

from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
find_token, get_bool_value, get_option_value, get_value, get_quoted_value) 
#    del_token, del_value, del_complete_lines,
#    find_complete_lines, find_end_of, 
#    find_re, find_substring, find_token_backwards,
#    get_containing_inset, get_containing_layout,
#    is_in_inset, set_bool_value
#    find_tokens, find_token_exact, check_token

from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
#  revert_font_attrs, insert_to_preamble, latex_length
#  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
#  revert_flex_inset, hex2ratio, str2bool

####################################################################
# Private helper functions


###############################################################################
###
### Conversion and reversion routines
###
###############################################################################

def removeFrontMatterStyles(document):
    " Remove styles Begin/EndFromatter"

    layouts = ['BeginFrontmatter', 'EndFrontmatter']
    for layout in layouts:
        i = 0
        while True:
            i = find_token(document.body, '\\begin_layout ' + layout, i)
            if i == -1:
                break
            j = find_end_of_layout(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
                i += 1
                continue
            if document.body[j] == '':
                j = j + 1
            del document.body[i:j+1]

def addFrontMatterStyles(document):
    " Use styles Begin/EndFrontmatter for elsarticle"

    def insertFrontmatter(prefix, line):
        document.body[line:line] = ['\\begin_layout ' + prefix + 'Frontmatter',
                                    '\\begin_inset Note Note',
                                    'status open', '',
                                    '\\begin_layout Plain Layout',
                                    'Keep this empty!',
                                    '\\end_layout', '',
                                    '\\end_inset', '', '',
                                    '\\end_layout']

    if document.textclass == "elsarticle":
        layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
                   'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
        first = -1
        last = -1
        for layout in layouts:
            i = 0
            while True:
                i = find_token(document.body, '\\begin_layout ' + layout, i)
                if i == -1:
                    break
                k = find_end_of_layout(document.body, i)
                if k == -1:
                    document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
                    i += 1;
                    continue
                if first == -1 or i < first:
                    first = i
                if last == -1 or last <= k:
                    last = k+1
                i = k
        if first == -1:
            return
        if first > 0 and document.body[first-1] == '':
            first -= 1
        if document.body[last] == '':
            last = last + 1
        insertFrontmatter('End', last)
        insertFrontmatter('Begin', first)

def convert_lst_literalparam(document):
    " Add param literal to include inset "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset CommandInset include', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
            i += 1
            continue
        while i < j and document.body[i].strip() != '':
            i += 1
        document.body.insert(i, "literal \"true\"")


def revert_lst_literalparam(document):
    " Remove param literal from include inset "

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset CommandInset include', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
            i += 1
            continue
        k = find_token(document.body, 'literal', i, j)
        if k == -1:
            i += 1
            continue
        del document.body[k]


def revert_paratype(document):
    " Revert ParaType font definitions to LaTeX "

    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
        preamble = ""
        i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
        i2 = find_token(document.header, "\\font_sans \"default\"", 0)
        i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
        j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
        sfval = get_value(document.header, "\\font_sf_scale", 0)
        # cutoff " 100"
        sfval = sfval[:-4]
        sfoption = ""
        if sfval != "100":
            sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
        k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
        ttval = get_value(document.header, "\\font_tt_scale", 0)
        # cutoff " 100"
        ttval = ttval[:-4]
        ttoption = ""
        if ttval != "100":
            ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
        if i1 != -1 and i2 != -1 and i3!= -1:
            add_to_preamble(document, ["\\usepackage{paratype}"])
        else:
            if i1!= -1: 
                add_to_preamble(document, ["\\usepackage{PTSerif}"])
                document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
            if j!= -1: 
                if sfoption != "":
                    add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
                else:
                    add_to_preamble(document, ["\\usepackage{PTSans}"])
                document.header[j] = document.header[j].replace("PTSans-TLF", "default")
            if k!= -1: 
                if ttoption != "":
                    add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
                else:
                    add_to_preamble(document, ["\\usepackage{PTMono}"])    
                document.header[k] = document.header[k].replace("PTMono-TLF", "default")


def revert_xcharter(document):
    " Revert XCharter font definitions to LaTeX "

    i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
    if i == -1:
        return

    # replace unsupported font setting
    document.header[i] = document.header[i].replace("xcharter", "default")
    # no need for preamble code with system fonts
    if get_bool_value(document.header, "\\use_non_tex_fonts"):
        return

    # transfer old style figures setting to package options
    j = find_token(document.header, "\\font_osf true")
    if j != -1:
        options = "[osf]"
        document.header[j] = "\\font_osf false"
    else:
        options = ""
    if i != -1:
        add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])


def revert_lscape(document):
    " Reverts the landscape environment (Landscape module) to TeX-code "

    if not "landscape" in document.get_module_list():
        return

    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex Landscape", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Landscape inset")
            i += 1
            continue

        if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
            document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
            document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
            add_to_preamble(document, ["\\usepackage{afterpage}"])
        else:
            document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
            document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")

        add_to_preamble(document, ["\\usepackage{pdflscape}"])
        # no need to reset i


def convert_fontenc(document):
    " Convert default fontenc setting "

    i = find_token(document.header, "\\fontencoding global", 0)
    if i == -1:
        return

    document.header[i] = document.header[i].replace("global", "auto")


def revert_fontenc(document):
    " Revert default fontenc setting "

    i = find_token(document.header, "\\fontencoding auto", 0)
    if i == -1:
        return

    document.header[i] = document.header[i].replace("auto", "global")


def revert_nospellcheck(document):
    " Remove nospellcheck font info param "

    i = 0
    while True:
        i = find_token(document.body, '\\nospellcheck', i)
        if i == -1:
            return
        del document.body[i]


def revert_floatpclass(document):
    " Remove float placement params 'document' and 'class' "

    i = 0
    i = find_token(document.header, "\\float_placement class", 0)
    if i != -1:
        del document.header[i]

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Float', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        k = find_token(document.body, 'placement class', i, i + 2)
        if k == -1:
            k = find_token(document.body, 'placement document', i, i + 2)
            if k != -1:
                del document.body[k]
            i = j
            continue
        del document.body[k]


def revert_floatalignment(document):
    " Remove float alignment params "

    i = 0
    i = find_token(document.header, "\\float_alignment", 0)
    galignment = ""
    if i != -1:
        galignment = get_value(document.header, "\\float_alignment", i)
        del document.header[i]

    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Float', i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
            i += 1
        k = find_token(document.body, 'alignment', i, i + 4)
        if k == -1:
            i = j
            continue
        alignment = get_value(document.body, "alignment", k)
        if alignment == "document":
            alignment = galignment
        del document.body[k]
        l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
        if l == -1:
            document.warning("Can't find float layout!")
            i = j
            continue
        alcmd = []
        if alignment == "left":
            alcmd = put_cmd_in_ert("\\raggedright{}")
        elif alignment == "center":
            alcmd = put_cmd_in_ert("\\centering{}")
        elif alignment == "right":
            alcmd = put_cmd_in_ert("\\raggedleft{}")
        if len(alcmd) > 0:
            document.body[l+1:l+1] = alcmd
        i = j 


def revert_tuftecite(document):
    " Revert \cite commands in tufte classes "

    tufte = ["tufte-book", "tufte-handout"]
    if document.textclass not in tufte:
        return

    i = 0
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset citation", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of citation inset at line %d!!" %(i))
            i += 1
            continue
        k = find_token(document.body, "LatexCommand", i, j)
        if k == -1:
            document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
            i = j + 1
            continue
        cmd = get_value(document.body, "LatexCommand", k)
        if cmd != "cite":
            i = j + 1
            continue
        pre = get_quoted_value(document.body, "before", i, j)
        post = get_quoted_value(document.body, "after", i, j)
        key = get_quoted_value(document.body, "key", i, j)
        if not key:
            document.warning("Citation inset at line %d does not have a key!" %(i))
            key = "???"
        # Replace command with ERT
        res = "\\cite"
        if pre:
            res += "[" + pre + "]"
        if post:
            res += "[" + post + "]"
        elif pre:
            res += "[]"
        res += "{" + key + "}"
        document.body[i:j+1] = put_cmd_in_ert([res])
        i = j + 1


def revert_stretchcolumn(document):
    " We remove the column varwidth flags or everything else will become a mess. "
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Tabular", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
            continue
        for k in range(i, j):
            if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
                document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
                document.body[k] = document.body[k].replace(' varwidth="true"', '')
        i = i + 1


def revert_vcolumns(document):
    " Revert standard columns with line breaks etc. "
    i = 0
    needvarwidth = False
    needarray = False
    try:
        while True:
            i = find_token(document.body, "\\begin_inset Tabular", i)
            if i == -1:
                return
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Could not find end of tabular.")
                i += 1
                continue

            # Collect necessary column information
            m = i + 1
            nrows = int(document.body[i+1].split('"')[3])
            ncols = int(document.body[i+1].split('"')[5])
            col_info = []
            for k in range(ncols):
                m = find_token(document.body, "<column", m)
                width = get_option_value(document.body[m], 'width')
                varwidth = get_option_value(document.body[m], 'varwidth')
                alignment = get_option_value(document.body[m], 'alignment')
                special = get_option_value(document.body[m], 'special')
                col_info.append([width, varwidth, alignment, special, m])

            # Now parse cells
            m = i + 1
            lines = []
            for row in range(nrows):
                for col in range(ncols):
                    m = find_token(document.body, "<cell", m)
                    multicolumn = get_option_value(document.body[m], 'multicolumn')
                    multirow = get_option_value(document.body[m], 'multirow')
                    width = get_option_value(document.body[m], 'width')
                    rotate = get_option_value(document.body[m], 'rotate')
                    # Check for: linebreaks, multipars, non-standard environments
                    begcell = m
                    endcell = find_token(document.body, "</cell>", begcell)
                    vcand = False
                    if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
                        vcand = True
                    elif count_pars_in_inset(document.body, begcell + 2) > 1:
                        vcand = True
                    elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
                        vcand = True
                    if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
                        if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
                            needvarwidth = True
                            alignment = col_info[col][2]
                            col_line = col_info[col][4]
                            vval = ""
                            if alignment == "center":
                                vval = ">{\\centering}"
                            elif  alignment == "left":
                                vval = ">{\\raggedright}"
                            elif alignment == "right":
                                vval = ">{\\raggedleft}"
                            if vval != "":
                                needarray = True
                            vval += "V{\\linewidth}"
                
                            document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
                            # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
                            # with newlines, and we do not want that)
                            while True:
                                endcell = find_token(document.body, "</cell>", begcell)
                                linebreak = False
                                nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
                                if nl == -1:
                                    nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
                                    if nl == -1:
                                         break
                                    linebreak = True
                                nle = find_end_of_inset(document.body, nl)
                                del(document.body[nle:nle+1])
                                if linebreak:
                                    document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
                                else:
                                    document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
                    m += 1

            i = j + 1

    finally:
        if needarray == True:
            add_to_preamble(document, ["\\usepackage{array}"])
        if needvarwidth == True:
            add_to_preamble(document, ["\\usepackage{varwidth}"])


def revert_bibencoding(document):
    " Revert bibliography encoding "

    # Get cite engine
    engine = "basic"
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed document! Missing \\cite_engine")
    else:
        engine = get_value(document.header, "\\cite_engine", i)

    # Check if biblatex
    biblatex = False
    if engine in ["biblatex", "biblatex-natbib"]:
        biblatex = True

    # Map lyx to latex encoding names 
    encodings = {
        "utf8" : "utf8",
        "utf8x" : "utf8x",
        "armscii8" : "armscii8",
        "iso8859-1" : "latin1",
        "iso8859-2" : "latin2",
        "iso8859-3" : "latin3",
        "iso8859-4" : "latin4",
        "iso8859-5" : "iso88595",
        "iso8859-6" : "8859-6",
        "iso8859-7" : "iso-8859-7",
        "iso8859-8" : "8859-8",
        "iso8859-9" : "latin5",
        "iso8859-13" : "latin7",
        "iso8859-15" : "latin9",
        "iso8859-16" : "latin10",
        "applemac" : "applemac",
        "cp437" : "cp437",
        "cp437de" : "cp437de",
        "cp850" : "cp850",
        "cp852" : "cp852",
        "cp855" : "cp855",
        "cp858" : "cp858",
        "cp862" : "cp862",
        "cp865" : "cp865",
        "cp866" : "cp866",
        "cp1250" : "cp1250",
        "cp1251" : "cp1251",
        "cp1252" : "cp1252",
        "cp1255" : "cp1255",
        "cp1256" : "cp1256",
        "cp1257" : "cp1257",
        "koi8-r" : "koi8-r",
        "koi8-u" : "koi8-u",
        "pt154" : "pt154",
        "utf8-platex" : "utf8",
        "ascii" : "ascii"
    }

    i = 0
    bibresources = []
    while (True):
        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Can't find end of bibtex inset at line %d!!" %(i))
            i += 1
            continue
        encoding = get_quoted_value(document.body, "encoding", i, j)
        if not encoding:
            i += 1
            continue
        # remove encoding line
        k = find_token(document.body, "encoding", i, j)
        if k != -1:
            del document.body[k]
        # Re-find inset end line
        j = find_end_of_inset(document.body, i)
        if biblatex:
            biblio_options = ""
            h = find_token(document.header, "\\biblio_options", 0)
            if h != -1:
                biblio_options = get_value(document.header, "\\biblio_options", h)
                if not "bibencoding" in biblio_options:
                     document.header[h] += ",bibencoding=%s" % encodings[encoding]
            else:
                bs = find_token(document.header, "\\biblatex_bibstyle", 0)
                if bs == -1:
                    # this should not happen
                    document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
                else:
                    document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
        else:
            document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
            document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")

        i = j + 1


def convert_vcsinfo(document):
    " Separate vcs Info inset from buffer Info inset. "

    types = {
        "vcs-revision" : "revision",
        "vcs-tree-revision" : "tree-revision",
        "vcs-author" : "author",
        "vcs-time" : "time",
        "vcs-date" : "date"
    }
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "buffer":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv not in list(types.keys()):
            i = i + 1
            continue
        document.body[tp] = "type \"vcs\""
        document.body[arg] = "arg \"" + types[argv] + "\""
        i = i + 1


def revert_vcsinfo(document):
    " Merge vcs Info inset to buffer Info inset. "

    args = ["revision", "tree-revision", "author", "time", "date" ]
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Info", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Info inset.")
            i = i + 1
            continue
        tp = find_token(document.body, 'type', i, j)
        tpv = get_quoted_value(document.body, "type", tp)
        if tpv != "vcs":
            i = i + 1
            continue
        arg = find_token(document.body, 'arg', i, j)
        argv = get_quoted_value(document.body, "arg", arg)
        if argv not in args:
            document.warning("Malformed Info inset. Invalid vcs arg.")
            i = i + 1
            continue
        document.body[tp] = "type \"buffer\""
        document.body[arg] = "arg \"vcs-" + argv + "\""
        i = i + 1


##
# Conversion hub
#

supported_versions = ["2.4.0", "2.4"]
convert = [
           [545, [convert_lst_literalparam]],
           [546, []],
           [547, []],
           [548, []],
           [549, []],
           [550, [convert_fontenc]],
           [551, []],
           [552, []],
           [553, []],
           [554, []],
           [555, []],
           [556, []],
           [557, [convert_vcsinfo]],
           [558, [removeFrontMatterStyles]]
          ]

revert =  [
	   [557, [addFrontMatterStyles]],
           [556, [revert_vcsinfo]],
           [555, [revert_bibencoding]],
           [554, [revert_vcolumns]],
           [553, [revert_stretchcolumn]],
           [552, [revert_tuftecite]],
           [551, [revert_floatpclass, revert_floatalignment]],
           [550, [revert_nospellcheck]],
           [549, [revert_fontenc]],
           [548, []],# dummy format change
           [547, [revert_lscape]],
           [546, [revert_xcharter]],
           [545, [revert_paratype]],
           [544, [revert_lst_literalparam]]
          ]


if __name__ == "__main__":
    pass