lyx_mirror/lib/lyx2lyx/lyx_2_1.py

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2011 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

""" Convert files to the file format generated by lyx 2.1"""

import re, string
import unicodedata
import sys, os

# Uncomment only what you need to import, please.

from parser_tools import del_token, find_token, find_end_of, find_end_of_inset, \
    find_end_of_layout, find_re, get_option_value, get_value, get_quoted_value, \
    set_option_value

#from parser_tools import find_token, find_end_of, find_tokens, \
  #find_token_exact, find_end_of_inset, find_end_of_layout, \
  #find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  #del_token, check_token

from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert

#from lyx2lyx_tools import insert_to_preamble, \
#  put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
#  revert_font_attrs, hex2ratio, str2bool

####################################################################
# Private helper functions

#def remove_option(lines, m, option):
    #''' removes option from line m. returns whether we did anything '''
    #l = lines[m].find(option)
    #if l == -1:
        #return False
    #val = lines[m][l:].split('"')[1]
    #lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
    #return True


###############################################################################
###
### Conversion and reversion routines
###
###############################################################################

def revert_visible_space(document):
    "Revert InsetSpace visible into its ERT counterpart"
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset space \\textvisiblespace{}", i)
      if i == -1:
        return
      end = find_end_of_inset(document.body, i)
      subst = put_cmd_in_ert("\\textvisiblespace{}")
      document.body[i:end + 1] = subst


def convert_undertilde(document):
    " Load undertilde automatically "
    i = find_token(document.header, "\\use_mathdots" , 0)
    if i == -1:
        i = find_token(document.header, "\\use_mhchem" , 0)
    if i == -1:
        i = find_token(document.header, "\\use_esint" , 0)
    if i == -1:
        document.warning("Malformed LyX document: Can't find \\use_mathdots.")
        return;
    j = find_token(document.preamble, "\\usepackage{undertilde}", 0)
    if j == -1:
        document.header.insert(i + 1, "\\use_undertilde 0")
    else:
        document.header.insert(i + 1, "\\use_undertilde 2")
        del document.preamble[j]


def revert_undertilde(document):
    " Load undertilde if used in the document "
    undertilde = find_token(document.header, "\\use_undertilde" , 0)
    if undertilde == -1:
      document.warning("No \\use_undertilde line. Assuming auto.")
    else:
      val = get_value(document.header, "\\use_undertilde", undertilde)
      del document.header[undertilde]
      try:
        usetilde = int(val)
      except:
        document.warning("Invalid \\use_undertilde value: " + val + ". Assuming auto.")
        # probably usedots has not been changed, but be safe.
        usetilde = 1

      if usetilde == 0:
        # do not load case
        return
      if usetilde == 2:
        # force load case
        add_to_preamble(document, ["\\usepackage{undertilde}"])
        return

    # so we are in the auto case. we want to load undertilde if \utilde is used.
    i = 0
    while True:
      i = find_token(document.body, '\\begin_inset Formula', i)
      if i == -1:
        return
      j = find_end_of_inset(document.body, i)
      if j == -1:
        document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
        i += 1
        continue
      code = "\n".join(document.body[i:j])
      if code.find("\\utilde") != -1:
        add_to_preamble(document, ["\\@ifundefined{utilde}{\\usepackage{undertilde}}"])
        return
      i = j


def revert_negative_space(document):
    "Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
    i = 0
    j = 0
    reverted = False
    while True:
      i = find_token(document.body, "\\begin_inset space \\negmedspace{}", i)
      if i == -1:
        j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
        if j == -1:
          # load amsmath in the preamble if not already loaded if we are at the end of checking
          if reverted == True:
            i = find_token(document.header, "\\use_amsmath 2", 0)
            if i == -1:
              add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
          return
      if i == -1:
        return
      end = find_end_of_inset(document.body, i)
      subst = put_cmd_in_ert("\\negmedspace{}")
      document.body[i:end + 1] = subst
      j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
      if j == -1:
        return
      end = find_end_of_inset(document.body, j)
      subst = put_cmd_in_ert("\\negthickspace{}")
      document.body[j:end + 1] = subst
      reverted = True


def revert_math_spaces(document):
    "Revert formulas with protected custom space and protected hfills to TeX-code"
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset Formula", i)
      if i == -1:
        return
      j = document.body[i].find("\\hspace*")
      if j != -1:
        end = find_end_of_inset(document.body, i)
        subst = put_cmd_in_ert(document.body[i][21:])
        document.body[i:end + 1] = subst
      i = i + 1


def convert_japanese_encodings(document):
    " Rename the japanese encodings to names understood by platex "
    jap_enc_dict = {
        "EUC-JP-pLaTeX": "euc",
        "JIS-pLaTeX":    "jis",
        "SJIS-pLaTeX":   "sjis"
    }
    i = find_token(document.header, "\\inputencoding" , 0)
    if i == -1:
        return
    val = get_value(document.header, "\\inputencoding", i)
    if val in jap_enc_dict.keys():
        document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]


def revert_japanese_encodings(document):
    " Revert the japanese encodings name changes "
    jap_enc_dict = {
        "euc":  "EUC-JP-pLaTeX",
        "jis":  "JIS-pLaTeX",
        "sjis": "SJIS-pLaTeX"
    }
    i = find_token(document.header, "\\inputencoding" , 0)
    if i == -1:
        return
    val = get_value(document.header, "\\inputencoding", i)
    if val in jap_enc_dict.keys():
        document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]


def revert_justification(document):
    " Revert the \\justification buffer param"
    if not del_token(document.header, '\\justification', 0):
        document.warning("Malformed LyX document: Missing \\justification.")


def revert_australian(document):
    "Set English language variants Australian and Newzealand to English"

    if document.language == "australian" or document.language == "newzealand":
        document.language = "english"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language english"

    j = 0
    while True:
        j = find_token(document.body, "\\lang australian", j)
        if j == -1:
            j = find_token(document.body, "\\lang newzealand", 0)
            if j == -1:
                return
            else:
                document.body[j] = document.body[j].replace("\\lang newzealand", "\\lang english")
        else:
            document.body[j] = document.body[j].replace("\\lang australian", "\\lang english")
        j += 1


def convert_biblio_style(document):
    "Add a sensible default for \\biblio_style based on the citation engine."
    i = find_token(document.header, "\\cite_engine", 0)
    if i != -1:
        engine = get_value(document.header, "\\cite_engine", i).split("_")[0]
        style = {"basic": "plain", "natbib": "plainnat", "jurabib": "jurabib"}
        document.header.insert(i + 1, "\\biblio_style " + style[engine])


def revert_biblio_style(document):
    "BibTeX insets with default option use the style defined by \\biblio_style."
    i = find_token(document.header, "\\biblio_style" , 0)
    if i == -1:
        document.warning("No \\biblio_style line. Nothing to do.")
        return

    default_style = get_value(document.header, "\\biblio_style", i)
    del document.header[i]

    # We are looking for bibtex insets having the default option
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of bibtex inset at line " + str(i))
            i += 1
            return
        k = find_token(document.body, "options", i, j)
        if k != -1:
            options = get_quoted_value(document.body, "options", k)
            if "default" in options.split(","):
                document.body[k] = 'options "%s"' \
                    % options.replace("default", default_style)
        i = j


def handle_longtable_captions(document, forward):
    begin_table = 0
    while True:
        begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
        if begin_table == -1:
            break
        end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
        if end_table == -1:
            document.warning("Malformed LyX document: Could not find end of table.")
            begin_table += 1
            continue
        fline = find_token(document.body, "<features", begin_table, end_table)
        if fline == -1:
            document.warning("Can't find features for inset at line " + str(begin_table))
            begin_table += 1
            continue
        p = document.body[fline].find("islongtable")
        if p == -1:
            # no longtable
            begin_table += 1
            continue
        numrows = get_option_value(document.body[begin_table], "rows")
        try:
            numrows = int(numrows)
        except:
            document.warning(document.body[begin_table])
            document.warning("Unable to determine rows!")
            begin_table = end_table
            continue
        begin_row = begin_table
        for row in range(numrows):
            begin_row = find_token(document.body, '<row', begin_row, end_table)
            if begin_row == -1:
                document.warning("Can't find row " + str(row + 1))
                break
            end_row = find_end_of(document.body, begin_row, '<row', '</row>')
            if end_row == -1:
                document.warning("Can't find end of row " + str(row + 1))
                break
            if forward:
                if (get_option_value(document.body[begin_row], 'caption') == 'true' and
                    get_option_value(document.body[begin_row], 'endfirsthead') != 'true' and
                    get_option_value(document.body[begin_row], 'endhead') != 'true' and
                    get_option_value(document.body[begin_row], 'endfoot') != 'true' and
                    get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
            elif get_option_value(document.body[begin_row], 'caption') == 'true':
                if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
                if get_option_value(document.body[begin_row], 'endhead') == 'true':
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
                if get_option_value(document.body[begin_row], 'endfoot') == 'true':
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'endfoot', 'false')
                if get_option_value(document.body[begin_row], 'endlastfoot') == 'true':
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'endlastfoot', 'false')
            begin_row = end_row
        # since there could be a tabular inside this one, we
        # cannot jump to end.
        begin_table += 1


def convert_longtable_captions(document):
    "Add a firsthead flag to caption rows"
    handle_longtable_captions(document, True)


def revert_longtable_captions(document):
    "remove head/foot flag from caption rows"
    handle_longtable_captions(document, False)


def convert_use_packages(document):
    "use_xxx yyy => use_package xxx yyy"
    packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"]
    for p in packages:
        i = find_token(document.header, "\\use_%s" % p , 0)
        if i != -1:
            value = get_value(document.header, "\\use_%s" % p , i)
            document.header[i] = "\\use_package %s %s" % (p, value)


def revert_use_packages(document):
    "use_package xxx yyy => use_xxx yyy"
    packages = {"amsmath":"1", "esint":"1", "mathdots":"1", "mhchem":"1", "undertilde":"1"}
    # the order is arbitrary for the use_package version, and not all packages need to be given.
    # Ensure a complete list and correct order (important for older LyX versions and especially lyx2lyx)
    j = -1
    for p in packages.keys():
        regexp = re.compile(r'(\\use_package\s+%s)' % p)
        i = find_re(document.header, regexp, 0)
        if i != -1:
            value = get_value(document.header, "\\use_package" , i).split()[1]
            del document.header[i]
            j = i
    for (p, v) in packages.items():
        document.header.insert(j, "\\use_%s %s"  % (p, value))
        j = j + 1


def convert_use_mathtools(document):
    "insert use_package mathtools"
    i = find_token(document.header, "\\use_package", 0)
    if i == -1:
        document.warning("Malformed LyX document: Can't find \\use_package.")
        return;
    j = find_token(document.preamble, "\\usepackage{mathtools}", 0)
    if j == -1:
        document.header.insert(i + 1, "\\use_package mathtools 0")
    else:
        document.header.insert(i + 1, "\\use_package mathtools 2")
        del document.preamble[j]


def revert_use_mathtools(document):
    "remove use_package mathtools"
    regexp = re.compile(r'(\\use_package\s+mathtools)')
    i = find_re(document.header, regexp, 0)
    value = "1" # default is auto
    if i != -1:
        value = get_value(document.header, "\\use_package" , i).split()[1]
        del document.header[i]
    if value == "2": # on
        add_to_preamble(document, ["\\usepackage{mathtools}"])
    elif value == "1": # auto
        commands = ["mathclap", "mathllap", "mathrlap", \
                    "lgathered", "rgathered", "vcentcolon", "dblcolon", \
                    "coloneqq", "Coloneqq", "coloneq", "Coloneq", "eqqcolon", \
                    "Eqqcolon", "eqcolon", "Eqcolon", "colonapprox", \
                    "Colonapprox", "colonsim", "Colonsim"]
        i = 0
        while True:
            i = find_token(document.body, '\\begin_inset Formula', i)
            if i == -1:
                return
            j = find_end_of_inset(document.body, i)
            if j == -1:
                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
                i += 1
                continue
            code = "\n".join(document.body[i:j])
            for c in commands:
                if code.find("\\%s" % c) != -1:
                    add_to_preamble(document, ["\\usepackage{mathtools}"])
                    return
            i = j


def convert_cite_engine_type(document):
    "Determine the \\cite_engine_type from the citation engine."
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        return
    engine = get_value(document.header, "\\cite_engine", i)
    if "_" in engine:
        engine, type = engine.split("_")
    else:
        type = {"basic": "numerical", "jurabib": "authoryear"}[engine]
    document.header[i] = "\\cite_engine " + engine
    document.header.insert(i + 1, "\\cite_engine_type " + type)


def revert_cite_engine_type(document):
    "Natbib had the type appended with an underscore."
    engine_type = "numerical"
    i = find_token(document.header, "\\cite_engine_type" , 0)
    if i == -1:
        document.warning("No \\cite_engine_type line. Assuming numerical.")
    else:
        engine_type = get_value(document.header, "\\cite_engine_type", i)
        del document.header[i]

    # We are looking for the natbib citation engine
    i = find_token(document.header, "\\cite_engine natbib", i)
    if i == -1:
        return
    document.header[i] = "\\cite_engine natbib_" + engine_type


def revert_cancel(document):
    "add cancel to the preamble if necessary"
    commands = ["cancelto", "cancel", "bcancel", "xcancel"]
    i = 0
    while True:
        i = find_token(document.body, '\\begin_inset Formula', i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
            i += 1
            continue
        code = "\n".join(document.body[i:j])
        for c in commands:
            if code.find("\\%s" % c) != -1:
                add_to_preamble(document, ["\\usepackage{cancel}"])
                return
        i = j


def revert_verbatim(document):
    " Revert verbatim einvironments completely to TeX-code. "
    i = 0
    consecutive = False
    subst_end = ['\end_layout', '', '\\begin_layout Plain Layout',
    	         '\end_layout', '',
                 '\\begin_layout Plain Layout', '', '',
                 '\\backslash', '',
                 'end{verbatim}',
                 '\\end_layout', '', '\\end_inset',
                 '', '', '\\end_layout']
    subst_begin = ['\\begin_layout Standard', '\\noindent',
                   '\\begin_inset ERT', 'status collapsed', '',
                   '\\begin_layout Plain Layout', '', '', '\\backslash',
                   'begin{verbatim}',
                   '\\end_layout', '', '\\begin_layout Plain Layout', '']
    while 1:
        i = find_token(document.body, "\\begin_layout Verbatim", i)
        if i == -1:
            return
        j = find_end_of_layout(document.body, i)
        if j == -1:
            document.warning("Malformed lyx document: Can't find end of Verbatim layout")
            i += 1
            continue
        # delete all line breaks insets (there are no other insets)
        l = i
        while 1:
            n = find_token(document.body, "\\begin_inset Newline newline", l)
            if n == -1:
                n = find_token(document.body, "\\begin_inset Newline linebreak", l)
                if n == -1:
                    break
            m = find_end_of_inset(document.body, n)
            del(document.body[m:m+1])
            document.body[n:n+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
            l += 1
            j += 1
        # consecutive verbatim environments need to be connected
        k = find_token(document.body, "\\begin_layout Verbatim", j)
        if k == j + 2 and consecutive == False:
            consecutive = True
            document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
            document.body[i:i+1] = subst_begin
            continue
        if k == j + 2 and consecutive == True:
            document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
            del(document.body[i:i+1])
            continue
        if k != j + 2 and consecutive == True:
            document.body[j:j+1] = subst_end
            # the next paragraph must not be indented
            document.body[j+19:j+19] = ['\\noindent']
            del(document.body[i:i+1])
            consecutive = False
            continue
        else:
            document.body[j:j+1] = subst_end
            # the next paragraph must not be indented
            document.body[j+19:j+19] = ['\\noindent']
            document.body[i:i+1] = subst_begin


def revert_tipa(document):
    " Revert native TIPA insets to mathed or ERT. "
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset IPA", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed lyx document: Can't find end of IPA inset")
            i += 1
            continue
        Multipar = False
        n = find_token(document.body, "\\begin_layout", i, j)
        if n == -1:
            document.warning("Malformed lyx document: IPA inset has no embedded layout")
            i += 1
            continue
        m = find_end_of_layout(document.body, n)
        if m == -1:
            document.warning("Malformed lyx document: Can't find end of embedded layout")
            i += 1
            continue
        content = document.body[n+1:m]
        p = find_token(document.body, "\\begin_layout", m, j)
        if p != -1 or len(content) > 1:
            Multipar = True
            content = document.body[i+1:j]
        if Multipar:
            # IPA insets with multiple pars need to be wrapped by \begin{IPA}...\end{IPA}
            document.body[i:j+1] = ['\\end_layout', '', '\\begin_layout Standard'] + put_cmd_in_ert("\\begin{IPA}") + ['\\end_layout'] + content + ['\\begin_layout Standard'] + put_cmd_in_ert("\\end{IPA}")
            add_to_preamble(document, ["\\usepackage{tipa,tipx}"])
        else:
            # single-par IPA insets can be reverted to mathed
            document.body[i:j+1] = ["\\begin_inset Formula $\\text{\\textipa{" + content[0] + "}}$", "\\end_inset"]
        i = j


##
# Conversion hub
#

supported_versions = ["2.1.0","2.1"]
convert = [
           [414, []],
           [415, [convert_undertilde]],
           [416, []],
           [417, [convert_japanese_encodings]],
           [418, []],
           [419, []],
           [420, [convert_biblio_style]],
           [421, [convert_longtable_captions]],
           [422, [convert_use_packages]],
           [423, [convert_use_mathtools]],
           [424, [convert_cite_engine_type]],
           [425, []],
           [426, []],
           [427, []]
          ]

revert =  [
           [426, [revert_tipa]],
           [425, [revert_verbatim]],
           [424, [revert_cancel]],
           [423, [revert_cite_engine_type]],
           [422, [revert_use_mathtools]],
           [421, [revert_use_packages]],
           [420, [revert_longtable_captions]],
           [419, [revert_biblio_style]],
           [418, [revert_australian]],
           [417, [revert_justification]],
           [416, [revert_japanese_encodings]],
           [415, [revert_negative_space, revert_math_spaces]],
           [414, [revert_undertilde]],
           [413, [revert_visible_space]]
          ]


if __name__ == "__main__":
    pass