lyx_mirror/lib/lyx2lyx/lyx_2_0.py

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2008 José Matos  <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 2.0"""

import re, string
import unicodedata
import sys, os

from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string

####################################################################
# Private helper functions

def find_end_of_inset(lines, i):
    " Find end of inset, where lines[i] is included."
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")


def add_to_preamble(document, text):
    """ Add text to the preamble if it is not already there.
    Only the first line is checked!"""

    if find_token(document.preamble, text[0], 0) != -1:
        return

    document.preamble.extend(text)


def insert_to_preamble(index, document, text):
    """ Insert text to the preamble at a given line"""

    document.preamble.insert(index, text)


def read_unicodesymbols():
    " Read the unicodesymbols list of unicode characters and corresponding commands."
    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
    spec_chars = []
    # Two backslashes, followed by some non-word character, and then a character
    # in brackets. The idea is to check for constructs like: \"{u}, which is how
    # they are written in the unicodesymbols file; but they can also be written
    # as: \"u or even \" u.
    r = re.compile(r'\\\\(\W)\{(\w)\}')
    for line in fp.readlines():
        if line[0] != '#' and line.strip() != "":
            line=line.replace(' "',' ') # remove all quotation marks with spaces before
            line=line.replace('" ',' ') # remove all quotation marks with spaces after
            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
            try:
                [ucs4,command,dead] = line.split(None,2)
                if command[0:1] != "\\":
                    continue
                spec_chars.append([command, unichr(eval(ucs4))])
            except:
                continue
            m = r.match(command)
            if m != None:
                command = "\\\\"
                # If the character is a double-quote, then we need to escape it, too,
                # since it is done that way in the LyX file.
                if m.group(1) == "\"":
                    command += "\\"
                commandbl = command
                command += m.group(1) + m.group(2)
                commandbl += m.group(1) + ' ' + m.group(2)
                spec_chars.append([command, unichr(eval(ucs4))])
                spec_chars.append([commandbl, unichr(eval(ucs4))])
    fp.close()
    return spec_chars


unicode_reps = read_unicodesymbols()


# DO NOT USE THIS ROUTINE ANY MORE. Better yet, replace the uses that
# have been made of it with uses of put_cmd_in_ert.
def old_put_cmd_in_ert(string):
    for rep in unicode_reps:
        string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
    string = string.replace('\\', "\\backslash\n")
    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Plain Layout\n" \
      + string + "\n\\end_layout\n\\end_inset"
    return string


# This routine wraps some content in an ERT inset. It returns a
# LIST of strings. This is how lyx2lyx works: with a list of strings,
# each representing a line of a LyX file. Embedded newlines confuse
# lyx2lyx very much.
# For this same reason, we expect as input a LIST of strings, not
# something with embedded newlines. That said, if any of your strings
# do have embedded newlines, the string will eventually get split on
# them and you'll get a list back.
#
# A call to this routine will often go something like this:
#   i = find_token('\\begin_inset FunkyInset', ...)
#   ...
#   j = find_end_of_inset(document.body, i)
#   content = ...extract content from insets
#   ert = put_cmd_in_ert(content)
#   document.body[i:j] = ert
# Now, before we continue, we need to reset i appropriately. Normally,
# this would be:
#   i += len(ert)
# That puts us right after the ERT we just inserted.
def put_cmd_in_ert(strlist):
    ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout\n"]
    # Despite the warnings just given, it will be faster for us to work
    # with a single string internally. That way, we only go through the
    # unicode_reps loop once.
    s = "\n".join(strlist)
    for rep in unicode_reps:
        s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
    s = s.replace('\\', "\\backslash\n")
    ret += s.splitlines()
    ret += ["\\end_layout", "\\end_inset"]
    return ret


def lyx2latex(document, lines):
    'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
    # clean up multiline stuff
    content = ""
    ert_end = 0

    for curline in range(len(lines)):
      line = lines[curline]
      if line.startswith("\\begin_inset ERT"):
          # We don't want to replace things inside ERT, so figure out
          # where the end of the inset is.
          ert_end = find_end_of_inset(lines, curline + 1)
          continue
      elif line.startswith("\\begin_inset Formula"):
          line = line[20:]
      elif line.startswith("\\begin_inset Quotes"):
          # For now, we do a very basic reversion. Someone who understands
          # quotes is welcome to fix it up.
          qtype = line[20:].strip()
          # lang = qtype[0]
          side = qtype[1]
          dbls = qtype[2]
          if side == "l":
              if dbls == "d":
                  line = "``"
              else:
                  line = "`"
          else:
              if dbls == "d":
                  line = "''"
              else:
                  line = "'"
      elif line.isspace() or \
            line.startswith("\\begin_layout") or \
            line.startswith("\\end_layout") or \
            line.startswith("\\begin_inset") or \
            line.startswith("\\end_inset") or \
            line.startswith("\\lang") or \
            line.strip() == "status collapsed" or \
            line.strip() == "status open":
          #skip all that stuff
          continue

      # this needs to be added to the preamble because of cases like
      # \textmu, \textbackslash, etc.
      add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
                                 '\\@ifundefined{textmu}',
                                 ' {\\usepackage{textcomp}}{}'])
      # a lossless reversion is not possible
      # try at least to handle some common insets and settings
      if ert_end >= curline:
          line = line.replace(r'\backslash', r'\\')
      else:
          line = line.replace('&', '\\&{}')
          line = line.replace('#', '\\#{}')
          line = line.replace('^', '\\^{}')
          line = line.replace('%', '\\%{}')
          line = line.replace('_', '\\_{}')
          line = line.replace('$', '\\${}')

          # Do the LyX text --> LaTeX conversion
          for rep in unicode_reps:
            line = line.replace(rep[1], rep[0] + "{}")
          line = line.replace(r'\backslash', r'\textbackslash{}')
          line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
          line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
          line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
          line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
          line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
          line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
          line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
          line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
          line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
      content += line
    return content


def latex_length(string):
    'Convert lengths to their LaTeX representation.'
    i = 0
    percent = False
    # the string has the form
    # ValueUnit+ValueUnit-ValueUnit or
    # ValueUnit+-ValueUnit
    # the + and - (glue lengths) are optional
    # the + always precedes the -

    # Convert relative lengths to LaTeX units
    units = {"text%":"\\textwidth", "col%":"\\columnwidth",
             "page%":"\\pagewidth", "line%":"\\linewidth",
             "theight%":"\\textheight", "pheight%":"\\pageheight"}
    for unit in units.keys():
        i = string.find(unit)
        if i != -1:
            percent = True
            minus = string.rfind("-", 1, i)
            plus = string.rfind("+", 0, i)
            latex_unit = units[unit]
            if plus == -1 and minus == -1:
                value = string[:i]
                value = str(float(value)/100)
                end = string[i + len(unit):]
                string = value + latex_unit + end
            if plus > minus:
                value = string[plus+1:i]
                value = str(float(value)/100)
                begin = string[:plus+1]
                end = string[i+len(unit):]
                string = begin + value + latex_unit + end
            if plus < minus:
                value = string[minus+1:i]
                value = str(float(value)/100)
                begin = string[:minus+1]
                string = begin + value + latex_unit

    # replace + and -, but only if the - is not the first character
    string = string[0] + string[1:].replace("+", " plus ").replace("-", " minus ")
    # handle the case where "+-1mm" was used, because LaTeX only understands
    # "plus 1mm minus 1mm"
    if string.find("plus  minus"):
        lastvaluepos = string.rfind(" ")
        lastvalue = string[lastvaluepos:]
        string = string.replace("  ", lastvalue + " ")
    if percent ==  False:
        return "False," + string
    else:
        return "True," + string


####################################################################


def revert_swiss(document):
    " Set language german-ch to ngerman "
    i = 0
    if document.language == "german-ch":
        document.language = "ngerman"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language ngerman"
    j = 0
    while True:
        j = find_token(document.body, "\\lang german-ch", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
        j = j + 1


def revert_tabularvalign(document):
   " Revert the tabular valign option "
   i = 0
   while True:
       i = find_token(document.body, "\\begin_inset Tabular", i)
       if i == -1:
           return
       j = find_token(document.body, "</cell>", i)
       if j == -1:
           document.warning("Malformed LyX document: Could not find end of tabular cell.")
           i = j
           continue
       # don't set a box for longtables, only delete tabularvalignment
       # the alignment is 2 lines below \\begin_inset Tabular
       p = document.body[i+2].find("islongtable")
       if p > -1:
           q = document.body[i+2].find("tabularvalignment")
           if q > -1:
               document.body[i+2] = document.body[i+2][:q-1]
               document.body[i+2] = document.body[i+2] + '>'
           i = i + 1

       # when no longtable
       if p == -1:
         tabularvalignment = 'c'
         # which valignment is specified?
         m = document.body[i+2].find('tabularvalignment="top"')
         if m > -1:
             tabularvalignment = 't'
         m = document.body[i+2].find('tabularvalignment="bottom"')
         if m > -1:
             tabularvalignment = 'b'
         # delete tabularvalignment
         q = document.body[i+2].find("tabularvalignment")
         if q > -1:
             document.body[i+2] = document.body[i+2][:q-1]
             document.body[i+2] = document.body[i+2] + '>'

         # don't add a box when centered
         if tabularvalignment == 'c':
             i = j
             continue
         subst = ['\\end_layout', '\\end_inset']
         document.body[j:j] = subst # just inserts those lines
         subst = ['\\begin_inset Box Frameless',
             'position "' + tabularvalignment +'"',
             'hor_pos "c"',
             'has_inner_box 1',
             'inner_pos "c"',
             'use_parbox 0',
             # we don't know the width, assume 50%
             'width "50col%"',
             'special "none"',
             'height "1in"',
             'height_special "totalheight"',
             'status open',
             '',
             '\\begin_layout Plain Layout']
         document.body[i:i] = subst # this just inserts the array at i
         i += len(subst) + 2 # adjust i to save a few cycles


def revert_phantom(document):
    " Reverts phantom to ERT "
    i = 0
    j = 0
    while True:
      i = find_token(document.body, "\\begin_inset Phantom Phantom", i)
      if i == -1:
          return
      substi = document.body[i].replace('\\begin_inset Phantom Phantom', \
                '\\begin_inset ERT\nstatus collapsed\n\n' \
                '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
                'phantom{\n\\end_layout\n\n\\end_inset\n')
      substi = substi.split('\n')
      document.body[i : i+4] = substi
      i += len(substi)
      j = find_token(document.body, "\\end_layout", i)
      if j == -1:
          document.warning("Malformed LyX document: Could not find end of Phantom inset.")
          return
      substj = document.body[j].replace('\\end_layout', \
                '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
                '\\begin_layout Plain Layout\n\n' \
                '}\n\\end_layout\n\n\\end_inset\n')
      substj = substj.split('\n')
      document.body[j : j+4] = substj
      i += len(substj)


def revert_hphantom(document):
    " Reverts hphantom to ERT "
    i = 0
    j = 0
    while True:
      i = find_token(document.body, "\\begin_inset Phantom HPhantom", i)
      if i == -1:
          return
      substi = document.body[i].replace('\\begin_inset Phantom HPhantom', \
                '\\begin_inset ERT\nstatus collapsed\n\n' \
                '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
                'hphantom{\n\\end_layout\n\n\\end_inset\n')
      substi = substi.split('\n')
      document.body[i : i+4] = substi
      i += len(substi)
      j = find_token(document.body, "\\end_layout", i)
      if j == -1:
          document.warning("Malformed LyX document: Could not find end of HPhantom inset.")
          return
      substj = document.body[j].replace('\\end_layout', \
                '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
                '\\begin_layout Plain Layout\n\n' \
                '}\n\\end_layout\n\n\\end_inset\n')
      substj = substj.split('\n')
      document.body[j : j+4] = substj
      i += len(substj)


def revert_vphantom(document):
    " Reverts vphantom to ERT "
    i = 0
    j = 0
    while True:
      i = find_token(document.body, "\\begin_inset Phantom VPhantom", i)
      if i == -1:
          return
      substi = document.body[i].replace('\\begin_inset Phantom VPhantom', \
                '\\begin_inset ERT\nstatus collapsed\n\n' \
                '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
                'vphantom{\n\\end_layout\n\n\\end_inset\n')
      substi = substi.split('\n')
      document.body[i : i+4] = substi
      i += len(substi)
      j = find_token(document.body, "\\end_layout", i)
      if j == -1:
          document.warning("Malformed LyX document: Could not find end of VPhantom inset.")
          return
      substj = document.body[j].replace('\\end_layout', \
                '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
                '\\begin_layout Plain Layout\n\n' \
                '}\n\\end_layout\n\n\\end_inset\n')
      substj = substj.split('\n')
      document.body[j : j+4] = substj
      i += len(substj)


def revert_xetex(document):
    " Reverts documents that use XeTeX "
    i = find_token(document.header, '\\use_xetex', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_xetex.")
        return
    if get_value(document.header, "\\use_xetex", i) == 'false':
        del document.header[i]
        return
    del document.header[i]
    # 1.) set doc encoding to utf8-plain
    i = find_token(document.header, "\\inputencoding", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\inputencoding.")
    document.header[i] = "\\inputencoding utf8-plain"
    # 2.) check font settings
    l = find_token(document.header, "\\font_roman", 0)
    if l == -1:
        document.warning("Malformed LyX document: Missing \\font_roman.")
    line = document.header[l]
    l = re.compile(r'\\font_roman (.*)$')
    m = l.match(line)
    roman = m.group(1)
    l = find_token(document.header, "\\font_sans", 0)
    if l == -1:
        document.warning("Malformed LyX document: Missing \\font_sans.")
    line = document.header[l]
    l = re.compile(r'\\font_sans (.*)$')
    m = l.match(line)
    sans = m.group(1)
    l = find_token(document.header, "\\font_typewriter", 0)
    if l == -1:
        document.warning("Malformed LyX document: Missing \\font_typewriter.")
    line = document.header[l]
    l = re.compile(r'\\font_typewriter (.*)$')
    m = l.match(line)
    typewriter = m.group(1)
    osf = get_value(document.header, '\\font_osf', 0) == "true"
    sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
    tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
    # 3.) set preamble stuff
    pretext = '%% This document must be processed with xelatex!\n'
    pretext += '\\usepackage{fontspec}\n'
    if roman != "default":
        pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
    if sans != "default":
        pretext += '\\setsansfont['
        if sf_scale != 100:
            pretext += 'Scale=' + str(sf_scale / 100) + ','
        pretext += 'Mapping=tex-text]{' + sans + '}\n'
    if typewriter != "default":
        pretext += '\\setmonofont'
        if tt_scale != 100:
            pretext += '[Scale=' + str(tt_scale / 100) + ']'
        pretext += '{' + typewriter + '}\n'
    if osf:
        pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
    pretext += '\usepackage{xunicode}\n'
    pretext += '\usepackage{xltxtra}\n'
    insert_to_preamble(0, document, pretext)
    # 4.) reset font settings
    i = find_token(document.header, "\\font_roman", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_roman.")
    document.header[i] = "\\font_roman default"
    i = find_token(document.header, "\\font_sans", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_sans.")
    document.header[i] = "\\font_sans default"
    i = find_token(document.header, "\\font_typewriter", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_typewriter.")
    document.header[i] = "\\font_typewriter default"
    i = find_token(document.header, "\\font_osf", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_osf.")
    document.header[i] = "\\font_osf false"
    i = find_token(document.header, "\\font_sc", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_sc.")
    document.header[i] = "\\font_sc false"
    i = find_token(document.header, "\\font_sf_scale", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_sf_scale.")
    document.header[i] = "\\font_sf_scale 100"
    i = find_token(document.header, "\\font_tt_scale", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\font_tt_scale.")
    document.header[i] = "\\font_tt_scale 100"


def revert_outputformat(document):
    " Remove default output format param "
    i = find_token(document.header, '\\default_output_format', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\default_output_format.")
        return
    del document.header[i]


def revert_backgroundcolor(document):
    " Reverts background color to preamble code "
    i = 0
    colorcode = ""
    while True:
      i = find_token(document.header, "\\backgroundcolor", i)
      if i == -1:
          return
      colorcode = get_value(document.header, '\\backgroundcolor', 0)
      del document.header[i]
      # don't clutter the preamble if backgroundcolor is not set
      if colorcode == "#ffffff":
          continue
      # the color code is in the form #rrggbb where every character denotes a hex number
      # convert the string to an int
      red = string.atoi(colorcode[1:3],16)
      # we want the output "0.5" for the value "127" therefore add here
      if red != 0:
          red = red + 1
      redout = float(red) / 256
      green = string.atoi(colorcode[3:5],16)
      if green != 0:
          green = green + 1
      greenout = float(green) / 256
      blue = string.atoi(colorcode[5:7],16)
      if blue != 0:
          blue = blue + 1
      blueout = float(blue) / 256
      # write the preamble
      insert_to_preamble(0, document,
                           '% Commands inserted by lyx2lyx to set the background color\n'
                           + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
                           + '\\definecolor{page_backgroundcolor}{rgb}{'
                           + str(redout) + ', ' + str(greenout)
                           + ', ' + str(blueout) + '}\n'
                           + '\\pagecolor{page_backgroundcolor}\n')


def revert_splitindex(document):
    " Reverts splitindex-aware documents "
    i = find_token(document.header, '\\use_indices', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_indices.")
        return
    indices = get_value(document.header, "\\use_indices", i)
    preamble = ""
    if indices == "true":
         preamble += "\\usepackage{splitidx}\n"
    del document.header[i]
    i = 0
    while True:
        i = find_token(document.header, "\\index", i)
        if i == -1:
            break
        k = find_token(document.header, "\\end_index", i)
        if k == -1:
            document.warning("Malformed LyX document: Missing \\end_index.")
            return
        line = document.header[i]
        l = re.compile(r'\\index (.*)$')
        m = l.match(line)
        iname = m.group(1)
        ishortcut = get_value(document.header, '\\shortcut', i, k)
        if ishortcut != "" and indices == "true":
            preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
        del document.header[i:k+1]
        i = 0
    if preamble != "":
        insert_to_preamble(0, document, preamble)
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Index", i)
        if i == -1:
            break
        line = document.body[i]
        l = re.compile(r'\\begin_inset Index (.*)$')
        m = l.match(line)
        itype = m.group(1)
        if itype == "idx" or indices == "false":
            document.body[i] = "\\begin_inset Index"
        else:
            k = find_end_of_inset(document.body, i)
            if k == -1:
                 return
            content = lyx2latex(document, document.body[i:k])
            # escape quotes
            content = content.replace('"', r'\"')
            subst = [old_put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")]
            document.body[i:k+1] = subst
        i = i + 1
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
        if i == -1:
            return
        k = find_end_of_inset(document.body, i)
        ptype = get_value(document.body, 'type', i, k).strip('"')
        if ptype == "idx":
            j = find_token(document.body, "type", i, k)
            del document.body[j]
        elif indices == "false":
            del document.body[i:k+1]
        else:
            subst = [old_put_cmd_in_ert("\\printindex[" + ptype + "]{}")]
            document.body[i:k+1] = subst
        i = i + 1


def convert_splitindex(document):
    " Converts index and printindex insets to splitindex-aware format "
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Index", i)
        if i == -1:
            break
        document.body[i] = document.body[i].replace("\\begin_inset Index",
            "\\begin_inset Index idx")
        i = i + 1
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
        if i == -1:
            return
        if document.body[i + 1].find('LatexCommand printindex') == -1:
            document.warning("Malformed LyX document: Incomplete printindex inset.")
            return
        subst = ["LatexCommand printindex",
            "type \"idx\""]
        document.body[i + 1:i + 2] = subst
        i = i + 1


def revert_subindex(document):
    " Reverts \\printsubindex CommandInset types "
    i = find_token(document.header, '\\use_indices', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_indices.")
        return
    indices = get_value(document.header, "\\use_indices", i)
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
        if i == -1:
            return
        k = find_end_of_inset(document.body, i)
        ctype = get_value(document.body, 'LatexCommand', i, k)
        if ctype != "printsubindex":
            i = i + 1
            continue
        ptype = get_value(document.body, 'type', i, k).strip('"')
        if indices == "false":
            del document.body[i:k+1]
        else:
            subst = [old_put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")]
            document.body[i:k+1] = subst
        i = i + 1


def revert_printindexall(document):
    " Reverts \\print[sub]index* CommandInset types "
    i = find_token(document.header, '\\use_indices', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\use_indices.")
        return
    indices = get_value(document.header, "\\use_indices", i)
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
        if i == -1:
            return
        k = find_end_of_inset(document.body, i)
        ctype = get_value(document.body, 'LatexCommand', i, k)
        if ctype != "printindex*" and ctype != "printsubindex*":
            i = i + 1
            continue
        if indices == "false":
            del document.body[i:k+1]
        else:
            subst = [old_put_cmd_in_ert("\\" + ctype + "{}")]
            document.body[i:k+1] = subst
        i = i + 1


def revert_strikeout(document):
    " Reverts \\strikeout character style "
    while True:
        i = find_token(document.body, '\\strikeout', 0)
        if i == -1:
            return
        del document.body[i]


def revert_uulinewave(document):
    " Reverts \\uuline, and \\uwave character styles "
    while True:
        i = find_token(document.body, '\\uuline', 0)
        if i == -1:
            break
        del document.body[i]
    while True:
        i = find_token(document.body, '\\uwave', 0)
        if i == -1:
            return
        del document.body[i]


def revert_ulinelatex(document):
    " Reverts \\uline character style "
    i = find_token(document.body, '\\bar under', 0)
    if i == -1:
        return
    insert_to_preamble(0, document,
            '% Commands inserted by lyx2lyx for proper underlining\n'
            + '\\PassOptionsToPackage{normalem}{ulem}\n'
            + '\\usepackage{ulem}\n'
            + '\\let\\cite@rig\\cite\n'
            + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
            + '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
            + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')


def revert_custom_processors(document):
    " Remove bibtex_command and index_command params "
    i = find_token(document.header, '\\bibtex_command', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\bibtex_command.")
        return
    del document.header[i]
    i = find_token(document.header, '\\index_command', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\index_command.")
        return
    del document.header[i]


def convert_nomencl_width(document):
    " Add set_width param to nomencl_print "
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
      if i == -1:
        break
      document.body.insert(i + 2, "set_width \"none\"")
      i = i + 1


def revert_nomencl_width(document):
    " Remove set_width param from nomencl_print "
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
      if i == -1:
        break
      j = find_end_of_inset(document.body, i)
      l = find_token(document.body, "set_width", i, j)
      if l == -1:
            document.warning("Can't find set_width option for nomencl_print!")
            i = j
            continue
      del document.body[l]
      i = i + 1


def revert_nomencl_cwidth(document):
    " Remove width param from nomencl_print "
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
      if i == -1:
        break
      j = find_end_of_inset(document.body, i)
      l = find_token(document.body, "width", i, j)
      if l == -1:
            #Can't find width option for nomencl_print
            i = j
            continue
      width = get_value(document.body, "width", i, j).strip('"')
      del document.body[l]
      add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
      add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
      i = i + 1


def revert_applemac(document):
    " Revert applemac encoding to auto "
    i = 0
    if document.encoding == "applemac":
        document.encoding = "auto"
        i = find_token(document.header, "\\encoding", 0)
        if i != -1:
            document.header[i] = "\\encoding auto"


def revert_longtable_align(document):
    " Remove longtable alignment setting "
    i = 0
    j = 0
    while True:
      i = find_token(document.body, "\\begin_inset Tabular", i)
      if i == -1:
          break
      # the alignment is 2 lines below \\begin_inset Tabular
      j = document.body[i+2].find("longtabularalignment")
      if j == -1:
          break
      document.body[i+2] = document.body[i+2][:j-1]
      document.body[i+2] = document.body[i+2] + '>'
      i = i + 1


def revert_branch_filename(document):
    " Remove \\filename_suffix parameter from branches "
    i = 0
    while True:
        i = find_token(document.header, "\\filename_suffix", i)
        if i == -1:
            return
        del document.header[i]


def revert_paragraph_indentation(document):
    " Revert custom paragraph indentation to preamble code "
    i = 0
    while True:
      i = find_token(document.header, "\\paragraph_indentation", i)
      if i == -1:
          break
      # only remove the preamble line if default
      # otherwise also write the value to the preamble
      length = get_value(document.header, "\\paragraph_indentation", i)
      if length == "default":
          del document.header[i]
          break
      else:
          # handle percent lengths
          # latex_length returns "bool,length"
          length = latex_length(length).split(",")[1]
          add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
          add_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
          del document.header[i]
      i = i + 1


def revert_percent_skip_lengths(document):
    " Revert relative lengths for paragraph skip separation to preamble code "
    i = 0
    while True:
      i = find_token(document.header, "\\defskip", i)
      if i == -1:
          break
      length = get_value(document.header, "\\defskip", i)
      # only revert when a custom length was set and when
      # it used a percent length
      if length not in ('smallskip', 'medskip', 'bigskip'):
          # handle percent lengths
          length = latex_length(length)
          # latex_length returns "bool,length"
          percent = length.split(",")[0]
          length = length.split(",")[1]
          if percent == "True":
              add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
              add_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
              # set defskip to medskip as default
              document.header[i] = "\\defskip medskip"
      i = i + 1


def revert_percent_vspace_lengths(document):
    " Revert relative VSpace lengths to ERT "
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset VSpace", i)
      if i == -1:
          break
      # only revert if a custom length was set and if
      # it used a percent length
      line = document.body[i]
      r = re.compile(r'\\begin_inset VSpace (.*)$')
      m = r.match(line)
      length = m.group(1)
      if length not in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
          # check if the space has a star (protected space)
          protected = (document.body[i].rfind("*") != -1)
          if protected:
              length = length.rstrip('*')
          # handle percent lengths
          length = latex_length(length)
          # latex_length returns "bool,length"
          percent = length.split(",")[0]
          length = length.split(",")[1]
          # revert the VSpace inset to ERT
          if percent == "True":
              if protected:
                  subst = [old_put_cmd_in_ert("\\vspace*{" + length + "}")]
              else:
                  subst = [old_put_cmd_in_ert("\\vspace{" + length + "}")]
              document.body[i:i+2] = subst
      i = i + 1


def revert_percent_hspace_lengths(document):
    " Revert relative HSpace lengths to ERT "
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset space \\hspace", i)
      if i == -1:
          break
      protected = (document.body[i].find("\\hspace*{}") != -1)
      # only revert if a custom length was set and if
      # it used a percent length
      length = get_value(document.body, '\\length', i+1)
      if length == '':
          document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
          return
      # handle percent lengths
      length = latex_length(length)
      # latex_length returns "bool,length"
      percent = length.split(",")[0]
      length = length.split(",")[1]
      # revert the HSpace inset to ERT
      if percent == "True":
          if protected:
              subst = [old_put_cmd_in_ert("\\hspace*{" + length + "}")]
          else:
              subst = [old_put_cmd_in_ert("\\hspace{" + length + "}")]
          document.body[i:i+3] = subst
      i = i + 2


def revert_hspace_glue_lengths(document):
    " Revert HSpace glue lengths to ERT "
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset space \\hspace", i)
      if i == -1:
          break
      protected = (document.body[i].find("\\hspace*{}") != -1)
      length = get_value(document.body, '\\length', i+1)
      if length == '':
          document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
          return
      # only revert if the length contains a plus or minus at pos != 0
      glue  = re.compile(r'.+[\+-]')
      if glue.search(length):
          # handle percent lengths
          # latex_length returns "bool,length"
          length = latex_length(length).split(",")[1]
          # revert the HSpace inset to ERT
          if protected:
              subst = [old_put_cmd_in_ert("\\hspace*{" + length + "}")]
          else:
              subst = [old_put_cmd_in_ert("\\hspace{" + length + "}")]
          document.body[i:i+3] = subst
      i = i + 2

def convert_author_id(document):
    " Add the author_id to the \\author definition and make sure 0 is not used"
    i = 0
    j = 1
    while True:
        i = find_token(document.header, "\\author", i)
        if i == -1:
            break

        r = re.compile(r'(\\author) (\".*\")\s?(.*)$')
        m = r.match(document.header[i])
        if m != None:
            name = m.group(2)

            email = ''
            if m.lastindex == 3:
                email = m.group(3)
            document.header[i] = "\\author %i %s %s" % (j, name, email)
        j = j + 1
        i = i + 1

    k = 0
    while True:
        k = find_token(document.body, "\\change_", k)
        if k == -1:
            break

        change = document.body[k].split(' ');
        if len(change) == 3:
            type = change[0]
            author_id = int(change[1])
            time = change[2]
            document.body[k] = "%s %i %s" % (type, author_id + 1, time)
        k = k + 1

def revert_author_id(document):
    " Remove the author_id from the \\author definition "
    i = 0
    j = 0
    idmap = dict()
    while True:
        i = find_token(document.header, "\\author", i)
        if i == -1:
            break

        r = re.compile(r'(\\author) (\d+) (\".*\")\s?(.*)$')
        m = r.match(document.header[i])
        if m != None:
            author_id = int(m.group(2))
            idmap[author_id] = j
            name = m.group(3)

            email = ''
            if m.lastindex == 4:
                email = m.group(4)
            document.header[i] = "\\author %s %s" % (name, email)
        i = i + 1
        j = j + 1

    k = 0
    while True:
        k = find_token(document.body, "\\change_", k)
        if k == -1:
            break

        change = document.body[k].split(' ');
        if len(change) == 3:
            type = change[0]
            author_id = int(change[1])
            time = change[2]
            document.body[k] = "%s %i %s" % (type, idmap[author_id], time)
        k = k + 1


def revert_suppress_date(document):
    " Revert suppressing of default document date to preamble code "
    i = 0
    while True:
      i = find_token(document.header, "\\suppress_date", i)
      if i == -1:
          break
      # remove the preamble line and write to the preamble
      # when suppress_date was true
      date = get_value(document.header, "\\suppress_date", i)
      if date == "true":
          add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
          add_to_preamble(document, ["\\date{}"])
      del document.header[i]
      i = i + 1


def revert_mhchem(document):
    "Revert mhchem loading to preamble code"
    i = 0
    j = 0
    k = 0
    i = find_token(document.header, "\\use_mhchem 1", 0)
    if i != -1:
        mhchem = "auto"
    else:
        i = find_token(document.header, "\\use_mhchem 2", 0)
        if i != -1:
            mhchem = "on"
    if mhchem == "auto":
        j = find_token(document.body, "\\cf{", 0)
        if j != -1:
            mhchem = "on"
        else:
            j = find_token(document.body, "\\ce{", 0)
            if j != -1:
                mhchem = "on"
    if mhchem == "on":
        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
        add_to_preamble(document, ["\\PassOptionsToPackage{version=3}{mhchem}"])
        add_to_preamble(document, ["\\usepackage{mhchem}"])
    k = find_token(document.header, "\\use_mhchem", 0)
    if k == -1:
        document.warning("Malformed LyX document: Could not find mhchem setting.")
        return
    del document.header[k]


def revert_fontenc(document):
    " Remove fontencoding param "
    i = find_token(document.header, '\\fontencoding', 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing \\fontencoding.")
        return
    del document.header[i]


def merge_gbrief(document):
    " Merge g-brief-en and g-brief-de to one class "

    if document.textclass != "g-brief-de":
        if document.textclass == "g-brief-en":
            document.textclass = "g-brief"
            document.set_textclass()
        return

    obsoletedby = { "Brieftext":       "Letter",
                    "Unterschrift":    "Signature",
                    "Strasse":         "Street",
                    "Zusatz":          "Addition",
                    "Ort":             "Town",
                    "Land":            "State",
                    "RetourAdresse":   "ReturnAddress",
                    "MeinZeichen":     "MyRef",
                    "IhrZeichen":      "YourRef",
                    "IhrSchreiben":    "YourMail",
                    "Telefon":         "Phone",
                    "BLZ":             "BankCode",
                    "Konto":           "BankAccount",
                    "Postvermerk":     "PostalComment",
                    "Adresse":         "Address",
                    "Datum":           "Date",
                    "Betreff":         "Reference",
                    "Anrede":          "Opening",
                    "Anlagen":         "Encl.",
                    "Verteiler":       "cc",
                    "Gruss":           "Closing"}
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_layout", i)
        if i == -1:
            break

        layout = document.body[i][14:]
        if layout in obsoletedby:
            document.body[i] = "\\begin_layout " + obsoletedby[layout]

        i += 1

    document.textclass = "g-brief"
    document.set_textclass()


def revert_gbrief(document):
    " Revert g-brief to g-brief-en "
    if document.textclass == "g-brief":
        document.textclass = "g-brief-en"
        document.set_textclass()


def revert_html_options(document):
    " Remove html options "
    i = find_token(document.header, '\\html_use_mathml', 0)
    if i != -1:
        del document.header[i]
    i = find_token(document.header, '\\html_be_strict', 0)
    if i != -1:
        del document.header[i]


def revert_includeonly(document):
    i = 0
    while True:
        i = find_token(document.header, "\\begin_includeonly", i)
        if i == -1:
            return
        j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
        if j == -1:
            # this should not happen
            break
        document.header[i : j + 1] = []


def revert_includeall(document):
    " Remove maintain_unincluded_children param "
    i = find_token(document.header, '\\maintain_unincluded_children', 0)
    if i != -1:
        del document.header[i]


def revert_multirow(document):
    " Revert multirow cells in tables "
    i = 0
    multirow = False
    while True:
      # cell type 3 is multirow begin cell
      i = find_token(document.body, '<cell multirow="3"', i)
      if i == -1:
          break
      # a multirow cell was found
      multirow = True
      # remove the multirow tag, set the valignment to top
      # and remove the bottom line
      document.body[i] = document.body[i].replace(' multirow="3" ', ' ')
      document.body[i] = document.body[i].replace('valignment="middle"', 'valignment="top"')
      document.body[i] = document.body[i].replace(' bottomline="true" ', ' ')
      # write ERT to create the multirow cell
      # use 2 rows and 2cm as default with because the multirow span
      # and the column width is only hardly accessible
      subst = [old_put_cmd_in_ert("\\multirow{2}{2cm}{")]
      document.body[i + 4:i + 4] = subst
      i = find_token(document.body, "</cell>", i)
      if i == -1:
           document.warning("Malformed LyX document: Could not find end of tabular cell.")
           break
      subst = [old_put_cmd_in_ert("}")]
      document.body[i - 3:i - 3] = subst
      # cell type 4 is multirow part cell
      i = find_token(document.body, '<cell multirow="4"', i)
      if i == -1:
          break
      # remove the multirow tag, set the valignment to top
      # and remove the top line
      document.body[i] = document.body[i].replace(' multirow="4" ', ' ')
      document.body[i] = document.body[i].replace('valignment="middle"', 'valignment="top"')
      document.body[i] = document.body[i].replace(' topline="true" ', ' ')
      i = i + 1
    if multirow == True:
        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
        add_to_preamble(document, ["\\usepackage{multirow}"])


def convert_math_output(document):
    " Convert \html_use_mathml to \html_math_output "
    i = find_token(document.header, "\\html_use_mathml", 0)
    if i == -1:
        return
    rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
    m = rgx.match(document.header[i])
    if rgx:
        newval = "0" # MathML
        val = m.group(1)
        if val != "true":
            newval = "2" # Images
        document.header[i] = "\\html_math_output " + newval


def revert_math_output(document):
    " Revert \html_math_output to \html_use_mathml "
    i = find_token(document.header, "\\html_math_output", 0)
    if i == -1:
        return
    rgx = re.compile(r'\\html_math_output\s+(\d)')
    m = rgx.match(document.header[i])
    newval = "true"
    if rgx:
        val = m.group(1)
        if val == "1" or val == "2":
            newval = "false"
    else:
        document.warning("Unable to match " + document.header[i])
    document.header[i] = "\\html_use_mathml " + newval


def revert_inset_preview(document):
    " Dissolves the preview inset "
    i = 0
    j = 0
    k = 0
    while True:
      i = find_token(document.body, "\\begin_inset Preview", i)
      if i == -1:
          return
      j = find_end_of_inset(document.body, i)
      if j == -1:
          document.warning("Malformed LyX document: Could not find end of Preview inset.")
          return
      #If the layout is Standard we need to remove it, otherwise there
      #will be paragraph breaks that shouldn't be there.
      k = find_token(document.body, "\\begin_layout Standard", i)
      if k == i+2:
          del document.body[i : i+3]
          del document.body[j-5 : j-2]
          i -= 6
      else:
          del document.body[i]
          del document.body[j-1]
          i -= 2


def revert_equalspacing_xymatrix(document):
    " Revert a Formula with xymatrix@! to an ERT inset "
    i = 0
    j = 0
    has_preamble = False
    has_equal_spacing = False
    while True:
      found = -1
      i = find_token(document.body, "\\begin_inset Formula", i)
      if i == -1:
          break
      j = find_end_of_inset(document.body, i)
      if j == -1:
          document.warning("Malformed LyX document: Could not find end of Formula inset.")
          break

      for curline in range(i,j):
          found = document.body[curline].find("\\xymatrix@!")
          if found != -1:
              break

      if found != -1:
          has_equal_spacing = True
          content = [document.body[i][21:]]
          content += document.body[i+1:j]
          subst = put_cmd_in_ert(content)
          document.body[i:j+1] = subst
          i += len(subst)
      else:
          for curline in range(i,j):
              l = document.body[curline].find("\\xymatrix")
              if l != -1:
                  has_preamble = True;
                  break;
          i = j + 1
    if has_equal_spacing and not has_preamble:
        add_to_preamble(document, ['\\usepackage[all]{xy}'])

##
# Conversion hub
#

supported_versions = ["2.0.0","2.0"]
convert = [[346, []],
           [347, []],
           [348, []],
           [349, []],
           [350, []],
           [351, []],
           [352, [convert_splitindex]],
           [353, []],
           [354, []],
           [355, []],
           [356, []],
           [357, []],
           [358, []],
           [359, [convert_nomencl_width]],
           [360, []],
           [361, []],
           [362, []],
           [363, []],
           [364, []],
           [365, []],
           [366, []],
           [367, []],
           [368, []],
           [369, [convert_author_id]],
           [370, []],
           [371, []],
           [372, []],
           [373, [merge_gbrief]],
           [374, []],
           [375, []],
           [376, []],
           [377, []],
           [378, []],
           [379, [convert_math_output]],
           [380, []],
           [381, []]
          ]

revert =  [[380, [revert_equalspacing_xymatrix]],
           [379, [revert_inset_preview]],
           [378, [revert_math_output]],
           [377, []],
           [376, [revert_multirow]],
           [375, [revert_includeall]],
           [374, [revert_includeonly]],
           [373, [revert_html_options]],
           [372, [revert_gbrief]],
           [371, [revert_fontenc]],
           [370, [revert_mhchem]],
           [369, [revert_suppress_date]],
           [368, [revert_author_id]],
           [367, [revert_hspace_glue_lengths]],
           [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
           [365, [revert_percent_skip_lengths]],
           [364, [revert_paragraph_indentation]],
           [363, [revert_branch_filename]],
           [362, [revert_longtable_align]],
           [361, [revert_applemac]],
           [360, []],
           [359, [revert_nomencl_cwidth]],
           [358, [revert_nomencl_width]],
           [357, [revert_custom_processors]],
           [356, [revert_ulinelatex]],
           [355, [revert_uulinewave]],
           [354, [revert_strikeout]],
           [353, [revert_printindexall]],
           [352, [revert_subindex]],
           [351, [revert_splitindex]],
           [350, [revert_backgroundcolor]],
           [349, [revert_outputformat]],
           [348, [revert_xetex]],
           [347, [revert_phantom, revert_hphantom, revert_vphantom]],
           [346, [revert_tabularvalign]],
           [345, [revert_swiss]]
          ]


if __name__ == "__main__":
    pass