lyx_mirror/lib/lyx2lyx/lyx_1_6.py

# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2007 José Matos <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 1.6"""

import re
import unicodedata
import sys, os

from parser_tools import find_token, find_end_of, find_tokens, get_value

####################################################################
# Private helper functions

def find_end_of_inset(lines, i):
    " Find end of inset, where lines[i] is included."
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")


####################################################################

def fix_wrong_tables(document):
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Tabular", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
            continue

        m = i + 1
        nrows = int(document.body[i+1].split('"')[3])
        ncols = int(document.body[i+1].split('"')[5])

        for l in range(nrows):
            prev_multicolumn = 0
            for k in range(ncols):
                m = find_token(document.body, '<cell', m)

                if document.body[m].find('multicolumn') != -1:
                    multicol_cont = int(document.body[m].split('"')[1])

                    if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
                        document.body[m] = document.body[m][:5] + document.body[m][21:]
                        prev_multicolumn = 0
                    else:
                        prev_multicolumn = multicol_cont
                else:
                    prev_multicolumn = 0

        i = j + 1


def close_begin_deeper(document):
    i = 0
    depth = 0
    while True:
        i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)

        if i == -1:
            break

        if document.body[i][:13] == "\\begin_deeper":
            depth += 1
        else:
            depth -= 1

        i += 1

    document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]


def long_charstyle_names(document):
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CharStyle", i)
        if i == -1:
            return
        document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
        i += 1

def revert_long_charstyle_names(document):
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CharStyle", i)
        if i == -1:
            return
        document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
        i += 1


def axe_show_label(document):
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CharStyle", i)
        if i == -1:
            return
        if document.body[i + 1].find("show_label") != -1:
            if document.body[i + 1].find("true") != -1:
                document.body[i + 1] = "status open"
                del document.body[ i + 2]
            else:
                if document.body[i + 1].find("false") != -1:
                    document.body[i + 1] = "status collapsed"
                    del document.body[ i + 2]
                else:
                    document.warning("Malformed LyX document: show_label neither false nor true.")
        else:
            document.warning("Malformed LyX document: show_label missing in CharStyle.")

        i += 1


def revert_show_label(document):
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CharStyle", i)
        if i == -1:
            return
        if document.body[i + 1].find("status open") != -1:
            document.body.insert(i + 1, "show_label true")
        else:
            if document.body[i + 1].find("status collapsed") != -1:
                document.body.insert(i + 1, "show_label false")
            else:
                document.warning("Malformed LyX document: no legal status line in CharStyle.")
        i += 1

def revert_begin_modules(document):
    i = 0
    while True:
        i = find_token(document.header, "\\begin_modules", i)
        if i == -1:
            return
        j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
        if j == -1:
            # this should not happen
            break
        document.header[i : j + 1] = []

def convert_flex(document):
    "Convert CharStyle to Flex"
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CharStyle", i)
        if i == -1:
            return
        document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')

def revert_flex(document):
    "Convert Flex to CharStyle"
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Flex", i)
        if i == -1:
            return
        document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')


#  Discard PDF options for hyperref
def revert_pdf_options(document):
        "Revert PDF options for hyperref."
        i = 0
        i = find_token(document.header, "\\use_hyperref", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_store_options", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_title", 0)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_author", 0)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_subject", 0)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_keywords", 0)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_bookmarks", 0)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_bookmarksopen", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_breaklinks", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_pdfborder", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_colorlinks", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_backref", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_pagebackref", i)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_pagemode", 0)
        if i != -1:
            del document.header[i]
        i = find_token(document.header, "\\pdf_quoted_options", 0)
        if i != -1:
            del document.header[i]


def remove_inzip_options(document):
    "Remove inzipName and embed options from the Graphics inset"
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset Graphics", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            # should not happen
            document.warning("Malformed LyX document: Could not find end of graphics inset.")
        # If there's a inzip param, just remove that
        k = find_token(document.body, "\tinzipName", i + 1, j)
        if k != -1:
            del document.body[k]
            # embed option must follow the inzipName option
            del document.body[k+1]
        i = i + 1


def convert_inset_command(document):
    """
        Convert:
            \begin_inset LatexCommand cmd
        to
            \begin_inset CommandInset InsetType
            LatexCommand cmd
    """
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset LatexCommand", i)
        if i == -1:
            return
        line = document.body[i]
        r = re.compile(r'\\begin_inset LatexCommand (.*)$')
        m = r.match(line)
        cmdName = m.group(1)
        insetName = ""
        #this is adapted from factory.cpp
        if cmdName[0:4].lower() == "cite":
            insetName = "citation"
        elif cmdName == "url" or cmdName == "htmlurl":
            insetName = "url"
        elif cmdName[-3:] == "ref":
            insetName = "ref"
        elif cmdName == "tableofcontents":
            insetName = "toc"
        elif cmdName == "printnomenclature":
            insetName = "nomencl_print"
        elif cmdName == "printindex":
            insetName = "index_print"
        else:
            insetName = cmdName
        insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
        document.body[i : i+1] = insertion


def revert_inset_command(document):
    """
        Convert:
            \begin_inset CommandInset InsetType
            LatexCommand cmd
        to
            \begin_inset LatexCommand cmd
        Some insets may end up being converted to insets earlier versions of LyX
        will not be able to recognize. Not sure what to do about that.
    """
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset CommandInset", i)
        if i == -1:
            return
        nextline = document.body[i+1]
        r = re.compile(r'LatexCommand\s+(.*)$')
        m = r.match(nextline)
        if not m:
            document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
            continue
        cmdName = m.group(1)
        insertion = ["\\begin_inset LatexCommand " + cmdName]
        document.body[i : i+2] = insertion


def convert_wrapfig_options(document):
    "Convert optional options for wrap floats (wrapfig)."
    # adds the tokens "lines", "placement", and "overhang"
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Wrap figure", i)
        if i == -1:
            return
        document.body.insert(i + 1, "lines 0")
        j = find_token(document.body, "placement", i)
        # placement can be already set or not; if not, set it
        if j == i+2:
            document.body.insert(i + 3, "overhang 0col%")
        else:
           document.body.insert(i + 2, "placement o")
           document.body.insert(i + 3, "overhang 0col%")
        i = i + 1


def revert_wrapfig_options(document):
    "Revert optional options for wrap floats (wrapfig)."
    i = 0
    while True:
        i = find_token(document.body, "lines", i)
        if i == -1:
            return
        j = find_token(document.body, "overhang", i+1)
        if j != i + 2 and j != -1:
            document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
        if j == -1:
            return
        del document.body[i]
        del document.body[j-1]
        i = i + 1


def convert_latexcommand_index(document):
    "Convert from LatexCommand form to collapsable form."
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset CommandInset index", i)
        if i == -1:
            return
        if document.body[i + 1] != "LatexCommand index": # Might also be index_print
            return
        fullcommand = document.body[i + 2]
        document.body[i] = "\\begin_inset Index"
        document.body[i + 1] = "status collapsed"
        document.body[i + 2] = "\\begin_layout standard"
        document.body.insert(i + 3, fullcommand[6:].strip('"'))
        document.body.insert(i + 4, "\\end_layout")
        i = i + 5


def revert_latexcommand_index(document):
    "Revert from collapsable form toLatexCommand form."
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Index", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i)
        del document.body[j - 1]
        del document.body[j - 2] # \end_layout
        document.body[i] =  "\\begin_inset CommandInset index"
        document.body[i + 1] =  "LatexCommand index"
        document.body[i + 3] = "name " + '"' + document.body[i + 3] + '"'
        document.body.insert(i + 4, "")
        del document.body[i + 2] # \begin_layout standard
        i = i + 5


def revert_wraptable(document):
    "Revert wrap table to wrap figure."
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset Wrap table", i)
        if i == -1:
            return
        document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
        i = i + 1


def revert_vietnamese(document):
    "Set language Vietnamese to English"
    # Set document language from Vietnamese to English
    i = 0
    if document.language == "vietnamese":
        document.language = "english"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language english"
    j = 0
    while True:
        j = find_token(document.body, "\\lang vietnamese", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
        j = j + 1


def revert_japanese(document):
    "Set language japanese-plain to japanese"
    # Set document language from japanese-plain to japanese
    i = 0
    if document.language == "japanese-plain":
        document.language = "japanese"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language japanese"
    j = 0
    while True:
        j = find_token(document.body, "\\lang japanese-plain", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
        j = j + 1


def revert_japanese_encoding(document):
    "Set input encoding form EUC-JP-plain to EUC-JP etc."
    # Set input encoding form EUC-JP-plain to EUC-JP etc.
    i = 0
    i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
    if i != -1:
        document.header[i] = "\\inputencoding EUC-JP"
    j = 0
    j = find_token(document.header, "\\inputencoding JIS-plain", 0)
    if j != -1:
        document.header[j] = "\\inputencoding JIS"
    k = 0
    k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
    if k != -1: # convert to UTF8 since there is currently no SJIS encoding
        document.header[k] = "\\inputencoding UTF8"


def revert_inset_info(document):
    'Replace info inset with its content'
    i = 0
    while 1:
        i = find_token(document.body, '\\begin_inset Info', i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            # should not happen
            document.warning("Malformed LyX document: Could not find end of Info inset.")
        type = 'unknown'
        arg = ''
        for k in range(i, j+1):
            if document.body[k].startswith("arg"):
                arg = document.body[k][3:].strip().strip('"')
            if document.body[k].startswith("type"):
                type = document.body[k][4:].strip().strip('"')
        # I think there is a newline after \\end_inset, which should be removed.
        if document.body[j + 1].strip() == "":
            document.body[i : (j + 2)] = [type + ':' + arg]
        else:
            document.body[i : (j + 1)] = [type + ':' + arg]


def convert_pdf_options(document):
    # Set the pdfusetitle tag, delete the pdf_store_options,
    # set quotes for bookmarksopenlevel"
    has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
    if has_hr == "1":
        k = find_token(document.header, "\\use_hyperref", 0)
        document.header.insert(k + 1, "\\pdf_pdfusetitle true")
    k = find_token(document.header, "\\pdf_store_options", 0)
    if k != -1:
        del document.header[k]
    i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
    if i == -1: return
    document.header[i] = document.header[i].replace('"', '')


def revert_pdf_options_2(document):
    # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
    k = find_token(document.header, "\\use_hyperref", 0)
    i = find_token(document.header, "\\pdf_pdfusetitle", k)
    if i != -1:
        del document.header[i]
    i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
    if i == -1: return
    values = document.header[i].split()
    values[1] = ' "' + values[1] + '"'
    document.header[i] = ''.join(values)


def convert_htmlurl(document):
    'Convert "htmlurl" to "href" insets for docbook'
    if document.backend != "docbook":
      return
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset url", i)
      if i == -1:
        return
      document.body[i] = "\\begin_inset CommandInset href"
      document.body[i + 1] = "LatexCommand href"
      i = i + 1


def convert_url(document):
    'Convert url insets to url charstyles'
    if document.backend == "docbook":
      return
    didone = False
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset url", i)
      if i == -1:
        break
      n = find_token(document.body, "name", i)
      if n == i + 2:
        # place the URL name in typewriter before the new URL insert
        # grab the name 'bla' from the e.g. the line 'name "bla"',
        # therefore start with the 6th character
        name = document.body[n][6:-1]
        newname = [name + " "]
        document.body[i:i] = newname
        i = i + 1
      j = find_token(document.body, "target", i)
      if j == -1:
        document.warning("Malformed LyX document: Can't find target for url inset")
        i = j
        continue
      target = document.body[j][8:-1]
      k = find_token(document.body, "\\end_inset", j)
      if k == -1:
        document.warning("Malformed LyX document: Can't find end of url inset")
        i = k
        continue
      newstuff = ["\\begin_inset Flex URL",
        "status collapsed", "",
        "\\begin_layout Standard",
        "",
        target,
        "\\end_layout",
        ""]
      document.body[i:k] = newstuff
      didone = True
      i = k

    #If we did one, we need to add URL to the modules
    if didone:
      i = find_token(document.header, "\\begin_modules", 0)
      if i == -1:
        #No modules yet included
        i = find_token(document.header, "\\textclass", 0)
        if i == -1:
          document.warning("Malformed LyX document: No \\textclass!!")
          return
        modinfo = ["\\begin_modules", "URL", "\\end_modules"]
        document.header[i + 1: i + 1] = modinfo
        return
      j = find_token(document.header, "\\end_modules", i)
      if j == -1:
        document.warning("Malformed LyX document: No \\end_modules.")
        return
      k = find_token(document.header, "URL", i)
      if k != -1 and k < j:
        return
      document.header.insert(i + 1, "URL")


def revert_href(document):
    'Reverts hyperlink insets (href) to url insets (url)'
    i = 0
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset href", i)
      if i == -1:
          return
      document.body[i : i + 2] = \
        ["\\begin_inset CommandInset url", "LatexCommand url"]
      i = i + 2


def convert_include(document):
  'Converts include insets to new format.'
  i = 0
  r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
  while True:
    i = find_token(document.body, "\\begin_inset Include", i)
    if i == -1:
      return
    line = document.body[i]
    previewline = document.body[i + 1]
    m = r.match(line)
    if m == None:
      document.warning("Unable to match line " + str(i) + " of body!")
      i += 1
      continue
    cmd = m.group(1)
    fn  = m.group(2)
    opt = m.group(3)
    insertion = ["\\begin_inset CommandInset include",
       "LatexCommand " + cmd, previewline,
       "filename \"" + fn + "\""]
    newlines = 2
    if opt:
      insertion.append("lstparams " + '"' + opt + '"')
      newlines += 1
    document.body[i : i + 2] = insertion
    i += newlines


def revert_include(document):
  'Reverts include insets to old format.'
  i = 0
  r1 = re.compile('LatexCommand (.+)')
  r2 = re.compile('filename (.+)')
  r3 = re.compile('options (.*)')
  while True:
    i = find_token(document.body, "\\begin_inset CommandInset include", i)
    if i == -1:
      return
    previewline = document.body[i + 1]
    m = r1.match(document.body[i + 2])
    if m == None:
      document.warning("Malformed LyX document: No LatexCommand line for `" +
        document.body[i] + "' on line " + str(i) + ".")
      i += 1
      continue
    cmd = m.group(1)
    m = r2.match(document.body[i + 3])
    if m == None:
      document.warning("Malformed LyX document: No filename line for `" + \
        document.body[i] + "' on line " + str(i) + ".")
      i += 2
      continue
    fn = m.group(1)
    options = ""
    numlines = 4
    if (cmd == "lstinputlisting"):
      m = r3.match(document.body[i + 4])
      if m != None:
        options = m.group(1)
        numlines = 5
    newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
    if options:
      newline += ("[" + options + "]")
    insertion = [newline, previewline]
    document.body[i : i + numlines] = insertion
    i += 2


def revert_albanian(document):
    "Set language Albanian to English"
    # Set document language from Albanian to English
    i = 0
    if document.language == "albanian":
        document.language = "english"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language english"
    j = 0
    while True:
        j = find_token(document.body, "\\lang albanian", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english")
        j = j + 1


def revert_lowersorbian(document):
    "Set language lower Sorbian to English"
    # Set document language from lower Sorbian to English
    i = 0
    if document.language == "lowersorbian":
        document.language = "english"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language english"
    j = 0
    while True:
        j = find_token(document.body, "\\lang lowersorbian", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english")
        j = j + 1


def revert_uppersorbian(document):
    "Set language uppersorbian to usorbian as this was used in LyX 1.5"
    # Set document language from uppersorbian to usorbian
    i = 0
    if document.language == "uppersorbian":
        document.language = "usorbian"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language usorbian"
    j = 0
    while True:
        j = find_token(document.body, "\\lang uppersorbian", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian")
        j = j + 1


def convert_usorbian(document):
    "Set language uppersorbian to usorbian as this was used in LyX 1.5"
    # Set document language from uppersorbian to usorbian
    i = 0
    if document.language == "usorbian":
        document.language = "uppersorbian"
        i = find_token(document.header, "\\language", 0)
        if i != -1:
            document.header[i] = "\\language uppersorbian"
    j = 0
    while True:
        j = find_token(document.body, "\\lang usorbian", j)
        if j == -1:
            return
        document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian")
        j = j + 1


def revert_macro_optional_params(document):
    "Convert macro definitions with optional parameters into ERTs"
    # Stub to convert macro definitions with one or more optional parameters
    # into uninterpreted ERT insets


def revert_hyperlinktype(document):
    'Reverts hyperlink type'
    i = 0
    j = 0
    while True:
      i = find_token(document.body, "target", i)
      if i == -1:
          return
      j = find_token(document.body, "type", i)
      if j == -1:
          return
      if j == i + 1:
          del document.body[j]
      i = i + 1


##
# Conversion hub
#

supported_versions = ["1.6.0","1.6"]
convert = [[277, [fix_wrong_tables]],
           [278, [close_begin_deeper]],
           [279, [long_charstyle_names]],
           [280, [axe_show_label]],
           [281, []],
           [282, []],
           [283, [convert_flex]],
           [284, []],
           [285, []],
           [286, []],
           [287, [convert_wrapfig_options]],
           [288, [convert_inset_command]],
           [289, [convert_latexcommand_index]],
           [290, []],
           [291, []],
           [292, []],
           [293, []],
           [294, [convert_pdf_options]],
           [295, [convert_htmlurl, convert_url]],
           [296, [convert_include]],
           [297, [convert_usorbian]],
           [298, []],
           [299, []]
          ]

revert =  [[298, [revert_hyperlinktype]],
           [297, [revert_macro_optional_params]],
           [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
           [295, [revert_include]],
           [294, [revert_href]],
           [293, [revert_pdf_options_2]],
           [292, [revert_inset_info]],
           [291, [revert_japanese, revert_japanese_encoding]],
           [290, [revert_vietnamese]],
           [289, [revert_wraptable]],
           [288, [revert_latexcommand_index]],
           [287, [revert_inset_command]],
           [286, [revert_wrapfig_options]],
           [285, [revert_pdf_options]],
           [284, [remove_inzip_options]],
           [283, []],
           [282, [revert_flex]],
           [281, []],
           [280, [revert_begin_modules]],
           [279, [revert_show_label]],
           [278, [revert_long_charstyle_names]],
           [277, []],
           [276, []]
          ]


if __name__ == "__main__":
    pass