lyx_mirror/lib/lyx2lyx/lyx_1_2.py

# This file is part of lyx2lyx
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
# Copyright (C) 2004 José Matos <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

import string
import re

from parser_tools import find_token, find_token_backwards, get_next_paragraph,\
                         find_tokens, find_end_of_inset, find_re, \
                         is_nonempty_line, get_paragraph, find_nonempty_line, \
                         get_value, get_tabular_lines, check_token

floats = {
    "footnote": ["\\begin_inset Foot",
		 "collapsed true"],
    "margin":   ["\\begin_inset Marginal",
		 "collapsed true"],
    "fig":      ["\\begin_inset Float figure",
		 "wide false",
		 "collapsed false"],
    "tab":      ["\\begin_inset Float table",
		 "wide false",
		 "collapsed false"],
    "alg":      ["\\begin_inset Float algorithm",
		 "wide false",
		 "collapsed false"],
    "wide-fig": ["\\begin_inset Float figure",
		 "wide true",
		 "collapsed false"],
    "wide-tab": ["\\begin_inset Float table",
		 "wide true",
		 "collapsed false"]
}

font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph",
	       "\\bar", "\\noun", "\\color", "\\lang", "\\latex"]

pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3")
pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
			 r"(\s+\\pextra_alignment\s+(\S+))?"+\
			 r"(\s+\\pextra_hfill\s+(\S+))?"+\
			 r"(\s+\\pextra_start_minipage\s+(\S+))?"+\
			 r"(\s+(\\pextra_widthp?)\s+(\S*))?")


def get_width(mo):
    if mo.group(10):
	if mo.group(9) == "\\pextra_widthp":
	    return mo.group(10)+"col%"
	else:
	    return mo.group(10)
    else:
	return "100col%"


#
# Change \begin_float .. \end_float into \begin_inset Float .. \end_inset
#
def remove_oldfloat(file):
    lines = file.body
    i = 0
    while 1:
	i = find_token(lines, "\\begin_float", i)
	if i == -1:
	    break
	# There are no nested floats, so finding the end of the float is simple
	j = find_token(lines, "\\end_float", i+1)

	floattype = string.split(lines[i])[1]
	if not floats.has_key(floattype):
	    file.warning("Error! Unknown float type " + floattype)
	    floattype = "fig"

	# skip \end_deeper tokens
	i2 = i+1
	while check_token(lines[i2], "\\end_deeper"):
	    i2 = i2+1
	if i2 > i+1:
	    j2 = get_next_paragraph(lines, j + 1, file.format + 1)
	    lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))

	new = floats[floattype]+[""]

	# Check if the float is floatingfigure
	k = find_re(lines, pextra_type3_rexp, i, j)
	if k != -1:
	    mo = pextra_rexp.search(lines[k])
	    width = get_width(mo)
	    lines[k] = re.sub(pextra_rexp, "", lines[k])
	    new = ["\\begin_inset Wrap figure",
		   'width "%s"' % width,
		   "collapsed false",
		   ""]

	new = new+lines[i2:j]+["\\end_inset ", ""]

	# After a float, all font attributes are reseted.
	# We need to output '\foo default' for every attribute foo
	# whose value is not default before the float.
	# The check here is not accurate, but it doesn't matter
	# as extra '\foo default' commands are ignored.
	# In fact, it might be safer to output '\foo default' for all
	# font attributes.
	k = get_paragraph(lines, i, file.format + 1)
	flag = 0
	for token in font_tokens:
	    if find_token(lines, token, k, i) != -1:
		if not flag:
		    # This is not necessary, but we want the output to be
		    # as similar as posible to the lyx format
		    flag = 1
		    new.append("")
		if token == "\\lang":
		    new.append(token+" "+ file.language)
		else:
		    new.append(token+" default ")

	lines[i:j+1] = new
	i = i+1


pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
pextra_widthp = re.compile(r"\\pextra_widthp")

def remove_pextra(file):
    lines = file.body
    i = 0
    flag = 0
    while 1:
	i = find_re(lines, pextra_type2_rexp, i)
	if i == -1:
	    break

        # Sometimes the \pextra_widthp argument comes in it own
        # line. If that happens insert it back in this line.
        if pextra_widthp.search(lines[i+1]):
            lines[i] = lines[i] + ' ' + lines[i+1]
            del lines[i+1]

	mo = pextra_rexp.search(lines[i])
        width = get_width(mo)

        if mo.group(1) == "1":
            # handle \pextra_type 1 (indented paragraph)
            lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i])
            i = i+1
            continue

        # handle \pextra_type 2 (minipage)
	position = mo.group(3)
	hfill = mo.group(5)
	lines[i] = re.sub(pextra_rexp, "", lines[i])

	start = ["\\begin_inset Minipage",
		 "position " + position,
		 "inner_position 0",
		 'height "0pt"',
		 'width "%s"' % width,
		 "collapsed false"
		 ]
	if flag:
	    flag = 0
	    if hfill:
		start = ["","\hfill",""]+start
	else:
	    start = ["\\layout Standard"] + start

	j0 = find_token_backwards(lines,"\\layout", i-1)
	j = get_next_paragraph(lines, i, file.format + 1)

	count = 0
	while 1:
	    # collect more paragraphs to the minipage
	    count = count+1
	    if j == -1 or not check_token(lines[j], "\\layout"):
		break
	    i = find_re(lines, pextra_type2_rexp2, j+1)
	    if i == -1:
		break
	    mo = pextra_rexp.search(lines[i])
	    if not mo:
		break
	    if mo.group(7) == "1":
		flag = 1
		break
	    lines[i] = re.sub(pextra_rexp, "", lines[i])
	    j = find_tokens(lines, ["\\layout", "\\end_float"], i+1)

	mid = lines[j0:j]
	end = ["\\end_inset "]

	lines[j0:j] = start+mid+end
	i = i+1


def is_empty(lines):
    return filter(is_nonempty_line, lines) == []


move_rexp =  re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar")
spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
ert_begin = ["\\begin_inset ERT",
	     "status Collapsed",
	     "",
	     "\\layout Standard"]


def remove_oldert(file):
    lines = file.body
    i = 0
    while 1:
	i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
	if i == -1:
	    break
	j = i+1
	while 1:
            # \end_inset is for ert inside a tabular cell. The other tokens
            # are obvious.
	    j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
			    j)
	    if check_token(lines[j], "\\begin_inset"):
		j = find_end_of_inset(lines, j)+1
	    else:
		break

	if check_token(lines[j], "\\layout"):
	    while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"):
		j = j-1

	# We need to remove insets, special chars & font commands from ERT text
	new = []
	new2 = []
	if check_token(lines[i], "\\layout LaTeX"):
	    new = ["\layout Standard", "", ""]
	    # We have a problem with classes in which Standard is not the default layout!

	k = i+1
	while 1:
	    k2 = find_re(lines, ert_rexp, k, j)
	    inset = hfill = specialchar = 0
	    if k2 == -1:
		k2 = j
	    elif check_token(lines[k2], "\\begin_inset"):
		inset = 1
            elif check_token(lines[k2], "\\hfill"):
                hfill = 1
                del lines[k2]
                j = j-1
	    else:
		specialchar = 1
		mo = spchar_rexp.match(lines[k2])
		lines[k2] = mo.group(1)
		specialchar_str = mo.group(2)
		k2 = k2+1

	    tmp = []
	    for line in lines[k:k2]:
                # Move some lines outside the ERT inset:
		if move_rexp.match(line):
		    if new2 == []:
			# This is not necessary, but we want the output to be
			# as similar as posible to the lyx format
			new2 = [""]
		    new2.append(line)
		elif not check_token(line, "\\latex"):
		    tmp.append(line)

	    if is_empty(tmp):
		if filter(lambda x:x != "", tmp) != []:
		    if new == []:
			# This is not necessary, but we want the output to be
			# as similar as posible to the lyx format
			lines[i-1] = lines[i-1]+" "
		    else:
			new = new+[" "]
	    else:
		new = new+ert_begin+tmp+["\\end_inset ", ""]

	    if inset:
		k3 = find_end_of_inset(lines, k2)
		new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
		k = k3+1
		# Skip the empty line after \end_inset
		if not is_nonempty_line(lines[k]):
		    k = k+1
		    new.append("")
            elif hfill:
                new = new + ["\\hfill", ""]
                k = k2
	    elif specialchar:
		if new == []:
		    # This is not necessary, but we want the output to be
		    # as similar as posible to the lyx format
		    lines[i-1] = lines[i-1]+specialchar_str
		    new = [""]
		else:
		    new = new+[specialchar_str, ""]
		k = k2
	    else:
		break

	new = new+new2
	if not check_token(lines[j], "\\latex "):
	    new = new+[""]+[lines[j]]
	lines[i:j+1] = new
	i = i+1

    # Delete remaining "\latex xxx" tokens
    i = 0
    while 1:
	i = find_token(lines, "\\latex ", i)
	if i == -1:
	    break
	del lines[i]


# ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future.
def remove_oldertinset(file):
    lines = file.body
    i = 0
    while 1:
	i = find_token(lines, "\\begin_inset ERT", i)
	if i == -1:
	    break
	j = find_end_of_inset(lines, i)
	k = find_token(lines, "\\layout", i+1)
	l = get_paragraph(lines, i, file.format + 1)
	if lines[k] == lines[l]: # same layout
	    k = k+1
	new = lines[k:j]
	lines[i:j+1] = new
	i = i+1


def is_ert_paragraph(lines, i):
    if not check_token(lines[i], "\\layout Standard"):
        return 0

    i = find_nonempty_line(lines, i+1)
    if not check_token(lines[i], "\\begin_inset ERT"):
	return 0

    j = find_end_of_inset(lines, i)
    k = find_nonempty_line(lines, j+1)
    return check_token(lines[k], "\\layout")


def combine_ert(file):
    lines = file.body
    i = 0
    while 1:
	i = find_token(lines, "\\begin_inset ERT", i)
	if i == -1:
	    break
	j = get_paragraph(lines, i, file.format + 1)
	count = 0
	text = []
	while is_ert_paragraph(lines, j):

	    count = count+1
	    i2 = find_token(lines, "\\layout", j+1)
	    k = find_token(lines, "\\end_inset", i2+1)
	    text = text+lines[i2:k]
	    j = find_token(lines, "\\layout", k+1)
	    if j == -1:
		break

	if count >= 2:
	    j = find_token(lines, "\\layout", i+1)
	    lines[j:k] = text

	i = i+1


oldunits = ["pt", "cm", "in", "text%", "col%"]

def get_length(lines, name, start, end):
    i = find_token(lines, name, start, end)
    if i == -1:
	return ""
    x = string.split(lines[i])
    return x[2]+oldunits[int(x[1])]


def write_attribute(x, token, value):
    if value != "":
	x.append("\t"+token+" "+value)


def remove_figinset(file):
    lines = file.body
    i = 0
    while 1:
	i = find_token(lines, "\\begin_inset Figure", i)
	if i == -1:
	    break
	j = find_end_of_inset(lines, i)

	if ( len(string.split(lines[i])) > 2 ):
	    lyxwidth = string.split(lines[i])[3]+"pt"
	    lyxheight = string.split(lines[i])[4]+"pt"
	else:
	    lyxwidth = ""
	    lyxheight = ""

	filename = get_value(lines, "file", i+1, j)

	width = get_length(lines, "width", i+1, j)
	# what does width=5 mean ?
	height = get_length(lines, "height", i+1, j)
	rotateAngle = get_value(lines, "angle", i+1, j)
	if width == "" and height == "":
	    size_type = "0"
	else:
	    size_type = "1"

	flags = get_value(lines, "flags", i+1, j)
	x = int(flags)%4
	if x == 1:
	    display = "monochrome"
	elif x == 2:
	    display = "gray"
	else:
	    display = "color"

	subcaptionText = ""
	subcaptionLine = find_token(lines, "subcaption", i+1, j)
	if subcaptionLine != -1:
            subcaptionText = lines[subcaptionLine][11:]
	    if subcaptionText != "":
	        subcaptionText = '"'+subcaptionText+'"'

	k = find_token(lines, "subfigure", i+1,j)
	if k == -1:
	    subcaption = 0
	else:
	    subcaption = 1

	new = ["\\begin_inset Graphics FormatVersion 1"]
	write_attribute(new, "filename", filename)
	write_attribute(new, "display", display)
	if subcaption:
	    new.append("\tsubcaption")
	write_attribute(new, "subcaptionText", subcaptionText)
	write_attribute(new, "size_type", size_type)
	write_attribute(new, "width", width)
	write_attribute(new, "height", height)
	if rotateAngle != "":
	    new.append("\trotate")
	    write_attribute(new, "rotateAngle", rotateAngle)
	write_attribute(new, "rotateOrigin", "leftBaseline")
	write_attribute(new, "lyxsize_type", "1")
	write_attribute(new, "lyxwidth", lyxwidth)
	write_attribute(new, "lyxheight", lyxheight)
	new = new + ["\\end_inset"]
	lines[i:j+1] = new


##
# Convert tabular format 2 to 3
#
attr_re = re.compile(r' \w*="(false|0|)"')
line_re = re.compile(r'<(features|column|row|cell)')

def update_tabular(file):
    regexp = re.compile(r'^\\begin_inset\s+Tabular')
    lines = file.body
    i = 0
    while 1:
        i = find_re(lines, regexp, i)
        if i == -1:
            break

	for k in get_tabular_lines(lines, i):
	    if check_token(lines[k], "<lyxtabular"):
		lines[k] = string.replace(lines[k], 'version="2"', 'version="3"')
	    elif check_token(lines[k], "<column"):
		lines[k] = string.replace(lines[k], 'width=""', 'width="0pt"')

	    if line_re.match(lines[k]):
		lines[k] = re.sub(attr_re, "", lines[k])

	i = i+1


##
# Convert tabular format 2 to 3
#
# compatibility read for old longtable options. Now we can make any
# row part of the header/footer type we want before it was strict
# sequential from the first row down (as LaTeX does it!). So now when
# we find a header/footer line we have to go up the rows and set it
# on all preceding rows till the first or one with already a h/f option
# set. If we find a firstheader on the same line as a header or a
# lastfooter on the same line as a footer then this should be set empty.
# (Jug 20011220)

# just for compatibility with old python versions
# python >= 2.3 has real booleans (False and True)
false = 0
true = 1

# simple data structure to deal with long table info
class row:
    def __init__(self):
        self.endhead = false		# header row
        self.endfirsthead = false	# first header row
        self.endfoot = false		# footer row
        self.endlastfoot = false	# last footer row


def haveLTFoot(row_info):
    for row_ in row_info:
        if row_.endfoot:
            return true
    return false


def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
    endfirsthead_empty = false
    endlastfoot_empty = false
    # set header info
    while (hr > 0):
        hr = hr - 1
        row_info[hr].endhead = true

    # set firstheader info
    if fhr and fhr < rows_:
        if row_info[fhr].endhead:
            while fhr > 0:
                fhr = fhr - 1
                row_info[fhr].endfirsthead = true
                row_info[fhr].endhead = false
        elif row_info[fhr - 1].endhead:
            endfirsthead_empty = true
        else:
            while fhr > 0 and not row_info[fhr - 1].endhead:
                fhr = fhr - 1
                row_info[fhr].endfirsthead = true

    # set footer info
    if fr and fr < rows_:
        if row_info[fr].endhead and row_info[fr - 1].endhead:
            while fr > 0 and not row_info[fr - 1].endhead:
                fr = fr - 1
                row_info[fr].endfoot = true
                row_info[fr].endhead = false
        elif row_info[fr].endfirsthead and row_info[fr - 1].endfirsthead:
            while fr > 0 and not row_info[fr - 1].endfirsthead:
                fr = fr - 1
                row_info[fr].endfoot = true
                row_info[fr].endfirsthead = false
        elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead:
            while fr > 0 and not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead:
                fr = fr - 1
                row_info[fr].endfoot = true

    # set lastfooter info
    if lfr and lfr < rows_:
        if row_info[lfr].endhead and row_info[lfr - 1].endhead:
            while lfr > 0 and not row_info[lfr - 1].endhead:
                lfr = lfr - 1
                row_info[lfr].endlastfoot = true
                row_info[lfr].endhead = false
        elif row_info[lfr].endfirsthead and row_info[lfr - 1].endfirsthead:
            while lfr > 0 and not row_info[lfr - 1].endfirsthead:
                lfr = lfr - 1
                row_info[lfr].endlastfoot = true
                row_info[lfr].endfirsthead = false
        elif row_info[lfr].endfoot and row_info[lfr - 1].endfoot:
            while lfr > 0 and not row_info[lfr - 1].endfoot:
                lfr = lfr - 1
                row_info[lfr].endlastfoot = true
                row_info[lfr].endfoot = false
        elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead and not row_info[fr - 1].endfoot:
            while lfr > 0 and not row_info[lfr - 1].endhead and not row_info[lfr - 1].endfirsthead and not row_info[lfr - 1].endfoot:
                lfr = lfr - 1
                row_info[lfr].endlastfoot = true
        elif haveLTFoot(row_info):
            endlastfoot_empty = true

    return endfirsthead_empty, endlastfoot_empty


def insert_attribute(lines, i, attribute):
    last = string.find(lines[i],'>')
    lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:]


rows_re = re.compile(r'rows="(\d*)"')
longtable_re = re.compile(r'islongtable="(\w)"')
ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"')
lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")')
def update_longtables(file):
    regexp = re.compile(r'^\\begin_inset\s+Tabular')
    body = file.body
    i = 0
    while 1:
        i = find_re(body, regexp, i)
        if i == -1:
            break
        i = i + 1
        i = find_token(body, "<lyxtabular", i)
        if i == -1:
            break

        # get number of rows in the table
        rows = int(rows_re.search(body[i]).group(1))

        i = i + 1
        i = find_token(body, '<features', i)
        if i == -1:
            break

        # is this a longtable?
        longtable = longtable_re.search(body[i])

        if not longtable:
            # islongtable is missing add it
            body[i] = body[i][:10] + 'islongtable="false" ' + body[i][10:]

        if not longtable or longtable.group(1) != "true":
            # remove longtable elements from features
            features = lt_features_re.search(body[i])
            if features:
                body[i] = string.replace(body[i], features.group(1), "")
            continue

        row_info = row() * rows
        res = ltvalues_re.search(body[i])
        if not res:
            continue

        endfirsthead_empty, endlastfoot_empty = setHeaderFooterRows(res.group(1), res.group(2), res.group(3), res.group(4), rows, row_info)

        if endfirsthead_empty:
            insert_attribute(body, i, 'firstHeadEmpty="true"')

        if endfirsthead_empty:
            insert_attribute(body, i, 'lastFootEmpty="true"')

        i = i + 1
        for j in range(rows):
            i = find_token(body, '<row', i)

            self.endfoot = false		# footer row
            self.endlastfoot = false	# last footer row
            if row_info[j].endhead:
                insert_attribute(body, i, 'endhead="true"')

            if row_info[j].endfirsthead:
                insert_attribute(body, i, 'endfirsthead="true"')

            if row_info[j].endfoot:
                insert_attribute(body, i, 'endfoot="true"')

            if row_info[j].endlastfoot:
                insert_attribute(body, i, 'endlastfoot="true"')

            i = i + 1


# Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future.
def fix_oldfloatinset(file):
    lines = file.body
    i = 0
    while 1:
	i = find_token(lines, "\\begin_inset Float ", i)
	if i == -1:
	    break
        j = find_token(lines, "collapsed", i)
        if j != -1:
            lines[j:j] = ["wide false"]
        i = i+1


def change_listof(file):
    lines = file.body
    i = 0
    while 1:
	i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
	if i == -1:
	    break
        type = re.search(r"listof(\w*)", lines[i]).group(1)[:-1]
        lines[i] = "\\begin_inset FloatList "+type
        i = i+1


def change_infoinset(file):
    lines = file.body
    i = 0
    while 1:
        i = find_token(lines, "\\begin_inset Info", i)
        if i == -1:
            break
        txt = string.lstrip(lines[i][18:])
        new = ["\\begin_inset Note", "collapsed true", ""]
        j = find_token(lines, "\\end_inset", i)
        if j == -1:
            break

        note_lines = lines[i+1:j]
        if len(txt) > 0:
            note_lines = [txt]+note_lines

        for line in note_lines:
            new = new + ["\layout Standard", ""]
            tmp = string.split(line, '\\')
            new = new + [tmp[0]]
            for x in tmp[1:]:
                new = new + ["\\backslash ", x]
        lines[i:j] = new
        i = i+5


def change_header(file):
    lines = file.header
    i = find_token(lines, "\\use_amsmath", 0)
    if i == -1:
	return
    lines[i+1:i+1] = ["\\use_natbib 0",
		      "\use_numerical_citations 0"]


convert = [[220, [change_header, change_listof, fix_oldfloatinset,
                  update_tabular, update_longtables, remove_pextra,
                  remove_oldfloat, remove_figinset, remove_oldertinset,
                  remove_oldert, combine_ert, change_infoinset]]]
revert  = []


if __name__ == "__main__":
    pass