# This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2002 Dekel Tsur # Copyright (C) 2004 José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ Convert files to the file format generated by lyx 1.2""" import re from parser_tools import find_token, find_token_backwards, \ find_tokens, find_tokens_backwards, \ find_beginning_of, find_end_of, find_re, \ is_nonempty_line, find_nonempty_line, \ get_value, check_token #################################################################### # Private helper functions def get_layout(line, default_layout): " Get layout, if empty return the default layout." tokens = line.split() if len(tokens) > 1: return tokens[1] return default_layout def get_paragraph(lines, i, format): " Finds the paragraph that contains line i." begin_layout = "\\layout" while i != -1: i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i) if i == -1: return -1 if check_token(lines[i], begin_layout): return i i = find_beginning_of_inset(lines, i) return -1 def get_next_paragraph(lines, i, format): " Finds the paragraph after the paragraph that contains line i." tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"] while i != -1: i = find_tokens(lines, tokens, i) if not check_token(lines[i], "\\begin_inset"): return i i = find_end_of_inset(lines, i) return -1 def find_beginning_of_inset(lines, i): " Find beginning of inset, where lines[i] is included." return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset") def find_end_of_inset(lines, i): " Finds the matching \end_inset" return find_end_of(lines, i, "\\begin_inset", "\\end_inset") def find_end_of_tabular(lines, i): " Finds the matching end of tabular." return find_end_of(lines, i, " i+1: j2 = get_next_paragraph(lines, j + 1, document.format + 1) lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1)) new = floats[floattype]+[""] # Check if the float is floatingfigure k = find_re(lines, pextra_type3_rexp, i, j) if k != -1: mo = pextra_rexp.search(lines[k]) width = get_width(mo) lines[k] = re.sub(pextra_rexp, "", lines[k]) new = ["\\begin_inset Wrap figure", 'width "%s"' % width, "collapsed false", ""] new = new+lines[i2:j]+["\\end_inset ", ""] # After a float, all font attributes are reseted. # We need to output '\foo default' for every attribute foo # whose value is not default before the float. # The check here is not accurate, but it doesn't matter # as extra '\foo default' commands are ignored. # In fact, it might be safer to output '\foo default' for all # font attributes. k = get_paragraph(lines, i, document.format + 1) flag = 0 for token in font_tokens: if find_token(lines, token, k, i) != -1: if not flag: # This is not necessary, but we want the output to be # as similar as posible to the lyx format flag = 1 new.append("") if token == "\\lang": new.append(token+" "+ document.language) else: new.append(token+" default ") lines[i:j+1] = new i = i+1 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]") pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)") pextra_widthp = re.compile(r"\\pextra_widthp") def remove_pextra(document): " Remove pextra token." lines = document.body i = 0 flag = 0 while 1: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i+1]): lines[i] = lines[i] + ' ' + lines[i+1] del lines[i+1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i]) i = i+1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = ["\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false" ] if flag: flag = 0 if hfill: start = ["","\hfill",""]+start else: start = ['\\layout %s' % document.default_layout,''] + start j0 = find_token_backwards(lines,"\\layout", i-1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while 1: # collect more paragraphs to the minipage count = count+1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j+1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i+1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start+mid+end i = i+1 def is_empty(lines): " Are all the lines empty?" return filter(is_nonempty_line, lines) == [] move_rexp = re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)") ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar") spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)") def remove_oldert(document): " Remove old ERT inset." ert_begin = ["\\begin_inset ERT", "status Collapsed", "", '\\layout %s' % document.default_layout, ""] lines = document.body i = 0 while 1: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i+1 while 1: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"], j) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j)+1 else: break if check_token(lines[j], "\\layout"): while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"): j = j-1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = ['\layout %s' % document.default_layout, "", ""] k = i+1 while 1: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j-1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2+1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if filter(lambda x:x != "", tmp) != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+" " else: new = new+[" "] else: new = new+ert_begin+tmp+["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset k = k3+1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k+1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+specialchar_str new = [""] else: new = new+[specialchar_str, ""] k = k2 else: break new = new+new2 if not check_token(lines[j], "\\latex "): new = new+[""]+[lines[j]] lines[i:j+1] = new i = i+1 # Delete remaining "\latex xxx" tokens i = 0 while 1: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i] def remove_oldertinset(document): " ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset ERT", i) if i == -1: break j = find_end_of_inset(lines, i) k = find_token(lines, "\\layout", i+1) l = get_paragraph(lines, i, document.format + 1) if lines[k] == lines[l]: # same layout k = k+1 new = lines[k:j] lines[i:j+1] = new i = i+1 def is_ert_paragraph(document, i): " Is this a ert paragraph? " lines = document.body if not check_token(lines[i], "\\layout"): return 0 if not document.is_default_layout(get_layout(lines[i], document.default_layout)): return 0 i = find_nonempty_line(lines, i+1) if not check_token(lines[i], "\\begin_inset ERT"): return 0 j = find_end_of_inset(lines, i) k = find_nonempty_line(lines, j+1) return check_token(lines[k], "\\layout") def combine_ert(document): " Combine ERT paragraphs." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset ERT", i) if i == -1: break j = get_paragraph(lines, i, document.format + 1) count = 0 text = [] while is_ert_paragraph(document, j): count = count+1 i2 = find_token(lines, "\\layout", j+1) k = find_token(lines, "\\end_inset", i2+1) text = text+lines[i2:k] j = find_token(lines, "\\layout", k+1) if j == -1: break if count >= 2: j = find_token(lines, "\\layout", i+1) lines[j:k] = text i = i+1 oldunits = ["pt", "cm", "in", "text%", "col%"] def get_length(lines, name, start, end): " Get lenght." i = find_token(lines, name, start, end) if i == -1: return "" x = lines[i].split() return x[2]+oldunits[int(x[1])] def write_attribute(x, token, value): " Write attribute." if value != "": x.append("\t"+token+" "+value) def remove_figinset(document): " Remove figinset." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset Figure", i) if i == -1: break j = find_end_of_inset(lines, i) if ( len(lines[i].split()) > 2 ): lyxwidth = lines[i].split()[3]+"pt" lyxheight = lines[i].split()[4]+"pt" else: lyxwidth = "" lyxheight = "" filename = get_value(lines, "file", i+1, j) width = get_length(lines, "width", i+1, j) # what does width=5 mean ? height = get_length(lines, "height", i+1, j) rotateAngle = get_value(lines, "angle", i+1, j) if width == "" and height == "": size_type = "0" else: size_type = "1" flags = get_value(lines, "flags", i+1, j) x = int(flags)%4 if x == 1: display = "monochrome" elif x == 2: display = "gray" else: display = "color" subcaptionText = "" subcaptionLine = find_token(lines, "subcaption", i+1, j) if subcaptionLine != -1: subcaptionText = lines[subcaptionLine][11:] if subcaptionText != "": subcaptionText = '"'+subcaptionText+'"' k = find_token(lines, "subfigure", i+1,j) if k == -1: subcaption = 0 else: subcaption = 1 new = ["\\begin_inset Graphics FormatVersion 1"] write_attribute(new, "filename", filename) write_attribute(new, "display", display) if subcaption: new.append("\tsubcaption") write_attribute(new, "subcaptionText", subcaptionText) write_attribute(new, "size_type", size_type) write_attribute(new, "width", width) write_attribute(new, "height", height) if rotateAngle != "": new.append("\trotate") write_attribute(new, "rotateAngle", rotateAngle) write_attribute(new, "rotateOrigin", "leftBaseline") write_attribute(new, "lyxsize_type", "1") write_attribute(new, "lyxwidth", lyxwidth) write_attribute(new, "lyxheight", lyxheight) new = new + ["\\end_inset"] lines[i:j+1] = new attr_re = re.compile(r' \w*="(false|0|)"') line_re = re.compile(r'<(features|column|row|cell)') def update_tabular(document): " Convert tabular format 2 to 3." regexp = re.compile(r'^\\begin_inset\s+Tabular') lines = document.body i = 0 while 1: i = find_re(lines, regexp, i) if i == -1: break for k in get_tabular_lines(lines, i): if check_token(lines[k], "= 2.3 has real booleans (False and True) false = 0 true = 1 class row: " Simple data structure to deal with long table info." def __init__(self): self.endhead = false # header row self.endfirsthead = false # first header row self.endfoot = false # footer row self.endlastfoot = false # last footer row def haveLTFoot(row_info): " Does row has LTFoot?" for row_ in row_info: if row_.endfoot: return true return false def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info): " Set Header/Footer rows." endfirsthead_empty = false endlastfoot_empty = false # set header info while (hr > 0): hr = hr - 1 row_info[hr].endhead = true # set firstheader info if fhr and fhr < rows_: if row_info[fhr].endhead: while fhr > 0: fhr = fhr - 1 row_info[fhr].endfirsthead = true row_info[fhr].endhead = false elif row_info[fhr - 1].endhead: endfirsthead_empty = true else: while fhr > 0 and not row_info[fhr - 1].endhead: fhr = fhr - 1 row_info[fhr].endfirsthead = true # set footer info if fr and fr < rows_: if row_info[fr].endhead and row_info[fr - 1].endhead: while fr > 0 and not row_info[fr - 1].endhead: fr = fr - 1 row_info[fr].endfoot = true row_info[fr].endhead = false elif row_info[fr].endfirsthead and row_info[fr - 1].endfirsthead: while fr > 0 and not row_info[fr - 1].endfirsthead: fr = fr - 1 row_info[fr].endfoot = true row_info[fr].endfirsthead = false elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead: while fr > 0 and not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead: fr = fr - 1 row_info[fr].endfoot = true # set lastfooter info if lfr and lfr < rows_: if row_info[lfr].endhead and row_info[lfr - 1].endhead: while lfr > 0 and not row_info[lfr - 1].endhead: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endhead = false elif row_info[lfr].endfirsthead and row_info[lfr - 1].endfirsthead: while lfr > 0 and not row_info[lfr - 1].endfirsthead: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endfirsthead = false elif row_info[lfr].endfoot and row_info[lfr - 1].endfoot: while lfr > 0 and not row_info[lfr - 1].endfoot: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endfoot = false elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead and not row_info[fr - 1].endfoot: while lfr > 0 and not row_info[lfr - 1].endhead and not row_info[lfr - 1].endfirsthead and not row_info[lfr - 1].endfoot: lfr = lfr - 1 row_info[lfr].endlastfoot = true elif haveLTFoot(row_info): endlastfoot_empty = true return endfirsthead_empty, endlastfoot_empty def insert_attribute(lines, i, attribute): " Insert attribute in lines[i]." last = lines[i].find('>') lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:] rows_re = re.compile(r'rows="(\d*)"') longtable_re = re.compile(r'islongtable="(\w)"') ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"') lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")') def update_longtables(document): " Update longtables to new format." regexp = re.compile(r'^\\begin_inset\s+Tabular') body = document.body i = 0 while 1: i = find_re(body, regexp, i) if i == -1: break i = i + 1 i = find_token(body, "