# This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org> # Copyright (C) 2004 José Matos <jamatos@lyx.org> # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ Convert files to the file format generated by lyx 1.2""" import re from parser_tools import find_token, find_token_backwards, \ find_tokens, find_tokens_backwards, \ find_beginning_of, find_end_of, find_re, \ is_nonempty_line, find_nonempty_line, \ get_value, check_token #################################################################### # Private helper functions def get_layout(line, default_layout): " Get layout, if empty return the default layout." tokens = line.split() if len(tokens) > 1: return tokens[1] return default_layout def get_paragraph(lines, i, format): " Finds the paragraph that contains line i." begin_layout = "\\layout" while i != -1: i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i) if i == -1: return -1 if check_token(lines[i], begin_layout): return i i = find_beginning_of_inset(lines, i) return -1 def get_next_paragraph(lines, i, format): " Finds the paragraph after the paragraph that contains line i." tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"] while i != -1: i = find_tokens(lines, tokens, i) if not check_token(lines[i], "\\begin_inset"): return i i = find_end_of_inset(lines, i) return -1 def find_beginning_of_inset(lines, i): " Find beginning of inset, where lines[i] is included." return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset") def find_end_of_inset(lines, i): " Finds the matching \end_inset" return find_end_of(lines, i, "\\begin_inset", "\\end_inset") def find_end_of_tabular(lines, i): " Finds the matching end of tabular." return find_end_of(lines, i, "<lyxtabular", "</lyxtabular") def get_tabular_lines(lines, i): " Returns a lists of tabular lines." result = [] i = i+1 j = find_end_of_tabular(lines, i) if j == -1: return [] while i <= j: if check_token(lines[i], "\\begin_inset"): i = find_end_of_inset(lines, i)+1 else: result.append(i) i = i+1 return result # End of helper functions #################################################################### floats = { "footnote": ["\\begin_inset Foot", "collapsed true"], "margin": ["\\begin_inset Marginal", "collapsed true"], "fig": ["\\begin_inset Float figure", "wide false", "collapsed false"], "tab": ["\\begin_inset Float table", "wide false", "collapsed false"], "alg": ["\\begin_inset Float algorithm", "wide false", "collapsed false"], "wide-fig": ["\\begin_inset Float figure", "wide true", "collapsed false"], "wide-tab": ["\\begin_inset Float table", "wide true", "collapsed false"] } font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph", "\\bar", "\\noun", "\\color", "\\lang", "\\latex"] pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3") pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\ r"(\s+\\pextra_alignment\s+(\S+))?"+\ r"(\s+\\pextra_hfill\s+(\S+))?"+\ r"(\s+\\pextra_start_minipage\s+(\S+))?"+\ r"(\s+(\\pextra_widthp?)\s+(\S*))?") def get_width(mo): " Get width from a regular expression. " if mo.group(10): if mo.group(9) == "\\pextra_widthp": return mo.group(10)+"col%" else: return mo.group(10) else: return "100col%" def remove_oldfloat(document): " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset" lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_float", i) if i == -1: break # There are no nested floats, so finding the end of the float is simple j = find_token(lines, "\\end_float", i+1) floattype = lines[i].split()[1] if not floats.has_key(floattype): document.warning("Error! Unknown float type " + floattype) floattype = "fig" # skip \end_deeper tokens i2 = i+1 while check_token(lines[i2], "\\end_deeper"): i2 = i2+1 if i2 > i+1: j2 = get_next_paragraph(lines, j + 1, document.format + 1) lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1)) new = floats[floattype]+[""] # Check if the float is floatingfigure k = find_re(lines, pextra_type3_rexp, i, j) if k != -1: mo = pextra_rexp.search(lines[k]) width = get_width(mo) lines[k] = re.sub(pextra_rexp, "", lines[k]) new = ["\\begin_inset Wrap figure", 'width "%s"' % width, "collapsed false", ""] new = new+lines[i2:j]+["\\end_inset ", ""] # After a float, all font attributes are reseted. # We need to output '\foo default' for every attribute foo # whose value is not default before the float. # The check here is not accurate, but it doesn't matter # as extra '\foo default' commands are ignored. # In fact, it might be safer to output '\foo default' for all # font attributes. k = get_paragraph(lines, i, document.format + 1) flag = 0 for token in font_tokens: if find_token(lines, token, k, i) != -1: if not flag: # This is not necessary, but we want the output to be # as similar as posible to the lyx format flag = 1 new.append("") if token == "\\lang": new.append(token+" "+ document.language) else: new.append(token+" default ") lines[i:j+1] = new i = i+1 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]") pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)") pextra_widthp = re.compile(r"\\pextra_widthp") def remove_pextra(document): " Remove pextra token." lines = document.body i = 0 flag = 0 while 1: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i+1]): lines[i] = lines[i] + ' ' + lines[i+1] del lines[i+1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i]) i = i+1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = ["\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false" ] if flag: flag = 0 if hfill: start = ["","\hfill",""]+start else: start = ['\\layout %s' % document.default_layout,''] + start j0 = find_token_backwards(lines,"\\layout", i-1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while 1: # collect more paragraphs to the minipage count = count+1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j+1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i+1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start+mid+end i = i+1 def is_empty(lines): " Are all the lines empty?" return filter(is_nonempty_line, lines) == [] move_rexp = re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)") ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar") spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)") def remove_oldert(document): " Remove old ERT inset." ert_begin = ["\\begin_inset ERT", "status Collapsed", "", '\\layout %s' % document.default_layout, ""] lines = document.body i = 0 while 1: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i+1 while 1: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"], j) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j)+1 else: break if check_token(lines[j], "\\layout"): while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"): j = j-1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = ['\layout %s' % document.default_layout, "", ""] k = i+1 while 1: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j-1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2+1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if filter(lambda x:x != "", tmp) != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+" " else: new = new+[" "] else: new = new+ert_begin+tmp+["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset k = k3+1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k+1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+specialchar_str new = [""] else: new = new+[specialchar_str, ""] k = k2 else: break new = new+new2 if not check_token(lines[j], "\\latex "): new = new+[""]+[lines[j]] lines[i:j+1] = new i = i+1 # Delete remaining "\latex xxx" tokens i = 0 while 1: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i] def remove_oldertinset(document): " ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset ERT", i) if i == -1: break j = find_end_of_inset(lines, i) k = find_token(lines, "\\layout", i+1) l = get_paragraph(lines, i, document.format + 1) if lines[k] == lines[l]: # same layout k = k+1 new = lines[k:j] lines[i:j+1] = new i = i+1 def is_ert_paragraph(document, i): " Is this a ert paragraph? " lines = document.body if not check_token(lines[i], "\\layout"): return 0 if not document.is_default_layout(get_layout(lines[i], document.default_layout)): return 0 i = find_nonempty_line(lines, i+1) if not check_token(lines[i], "\\begin_inset ERT"): return 0 j = find_end_of_inset(lines, i) k = find_nonempty_line(lines, j+1) return check_token(lines[k], "\\layout") def combine_ert(document): " Combine ERT paragraphs." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset ERT", i) if i == -1: break j = get_paragraph(lines, i, document.format + 1) count = 0 text = [] while is_ert_paragraph(document, j): count = count+1 i2 = find_token(lines, "\\layout", j+1) k = find_token(lines, "\\end_inset", i2+1) text = text+lines[i2:k] j = find_token(lines, "\\layout", k+1) if j == -1: break if count >= 2: j = find_token(lines, "\\layout", i+1) lines[j:k] = text i = i+1 oldunits = ["pt", "cm", "in", "text%", "col%"] def get_length(lines, name, start, end): " Get lenght." i = find_token(lines, name, start, end) if i == -1: return "" x = lines[i].split() return x[2]+oldunits[int(x[1])] def write_attribute(x, token, value): " Write attribute." if value != "": x.append("\t"+token+" "+value) def remove_figinset(document): " Remove figinset." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset Figure", i) if i == -1: break j = find_end_of_inset(lines, i) if ( len(lines[i].split()) > 2 ): lyxwidth = lines[i].split()[3]+"pt" lyxheight = lines[i].split()[4]+"pt" else: lyxwidth = "" lyxheight = "" filename = get_value(lines, "file", i+1, j) width = get_length(lines, "width", i+1, j) # what does width=5 mean ? height = get_length(lines, "height", i+1, j) rotateAngle = get_value(lines, "angle", i+1, j) if width == "" and height == "": size_type = "0" else: size_type = "1" flags = get_value(lines, "flags", i+1, j) x = int(flags)%4 if x == 1: display = "monochrome" elif x == 2: display = "gray" else: display = "color" subcaptionText = "" subcaptionLine = find_token(lines, "subcaption", i+1, j) if subcaptionLine != -1: subcaptionText = lines[subcaptionLine][11:] if subcaptionText != "": subcaptionText = '"'+subcaptionText+'"' k = find_token(lines, "subfigure", i+1,j) if k == -1: subcaption = 0 else: subcaption = 1 new = ["\\begin_inset Graphics FormatVersion 1"] write_attribute(new, "filename", filename) write_attribute(new, "display", display) if subcaption: new.append("\tsubcaption") write_attribute(new, "subcaptionText", subcaptionText) write_attribute(new, "size_type", size_type) write_attribute(new, "width", width) write_attribute(new, "height", height) if rotateAngle != "": new.append("\trotate") write_attribute(new, "rotateAngle", rotateAngle) write_attribute(new, "rotateOrigin", "leftBaseline") write_attribute(new, "lyxsize_type", "1") write_attribute(new, "lyxwidth", lyxwidth) write_attribute(new, "lyxheight", lyxheight) new = new + ["\\end_inset"] lines[i:j+1] = new attr_re = re.compile(r' \w*="(false|0|)"') line_re = re.compile(r'<(features|column|row|cell)') def update_tabular(document): " Convert tabular format 2 to 3." regexp = re.compile(r'^\\begin_inset\s+Tabular') lines = document.body i = 0 while 1: i = find_re(lines, regexp, i) if i == -1: break for k in get_tabular_lines(lines, i): if check_token(lines[k], "<lyxtabular"): lines[k] = lines[k].replace('version="2"', 'version="3"') elif check_token(lines[k], "<column"): lines[k] = lines[k].replace('width=""', 'width="0pt"') if line_re.match(lines[k]): lines[k] = re.sub(attr_re, "", lines[k]) i = i+1 ## # Convert tabular format 2 to 3 # # compatibility read for old longtable options. Now we can make any # row part of the header/footer type we want before it was strict # sequential from the first row down (as LaTeX does it!). So now when # we find a header/footer line we have to go up the rows and set it # on all preceding rows till the first or one with already a h/f option # set. If we find a firstheader on the same line as a header or a # lastfooter on the same line as a footer then this should be set empty. # (Jug 20011220) # just for compatibility with old python versions # python >= 2.3 has real booleans (False and True) false = 0 true = 1 class row: " Simple data structure to deal with long table info." def __init__(self): self.endhead = false # header row self.endfirsthead = false # first header row self.endfoot = false # footer row self.endlastfoot = false # last footer row def haveLTFoot(row_info): " Does row has LTFoot?" for row_ in row_info: if row_.endfoot: return true return false def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info): " Set Header/Footer rows." endfirsthead_empty = false endlastfoot_empty = false # set header info while (hr > 0): hr = hr - 1 row_info[hr].endhead = true # set firstheader info if fhr and fhr < rows_: if row_info[fhr].endhead: while fhr > 0: fhr = fhr - 1 row_info[fhr].endfirsthead = true row_info[fhr].endhead = false elif row_info[fhr - 1].endhead: endfirsthead_empty = true else: while fhr > 0 and not row_info[fhr - 1].endhead: fhr = fhr - 1 row_info[fhr].endfirsthead = true # set footer info if fr and fr < rows_: if row_info[fr].endhead and row_info[fr - 1].endhead: while fr > 0 and not row_info[fr - 1].endhead: fr = fr - 1 row_info[fr].endfoot = true row_info[fr].endhead = false elif row_info[fr].endfirsthead and row_info[fr - 1].endfirsthead: while fr > 0 and not row_info[fr - 1].endfirsthead: fr = fr - 1 row_info[fr].endfoot = true row_info[fr].endfirsthead = false elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead: while fr > 0 and not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead: fr = fr - 1 row_info[fr].endfoot = true # set lastfooter info if lfr and lfr < rows_: if row_info[lfr].endhead and row_info[lfr - 1].endhead: while lfr > 0 and not row_info[lfr - 1].endhead: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endhead = false elif row_info[lfr].endfirsthead and row_info[lfr - 1].endfirsthead: while lfr > 0 and not row_info[lfr - 1].endfirsthead: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endfirsthead = false elif row_info[lfr].endfoot and row_info[lfr - 1].endfoot: while lfr > 0 and not row_info[lfr - 1].endfoot: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endfoot = false elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead and not row_info[fr - 1].endfoot: while lfr > 0 and not row_info[lfr - 1].endhead and not row_info[lfr - 1].endfirsthead and not row_info[lfr - 1].endfoot: lfr = lfr - 1 row_info[lfr].endlastfoot = true elif haveLTFoot(row_info): endlastfoot_empty = true return endfirsthead_empty, endlastfoot_empty def insert_attribute(lines, i, attribute): " Insert attribute in lines[i]." last = lines[i].find('>') lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:] rows_re = re.compile(r'rows="(\d*)"') longtable_re = re.compile(r'islongtable="(\w)"') ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"') lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")') def update_longtables(document): " Update longtables to new format." regexp = re.compile(r'^\\begin_inset\s+Tabular') body = document.body i = 0 while 1: i = find_re(body, regexp, i) if i == -1: break i = i + 1 i = find_token(body, "<lyxtabular", i) if i == -1: break # get number of rows in the table rows = int(rows_re.search(body[i]).group(1)) i = i + 1 i = find_token(body, '<features', i) if i == -1: break # is this a longtable? longtable = longtable_re.search(body[i]) if not longtable: # islongtable is missing add it body[i] = body[i][:10] + 'islongtable="false" ' + body[i][10:] if not longtable or longtable.group(1) != "true": # remove longtable elements from features features = lt_features_re.search(body[i]) if features: body[i] = body[i].replace(features.group(1), "") continue row_info = row() * rows res = ltvalues_re.search(body[i]) if not res: continue endfirsthead_empty, endlastfoot_empty = setHeaderFooterRows(res.group(1), res.group(2), res.group(3), res.group(4), rows, row_info) if endfirsthead_empty: insert_attribute(body, i, 'firstHeadEmpty="true"') if endfirsthead_empty: insert_attribute(body, i, 'lastFootEmpty="true"') i = i + 1 for j in range(rows): i = find_token(body, '<row', i) self.endfoot = false # footer row self.endlastfoot = false # last footer row if row_info[j].endhead: insert_attribute(body, i, 'endhead="true"') if row_info[j].endfirsthead: insert_attribute(body, i, 'endfirsthead="true"') if row_info[j].endfoot: insert_attribute(body, i, 'endfoot="true"') if row_info[j].endlastfoot: insert_attribute(body, i, 'endlastfoot="true"') i = i + 1 def fix_oldfloatinset(document): " Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset Float ", i) if i == -1: break j = find_token(lines, "collapsed", i) if j != -1: lines[j:j] = ["wide false"] i = i+1 def change_listof(document): " Change listof insets." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset LatexCommand \\listof", i) if i == -1: break type = re.search(r"listof(\w*)", lines[i]).group(1)[:-1] lines[i] = "\\begin_inset FloatList "+type i = i+1 def change_infoinset(document): " Change info inset." lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_inset Info", i) if i == -1: break txt = lines[i][18:].lstrip() new = ["\\begin_inset Note", "collapsed true", ""] j = find_token(lines, "\\end_inset", i) if j == -1: break note_lines = lines[i+1:j] if len(txt) > 0: note_lines = [txt]+note_lines for line in note_lines: new = new + ['\layout %s' % document.default_layout, ""] tmp = line.split('\\') new = new + [tmp[0]] for x in tmp[1:]: new = new + ["\\backslash ", x] lines[i:j] = new i = i+5 def change_header(document): " Update header." lines = document.header i = find_token(lines, "\\use_amsmath", 0) if i == -1: return lines[i+1:i+1] = ["\\use_natbib 0", "\use_numerical_citations 0"] supported_versions = ["1.2.%d" % i for i in range(5)] + ["1.2"] convert = [[220, [change_header, change_listof, fix_oldfloatinset, update_tabular, update_longtables, remove_pextra, remove_oldfloat, remove_figinset, remove_oldertinset, remove_oldert, combine_ert, change_infoinset]]] revert = [] if __name__ == "__main__": pass