# This file is part of lyx2lyx # Copyright (C) 2002 Dekel Tsur # Copyright (C) 2004 José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Convert files to the file format generated by lyx 1.2""" import re from parser_tools import ( check_token, find_beginning_of, find_end_of, find_nonempty_line, find_re, find_token, find_token_backwards, find_tokens, find_tokens_backwards, get_value, is_nonempty_line, ) #################################################################### # Private helper functions def get_layout(line, default_layout): "Get layout, if empty return the default layout." tokens = line.split() if len(tokens) > 1: return tokens[1] return default_layout def get_paragraph(lines, i, format): "Finds the paragraph that contains line i." begin_layout = "\\layout" while i != -1: i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i) if i == -1: return -1 if check_token(lines[i], begin_layout): return i i = find_beginning_of_inset(lines, i) return -1 def get_next_paragraph(lines, i, format): "Finds the paragraph after the paragraph that contains line i." tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"] while i != -1: i = find_tokens(lines, tokens, i) if not check_token(lines[i], "\\begin_inset"): return i i = find_end_of_inset(lines, i) return -1 def find_beginning_of_inset(lines, i): "Find beginning of inset, where lines[i] is included." return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset") def find_end_of_inset(lines, i): r"Finds the matching \end_inset" return find_end_of(lines, i, "\\begin_inset", "\\end_inset") def find_end_of_tabular(lines, i): "Finds the matching end of tabular." return find_end_of(lines, i, " i + 1: j2 = get_next_paragraph(lines, j + 1, document.format + 1) lines[j2:j2] = ["\\end_deeper "] * (i2 - (i + 1)) new = floats[floattype] + [""] # Check if the float is floatingfigure k = find_re(lines, pextra_type3_rexp, i, j) if k != -1: mo = pextra_rexp.search(lines[k]) width = get_width(mo) lines[k] = re.sub(pextra_rexp, "", lines[k]) new = [ "\\begin_inset Wrap figure", 'width "%s"' % width, "collapsed false", "", ] new = new + lines[i2:j] + ["\\end_inset ", ""] # After a float, all font attributes are reseted. # We need to output '\foo default' for every attribute foo # whose value is not default before the float. # The check here is not accurate, but it doesn't matter # as extra '\foo default' commands are ignored. # In fact, it might be safer to output '\foo default' for all # font attributes. k = get_paragraph(lines, i, document.format + 1) flag = 0 for token in font_tokens: if find_token(lines, token, k, i) != -1: if not flag: # This is not necessary, but we want the output to be # as similar as posible to the lyx format flag = 1 new.append("") if token == "\\lang": new.append(token + " " + document.language) else: new.append(token + " default ") lines[i : j + 1] = new i = i + 1 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]") pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)") pextra_widthp = re.compile(r"\\pextra_widthp") def remove_pextra(document): "Remove pextra token." lines = document.body i = 0 flag = 0 while True: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i + 1]): lines[i] = lines[i] + " " + lines[i + 1] del lines[i + 1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent " + width + " ", lines[i]) i = i + 1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = [ "\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false", ] if flag: flag = 0 if hfill: start = ["", r"\hfill", ""] + start else: start = ["\\layout %s" % document.default_layout, ""] + start j0 = find_token_backwards(lines, "\\layout", i - 1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while True: # collect more paragraphs to the minipage count = count + 1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j + 1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i + 1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start + mid + end i = i + 1 def is_empty(lines): "Are all the lines empty?" return list(filter(is_nonempty_line, lines)) == [] move_rexp = re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)") ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar") spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)") def remove_oldert(document): "Remove old ERT inset." ert_begin = [ "\\begin_inset ERT", "status Collapsed", "", "\\layout %s" % document.default_layout, "", ] lines = document.body i = 0 while True: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i + 1 while True: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens( lines, [ "\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end", ], j, ) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j) + 1 else: break if check_token(lines[j], "\\layout"): while j - 1 >= 0 and check_token(lines[j - 1], "\\begin_deeper"): j = j - 1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = [r"\layout %s" % document.default_layout, "", ""] k = i + 1 while True: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j - 1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2 + 1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if [x for x in tmp if x != ""] != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i - 1] = lines[i - 1] + " " else: new = new + [" "] else: new = new + ert_begin + tmp + ["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = ( new + [""] + lines[k2 : k3 + 1] + [""] ) # Put an empty line after \end_inset k = k3 + 1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k + 1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i - 1] = lines[i - 1] + specialchar_str new = [""] else: new = new + [specialchar_str, ""] k = k2 else: break new = new + new2 if not check_token(lines[j], "\\latex "): new = new + [""] + [lines[j]] lines[i : j + 1] = new i = i + 1 # Delete remaining "\latex xxx" tokens i = 0 while True: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i] def remove_oldertinset(document): "ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future." lines = document.body i = 0 while True: i = find_token(lines, "\\begin_inset ERT", i) if i == -1: break j = find_end_of_inset(lines, i) k = find_token(lines, "\\layout", i + 1) l = get_paragraph(lines, i, document.format + 1) if lines[k] == lines[l]: # same layout k = k + 1 new = lines[k:j] lines[i : j + 1] = new i = i + 1 def is_ert_paragraph(document, i): "Is this a ert paragraph?" lines = document.body if not check_token(lines[i], "\\layout"): return 0 if not document.is_default_layout(get_layout(lines[i], document.default_layout)): return 0 i = find_nonempty_line(lines, i + 1) if not check_token(lines[i], "\\begin_inset ERT"): return 0 j = find_end_of_inset(lines, i) k = find_nonempty_line(lines, j + 1) return check_token(lines[k], "\\layout") def combine_ert(document): "Combine ERT paragraphs." lines = document.body i = 0 while True: i = find_token(lines, "\\begin_inset ERT", i) if i == -1: break j = get_paragraph(lines, i, document.format + 1) count = 0 text = [] while is_ert_paragraph(document, j): count = count + 1 i2 = find_token(lines, "\\layout", j + 1) k = find_token(lines, "\\end_inset", i2 + 1) text = text + lines[i2:k] j = find_token(lines, "\\layout", k + 1) if j == -1: break if count >= 2: j = find_token(lines, "\\layout", i + 1) lines[j:k] = text i = i + 1 oldunits = ["pt", "cm", "in", "text%", "col%"] def get_length(lines, name, start, end): "Get lenght." i = find_token(lines, name, start, end) if i == -1: return "" x = lines[i].split() return x[2] + oldunits[int(x[1])] def write_attribute(x, token, value): "Write attribute." if value != "": x.append("\t" + token + " " + value) def remove_figinset(document): "Remove figinset." lines = document.body i = 0 while True: i = find_token(lines, "\\begin_inset Figure", i) if i == -1: break j = find_end_of_inset(lines, i) if len(lines[i].split()) > 2: lyxwidth = lines[i].split()[3] + "pt" lyxheight = lines[i].split()[4] + "pt" else: lyxwidth = "" lyxheight = "" filename = get_value(lines, "file", i + 1, j) width = get_length(lines, "width", i + 1, j) # what does width=5 mean ? height = get_length(lines, "height", i + 1, j) rotateAngle = get_value(lines, "angle", i + 1, j) if width == "" and height == "": size_type = "0" else: size_type = "1" flags = get_value(lines, "flags", i + 1, j) x = int(flags) % 4 if x == 1: display = "monochrome" elif x == 2: display = "gray" else: display = "color" subcaptionText = "" subcaptionLine = find_token(lines, "subcaption", i + 1, j) if subcaptionLine != -1: subcaptionText = lines[subcaptionLine][11:] if subcaptionText != "": subcaptionText = '"' + subcaptionText + '"' k = find_token(lines, "subfigure", i + 1, j) if k == -1: subcaption = 0 else: subcaption = 1 new = ["\\begin_inset Graphics FormatVersion 1"] write_attribute(new, "filename", filename) write_attribute(new, "display", display) if subcaption: new.append("\tsubcaption") write_attribute(new, "subcaptionText", subcaptionText) write_attribute(new, "size_type", size_type) write_attribute(new, "width", width) write_attribute(new, "height", height) if rotateAngle != "": new.append("\trotate") write_attribute(new, "rotateAngle", rotateAngle) write_attribute(new, "rotateOrigin", "leftBaseline") write_attribute(new, "lyxsize_type", "1") write_attribute(new, "lyxwidth", lyxwidth) write_attribute(new, "lyxheight", lyxheight) new = new + ["\\end_inset"] lines[i : j + 1] = new attr_re = re.compile(r' \w*="(false|0|)"') line_re = re.compile(r"<(features|column|row|cell)") def update_tabular(document): "Convert tabular format 2 to 3." regexp = re.compile(r"^\\begin_inset\s+Tabular") lines = document.body i = 0 while True: i = find_re(lines, regexp, i) if i == -1: break for k in get_tabular_lines(lines, i): if check_token(lines[k], "= 2.3 has real booleans (False and True) false = 0 true = 1 class row: "Simple data structure to deal with long table info." def __init__(self): self.endhead = false # header row self.endfirsthead = false # first header row self.endfoot = false # footer row self.endlastfoot = false # last footer row def haveLTFoot(row_info): "Does row has LTFoot?" for row_ in row_info: if row_.endfoot: return true return false def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info): "Set Header/Footer rows." endfirsthead_empty = false endlastfoot_empty = false # set header info while hr > 0: hr = hr - 1 row_info[hr].endhead = true # set firstheader info if fhr and fhr < rows_: if row_info[fhr].endhead: while fhr > 0: fhr = fhr - 1 row_info[fhr].endfirsthead = true row_info[fhr].endhead = false elif row_info[fhr - 1].endhead: endfirsthead_empty = true else: while fhr > 0 and not row_info[fhr - 1].endhead: fhr = fhr - 1 row_info[fhr].endfirsthead = true # set footer info if fr and fr < rows_: if row_info[fr].endhead and row_info[fr - 1].endhead: while fr > 0 and not row_info[fr - 1].endhead: fr = fr - 1 row_info[fr].endfoot = true row_info[fr].endhead = false elif row_info[fr].endfirsthead and row_info[fr - 1].endfirsthead: while fr > 0 and not row_info[fr - 1].endfirsthead: fr = fr - 1 row_info[fr].endfoot = true row_info[fr].endfirsthead = false elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead: while fr > 0 and not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead: fr = fr - 1 row_info[fr].endfoot = true # set lastfooter info if lfr and lfr < rows_: if row_info[lfr].endhead and row_info[lfr - 1].endhead: while lfr > 0 and not row_info[lfr - 1].endhead: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endhead = false elif row_info[lfr].endfirsthead and row_info[lfr - 1].endfirsthead: while lfr > 0 and not row_info[lfr - 1].endfirsthead: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endfirsthead = false elif row_info[lfr].endfoot and row_info[lfr - 1].endfoot: while lfr > 0 and not row_info[lfr - 1].endfoot: lfr = lfr - 1 row_info[lfr].endlastfoot = true row_info[lfr].endfoot = false elif ( not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead and not row_info[fr - 1].endfoot ): while ( lfr > 0 and not row_info[lfr - 1].endhead and not row_info[lfr - 1].endfirsthead and not row_info[lfr - 1].endfoot ): lfr = lfr - 1 row_info[lfr].endlastfoot = true elif haveLTFoot(row_info): endlastfoot_empty = true return endfirsthead_empty, endlastfoot_empty def insert_attribute(lines, i, attribute): "Insert attribute in lines[i]." last = lines[i].find(">") lines[i] = lines[i][:last] + " " + attribute + lines[i][last:] rows_re = re.compile(r'rows="(\d*)"') longtable_re = re.compile(r'islongtable="(\w)"') ltvalues_re = re.compile( r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"' ) lt_features_re = re.compile( r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")' ) def update_longtables(document): "Update longtables to new format." regexp = re.compile(r"^\\begin_inset\s+Tabular") body = document.body i = 0 while True: i = find_re(body, regexp, i) if i == -1: break i = i + 1 i = find_token(body, "