# This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2007 José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 1.6""" import re import unicodedata import sys, os from parser_tools import find_token, find_end_of, find_tokens, get_value #################################################################### # Private helper functions def find_end_of_inset(lines, i): " Find end of inset, where lines[i] is included." return find_end_of(lines, i, "\\begin_inset", "\\end_inset") def wrap_into_ert(string, src, dst): " Wrap a something into an ERT" return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n' + dst + '\n\\end_layout\n\\end_inset\n') #################################################################### def fix_wrong_tables(document): i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of tabular.") continue m = i + 1 nrows = int(document.body[i+1].split('"')[3]) ncols = int(document.body[i+1].split('"')[5]) for l in range(nrows): prev_multicolumn = 0 for k in range(ncols): m = find_token(document.body, ' ": fullcontent = wrap_into_ert(fullcontent, r'\"', '"') #fullcontent = fullcontent.replace(r'\"', '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout standard\n"\n\\end_layout\n\\end_inset\n') # Math: r = re.compile('^(.*?)(\$.*?\$)(.*)') g = fullcontent while r.match(g): m = r.match(g) s = m.group(1) f = m.group(2).replace('\\\\', '\\') g = m.group(3) if s: # this is non-math! s = wrap_into_ert(s, r'\\', '\\backslash') s = wrap_into_ert(s, '{', '{') s = wrap_into_ert(s, '}', '}') document.body.insert(i + 3, s) i += 1 document.body.insert(i + 3, "\\begin_inset Formula " + f) document.body.insert(i + 4, "\\end_inset") i += 2 # Generic, \\ -> \backslash: g = wrap_into_ert(g, r'\\', '\\backslash{}') g = wrap_into_ert(g, '{', '{') g = wrap_into_ert(g, '}', '}') document.body.insert(i + 3, g) document.body[i + 4] = "\\end_layout" i = i + 5 def revert_latexcommand_index(document): "Revert from collapsable form to LatexCommand form." i = 0 while True: i = find_token(document.body, "\\begin_inset Index", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: return del document.body[j - 1] del document.body[j - 2] # \end_layout document.body[i] = "\\begin_inset CommandInset index" document.body[i + 1] = "LatexCommand index" # clean up multiline stuff content = "" for k in range(i + 3, j - 2): line = document.body[k] if line.startswith("\\begin_inset ERT"): line = line[16:] if line.startswith("\\begin_inset Formula"): line = line[20:] if line.startswith("\\begin_layout Standard"): line = line[22:] if line.startswith("\\end_layout"): line = line[11:] if line.startswith("\\end_inset"): line = line[10:] if line.startswith("status collapsed"): line = line[16:] line = line.replace(u'ä', r'\\\"a').replace(u'ö', r'\\\"o').replace(u'ü', r'\\\"u') content = content + line; document.body[i + 3] = "name " + '"' + content + '"' for k in range(i + 4, j - 2): del document.body[i + 4] document.body.insert(i + 4, "") del document.body[i + 2] # \begin_layout standard i = i + 5 def revert_wraptable(document): "Revert wrap table to wrap figure." i = 0 while True: i = find_token(document.body, "\\begin_inset Wrap table", i) if i == -1: return document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure') i = i + 1 def revert_vietnamese(document): "Set language Vietnamese to English" # Set document language from Vietnamese to English i = 0 if document.language == "vietnamese": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang vietnamese", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english") j = j + 1 def revert_japanese(document): "Set language japanese-plain to japanese" # Set document language from japanese-plain to japanese i = 0 if document.language == "japanese-plain": document.language = "japanese" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language japanese" j = 0 while True: j = find_token(document.body, "\\lang japanese-plain", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese") j = j + 1 def revert_japanese_encoding(document): "Set input encoding form EUC-JP-plain to EUC-JP etc." # Set input encoding form EUC-JP-plain to EUC-JP etc. i = 0 i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0) if i != -1: document.header[i] = "\\inputencoding EUC-JP" j = 0 j = find_token(document.header, "\\inputencoding JIS-plain", 0) if j != -1: document.header[j] = "\\inputencoding JIS" k = 0 k = find_token(document.header, "\\inputencoding SJIS-plain", 0) if k != -1: # convert to UTF8 since there is currently no SJIS encoding document.header[k] = "\\inputencoding UTF8" def revert_inset_info(document): 'Replace info inset with its content' i = 0 while 1: i = find_token(document.body, '\\begin_inset Info', i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: # should not happen document.warning("Malformed LyX document: Could not find end of Info inset.") type = 'unknown' arg = '' for k in range(i, j+1): if document.body[k].startswith("arg"): arg = document.body[k][3:].strip().strip('"') if document.body[k].startswith("type"): type = document.body[k][4:].strip().strip('"') # I think there is a newline after \\end_inset, which should be removed. if document.body[j + 1].strip() == "": document.body[i : (j + 2)] = [type + ':' + arg] else: document.body[i : (j + 1)] = [type + ':' + arg] def convert_pdf_options(document): # Set the pdfusetitle tag, delete the pdf_store_options, # set quotes for bookmarksopenlevel" has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0") if has_hr == "1": k = find_token(document.header, "\\use_hyperref", 0) document.header.insert(k + 1, "\\pdf_pdfusetitle true") k = find_token(document.header, "\\pdf_store_options", 0) if k != -1: del document.header[k] i = find_token(document.header, "\\pdf_bookmarksopenlevel", k) if i == -1: return document.header[i] = document.header[i].replace('"', '') def revert_pdf_options_2(document): # reset the pdfusetitle tag, set quotes for bookmarksopenlevel" k = find_token(document.header, "\\use_hyperref", 0) i = find_token(document.header, "\\pdf_pdfusetitle", k) if i != -1: del document.header[i] i = find_token(document.header, "\\pdf_bookmarksopenlevel", k) if i == -1: return values = document.header[i].split() values[1] = ' "' + values[1] + '"' document.header[i] = ''.join(values) def convert_htmlurl(document): 'Convert "htmlurl" to "href" insets for docbook' if document.backend != "docbook": return i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset url", i) if i == -1: return document.body[i] = "\\begin_inset CommandInset href" document.body[i + 1] = "LatexCommand href" i = i + 1 def convert_url(document): 'Convert url insets to url charstyles' if document.backend == "docbook": return i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset url", i) if i == -1: break n = find_token(document.body, "name", i) if n == i + 2: # place the URL name in typewriter before the new URL insert # grab the name 'bla' from the e.g. the line 'name "bla"', # therefore start with the 6th character name = document.body[n][6:-1] newname = [name + " "] document.body[i:i] = newname i = i + 1 j = find_token(document.body, "target", i) if j == -1: document.warning("Malformed LyX document: Can't find target for url inset") i = j continue target = document.body[j][8:-1] k = find_token(document.body, "\\end_inset", j) if k == -1: document.warning("Malformed LyX document: Can't find end of url inset") i = k continue newstuff = ["\\begin_inset Flex URL", "status collapsed", "", "\\begin_layout Standard", "", target, "\\end_layout", ""] document.body[i:k] = newstuff i = k def revert_href(document): 'Reverts hyperlink insets (href) to url insets (url)' i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset href", i) if i == -1: return document.body[i : i + 2] = \ ["\\begin_inset CommandInset url", "LatexCommand url"] i = i + 2 def convert_include(document): 'Converts include insets to new format.' i = 0 r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?') while True: i = find_token(document.body, "\\begin_inset Include", i) if i == -1: return line = document.body[i] previewline = document.body[i + 1] m = r.match(line) if m == None: document.warning("Unable to match line " + str(i) + " of body!") i += 1 continue cmd = m.group(1) fn = m.group(2) opt = m.group(3) insertion = ["\\begin_inset CommandInset include", "LatexCommand " + cmd, previewline, "filename \"" + fn + "\""] newlines = 2 if opt: insertion.append("lstparams " + '"' + opt + '"') newlines += 1 document.body[i : i + 2] = insertion i += newlines def revert_include(document): 'Reverts include insets to old format.' i = 0 r1 = re.compile('LatexCommand (.+)') r2 = re.compile('filename (.+)') r3 = re.compile('options (.*)') while True: i = find_token(document.body, "\\begin_inset CommandInset include", i) if i == -1: return previewline = document.body[i + 1] m = r1.match(document.body[i + 2]) if m == None: document.warning("Malformed LyX document: No LatexCommand line for `" + document.body[i] + "' on line " + str(i) + ".") i += 1 continue cmd = m.group(1) m = r2.match(document.body[i + 3]) if m == None: document.warning("Malformed LyX document: No filename line for `" + \ document.body[i] + "' on line " + str(i) + ".") i += 2 continue fn = m.group(1) options = "" numlines = 4 if (cmd == "lstinputlisting"): m = r3.match(document.body[i + 4]) if m != None: options = m.group(1) numlines = 5 newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}" if options: newline += ("[" + options + "]") insertion = [newline, previewline] document.body[i : i + numlines] = insertion i += 2 def revert_albanian(document): "Set language Albanian to English" i = 0 if document.language == "albanian": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang albanian", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english") j = j + 1 def revert_lowersorbian(document): "Set language lower Sorbian to English" i = 0 if document.language == "lowersorbian": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang lowersorbian", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english") j = j + 1 def revert_uppersorbian(document): "Set language uppersorbian to usorbian as this was used in LyX 1.5" i = 0 if document.language == "uppersorbian": document.language = "usorbian" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language usorbian" j = 0 while True: j = find_token(document.body, "\\lang uppersorbian", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian") j = j + 1 def convert_usorbian(document): "Set language usorbian to uppersorbian" i = 0 if document.language == "usorbian": document.language = "uppersorbian" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language uppersorbian" j = 0 while True: j = find_token(document.body, "\\lang usorbian", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian") j = j + 1 def revert_macro_optional_params(document): "Convert macro definitions with optional parameters into ERTs" # Stub to convert macro definitions with one or more optional parameters # into uninterpreted ERT insets def revert_hyperlinktype(document): 'Reverts hyperlink type' i = 0 j = 0 while True: i = find_token(document.body, "target", i) if i == -1: return j = find_token(document.body, "type", i) if j == -1: return if j == i + 1: del document.body[j] i = i + 1 def revert_pagebreak(document): 'Reverts pagebreak to ERT' i = 0 while True: i = find_token(document.body, "\\pagebreak", i) if i == -1: return document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Standard\n\n\n\\backslash\n' \ 'pagebreak{}\n\\end_layout\n\n\\end_inset\n\n' i = i + 1 def revert_linebreak(document): 'Reverts linebreak to ERT' i = 0 while True: i = find_token(document.body, "\\linebreak", i) if i == -1: return document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Standard\n\n\n\\backslash\n' \ 'linebreak{}\n\\end_layout\n\n\\end_inset\n\n' i = i + 1 def revert_latin(document): "Set language Latin to English" i = 0 if document.language == "latin": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang latin", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang latin", "\\lang english") j = j + 1 def revert_samin(document): "Set language North Sami to English" i = 0 if document.language == "samin": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang samin", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang samin", "\\lang english") j = j + 1 def convert_serbocroatian(document): "Set language Serbocroatian to Croatian as this was really Croatian in LyX 1.5" i = 0 if document.language == "serbocroatian": document.language = "croatian" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language croatian" j = 0 while True: j = find_token(document.body, "\\lang serbocroatian", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang serbocroatian", "\\lang croatian") j = j + 1 def convert_framed_notes(document): "Convert framed notes to boxes. " i = 0 while 1: i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i) if i == -1: return document.body[i] = document.body[i].replace("\\begin_inset Note", "\\begin_inset Box") document.body.insert(i + 1, 'position "t"\nhor_pos "c"\nhas_inner_box 0\ninner_pos "t"\n' \ 'use_parbox 0\nwidth "100col%"\nspecial "none"\nheight "1in"\n' \ 'height_special "totalheight"') i = i + 1 def revert_framed_notes(document): "Revert framed boxes to notes. " i = 0 while 1: i = find_tokens(document.body, ["\\begin_inset Box Framed", "\\begin_inset Box Shaded"], i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: # should not happen document.warning("Malformed LyX document: Could not find end of Box inset.") k = find_token(document.body, "status", i + 1, j) if k == -1: document.warning("Malformed LyX document: Missing `status' tag in Box inset.") return status = document.body[k] l = find_token(document.body, "\\begin_layout Standard", i + 1, j) if l == -1: document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Box inset.") return m = find_token(document.body, "\\end_layout", i + 1, j) if m == -1: document.warning("Malformed LyX document: Missing `\\end_layout' in Box inset.") return ibox = find_token(document.body, "has_inner_box 1", i + 1, k) pbox = find_token(document.body, "use_parbox 1", i + 1, k) if ibox == -1 and pbox == -1: document.body[i] = document.body[i].replace("\\begin_inset Box", "\\begin_inset Note") del document.body[i+1:k] else: document.body[i] = document.body[i].replace("\\begin_inset Box Shaded", "\\begin_inset Box Frameless") document.body.insert(l + 1, "\\begin_inset Note Shaded\n" + status + "\n\\begin_layout Standard\n") document.body.insert(m + 1, "\\end_layout\n\\end_inset") i = i + 1 def revert_slash(document): 'Revert \\SpecialChar \\slash{} to ERT' for i in range(len(document.body)): document.body[i] = document.body[i].replace('\\SpecialChar \\slash{}', \ '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Standard\n\n\n\\backslash\n' \ 'slash{}\n\\end_layout\n\n\\end_inset\n\n') def revert_nobreakdash(document): 'Revert \\SpecialChar \\nobreakdash- to ERT' found = 0 for i in range(len(document.body)): line = document.body[i] r = re.compile(r'\\SpecialChar \\nobreakdash-') m = r.match(line) if m: found = 1 document.body[i] = document.body[i].replace('\\SpecialChar \\nobreakdash-', \ '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Standard\n\n\n\\backslash\n' \ 'nobreakdash-\n\\end_layout\n\n\\end_inset\n\n') if not found: return j = find_token(document.header, "\\use_amsmath", 0) if j == -1: document.warning("Malformed LyX document: Missing '\\use_amsmath'.") return document.header[j] = "\\use_amsmath 2" def revert_bahasam(document): "Set language Bahasa Malaysia to Bahasa Indonesia" i = 0 if document.language == "bahasam": document.language = "bahasa" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language bahasa" j = 0 while True: j = find_token(document.body, "\\lang bahasam", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang bahasam", "\\lang bahasa") j = j + 1 def revert_interlingua(document): "Set language Interlingua to English" i = 0 if document.language == "interlingua": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang interlingua", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang interlingua", "\\lang english") j = j + 1 ## # Conversion hub # supported_versions = ["1.6.0","1.6"] convert = [[277, [fix_wrong_tables]], [278, [close_begin_deeper]], [279, [long_charstyle_names]], [280, [axe_show_label]], [281, []], [282, []], [283, [convert_flex]], [284, []], [285, []], [286, []], [287, [convert_wrapfig_options]], [288, [convert_inset_command]], [289, [convert_latexcommand_index]], [290, []], [291, []], [292, []], [293, []], [294, [convert_pdf_options]], [295, [convert_htmlurl, convert_url]], [296, [convert_include]], [297, [convert_usorbian]], [298, []], [299, []], [300, []], [301, []], [302, []], [303, [convert_serbocroatian]], [304, [convert_framed_notes]], [305, []], [306, []], [307, []] ] revert = [[306, [revert_slash, revert_nobreakdash]], [305, [revert_interlingua]], [304, [revert_bahasam]], [303, [revert_framed_notes]], [302, []], [301, [revert_latin, revert_samin]], [300, [revert_linebreak]], [299, [revert_pagebreak]], [298, [revert_hyperlinktype]], [297, [revert_macro_optional_params]], [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]], [295, [revert_include]], [294, [revert_href]], [293, [revert_pdf_options_2]], [292, [revert_inset_info]], [291, [revert_japanese, revert_japanese_encoding]], [290, [revert_vietnamese]], [289, [revert_wraptable]], [288, [revert_latexcommand_index]], [287, [revert_inset_command]], [286, [revert_wrapfig_options]], [285, [revert_pdf_options]], [284, [remove_inzip_options]], [283, []], [282, [revert_flex]], [281, []], [280, [revert_begin_modules]], [279, [revert_show_label]], [278, [revert_long_charstyle_names]], [277, []], [276, []] ] if __name__ == "__main__": pass