# -*- coding: utf-8 -*- # This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2011 The LyX team # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ Convert files to the file format generated by lyx 2.1""" import re, string import unicodedata import sys, os # Uncomment only what you need to import, please. from parser_tools import del_token, find_token, find_end_of, find_end_of_inset, \ find_end_of_layout, find_re, get_option_value, get_value, get_quoted_value, \ set_option_value #from parser_tools import find_token, find_end_of, find_tokens, \ #find_token_exact, find_end_of_inset, find_end_of_layout, \ #find_token_backwards, is_in_inset, del_token, check_token from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert #from lyx2lyx_tools import insert_to_preamble, \ # lyx2latex, latex_length, revert_flex_inset, \ # revert_font_attrs, hex2ratio, str2bool #################################################################### # Private helper functions #def remove_option(lines, m, option): #''' removes option from line m. returns whether we did anything ''' #l = lines[m].find(option) #if l == -1: #return False #val = lines[m][l:].split('"')[1] #lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):] #return True ############################################################################### ### ### Conversion and reversion routines ### ############################################################################### def revert_visible_space(document): "Revert InsetSpace visible into its ERT counterpart" i = 0 while True: i = find_token(document.body, "\\begin_inset space \\textvisiblespace{}", i) if i == -1: return end = find_end_of_inset(document.body, i) subst = put_cmd_in_ert("\\textvisiblespace{}") document.body[i:end + 1] = subst def convert_undertilde(document): " Load undertilde automatically " i = find_token(document.header, "\\use_mathdots" , 0) if i == -1: i = find_token(document.header, "\\use_mhchem" , 0) if i == -1: i = find_token(document.header, "\\use_esint" , 0) if i == -1: document.warning("Malformed LyX document: Can't find \\use_mathdots.") return; j = find_token(document.preamble, "\\usepackage{undertilde}", 0) if j == -1: document.header.insert(i + 1, "\\use_undertilde 0") else: document.header.insert(i + 1, "\\use_undertilde 2") del document.preamble[j] def revert_undertilde(document): " Load undertilde if used in the document " undertilde = find_token(document.header, "\\use_undertilde" , 0) if undertilde == -1: document.warning("No \\use_undertilde line. Assuming auto.") else: val = get_value(document.header, "\\use_undertilde", undertilde) del document.header[undertilde] try: usetilde = int(val) except: document.warning("Invalid \\use_undertilde value: " + val + ". Assuming auto.") # probably usedots has not been changed, but be safe. usetilde = 1 if usetilde == 0: # do not load case return if usetilde == 2: # force load case add_to_preamble(document, ["\\usepackage{undertilde}"]) return # so we are in the auto case. we want to load undertilde if \utilde is used. i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue code = "\n".join(document.body[i:j]) if code.find("\\utilde") != -1: add_to_preamble(document, ["\\@ifundefined{utilde}{\\usepackage{undertilde}}"]) return i = j def revert_negative_space(document): "Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart" i = 0 j = 0 reverted = False while True: i = find_token(document.body, "\\begin_inset space \\negmedspace{}", i) if i == -1: j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j) if j == -1: # load amsmath in the preamble if not already loaded if we are at the end of checking if reverted == True: i = find_token(document.header, "\\use_amsmath 2", 0) if i == -1: add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"]) return if i == -1: return end = find_end_of_inset(document.body, i) subst = put_cmd_in_ert("\\negmedspace{}") document.body[i:end + 1] = subst j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j) if j == -1: return end = find_end_of_inset(document.body, j) subst = put_cmd_in_ert("\\negthickspace{}") document.body[j:end + 1] = subst reverted = True def revert_math_spaces(document): "Revert formulas with protected custom space and protected hfills to TeX-code" i = 0 while True: i = find_token(document.body, "\\begin_inset Formula", i) if i == -1: return j = document.body[i].find("\\hspace*") if j != -1: end = find_end_of_inset(document.body, i) subst = put_cmd_in_ert(document.body[i][21:]) document.body[i:end + 1] = subst i = i + 1 def convert_japanese_encodings(document): " Rename the japanese encodings to names understood by platex " jap_enc_dict = { "EUC-JP-pLaTeX": "euc", "JIS-pLaTeX": "jis", "SJIS-pLaTeX": "sjis" } i = find_token(document.header, "\\inputencoding" , 0) if i == -1: return val = get_value(document.header, "\\inputencoding", i) if val in jap_enc_dict.keys(): document.header[i] = "\\inputencoding %s" % jap_enc_dict[val] def revert_japanese_encodings(document): " Revert the japanese encodings name changes " jap_enc_dict = { "euc": "EUC-JP-pLaTeX", "jis": "JIS-pLaTeX", "sjis": "SJIS-pLaTeX" } i = find_token(document.header, "\\inputencoding" , 0) if i == -1: return val = get_value(document.header, "\\inputencoding", i) if val in jap_enc_dict.keys(): document.header[i] = "\\inputencoding %s" % jap_enc_dict[val] def revert_justification(document): " Revert the \\justification buffer param" if not del_token(document.header, '\\justification', 0): document.warning("Malformed LyX document: Missing \\justification.") def revert_australian(document): "Set English language variants Australian and Newzealand to English" if document.language == "australian" or document.language == "newzealand": document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" j = 0 while True: j = find_token(document.body, "\\lang australian", j) if j == -1: j = find_token(document.body, "\\lang newzealand", 0) if j == -1: return else: document.body[j] = document.body[j].replace("\\lang newzealand", "\\lang english") else: document.body[j] = document.body[j].replace("\\lang australian", "\\lang english") j += 1 def convert_biblio_style(document): "Add a sensible default for \\biblio_style based on the citation engine." i = find_token(document.header, "\\cite_engine", 0) if i != -1: engine = get_value(document.header, "\\cite_engine", i).split("_")[0] style = {"basic": "plain", "natbib": "plainnat", "jurabib": "jurabib"} document.header.insert(i + 1, "\\biblio_style " + style[engine]) def revert_biblio_style(document): "BibTeX insets with default option use the style defined by \\biblio_style." i = find_token(document.header, "\\biblio_style" , 0) if i == -1: document.warning("No \\biblio_style line. Nothing to do.") return default_style = get_value(document.header, "\\biblio_style", i) del document.header[i] # We are looking for bibtex insets having the default option i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset bibtex", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of bibtex inset at line " + str(i)) i += 1 return k = find_token(document.body, "options", i, j) if k != -1: options = get_quoted_value(document.body, "options", k) if "default" in options.split(","): document.body[k] = 'options "%s"' \ % options.replace("default", default_style) i = j def handle_longtable_captions(document, forward): begin_table = 0 while True: begin_table = find_token(document.body, '') if end_table == -1: document.warning("Malformed LyX document: Could not find end of table.") begin_table += 1 continue fline = find_token(document.body, "') if end_row == -1: document.warning("Can't find end of row " + str(row + 1)) break if forward: if (get_option_value(document.body[begin_row], 'caption') == 'true' and get_option_value(document.body[begin_row], 'endfirsthead') != 'true' and get_option_value(document.body[begin_row], 'endhead') != 'true' and get_option_value(document.body[begin_row], 'endfoot') != 'true' and get_option_value(document.body[begin_row], 'endlastfoot') != 'true'): document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true') elif get_option_value(document.body[begin_row], 'caption') == 'true': if get_option_value(document.body[begin_row], 'endfirsthead') == 'true': document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false') if get_option_value(document.body[begin_row], 'endhead') == 'true': document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false') if get_option_value(document.body[begin_row], 'endfoot') == 'true': document.body[begin_row] = set_option_value(document.body[begin_row], 'endfoot', 'false') if get_option_value(document.body[begin_row], 'endlastfoot') == 'true': document.body[begin_row] = set_option_value(document.body[begin_row], 'endlastfoot', 'false') begin_row = end_row # since there could be a tabular inside this one, we # cannot jump to end. begin_table += 1 def convert_longtable_captions(document): "Add a firsthead flag to caption rows" handle_longtable_captions(document, True) def revert_longtable_captions(document): "remove head/foot flag from caption rows" handle_longtable_captions(document, False) def convert_use_packages(document): "use_xxx yyy => use_package xxx yyy" packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"] for p in packages: i = find_token(document.header, "\\use_%s" % p , 0) if i != -1: value = get_value(document.header, "\\use_%s" % p , i) document.header[i] = "\\use_package %s %s" % (p, value) def revert_use_packages(document): "use_package xxx yyy => use_xxx yyy" packages = {"amsmath":"1", "esint":"1", "mathdots":"1", "mhchem":"1", "undertilde":"1"} # the order is arbitrary for the use_package version, and not all packages need to be given. # Ensure a complete list and correct order (important for older LyX versions and especially lyx2lyx) j = -1 for p in packages.keys(): regexp = re.compile(r'(\\use_package\s+%s)' % p) i = find_re(document.header, regexp, 0) if i != -1: value = get_value(document.header, "\\use_package" , i).split()[1] del document.header[i] j = i for (p, v) in packages.items(): document.header.insert(j, "\\use_%s %s" % (p, value)) j = j + 1 def convert_use_mathtools(document): "insert use_package mathtools" i = find_token(document.header, "\\use_package", 0) if i == -1: document.warning("Malformed LyX document: Can't find \\use_package.") return; j = find_token(document.preamble, "\\usepackage{mathtools}", 0) if j == -1: document.header.insert(i + 1, "\\use_package mathtools 0") else: document.header.insert(i + 1, "\\use_package mathtools 2") del document.preamble[j] def revert_use_mathtools(document): "remove use_package mathtools" regexp = re.compile(r'(\\use_package\s+mathtools)') i = find_re(document.header, regexp, 0) value = "1" # default is auto if i != -1: value = get_value(document.header, "\\use_package" , i).split()[1] del document.header[i] if value == "2": # on add_to_preamble(document, ["\\usepackage{mathtools}"]) elif value == "1": # auto commands = ["mathclap", "mathllap", "mathrlap", \ "lgathered", "rgathered", "vcentcolon", "dblcolon", \ "coloneqq", "Coloneqq", "coloneq", "Coloneq", "eqqcolon", \ "Eqqcolon", "eqcolon", "Eqcolon", "colonapprox", \ "Colonapprox", "colonsim", "Colonsim"] i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue code = "\n".join(document.body[i:j]) for c in commands: if code.find("\\%s" % c) != -1: add_to_preamble(document, ["\\usepackage{mathtools}"]) return i = j def convert_cite_engine_type(document): "Determine the \\cite_engine_type from the citation engine." i = find_token(document.header, "\\cite_engine", 0) if i == -1: return engine = get_value(document.header, "\\cite_engine", i) if "_" in engine: engine, type = engine.split("_") else: type = {"basic": "numerical", "jurabib": "authoryear"}[engine] document.header[i] = "\\cite_engine " + engine document.header.insert(i + 1, "\\cite_engine_type " + type) def revert_cite_engine_type(document): "Natbib had the type appended with an underscore." engine_type = "numerical" i = find_token(document.header, "\\cite_engine_type" , 0) if i == -1: document.warning("No \\cite_engine_type line. Assuming numerical.") else: engine_type = get_value(document.header, "\\cite_engine_type", i) del document.header[i] # We are looking for the natbib citation engine i = find_token(document.header, "\\cite_engine natbib", 0) if i == -1: return document.header[i] = "\\cite_engine natbib_" + engine_type def revert_cancel(document): "add cancel to the preamble if necessary" commands = ["cancelto", "cancel", "bcancel", "xcancel"] i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue code = "\n".join(document.body[i:j]) for c in commands: if code.find("\\%s" % c) != -1: add_to_preamble(document, ["\\usepackage{cancel}"]) return i = j def revert_verbatim(document): " Revert verbatim einvironments completely to TeX-code. " i = 0 consecutive = False subst_end = ['\end_layout', '', '\\begin_layout Plain Layout', '\end_layout', '', '\\begin_layout Plain Layout', '', '', '\\backslash', '', 'end{verbatim}', '\\end_layout', '', '\\end_inset', '', '', '\\end_layout'] subst_begin = ['\\begin_layout Standard', '\\noindent', '\\begin_inset ERT', 'status collapsed', '', '\\begin_layout Plain Layout', '', '', '\\backslash', 'begin{verbatim}', '\\end_layout', '', '\\begin_layout Plain Layout', ''] while 1: i = find_token(document.body, "\\begin_layout Verbatim", i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed lyx document: Can't find end of Verbatim layout") i += 1 continue # delete all line breaks insets (there are no other insets) l = i while 1: n = find_token(document.body, "\\begin_inset Newline newline", l) if n == -1: n = find_token(document.body, "\\begin_inset Newline linebreak", l) if n == -1: break m = find_end_of_inset(document.body, n) del(document.body[m:m+1]) document.body[n:n+1] = ['\end_layout', '', '\\begin_layout Plain Layout'] l += 1 j += 1 # consecutive verbatim environments need to be connected k = find_token(document.body, "\\begin_layout Verbatim", j) if k == j + 2 and consecutive == False: consecutive = True document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout'] document.body[i:i+1] = subst_begin continue if k == j + 2 and consecutive == True: document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout'] del(document.body[i:i+1]) continue if k != j + 2 and consecutive == True: document.body[j:j+1] = subst_end # the next paragraph must not be indented document.body[j+19:j+19] = ['\\noindent'] del(document.body[i:i+1]) consecutive = False continue else: document.body[j:j+1] = subst_end # the next paragraph must not be indented document.body[j+19:j+19] = ['\\noindent'] document.body[i:i+1] = subst_begin def revert_tipa(document): " Revert native TIPA insets to mathed or ERT. " i = 0 while 1: i = find_token(document.body, "\\begin_inset IPA", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed lyx document: Can't find end of IPA inset") i += 1 continue Multipar = False n = find_token(document.body, "\\begin_layout", i, j) if n == -1: document.warning("Malformed lyx document: IPA inset has no embedded layout") i += 1 continue m = find_end_of_layout(document.body, n) if m == -1: document.warning("Malformed lyx document: Can't find end of embedded layout") i += 1 continue content = document.body[n+1:m] p = find_token(document.body, "\\begin_layout", m, j) if p != -1 or len(content) > 1: Multipar = True content = document.body[i+1:j] if Multipar: # IPA insets with multiple pars need to be wrapped by \begin{IPA}...\end{IPA} document.body[i:j+1] = ['\\end_layout', '', '\\begin_layout Standard'] + put_cmd_in_ert("\\begin{IPA}") + ['\\end_layout'] + content + ['\\begin_layout Standard'] + put_cmd_in_ert("\\end{IPA}") add_to_preamble(document, ["\\usepackage{tipa,tipx}"]) else: # single-par IPA insets can be reverted to mathed document.body[i:j+1] = ["\\begin_inset Formula $\\text{\\textipa{" + content[0] + "}}$", "\\end_inset"] i = j def revert_cell_rotation(document): "Revert cell rotations to TeX-code" load_rotating = False i = 0 try: while True: # first, let's find out if we need to do anything i = find_token(document.body, '', j) k = document.body[i].find('"', j + 8) value = document.body[i][j + 8 : k] if value == "0": rgx = re.compile(r' rotate="[^"]+?"') # remove rotate option document.body[i] = rgx.sub('', document.body[i]) elif value == "90": rgx = re.compile(r'rotate="[^"]+?"') document.body[i] = rgx.sub('rotate="true"', document.body[i]) else: rgx = re.compile(r' rotate="[^"]+?"') load_rotating = True # remove rotate option document.body[i] = rgx.sub('', document.body[i]) # write ERT document.body[end_table + 3 : end_table + 3] = \ put_cmd_in_ert("\\end{turn}") document.body[i - 2 : i - 2] = \ put_cmd_in_ert("\\begin{turn}{" + value + "}") i += 1 finally: if load_rotating: add_to_preamble(document, ["\\@ifundefined{turnbox}{\usepackage{rotating}}{}"]) def convert_table_rotation(document): 'Convert table rotation statements from "true" to "90"' i = 0 while True: # first, let's find out if we need to do anything i = find_token(document.body, '