# -*- coding: utf-8 -*- # This file is part of lyx2lyx # Copyright (C) 2011 The LyX team # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ Convert files to the file format generated by lyx 2.0""" import re, string import unicodedata import sys, os from parser_tools import del_complete_lines, \ find_token, find_end_of, find_tokens, \ find_token_exact, find_end_of_inset, find_end_of_layout, \ find_token_backwards, is_in_inset, get_value, get_quoted_value, \ del_token, check_token, get_option_value from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \ put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \ revert_font_attrs, hex2ratio, str2bool, revert_language #################################################################### # Private helper functions def remove_option(lines, m, option): ''' removes option from line m. returns whether we did anything ''' l = lines[m].find(option) if l == -1: return False val = lines[m][l:].split('"')[1] lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):] return True ############################################################################### ### ### Conversion and reversion routines ### ############################################################################### def revert_swiss(document): " Set language german-ch to ngerman " i = 0 if document.language == "german-ch": document.language = "ngerman" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language ngerman" j = 0 while True: j = find_token(document.body, "\\lang german-ch", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman") j = j + 1 def revert_tabularvalign(document): " Revert the tabular valign option " i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return end = find_end_of_inset(document.body, i) if end == -1: document.warning("Can't find end of inset at line " + str(i)) i += 1 continue fline = find_token(document.body, "<features", i, end) if fline == -1: document.warning("Can't find features for inset at line " + str(i)) i += 1 continue p = document.body[fline].find("islongtable") if p != -1: q = document.body[fline].find("tabularvalignment") if q != -1: document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline]) i += 1 continue # no longtable tabularvalignment = 'c' # which valignment is specified? m = document.body[fline].find('tabularvalignment="top"') if m != -1: tabularvalignment = 't' m = document.body[fline].find('tabularvalignment="bottom"') if m != -1: tabularvalignment = 'b' # delete tabularvalignment q = document.body[fline].find("tabularvalignment") if q != -1: document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline]) # don't add a box when centered if tabularvalignment == 'c': i = end continue subst = ['\\end_inset', '\\end_layout'] document.body[end:end] = subst # just inserts those lines subst = ['\\begin_inset Box Frameless', 'position "' + tabularvalignment +'"', 'hor_pos "c"', 'has_inner_box 1', 'inner_pos "c"', 'use_parbox 0', # we don't know the width, assume 50% 'width "50col%"', 'special "none"', 'height "1in"', 'height_special "totalheight"', 'status open', '', '\\begin_layout Plain Layout'] document.body[i:i] = subst # this just inserts the array at i # since there could be a tabular inside a tabular, we cannot # jump to end i += len(subst) def revert_phantom_types(document, ptype, cmd): " Reverts phantom to ERT " i = 0 while True: i = find_token(document.body, "\\begin_inset Phantom " + ptype, i) if i == -1: return end = find_end_of_inset(document.body, i) if end == -1: document.warning("Can't find end of inset at line " + str(i)) i += 1 continue blay = find_token(document.body, "\\begin_layout Plain Layout", i, end) if blay == -1: document.warning("Can't find layout for inset at line " + str(i)) i = end continue bend = find_end_of_layout(document.body, blay) if bend == -1: document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.") i = end continue substi = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "", "", "\\backslash", cmd + "{", "\\end_layout", "", "\\end_inset"] substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"] # do the later one first so as not to mess up the numbering document.body[bend:end + 1] = substj document.body[i:blay + 1] = substi i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2 def revert_phantom(document): revert_phantom_types(document, "Phantom", "phantom") def revert_hphantom(document): revert_phantom_types(document, "HPhantom", "hphantom") def revert_vphantom(document): revert_phantom_types(document, "VPhantom", "vphantom") def revert_xetex(document): " Reverts documents that use XeTeX " i = find_token(document.header, '\\use_xetex', 0) if i == -1: document.warning("Malformed LyX document: Missing \\use_xetex.") return if not str2bool(get_value(document.header, "\\use_xetex", i)): del document.header[i] return del document.header[i] # 1.) set doc encoding to utf8-plain i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.warning("Malformed LyX document: Missing \\inputencoding.") else: document.header[i] = "\\inputencoding utf8-plain" # 2.) check font settings # defaults roman = sans = typew = "default" osf = False sf_scale = tt_scale = 100.0 i = find_token(document.header, "\\font_roman", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_roman.") else: roman = get_value(document.header, "\\font_roman", i) document.header[i] = "\\font_roman default" i = find_token(document.header, "\\font_sans", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_sans.") else: sans = get_value(document.header, "\\font_sans", i) document.header[i] = "\\font_sans default" i = find_token(document.header, "\\font_typewriter", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_typewriter.") else: typew = get_value(document.header, "\\font_typewriter", i) document.header[i] = "\\font_typewriter default" i = find_token(document.header, "\\font_osf", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_osf.") else: osf = str2bool(get_value(document.header, "\\font_osf", i)) document.header[i] = "\\font_osf false" i = find_token(document.header, "\\font_sc", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_sc.") else: # we do not need this value. document.header[i] = "\\font_sc false" i = find_token(document.header, "\\font_sf_scale", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_sf_scale.") else: val = get_value(document.header, '\\font_sf_scale', i) try: # float() can throw sf_scale = float(val) except: document.warning("Invalid font_sf_scale value: " + val) document.header[i] = "\\font_sf_scale 100" i = find_token(document.header, "\\font_tt_scale", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_tt_scale.") else: val = get_value(document.header, '\\font_tt_scale', i) try: # float() can throw tt_scale = float(val) except: document.warning("Invalid font_tt_scale value: " + val) document.header[i] = "\\font_tt_scale 100" # 3.) set preamble stuff pretext = ['%% This document must be processed with xelatex!'] pretext.append('\\usepackage{fontspec}') if roman != "default": pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}') if sans != "default": sf = '\\setsansfont[' if sf_scale != 100.0: sf += 'Scale=' + str(sf_scale / 100.0) + ',' sf += 'Mapping=tex-text]{' + sans + '}' pretext.append(sf) if typew != "default": tw = '\\setmonofont' if tt_scale != 100.0: tw += '[Scale=' + str(tt_scale / 100.0) + ']' tw += '{' + typew + '}' pretext.append(tw) if osf: pretext.append('\\defaultfontfeatures{Numbers=OldStyle}') pretext.append('\\usepackage{xunicode}') pretext.append('\\usepackage{xltxtra}') insert_to_preamble(document, pretext) def revert_outputformat(document): " Remove default output format param " if not del_token(document.header, '\\default_output_format', 0): document.warning("Malformed LyX document: Missing \\default_output_format.") def revert_backgroundcolor(document): " Reverts background color to preamble code " i = find_token(document.header, "\\backgroundcolor", 0) if i == -1: return colorcode = get_value(document.header, '\\backgroundcolor', i) del document.header[i] # don't clutter the preamble if backgroundcolor is not set if colorcode == "#ffffff": return red = hex2ratio(colorcode[1:3]) green = hex2ratio(colorcode[3:5]) blue = hex2ratio(colorcode[5:7]) insert_to_preamble(document, \ ['% To set the background color', '\\@ifundefined{definecolor}{\\usepackage{color}}{}', '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}', '\\pagecolor{page_backgroundcolor}']) def add_use_indices(document): " Add \\use_indices if it is missing " i = find_token(document.header, '\\use_indices', 0) if i != -1: return i i = find_token(document.header, '\\use_bibtopic', 0) if i == -1: i = find_token(document.header, '\\cite_engine', 0) if i == -1: i = find_token(document.header, '\\use_mathdots', 0) if i == -1: i = find_token(document.header, '\\use_mhchem', 0) if i == -1: i = find_token(document.header, '\\use_esint', 0) if i == -1: i = find_token(document.header, '\\use_amsmath', 0) if i == -1: document.warning("Malformed LyX document: Missing \\use_indices.") return -1 document.header.insert(i + 1, '\\use_indices 0') return i + 1 def revert_splitindex(document): " Reverts splitindex-aware documents " i = add_use_indices(document) if i == -1: return useindices = str2bool(get_value(document.header, "\\use_indices", i)) del document.header[i] preamble = [] if useindices: preamble.append("\\usepackage{splitidx})") # deal with index declarations in the preamble i = 0 while True: i = find_token(document.header, "\\index", i) if i == -1: break k = find_token(document.header, "\\end_index", i) if k == -1: document.warning("Malformed LyX document: Missing \\end_index.") return if useindices: line = document.header[i] l = re.compile(r'\\index (.*)$') m = l.match(line) iname = m.group(1) ishortcut = get_value(document.header, '\\shortcut', i, k) if ishortcut != "": preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}") del document.header[i:k + 1] if preamble: insert_to_preamble(document, preamble) # deal with index insets # these need to have the argument removed i = 0 while True: i = find_token(document.body, "\\begin_inset Index", i) if i == -1: break line = document.body[i] l = re.compile(r'\\begin_inset Index (.*)$') m = l.match(line) itype = m.group(1) if itype == "idx" or indices == "false": document.body[i] = "\\begin_inset Index" else: k = find_end_of_inset(document.body, i) if k == -1: document.warning("Can't find end of index inset!") i += 1 continue content = lyx2latex(document, document.body[i:k]) # escape quotes content = content.replace('"', r'\"') subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}") document.body[i:k + 1] = subst i = i + 1 # deal with index_print insets i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return k = find_end_of_inset(document.body, i) ptype = get_quoted_value(document.body, 'type', i, k) if ptype == "idx": j = find_token(document.body, "type", i, k) del document.body[j] elif not useindices: del document.body[i:k + 1] else: subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}") document.body[i:k + 1] = subst i = i + 1 def convert_splitindex(document): " Converts index and printindex insets to splitindex-aware format " add_use_indices(document) i = 0 while True: i = find_token(document.body, "\\begin_inset Index", i) if i == -1: break document.body[i] = document.body[i].replace("\\begin_inset Index", "\\begin_inset Index idx") i = i + 1 i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return if document.body[i + 1].find('LatexCommand printindex') == -1: document.warning("Malformed LyX document: Incomplete printindex inset.") return subst = ["LatexCommand printindex", "type \"idx\""] document.body[i + 1:i + 2] = subst i = i + 1 def revert_subindex(document): " Reverts \\printsubindex CommandInset types " i = add_use_indices(document) if i == -1: return useindices = str2bool(get_value(document.header, "\\use_indices", i)) i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return k = find_end_of_inset(document.body, i) ctype = get_value(document.body, 'LatexCommand', i, k) if ctype != "printsubindex": i = k + 1 continue ptype = get_quoted_value(document.body, 'type', i, k) if not useindices: del document.body[i:k + 1] else: subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}") document.body[i:k + 1] = subst i = i + 1 def revert_printindexall(document): " Reverts \\print[sub]index* CommandInset types " i = add_use_indices(document) if i == -1: return useindices = str2bool(get_value(document.header, "\\use_indices", i)) i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return k = find_end_of_inset(document.body, i) ctype = get_value(document.body, 'LatexCommand', i, k) if ctype != "printindex*" and ctype != "printsubindex*": i = k continue if not useindices: del document.body[i:k + 1] else: subst = put_cmd_in_ert("\\" + ctype + "{}") document.body[i:k + 1] = subst i = i + 1 strikeout_preamble = ['% for proper underlining', r'\PassOptionsToPackage{normalem}{ulem}', r'\usepackage{ulem}'] def convert_strikeout(document): " Remove preamble code loading 'ulem' package. " del_complete_lines(document.preamble, ['% Added by lyx2lyx']+strikeout_preamble) def revert_strikeout(document): " Reverts \\strikeout font attribute " changed = revert_font_attrs(document.body, "\\uuline", "\\uuline") changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed changed = revert_font_attrs(document.body, "\\strikeout", "\\sout") or changed if changed == True: insert_to_preamble(document, strikeout_preamble) ulinelatex_preamble = ['% fix underbar in citations', r'\let\cite@rig\cite', r'\newcommand{\b@xcite}[2][\%]{\def\def@pt{\%}\def\pas@pt{#1}', r' \mbox{\ifx\def@pt\pas@pt\cite@rig{#2}\else\cite@rig[#1]{#2}\fi}}', r'\renewcommand{\underbar}[1]{{\let\cite\b@xcite\uline{#1}}}'] def convert_ulinelatex(document): " Remove preamble code for \\uline font attribute. " del_complete_lines(document.preamble, ['% Added by lyx2lyx']+ulinelatex_preamble) def revert_ulinelatex(document): " Add preamble code for \\uline font attribute in citations. " i = find_token(document.body, '\\bar under', 0) if i == -1: return try: document.preamble.index(r'\usepackage{ulem}') except ValueError: insert_to_preamble(document, strikeout_preamble) insert_to_preamble(document, ulinelatex_preamble) def revert_custom_processors(document): " Remove bibtex_command and index_command params " if not del_token(document.header, '\\bibtex_command', 0): document.warning("Malformed LyX document: Missing \\bibtex_command.") if not del_token(document.header, '\\index_command', 0): document.warning("Malformed LyX document: Missing \\index_command.") def convert_nomencl_width(document): " Add set_width param to nomencl_print " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i) if i == -1: break document.body.insert(i + 2, "set_width \"none\"") i = i + 1 def revert_nomencl_width(document): " Remove set_width param from nomencl_print " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i) if i == -1: break j = find_end_of_inset(document.body, i) if not del_token(document.body, "set_width", i, j): document.warning("Can't find set_width option for nomencl_print!") i = j def revert_nomencl_cwidth(document): " Remove width param from nomencl_print " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i) if i == -1: break j = find_end_of_inset(document.body, i) l = find_token(document.body, "width", i, j) if l == -1: i = j continue width = get_quoted_value(document.body, "width", i, j) del document.body[l] insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"]) i = j - 1 def revert_applemac(document): " Revert applemac encoding to auto " if document.encoding != "applemac": return document.encoding = "auto" i = find_token(document.header, "\\encoding", 0) if i != -1: document.header[i] = "\\encoding auto" def revert_longtable_align(document): " Remove longtable alignment setting " i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: break end = find_end_of_inset(document.body, i) if end == -1: document.warning("Can't find end of inset at line " + str(i)) i += 1 continue fline = find_token(document.body, "<features", i, end) if fline == -1: document.warning("Can't find features for inset at line " + str(i)) i += 1 continue j = document.body[fline].find("longtabularalignment") if j == -1: i += 1 continue # FIXME Is this correct? It wipes out everything after the # one we found. document.body[fline] = document.body[fline][:j - 1] + '>' # since there could be a tabular inside this one, we # cannot jump to end. i += 1 def revert_branch_filename(document): " Remove \\filename_suffix parameter from branches " i = 0 while True: i = find_token(document.header, "\\filename_suffix", i) if i == -1: return del document.header[i] def revert_paragraph_indentation(document): " Revert custom paragraph indentation to preamble code " i = find_token(document.header, "\\paragraph_indentation", 0) if i == -1: return length = get_value(document.header, "\\paragraph_indentation", i) # we need only remove the line if indentation is default if length != "default": # handle percent lengths length = latex_length(length)[1] insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"]) del document.header[i] def revert_percent_skip_lengths(document): " Revert relative lengths for paragraph skip separation to preamble code " i = find_token(document.header, "\\defskip", 0) if i == -1: return length = get_value(document.header, "\\defskip", i) # only revert when a custom length was set and when # it used a percent length if length in ('smallskip', 'medskip', 'bigskip'): return # handle percent lengths percent, length = latex_length(length) if percent: insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"]) # set defskip to medskip as default document.header[i] = "\\defskip medskip" def revert_percent_vspace_lengths(document): " Revert relative VSpace lengths to ERT " i = 0 while True: i = find_token(document.body, "\\begin_inset VSpace", i) if i == -1: break # only revert if a custom length was set and if # it used a percent length r = re.compile(r'\\begin_inset VSpace (.*)$') m = r.match(document.body[i]) length = m.group(1) if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'): i += 1 continue # check if the space has a star (protected space) protected = (document.body[i].rfind("*") != -1) if protected: length = length.rstrip('*') # handle percent lengths percent, length = latex_length(length) # revert the VSpace inset to ERT if percent: if protected: subst = put_cmd_in_ert("\\vspace*{" + length + "}") else: subst = put_cmd_in_ert("\\vspace{" + length + "}") document.body[i:i + 2] = subst i += 1 def revert_percent_hspace_lengths(document): " Revert relative HSpace lengths to ERT " i = 0 while True: i = find_token_exact(document.body, "\\begin_inset space \\hspace", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Can't find end of inset at line " + str(i)) i += 1 continue # only revert if a custom length was set... length = get_value(document.body, '\\length', i + 1, j) if length == '': document.warning("Malformed lyx document: Missing '\\length' in Space inset.") i = j continue protected = "" if document.body[i].find("\\hspace*{}") != -1: protected = "*" # ...and if it used a percent length percent, length = latex_length(length) # revert the HSpace inset to ERT if percent: subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}") document.body[i:j + 1] = subst # if we did a substitution, this will still be ok i = j def revert_hspace_glue_lengths(document): " Revert HSpace glue lengths to ERT " i = 0 while True: i = find_token_exact(document.body, "\\begin_inset space \\hspace", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Can't find end of inset at line " + str(i)) i += 1 continue length = get_value(document.body, '\\length', i + 1, j) if length == '': document.warning("Malformed lyx document: Missing '\\length' in Space inset.") i = j continue protected = "" if document.body[i].find("\\hspace*{}") != -1: protected = "*" # only revert if the length contains a plus or minus at pos != 0 if length.find('-',1) != -1 or length.find('+',1) != -1: # handle percent lengths length = latex_length(length)[1] # revert the HSpace inset to ERT subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}") document.body[i:j+1] = subst i = j def convert_author_id(document): " Add the author_id to the \\author definition and make sure 0 is not used" i = 0 anum = 1 re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$') while True: i = find_token(document.header, "\\author", i) if i == -1: break m = re_author.match(document.header[i]) if m: name = m.group(2) email = m.group(3) document.header[i] = "\\author %i %s %s" % (anum, name, email) anum += 1 i += 1 i = 0 while True: i = find_token(document.body, "\\change_", i) if i == -1: break change = document.body[i].split(' '); if len(change) == 3: type = change[0] author_id = int(change[1]) time = change[2] document.body[i] = "%s %i %s" % (type, author_id + 1, time) i += 1 def revert_author_id(document): " Remove the author_id from the \\author definition " i = 0 anum = 0 rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$') idmap = dict() while True: i = find_token(document.header, "\\author", i) if i == -1: break m = rx.match(document.header[i]) if m: author_id = int(m.group(2)) idmap[author_id] = anum name = m.group(3) email = m.group(4) document.header[i] = "\\author %s %s" % (name, email) i += 1 # FIXME Should this be incremented if we didn't match? anum += 1 i = 0 while True: i = find_token(document.body, "\\change_", i) if i == -1: break change = document.body[i].split(' '); if len(change) == 3: type = change[0] author_id = int(change[1]) time = change[2] document.body[i] = "%s %i %s" % (type, idmap[author_id], time) i += 1 def revert_suppress_date(document): " Revert suppressing of default document date to preamble code " i = find_token(document.header, "\\suppress_date", 0) if i == -1: return # remove the preamble line and write to the preamble # when suppress_date was true date = str2bool(get_value(document.header, "\\suppress_date", i)) if date: add_to_preamble(document, ["\\date{}"]) del document.header[i] mhchem_preamble = [r"\PassOptionsToPackage{version=3}{mhchem}", r"\usepackage{mhchem}"] def convert_mhchem(document): "Set mhchem to off for versions older than 1.6.x" if document.initial_format < 277: # LyX 1.5.x and older did never load mhchem. # Therefore we must switch it off: Documents that use mhchem have # a manual \usepackage anyway, and documents not using mhchem but # custom macros with the same names as mhchem commands might get # corrupted if mhchem is automatically loaded. mhchem = 0 # off else: # LyX 1.6.x did always load mhchem automatically. mhchem = 1 # auto i = find_token(document.header, "\\use_esint", 0) if i == -1: # pre-1.5.x document i = find_token(document.header, "\\use_amsmath", 0) if i == -1: document.warning("Malformed LyX document: " "Could not find amsmath or esint setting.") return document.header.insert(i + 1, "\\use_mhchem %d" % mhchem) # remove LyX-inserted preamble if mhchem != 0: del_complete_lines(document.preamble, ['% Added by lyx2lyx']+mhchem_preamble) def revert_mhchem(document): "Revert mhchem loading to preamble code." mhchem = get_value(document.header, "\\use_mhchem", delete=True) try: mhchem = int(mhchem) except ValueError: document.warning("Malformed LyX document: " "Could not find mhchem setting.") mhchem = 1 # "auto" # mhchem in {0: "off", 1: "auto", 2: "on"} if mhchem == 1: # "auto" i = 0 while i != 1 and mhchem == 1: i = find_token(document.body, "\\begin_inset Formula", i) j = find_end_of_inset(document.body, i) if j == -1: break if (True for line in document.body[i:j] if r"\ce{" in line or r"\cf{" in line): mhchem = 2 break i += 1 if (mhchem == 2 # on and find_token(document.preamble, r"\usepackage{mhchem}") == -1): insert_to_preamble(document, mhchem_preamble) def revert_fontenc(document): " Remove fontencoding param " if not del_token(document.header, '\\fontencoding', 0): document.warning("Malformed LyX document: Missing \\fontencoding.") def merge_gbrief(document): " Merge g-brief-en and g-brief-de to one class " if document.textclass != "g-brief-de": if document.textclass == "g-brief-en": document.textclass = "g-brief" document.set_textclass() return obsoletedby = { "Brieftext": "Letter", "Unterschrift": "Signature", "Strasse": "Street", "Zusatz": "Addition", "Ort": "Town", "Land": "State", "RetourAdresse": "ReturnAddress", "MeinZeichen": "MyRef", "IhrZeichen": "YourRef", "IhrSchreiben": "YourMail", "Telefon": "Phone", "BLZ": "BankCode", "Konto": "BankAccount", "Postvermerk": "PostalComment", "Adresse": "Address", "Datum": "Date", "Betreff": "Reference", "Anrede": "Opening", "Anlagen": "Encl.", "Verteiler": "cc", "Gruss": "Closing"} i = 0 while True: i = find_token(document.body, "\\begin_layout", i) if i == -1: break layout = document.body[i][14:] if layout in obsoletedby: document.body[i] = "\\begin_layout " + obsoletedby[layout] i += 1 document.textclass = "g-brief" document.set_textclass() def revert_gbrief(document): " Revert g-brief to g-brief-en " if document.textclass == "g-brief": document.textclass = "g-brief-en" document.set_textclass() def revert_html_options(document): " Remove html options " del_token(document.header, '\\html_use_mathml', 0) del_token(document.header, '\\html_be_strict', 0) def revert_includeonly(document): i = 0 while True: i = find_token(document.header, "\\begin_includeonly", i) if i == -1: return j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly") if j == -1: document.warning("Unable to find end of includeonly section!!") break document.header[i : j + 1] = [] def convert_includeall(document): " Add maintain_unincluded_children param " i = 0 i = find_token(document.header, "\\maintain_unincluded_children", 0) if i == -1: i = find_token(document.header, "\\textclass", 0) if i == -1: document.warning("Malformed LyX document! Missing \\textclass header.") return document.header.insert(i, "\\maintain_unincluded_children false") return def revert_includeall(document): " Remove maintain_unincluded_children param " del_token(document.header, '\\maintain_unincluded_children', 0) def revert_multirow(document): " Revert multirow cells in tables to TeX-code" # first, let's find out if we need to do anything # cell type 3 is multirow begin cell i = find_token(document.body, '<cell multirow="3"', 0) if i == -1: return add_to_preamble(document, ["\\usepackage{multirow}"]) begin_table = 0 while True: # find begin/end of table begin_table = find_token(document.body, '<lyxtabular version=', begin_table) if begin_table == -1: break end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>') if end_table == -1: document.warning("Malformed LyX document: Could not find end of table.") begin_table += 1 continue # does this table have multirow? i = find_token(document.body, '<cell multirow="3"', begin_table, end_table) if i == -1: begin_table = end_table continue # store the number of rows and columns numrows = get_option_value(document.body[begin_table], "rows") numcols = get_option_value(document.body[begin_table], "columns") try: numrows = int(numrows) numcols = int(numcols) except: document.warning("Unable to determine rows and columns!") begin_table = end_table continue mrstarts = [] multirows = [] # collect info on rows and columns of this table. begin_row = begin_table for row in range(numrows): begin_row = find_token(document.body, '<row>', begin_row, end_table) if begin_row == -1: document.warning("Can't find row " + str(row + 1)) break end_row = find_end_of(document.body, begin_row, '<row>', '</row>') if end_row == -1: document.warning("Can't find end of row " + str(row + 1)) break begin_cell = begin_row multirows.append([]) for column in range(numcols): begin_cell = find_token(document.body, '<cell ', begin_cell, end_row) if begin_cell == -1: document.warning("Can't find column " + str(column + 1) + \ "in row " + str(row + 1)) break # NOTE # this will fail if someone puts "</cell>" in a cell, but # that seems fairly unlikely. end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>') if end_cell == -1: document.warning("Can't find end of column " + str(column + 1) + \ "in row " + str(row + 1)) break multirows[row].append([begin_cell, end_cell, 0]) if document.body[begin_cell].find('multirow="3"') != -1: multirows[row][column][2] = 3 # begin multirow mrstarts.append([row, column]) elif document.body[begin_cell].find('multirow="4"') != -1: multirows[row][column][2] = 4 # in multirow begin_cell = end_cell begin_row = end_row # end of table info collection # work from the back to avoid messing up numbering mrstarts.reverse() for m in mrstarts: row = m[0] col = m[1] # get column width col_width = get_option_value(document.body[begin_table + 2 + col], "width") # "0pt" means that no width is specified if not col_width or col_width == "0pt": col_width = "*" # determine the number of cells that are part of the multirow nummrs = 1 for r in range(row + 1, numrows): if multirows[r][col][2] != 4: break nummrs += 1 # take the opportunity to revert this line lineno = multirows[r][col][0] document.body[lineno] = document.body[lineno].\ replace(' multirow="4" ', ' ').\ replace('valignment="middle"', 'valignment="top"').\ replace(' topline="true" ', ' ') # remove bottom line of previous multirow-part cell lineno = multirows[r-1][col][0] document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ') # revert beginning cell bcell = multirows[row][col][0] ecell = multirows[row][col][1] document.body[bcell] = document.body[bcell].\ replace(' multirow="3" ', ' ').\ replace('valignment="middle"', 'valignment="top"') blay = find_token(document.body, "\\begin_layout", bcell, ecell) if blay == -1: document.warning("Can't find layout for cell!") continue bend = find_end_of_layout(document.body, blay) if bend == -1: document.warning("Can't find end of layout for cell!") continue # do the later one first, so as not to mess up the numbering # we are wrapping the whole cell in this ert # so before the end of the layout... document.body[bend:bend] = put_cmd_in_ert("}") # ...and after the beginning document.body[blay + 1:blay + 1] = \ put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{") begin_table = end_table def convert_math_output(document): " Convert \html_use_mathml to \html_math_output " i = find_token(document.header, "\\html_use_mathml", 0) if i == -1: return rgx = re.compile(r'\\html_use_mathml\s+(\w+)') m = rgx.match(document.header[i]) newval = "0" # MathML if m: val = str2bool(m.group(1)) if not val: newval = "2" # Images else: document.warning("Can't match " + document.header[i]) document.header[i] = "\\html_math_output " + newval def revert_math_output(document): " Revert \html_math_output to \html_use_mathml " i = find_token(document.header, "\\html_math_output", 0) if i == -1: return rgx = re.compile(r'\\html_math_output\s+(\d)') m = rgx.match(document.header[i]) newval = "true" if m: val = m.group(1) if val == "1" or val == "2": newval = "false" else: document.warning("Unable to match " + document.header[i]) document.header[i] = "\\html_use_mathml " + newval def revert_inset_preview(document): " Dissolves the preview inset " i = 0 while True: i = find_token(document.body, "\\begin_inset Preview", i) if i == -1: return iend = find_end_of_inset(document.body, i) if iend == -1: document.warning("Malformed LyX document: Could not find end of Preview inset.") i += 1 continue # This has several issues. # We need to do something about the layouts inside InsetPreview. # If we just leave the first one, then we have something like: # \begin_layout Standard # ... # \begin_layout Standard # and we get a "no \end_layout" error. So something has to be done. # Ideally, we would check if it is the same as the layout we are in. # If so, we just remove it; if not, we end the active one. But it is # not easy to know what layout we are in, due to depth changes, etc, # and it is not clear to me how much work it is worth doing. In most # cases, the layout will probably be the same. # # For the same reason, we have to remove the \end_layout tag at the # end of the last layout in the inset. Again, that will sometimes be # wrong, but it will usually be right. To know what to do, we would # again have to know what layout the inset is in. blay = find_token(document.body, "\\begin_layout", i, iend) if blay == -1: document.warning("Can't find layout for preview inset!") # always do the later one first... del document.body[iend] del document.body[i] # deletions mean we do not need to reset i continue # This is where we would check what layout we are in. # The check for Standard is definitely wrong. # # lay = document.body[blay].split(None, 1)[1] # if lay != oldlayout: # # record a boolean to tell us what to do later.... # # better to do it later, since (a) it won't mess up # # the numbering and (b) we only modify at the end. # we want to delete the last \\end_layout in this inset, too. # note that this may not be the \\end_layout that goes with blay!! bend = find_end_of_layout(document.body, blay) while True: tmp = find_token(document.body, "\\end_layout", bend + 1, iend) if tmp == -1: break bend = tmp if bend == blay: document.warning("Unable to find last layout in preview inset!") del document.body[iend] del document.body[i] # deletions mean we do not need to reset i continue # always do the later one first... del document.body[iend] del document.body[bend] del document.body[i:blay + 1] # we do not need to reset i def revert_equalspacing_xymatrix(document): " Revert a Formula with xymatrix@! to an ERT inset " i = 0 has_preamble = False has_equal_spacing = False while True: i = find_token(document.body, "\\begin_inset Formula", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Could not find end of Formula inset.") i += 1 continue for curline in range(i,j): found = document.body[curline].find("\\xymatrix@!") if found != -1: break if found != -1: has_equal_spacing = True content = [document.body[i][21:]] content += document.body[i + 1:j] subst = put_cmd_in_ert(content) document.body[i:j + 1] = subst i += len(subst) - (j - i) + 1 else: for curline in range(i,j): l = document.body[curline].find("\\xymatrix") if l != -1: has_preamble = True; break; i = j + 1 if has_equal_spacing and not has_preamble: add_to_preamble(document, ['\\usepackage[all]{xy}']) def revert_notefontcolor(document): " Reverts greyed-out note font color to preamble code " i = find_token(document.header, "\\notefontcolor", 0) if i == -1: return colorcode = get_value(document.header, '\\notefontcolor', i) del document.header[i] # are there any grey notes? if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1: # no need to do anything else, and \renewcommand will throw # an error since lyxgreyedout will not exist. return # the color code is in the form #rrggbb where every character denotes a hex number red = hex2ratio(colorcode[1:3]) green = hex2ratio(colorcode[3:5]) blue = hex2ratio(colorcode[5:7]) # write the preamble insert_to_preamble(document, [ '% for greyed-out notes', '\\@ifundefined{definecolor}{\\usepackage{color}}{}' '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue), '\\renewenvironment{lyxgreyedout}', ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}']) def revert_turkmen(document): "Set language Turkmen to English" revert_language(document, "turkmen", "turkmen", "turkmen") def revert_fontcolor(document): " Reverts font color to preamble code " i = find_token(document.header, "\\fontcolor", 0) if i == -1: return colorcode = get_value(document.header, '\\fontcolor', i) del document.header[i] # don't clutter the preamble if font color is not set if colorcode == "#000000": return # the color code is in the form #rrggbb where every character denotes a hex number red = hex2ratio(colorcode[1:3]) green = hex2ratio(colorcode[3:5]) blue = hex2ratio(colorcode[5:7]) # write the preamble insert_to_preamble(document, ['% Set the font color', '\\@ifundefined{definecolor}{\\usepackage{color}}{}', '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue), '\\color{document_fontcolor}']) def revert_shadedboxcolor(document): " Reverts shaded box color to preamble code " i = find_token(document.header, "\\boxbgcolor", 0) if i == -1: return colorcode = get_value(document.header, '\\boxbgcolor', i) del document.header[i] # the color code is in the form #rrggbb red = hex2ratio(colorcode[1:3]) green = hex2ratio(colorcode[3:5]) blue = hex2ratio(colorcode[5:7]) # write the preamble insert_to_preamble(document, ['% Set the color of boxes with shaded background', '\\@ifundefined{definecolor}{\\usepackage{color}}{}', "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)]) def revert_lyx_version(document): " Reverts LyX Version information from Inset Info " version = "LyX version" try: import lyx2lyx_version version = lyx2lyx_version.version except: pass i = 0 while True: i = find_token(document.body, '\\begin_inset Info', i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of Info inset.") i += 1 continue # We expect: # \begin_inset Info # type "lyxinfo" # arg "version" # \end_inset typ = get_quoted_value(document.body, "type", i, j) arg = get_quoted_value(document.body, "arg", i, j) if arg != "version" or typ != "lyxinfo": i = j + 1 continue # We do not actually know the version of LyX used to produce the document. # But we can use our version, since we are reverting. s = [version] # Now we want to check if the line after "\end_inset" is empty. It normally # is, so we want to remove it, too. lastline = j + 1 if document.body[j + 1].strip() == "": lastline = j + 2 document.body[i: lastline] = s i = i + 1 def revert_math_scale(document): " Remove math scaling and LaTeX options " del_token(document.header, '\\html_math_img_scale', 0) del_token(document.header, '\\html_latex_start', 0) del_token(document.header, '\\html_latex_end', 0) def revert_pagesizes(document): " Revert page sizes to default " i = find_token(document.header, '\\papersize', 0) if i != -1: size = document.header[i][11:] if size == "a0paper" or size == "a1paper" or size == "a2paper" \ or size == "a6paper" or size == "b0paper" or size == "b1paper" \ or size == "b2paper" or size == "b6paper" or size == "b0j" \ or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \ or size == "b5j" or size == "b6j": del document.header[i] def revert_DIN_C_pagesizes(document): " Revert DIN C page sizes to default " i = find_token(document.header, '\\papersize', 0) if i != -1: size = document.header[i][11:] if size == "c0paper" or size == "c1paper" or size == "c2paper" \ or size == "c3paper" or size == "c4paper" or size == "c5paper" \ or size == "c6paper": del document.header[i] def convert_html_quotes(document): " Remove quotes around html_latex_start and html_latex_end " i = find_token(document.header, '\\html_latex_start', 0) if i != -1: line = document.header[i] l = re.compile(r'\\html_latex_start\s+"(.*)"') m = l.match(line) if m: document.header[i] = "\\html_latex_start " + m.group(1) i = find_token(document.header, '\\html_latex_end', 0) if i != -1: line = document.header[i] l = re.compile(r'\\html_latex_end\s+"(.*)"') m = l.match(line) if m: document.header[i] = "\\html_latex_end " + m.group(1) def revert_html_quotes(document): " Remove quotes around html_latex_start and html_latex_end " i = find_token(document.header, '\\html_latex_start', 0) if i != -1: line = document.header[i] l = re.compile(r'\\html_latex_start\s+(.*)') m = l.match(line) if not m: document.warning("Weird html_latex_start line: " + line) del document.header[i] else: document.header[i] = "\\html_latex_start \"" + m.group(1) + "\"" i = find_token(document.header, '\\html_latex_end', 0) if i != -1: line = document.header[i] l = re.compile(r'\\html_latex_end\s+(.*)') m = l.match(line) if not m: document.warning("Weird html_latex_end line: " + line) del document.header[i] else: document.header[i] = "\\html_latex_end \"" + m.group(1) + "\"" def revert_output_sync(document): " Remove forward search options " del_token(document.header, '\\output_sync_macro', 0) del_token(document.header, '\\output_sync', 0) def revert_align_decimal(document): i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Unable to find end of Tabular inset at line " + str(i)) i += 1 continue cell = find_token(document.body, "<cell", i, j) if cell == -1: document.warning("Can't find any cells in Tabular inset at line " + str(i)) i = j continue k = i + 1 while True: k = find_token(document.body, "<column", k, cell) if k == -1: return if document.body[k].find('alignment="decimal"') == -1: k += 1 continue remove_option(document.body, k, 'decimal_point') document.body[k] = \ document.body[k].replace('alignment="decimal"', 'alignment="center"') k += 1 def convert_optarg(document): " Convert \\begin_inset OptArg to \\begin_inset Argument " i = 0 while True: i = find_token(document.body, '\\begin_inset OptArg', i) if i == -1: return document.body[i] = "\\begin_inset Argument" i += 1 def revert_argument(document): " Convert \\begin_inset Argument to \\begin_inset OptArg " i = 0 while True: i = find_token(document.body, '\\begin_inset Argument', i) if i == -1: return document.body[i] = "\\begin_inset OptArg" i += 1 def revert_makebox(document): " Convert \\makebox to TeX code " i = 0 while True: i = find_token(document.body, '\\begin_inset Box', i) if i == -1: break z = find_end_of_inset(document.body, i) if z == -1: document.warning("Malformed LyX document: Can't find end of box inset.") i += 1 continue blay = find_token(document.body, "\\begin_layout", i, z) if blay == -1: document.warning("Malformed LyX document: Can't find layout in box.") i = z continue j = find_token(document.body, 'use_makebox', i) if j == -1 or j != i +6: document.warning("Malformed LyX document: Can't find use_makebox statement in box.") i = z continue # delete use_makebox if not check_token(document.body[i], "\\begin_inset Box Frameless") \ or get_value(document.body, 'use_makebox', j) != 1: del document.body[j] i += 1 continue bend = find_end_of_layout(document.body, blay) if bend == -1 or bend > z: document.warning("Malformed LyX document: Can't find end of layout in box.") i = z continue # determine the alignment align = get_quoted_value(document.body, 'hor_pos', i, blay, "c") # determine the width length = get_quoted_value(document.body, 'width', i, blay, "50col%") length = latex_length(length)[1] # remove the \end_layout \end_inset pair document.body[bend:z + 1] = put_cmd_in_ert("}") subst = "\\makebox[" + length + "][" \ + align + "]{" document.body[i:blay + 1] = put_cmd_in_ert(subst) i += 1 def convert_use_makebox(document): " Adds use_makebox option for boxes " i = 0 while True: i = find_token(document.body, '\\begin_inset Box', i) if i == -1: return k = find_token(document.body, 'use_parbox', i) if k == -1 or k != i + 5: document.warning("Malformed LyX document: Can't find use_parbox statement in box.") i += 1 continue if k == i + 5: document.body.insert(k + 1, "use_makebox 0") i += 1 def revert_IEEEtran(document): " Convert IEEEtran layouts and styles to TeX code " if document.textclass != "IEEEtran": return revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership") revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase") layouts = ("Special Paper Notice", "After Title Text", "Publication ID", "Page headings", "Biography without photo") latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice", "After Title Text": "\\IEEEaftertitletext", "Publication ID": "\\IEEEpubid"} obsoletedby = {"Page headings": "MarkBoth", "Biography without photo": "BiographyNoPhoto"} for layout in layouts: i = 0 while True: i = find_token(document.body, '\\begin_layout ' + layout, i) if i == -1: break j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of " + layout + " layout.") i += 1 continue if layout in list(obsoletedby.keys()): document.body[i] = "\\begin_layout " + obsoletedby[layout] i = j continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"]) del document.body[i:j + 1] # no need to reset i def convert_prettyref(document): " Converts prettyref references to neutral formatted refs " re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"") nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"") i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset ref", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: No end of InsetRef!") i += 1 continue k = find_token(document.body, "LatexCommand prettyref", i, j) if k != -1: document.body[k] = "LatexCommand formatted" i = j + 1 document.header.insert(-1, "\\use_refstyle 0") def revert_refstyle(document): " Reverts neutral formatted refs to prettyref " re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"") nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"") i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset ref", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: No end of InsetRef") i += 1 continue k = find_token(document.body, "LatexCommand formatted", i, j) if k != -1: document.body[k] = "LatexCommand prettyref" i = j + 1 i = find_token(document.header, "\\use_refstyle", 0) if i != -1: document.header.pop(i) def revert_nameref(document): " Convert namerefs to regular references " cmds = ["Nameref", "nameref"] foundone = False rx = re.compile(r'reference "(.*)"') for cmd in cmds: i = 0 oldcmd = "LatexCommand " + cmd while True: # It seems better to look for this, as most of the reference # insets won't be ones we care about. i = find_token(document.body, oldcmd, i) if i == -1: break cmdloc = i i += 1 # Make sure it is actually in an inset! # A normal line could begin with "LatexCommand nameref"! stins, endins = is_in_inset(document.body, cmdloc, "\\begin_inset CommandInset ref") if endins == -1: continue # ok, so it is in an InsetRef refline = find_token(document.body, "reference", stins, endins) if refline == -1: document.warning("Can't find reference for inset at line " + stinst + "!!") continue m = rx.match(document.body[refline]) if not m: document.warning("Can't match reference line: " + document.body[ref]) continue foundone = True ref = m.group(1) newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}') document.body[stins:endins + 1] = newcontent if foundone: add_to_preamble(document, ["\\usepackage{nameref}"]) def remove_Nameref(document): " Convert Nameref commands to nameref commands " i = 0 while True: # It seems better to look for this, as most of the reference # insets won't be ones we care about. i = find_token(document.body, "LatexCommand Nameref" , i) if i == -1: break cmdloc = i i += 1 # Make sure it is actually in an inset! val = is_in_inset(document.body, cmdloc, "\\begin_inset CommandInset ref", default=False) if not val: continue document.body[cmdloc] = "LatexCommand nameref" def revert_mathrsfs(document): " Load mathrsfs if \mathrsfs us use in the document " i = 0 for line in document.body: if line.find("\\mathscr{") != -1: add_to_preamble(document, ["\\usepackage{mathrsfs}"]) return def convert_flexnames(document): "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element." i = 0 rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$') while True: i = find_token(document.body, "\\begin_inset Flex", i) if i == -1: return m = rx.match(document.body[i]) if m: document.body[i] = "\\begin_inset Flex " + m.group(1) i += 1 flex_insets = { "Alert" : "CharStyle:Alert", "Code" : "CharStyle:Code", "Concepts" : "CharStyle:Concepts", "E-Mail" : "CharStyle:E-Mail", "Emph" : "CharStyle:Emph", "Expression" : "CharStyle:Expression", "Initial" : "CharStyle:Initial", "Institute" : "CharStyle:Institute", "Meaning" : "CharStyle:Meaning", "Noun" : "CharStyle:Noun", "Strong" : "CharStyle:Strong", "Structure" : "CharStyle:Structure", "ArticleMode" : "Custom:ArticleMode", "Endnote" : "Custom:Endnote", "Glosse" : "Custom:Glosse", "PresentationMode" : "Custom:PresentationMode", "Tri-Glosse" : "Custom:Tri-Glosse" } flex_elements = { "Abbrev" : "Element:Abbrev", "CCC-Code" : "Element:CCC-Code", "Citation-number" : "Element:Citation-number", "City" : "Element:City", "Code" : "Element:Code", "CODEN" : "Element:CODEN", "Country" : "Element:Country", "Day" : "Element:Day", "Directory" : "Element:Directory", "Dscr" : "Element:Dscr", "Email" : "Element:Email", "Emph" : "Element:Emph", "Filename" : "Element:Filename", "Firstname" : "Element:Firstname", "Fname" : "Element:Fname", "GuiButton" : "Element:GuiButton", "GuiMenu" : "Element:GuiMenu", "GuiMenuItem" : "Element:GuiMenuItem", "ISSN" : "Element:ISSN", "Issue-day" : "Element:Issue-day", "Issue-months" : "Element:Issue-months", "Issue-number" : "Element:Issue-number", "KeyCap" : "Element:KeyCap", "KeyCombo" : "Element:KeyCombo", "Keyword" : "Element:Keyword", "Literal" : "Element:Literal", "MenuChoice" : "Element:MenuChoice", "Month" : "Element:Month", "Orgdiv" : "Element:Orgdiv", "Orgname" : "Element:Orgname", "Postcode" : "Element:Postcode", "SS-Code" : "Element:SS-Code", "SS-Title" : "Element:SS-Title", "State" : "Element:State", "Street" : "Element:Street", "Surname" : "Element:Surname", "Volume" : "Element:Volume", "Year" : "Element:Year" } def revert_flexnames(document): if document.backend == "latex": flexlist = flex_insets else: flexlist = flex_elements rx = re.compile(r'^\\begin_inset Flex\s+(.+)$') i = 0 while True: i = find_token(document.body, "\\begin_inset Flex", i) if i == -1: return m = rx.match(document.body[i]) if not m: document.warning("Illegal flex inset: " + document.body[i]) i += 1 continue style = m.group(1) if style in flexlist: document.body[i] = "\\begin_inset Flex " + flexlist[style] i += 1 def convert_mathdots(document): " Load mathdots automatically " i = find_token(document.header, "\\use_mhchem" , 0) if i == -1: i = find_token(document.header, "\\use_esint" , 0) if i == -1: document.warning("Malformed LyX document: Can't find \\use_mhchem.") return; j = find_token(document.preamble, "\\usepackage{mathdots}", 0) if j == -1: document.header.insert(i + 1, "\\use_mathdots 0") else: document.header.insert(i + 1, "\\use_mathdots 2") del document.preamble[j] def revert_mathdots(document): " Load mathdots if used in the document " mathdots = find_token(document.header, "\\use_mathdots" , 0) if mathdots == -1: document.warning("No \\use_mathdots line. Assuming auto.") else: val = get_value(document.header, "\\use_mathdots", mathdots) del document.header[mathdots] try: usedots = int(val) except: document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.") # probably usedots has not been changed, but be safe. usedots = 1 if usedots == 0: # do not load case return if usedots == 2: # force load case add_to_preamble(document, ["\\usepackage{mathdots}"]) return # so we are in the auto case. we want to load mathdots if \iddots is used. i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue code = "\n".join(document.body[i:j]) if code.find("\\iddots") != -1: add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"]) return i = j def convert_rule(document): " Convert \\lyxline to CommandInset line. " i = 0 inset = ['\\begin_inset CommandInset line', 'LatexCommand rule', 'offset "0.5ex"', 'width "100line%"', 'height "1pt"', '', '\\end_inset', '', ''] # if paragraphs are indented, we may have to unindent to get the # line to be full-width. indent = get_value(document.header, "\\paragraph_separation", 0) have_indent = (indent == "indent") while True: i = find_token(document.body, "\\lyxline" , i) if i == -1: return # we need to find out if this line follows other content # in its paragraph. find its layout.... lastlay = find_token_backwards(document.body, "\\begin_layout", i) if lastlay == -1: document.warning("Can't find layout for line at " + str(i)) # do the best we can. document.body[i:i+1] = inset i += len(inset) continue # ...and look for other content before it. lineisfirst = True for line in document.body[lastlay + 1:i]: # is it empty or a paragraph option? if not line or line[0] == '\\': continue lineisfirst = False break if lineisfirst: document.body[i:i+1] = inset if indent: # we need to unindent, lest the line be too long document.body.insert(lastlay + 1, "\\noindent") i += len(inset) else: # so our line is in the middle of a paragraph # we need to add a new line, lest this line follow the # other content on that line and run off the side of the page document.body[i:i+1] = inset document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""] i += len(inset) def revert_rule(document): " Revert line insets to Tex code " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset line" , i) if i == -1: return # find end of inset j = find_token(document.body, "\\end_inset" , i) if j == -1: document.warning("Malformed LyX document: Can't find end of line inset.") return # determine the optional offset offset = get_quoted_value(document.body, 'offset', i, j) if offset: offset = '[' + offset + ']' # determine the width width = get_quoted_value(document.body, 'width', i, j, "100col%") width = latex_length(width)[1] # determine the height height = get_quoted_value(document.body, 'height', i, j, "1pt") height = latex_length(height)[1] # output the \rule command subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}" document.body[i:j + 1] = put_cmd_in_ert(subst) i += len(subst) - (j - i) def revert_diagram(document): " Add the feyn package if \\Diagram is used in math " i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset.") return lines = "\n".join(document.body[i:j]) if lines.find("\\Diagram") == -1: i = j continue add_to_preamble(document, ["\\usepackage{feyn}"]) # only need to do it once! return chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport", "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir", "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono", "svmult", "tbook", "treport", "tufte-book") def convert_bibtex_clearpage(document): " insert a clear(double)page bibliographystyle if bibtotoc option is used " if document.textclass not in chapters: return i = find_token(document.header, '\\papersides', 0) sides = 0 if i == -1: document.warning("Malformed LyX document: Can't find papersides definition.") document.warning("Assuming single sided.") sides = 1 else: val = get_value(document.header, "\\papersides", i) try: sides = int(val) except: pass if sides != 1 and sides != 2: document.warning("Invalid papersides value: " + val) document.warning("Assuming single sided.") sides = 1 j = 0 while True: j = find_token(document.body, "\\begin_inset CommandInset bibtex", j) if j == -1: return k = find_end_of_inset(document.body, j) if k == -1: document.warning("Can't find end of Bibliography inset at line " + str(j)) j += 1 continue # only act if there is the option "bibtotoc" val = get_value(document.body, 'options', j, k) if not val: document.warning("Can't find options for bibliography inset at line " + str(j)) j = k continue if val.find("bibtotoc") == -1: j = k continue # so we want to insert a new page right before the paragraph that # this bibliography thing is in. lay = find_token_backwards(document.body, "\\begin_layout", j) if lay == -1: document.warning("Can't find layout containing bibliography inset at line " + str(j)) j = k continue if sides == 1: cmd = "clearpage" else: cmd = "cleardoublepage" subst = ['\\begin_layout Standard', '\\begin_inset Newpage ' + cmd, '\\end_inset', '', '', '\\end_layout', ''] document.body[lay:lay] = subst j = k + len(subst) def check_passthru(document): tc = document.textclass ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report") if not ok: mods = document.get_module_list() for mod in mods: if mod == "sweave" or mod == "noweb": ok = True break return ok def convert_passthru(document): " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html " if not check_passthru: return rx = re.compile("\\\\begin_layout \s*(\w+)") beg = 0 for lay in ["Chunk", "Scrap"]: while True: beg = find_token(document.body, "\\begin_layout " + lay, beg) if beg == -1: break end = find_end_of_layout(document.body, beg) if end == -1: document.warning("Can't find end of layout at line " + str(beg)) beg += 1 continue # we are now going to replace newline insets within this layout # by new instances of this layout. so we have repeated layouts # instead of newlines. # if the paragraph has any customization, however, we do not want to # do the replacement. if document.body[beg + 1].startswith("\\"): beg = end + 1 continue ns = beg while True: ns = find_token(document.body, "\\begin_inset Newline newline", ns, end) if ns == -1: break ne = find_end_of_inset(document.body, ns) if ne == -1 or ne > end: document.warning("Can't find end of inset at line " + str(nb)) ns += 1 continue if document.body[ne + 1] == "": ne += 1 subst = ["\\end_layout", "", "\\begin_layout " + lay] document.body[ns:ne + 1] = subst # now we need to adjust end, in particular, but might as well # do ns properly, too newlines = (ne - ns) - len(subst) ns += newlines + 2 end += newlines + 2 # ok, we now want to find out if the next layout is the # same as this one. if so, we will insert an extra copy of it didit = False next = find_token(document.body, "\\begin_layout", end) if next != -1: m = rx.match(document.body[next]) if m: nextlay = m.group(1) if nextlay == lay: subst = ["\\begin_layout " + lay, "", "\\end_layout", ""] document.body[next:next] = subst didit = True beg = end + 1 if didit: beg += 4 # for the extra layout def revert_passthru(document): " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html " if not check_passthru: return rx = re.compile("\\\\begin_layout \s*(\w+)") beg = 0 for lay in ["Chunk", "Scrap"]: while True: beg = find_token(document.body, "\\begin_layout " + lay, beg) if beg == -1: break end = find_end_of_layout(document.body, beg) if end == -1: document.warning("Can't find end of layout at line " + str(beg)) beg += 1 continue # we now want to find out if the next layout is the # same as this one. but we will need to do this over and # over again. while True: next = find_token(document.body, "\\begin_layout", end) if next == -1: break m = rx.match(document.body[next]) if not m: break nextlay = m.group(1) if nextlay != lay: break # so it is the same layout again. we now want to know if it is empty. # but first let's check and make sure there is no content between the # two layouts. i'm not sure if that can happen or not. for l in range(end + 1, next): if document.body[l] != "": document.warning("Found content between adjacent " + lay + " layouts!") break nextend = find_end_of_layout(document.body, next) if nextend == -1: document.warning("Can't find end of layout at line " + str(next)) break empty = True for l in range(next + 1, nextend): if document.body[l] != "": empty = False break if empty: # empty layouts just get removed # should we check if it's before yet another such layout? del document.body[next : nextend + 1] # and we do not want to check again. we know the next layout # should be another Chunk and should be left as is. break else: # if it's not empty, then we want to insert a newline in place # of the layout switch subst = ["\\begin_inset Newline newline", "\\end_inset", ""] document.body[end : next + 1] = subst # and now we have to find the end of the new, larger layout newend = find_end_of_layout(document.body, beg) if newend == -1: document.warning("Can't find end of new layout at line " + str(beg)) break end = newend beg = end + 1 def revert_multirowOffset(document): " Revert multirow cells with offset in tables to TeX-code" # this routine is the same as the revert_multirow routine except that # it checks additionally for the offset # first, let's find out if we need to do anything i = find_token(document.body, '<cell multirow="3" mroffset=', 0) if i == -1: return add_to_preamble(document, ["\\usepackage{multirow}"]) rgx = re.compile(r'mroffset="[^"]+?"') begin_table = 0 while True: # find begin/end of table begin_table = find_token(document.body, '<lyxtabular version=', begin_table) if begin_table == -1: break end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>') if end_table == -1: document.warning("Malformed LyX document: Could not find end of table.") begin_table += 1 continue # does this table have multirow? i = find_token(document.body, '<cell multirow="3"', begin_table, end_table) if i == -1: begin_table = end_table continue # store the number of rows and columns numrows = get_option_value(document.body[begin_table], "rows") numcols = get_option_value(document.body[begin_table], "columns") try: numrows = int(numrows) numcols = int(numcols) except: document.warning("Unable to determine rows and columns!") begin_table = end_table continue mrstarts = [] multirows = [] # collect info on rows and columns of this table. begin_row = begin_table for row in range(numrows): begin_row = find_token(document.body, '<row>', begin_row, end_table) if begin_row == -1: document.warning("Can't find row " + str(row + 1)) break end_row = find_end_of(document.body, begin_row, '<row>', '</row>') if end_row == -1: document.warning("Can't find end of row " + str(row + 1)) break begin_cell = begin_row multirows.append([]) for column in range(numcols): begin_cell = find_token(document.body, '<cell ', begin_cell, end_row) if begin_cell == -1: document.warning("Can't find column " + str(column + 1) + \ "in row " + str(row + 1)) break # NOTE # this will fail if someone puts "</cell>" in a cell, but # that seems fairly unlikely. end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>') if end_cell == -1: document.warning("Can't find end of column " + str(column + 1) + \ "in row " + str(row + 1)) break multirows[row].append([begin_cell, end_cell, 0]) if document.body[begin_cell].find('multirow="3" mroffset=') != -1: multirows[row][column][2] = 3 # begin multirow mrstarts.append([row, column]) elif document.body[begin_cell].find('multirow="4"') != -1: multirows[row][column][2] = 4 # in multirow begin_cell = end_cell begin_row = end_row # end of table info collection # work from the back to avoid messing up numbering mrstarts.reverse() for m in mrstarts: row = m[0] col = m[1] # get column width col_width = get_option_value(document.body[begin_table + 2 + col], "width") # "0pt" means that no width is specified if not col_width or col_width == "0pt": col_width = "*" # determine the number of cells that are part of the multirow nummrs = 1 for r in range(row + 1, numrows): if multirows[r][col][2] != 4: break nummrs += 1 # take the opportunity to revert this line lineno = multirows[r][col][0] document.body[lineno] = document.body[lineno].\ replace(' multirow="4" ', ' ').\ replace('valignment="middle"', 'valignment="top"').\ replace(' topline="true" ', ' ') # remove bottom line of previous multirow-part cell lineno = multirows[r-1][col][0] document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ') # revert beginning cell bcell = multirows[row][col][0] ecell = multirows[row][col][1] offset = get_option_value(document.body[bcell], "mroffset") document.body[bcell] = document.body[bcell].\ replace(' multirow="3" ', ' ').\ replace('valignment="middle"', 'valignment="top"') # remove mroffset option document.body[bcell] = rgx.sub('', document.body[bcell]) blay = find_token(document.body, "\\begin_layout", bcell, ecell) if blay == -1: document.warning("Can't find layout for cell!") continue bend = find_end_of_layout(document.body, blay) if bend == -1: document.warning("Can't find end of layout for cell!") continue # do the later one first, so as not to mess up the numbering # we are wrapping the whole cell in this ert # so before the end of the layout... document.body[bend:bend] = put_cmd_in_ert("}") # ...and after the beginning document.body[blay + 1:blay + 1] = \ put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \ + offset + "]{") # on to the next table begin_table = end_table def revert_script(document): " Convert subscript/superscript inset to TeX code " i = 0 foundsubscript = False while True: i = find_token(document.body, '\\begin_inset script', i) if i == -1: break z = find_end_of_inset(document.body, i) if z == -1: document.warning("Malformed LyX document: Can't find end of script inset.") i += 1 continue blay = find_token(document.body, "\\begin_layout", i, z) if blay == -1: document.warning("Malformed LyX document: Can't find layout in script inset.") i = z continue if check_token(document.body[i], "\\begin_inset script subscript"): subst = '\\textsubscript{' foundsubscript = True elif check_token(document.body[i], "\\begin_inset script superscript"): subst = '\\textsuperscript{' else: document.warning("Malformed LyX document: Unknown type of script inset.") i = z continue bend = find_end_of_layout(document.body, blay) if bend == -1 or bend > z: document.warning("Malformed LyX document: Can't find end of layout in script inset.") i = z continue # remove the \end_layout \end_inset pair document.body[bend:z + 1] = put_cmd_in_ert("}") document.body[i:blay + 1] = put_cmd_in_ert(subst) i += 1 # these classes provide a \textsubscript command: # FIXME: Would be nice if we could use the information of the .layout file here classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"] if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1: add_to_preamble(document, ['\\usepackage{subscript}']) def convert_use_xetex(document): " convert \\use_xetex to \\use_non_tex_fonts " i = find_token(document.header, "\\use_xetex", 0) if i == -1: document.header.insert(-1, "\\use_non_tex_fonts 0") else: val = get_value(document.header, "\\use_xetex", 0) document.header[i] = "\\use_non_tex_fonts " + val def revert_use_xetex(document): " revert \\use_non_tex_fonts to \\use_xetex " i = 0 i = find_token(document.header, "\\use_non_tex_fonts", 0) if i == -1: document.warning("Malformed document. No \\use_non_tex_fonts param!") return val = get_value(document.header, "\\use_non_tex_fonts", 0) document.header[i] = "\\use_xetex " + val def revert_labeling(document): koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr", "scrlttr2", "scrreprt") if document.textclass in koma: return i = 0 while True: i = find_token_exact(document.body, "\\begin_layout Labeling", i) if i == -1: return document.body[i] = "\\begin_layout List" def revert_langpack(document): " revert \\language_package parameter " i = 0 i = find_token(document.header, "\\language_package", 0) if i == -1: document.warning("Malformed document. No \\language_package param!") return del document.header[i] def convert_langpack(document): " Add \\language_package parameter " i = find_token(document.header, "\language" , 0) if i == -1: document.warning("Malformed document. No \\language defined!") return document.header.insert(i + 1, "\\language_package default") def revert_tabularwidth(document): i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Unable to find end of Tabular inset at line " + str(i)) i += 1 continue i += 1 features = find_token(document.body, "<features", i, j) if features == -1: document.warning("Can't find any features in Tabular inset at line " + str(i)) i = j continue if document.body[features].find('alignment="tabularwidth"') != -1: remove_option(document.body, features, 'tabularwidth') def revert_html_css_as_file(document): if not del_token(document.header, '\\html_css_as_file', 0): document.warning("Malformed LyX document: Missing \\html_css_as_file.") ## # Conversion hub # supported_versions = ["2.0.0","2.0"] convert = [[346, []], [347, []], [348, []], [349, []], [350, []], [351, []], [352, [convert_splitindex]], [353, []], [354, []], [355, [convert_strikeout]], [356, []], [357, [convert_ulinelatex]], [358, []], [359, [convert_nomencl_width]], [360, []], [361, []], [362, []], [363, []], [364, []], [365, []], [366, []], [367, []], [368, []], [369, [convert_author_id]], [370, []], [371, [convert_mhchem]], [372, []], [373, [merge_gbrief]], [374, []], [375, []], [376, [convert_includeall]], [377, []], [378, []], [379, [convert_math_output]], [380, []], [381, []], [382, []], [383, []], [384, []], [385, []], [386, []], [387, []], [388, []], [389, [convert_html_quotes]], [390, []], [391, []], [392, []], [393, [convert_optarg]], [394, [convert_use_makebox]], [395, []], [396, []], [397, [remove_Nameref]], [398, []], [399, [convert_mathdots]], [400, [convert_rule]], [401, []], [402, [convert_bibtex_clearpage]], [403, [convert_flexnames]], [404, [convert_prettyref]], [405, []], [406, [convert_passthru]], [407, []], [408, []], [409, [convert_use_xetex]], [410, []], [411, [convert_langpack]], [412, []], [413, []] ] revert = [[412, [revert_html_css_as_file]], [411, [revert_tabularwidth]], [410, [revert_langpack]], [409, [revert_labeling]], [408, [revert_use_xetex]], [407, [revert_script]], [406, [revert_multirowOffset]], [405, [revert_passthru]], [404, []], [403, [revert_refstyle]], [402, [revert_flexnames]], [401, []], [400, [revert_diagram]], [399, [revert_rule]], [398, [revert_mathdots]], [397, [revert_mathrsfs]], [396, []], [395, [revert_nameref]], [394, [revert_DIN_C_pagesizes]], [393, [revert_makebox]], [392, [revert_argument]], [391, []], [390, [revert_align_decimal, revert_IEEEtran]], [389, [revert_output_sync]], [388, [revert_html_quotes]], [387, [revert_pagesizes]], [386, [revert_math_scale]], [385, [revert_lyx_version]], [384, [revert_shadedboxcolor]], [383, [revert_fontcolor]], [382, [revert_turkmen]], [381, [revert_notefontcolor]], [380, [revert_equalspacing_xymatrix]], [379, [revert_inset_preview]], [378, [revert_math_output]], [377, []], [376, [revert_multirow]], [375, [revert_includeall]], [374, [revert_includeonly]], [373, [revert_html_options]], [372, [revert_gbrief]], [371, [revert_fontenc]], [370, [revert_mhchem]], [369, [revert_suppress_date]], [368, [revert_author_id]], [367, [revert_hspace_glue_lengths]], [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]], [365, [revert_percent_skip_lengths]], [364, [revert_paragraph_indentation]], [363, [revert_branch_filename]], [362, [revert_longtable_align]], [361, [revert_applemac]], [360, []], [359, [revert_nomencl_cwidth]], [358, [revert_nomencl_width]], [357, [revert_custom_processors]], [356, [revert_ulinelatex]], [355, []], [354, [revert_strikeout]], [353, [revert_printindexall]], [352, [revert_subindex]], [351, [revert_splitindex]], [350, [revert_backgroundcolor]], [349, [revert_outputformat]], [348, [revert_xetex]], [347, [revert_phantom, revert_hphantom, revert_vphantom]], [346, [revert_tabularvalign]], [345, [revert_swiss]] ] if __name__ == "__main__": pass