# -*- coding: utf-8 -*- # This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2011 The LyX team # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 2.2""" import re, string import unicodedata import sys, os # Uncomment only what you need to import, please. #from parser_tools import find_token, find_end_of, find_tokens, \ # find_token_exact, find_end_of_inset, find_end_of_layout, \ # find_token_backwards, is_in_inset, get_value, get_quoted_value, \ # del_token, check_token, get_option_value from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, lyx2latex, \ length_in_bp#, \ # insert_to_preamble, latex_length, revert_flex_inset, \ # revert_font_attrs, hex2ratio, str2bool from parser_tools import find_token, find_token_backwards, find_re, \ find_end_of_inset, find_end_of_layout, find_nonempty_line, \ get_containing_layout, get_value, check_token ############################################################################### ### ### Conversion and reversion routines ### ############################################################################### def convert_separator(document): """ Convert layout separators to separator insets and add (LaTeX) paragraph breaks in order to mimic previous LaTeX export. """ parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""] parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak", "\\end_inset", "", "\\end_layout", ""] sty_dict = { "family" : "default", "series" : "default", "shape" : "default", "size" : "default", "bar" : "default", "color" : "inherit" } i = 0 while 1: i = find_token(document.body, "\\begin_deeper", i) if i == -1: break j = find_token_backwards(document.body, "\\end_layout", i-1) if j != -1: # reset any text style before inserting the inset lay = get_containing_layout(document.body, j-1) if lay != False: content = "\n".join(document.body[lay[1]:lay[2]]) for val in list(sty_dict.keys()): if content.find("\\%s" % val) != -1: document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])] i = i + 1 j = j + 1 document.body[j:j] = parins i = i + len(parins) + 1 else: i = i + 1 i = 0 while 1: i = find_token(document.body, "\\align", i) if i == -1: break lay = get_containing_layout(document.body, i) if lay != False and lay[0] == "Plain Layout": i = i + 1 continue j = find_token_backwards(document.body, "\\end_layout", i-1) if j != -1: lay = get_containing_layout(document.body, j-1) if lay != False and lay[0] == "Standard" \ and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \ and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1: # reset any text style before inserting the inset content = "\n".join(document.body[lay[1]:lay[2]]) for val in list(sty_dict.keys()): if content.find("\\%s" % val) != -1: document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])] i = i + 1 j = j + 1 document.body[j:j] = parins i = i + len(parins) + 1 else: i = i + 1 else: i = i + 1 regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE) i = 0 while 1: i = find_re(document.body, regexp, i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Missing `\\end_layout'.") return lay = get_containing_layout(document.body, j-1) if lay != False: lines = document.body[lay[3]:lay[2]] else: lines = [] document.body[i:j+1] = parlay if len(lines) > 0: document.body[i+1:i+1] = lines i = i + len(parlay) + len(lines) + 1 def revert_separator(document): " Revert separator insets to layout separators " beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"] if document.textclass in beamer_classes: beglaysep = "\\begin_layout Separator" else: beglaysep = "\\begin_layout --Separator--" parsep = [beglaysep, "", "\\end_layout", ""] comert = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "%", "\\end_layout", "", "\\end_inset", ""] empert = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", " ", "\\end_layout", "", "\\end_inset", ""] i = 0 while 1: i = find_token(document.body, "\\begin_inset Separator", i) if i == -1: return lay = get_containing_layout(document.body, i) if lay == False: document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i)) i = i + 1 continue layoutname = lay[0] beg = lay[1] end = lay[2] kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1] before = document.body[beg+1:i] something_before = len(before) > 0 and len("".join(before)) > 0 j = find_end_of_inset(document.body, i) after = document.body[j+1:end] something_after = len(after) > 0 and len("".join(after)) > 0 if kind == "plain": beg = beg + len(before) + 1 elif something_before: document.body[i:i] = ["\\end_layout", ""] i = i + 2 j = j + 2 beg = i end = end + 2 if kind == "plain": if something_after: document.body[beg:j+1] = empert i = i + len(empert) else: document.body[beg:j+1] = comert i = i + len(comert) else: if something_after: if layoutname == "Standard": if not something_before: document.body[beg:j+1] = parsep i = i + len(parsep) document.body[i:i] = ["", "\\begin_layout Standard"] i = i + 2 else: document.body[beg:j+1] = ["\\begin_layout Standard"] i = i + 1 else: document.body[beg:j+1] = ["\\begin_deeper"] i = i + 1 end = end + 1 - (j + 1 - beg) if not something_before: document.body[i:i] = parsep i = i + len(parsep) end = end + len(parsep) document.body[i:i] = ["\\begin_layout Standard"] document.body[end+2:end+2] = ["", "\\end_deeper", ""] i = i + 4 else: next_par_is_aligned = False k = find_nonempty_line(document.body, end+1) if k != -1 and check_token(document.body[k], "\\begin_layout"): lay = get_containing_layout(document.body, k) next_par_is_aligned = lay != False and \ find_token(document.body, "\\align", lay[1], lay[2]) != -1 if k != -1 and not next_par_is_aligned \ and not check_token(document.body[k], "\\end_deeper") \ and not check_token(document.body[k], "\\begin_deeper"): if layoutname == "Standard": document.body[beg:j+1] = [beglaysep] i = i + 1 else: document.body[beg:j+1] = ["\\begin_deeper", beglaysep] end = end + 2 - (j + 1 - beg) document.body[end+1:end+1] = ["", "\\end_deeper", ""] i = i + 3 else: if something_before: del document.body[i:end+1] else: del document.body[i:end-1] i = i + 1 def revert_smash(document): " Set amsmath to on if smash commands are used " commands = ["smash[t]", "smash[b]", "notag"] i = find_token(document.header, "\\use_package amsmath", 0) if i == -1: document.warning("Malformed LyX document: Can't find \\use_package amsmath.") return; value = get_value(document.header, "\\use_package amsmath", i).split()[1] if value != "1": # nothing to do if package is not auto but on or off return; j = 0 while True: j = find_token(document.body, '\\begin_inset Formula', j) if j == -1: return k = find_end_of_inset(document.body, j) if k == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j)) j += 1 continue code = "\n".join(document.body[j:k]) for c in commands: if code.find("\\%s" % c) != -1: # set amsmath to on, since it is loaded by the newer format document.header[i] = "\\use_package amsmath 2" return j = k def revert_swissgerman(document): " Set language german-ch-old to german " i = 0 if document.language == "german-ch-old": document.language = "german" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language german" j = 0 while True: j = find_token(document.body, "\\lang german-ch-old", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german") j = j + 1 def revert_use_package(document, pkg, commands, oldauto, supported): # oldauto defines how the version we are reverting to behaves: # if it is true, the old version uses the package automatically. # if it is false, the old version never uses the package. # If "supported" is true, the target version also supports this # package natively. regexp = re.compile(r'(\\use_package\s+%s)' % pkg) p = find_re(document.header, regexp, 0) value = "1" # default is auto if p != -1: value = get_value(document.header, "\\use_package" , p).split()[1] if not supported: del document.header[p] if value == "2" and not supported: # on add_to_preamble(document, ["\\usepackage{" + pkg + "}"]) elif value == "1" and not oldauto: # auto i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue code = "\n".join(document.body[i:j]) for c in commands: if code.find("\\%s" % c) != -1: if supported: document.header[p] = "\\use_package " + pkg + " 2" else: add_to_preamble(document, ["\\usepackage{" + pkg + "}"]) return i = j mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \ "xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \ "xLeftarrow", "xleftharpoondown", "xleftharpoonup", \ "xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \ "xmapsto"] def revert_xarrow(document): "remove use_package mathtools" revert_use_package(document, "mathtools", mathtools_commands, False, True) def revert_beamer_lemma(document): " Reverts beamer lemma layout to ERT " beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"] if document.textclass not in beamer_classes: return consecutive = False i = 0 while True: i = find_token(document.body, "\\begin_layout Lemma", i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Lemma layout") i += 1 continue arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j) endarg1 = find_end_of_inset(document.body, arg1) arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j) endarg2 = find_end_of_inset(document.body, arg2) subst1 = [] subst2 = [] if arg1 != -1: beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1) if beginPlain1 == -1: document.warning("Malformed LyX document: Can't find arg1 plain Layout") i += 1 continue endPlain1 = find_end_of_inset(document.body, beginPlain1) content1 = document.body[beginPlain1 + 1 : endPlain1 - 2] subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">") if arg2 != -1: beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2) if beginPlain2 == -1: document.warning("Malformed LyX document: Can't find arg2 plain Layout") i += 1 continue endPlain2 = find_end_of_inset(document.body, beginPlain2) content2 = document.body[beginPlain2 + 1 : endPlain2 - 2] subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]") # remove Arg insets if arg1 < arg2: del document.body[arg2 : endarg2 + 1] if arg1 != -1: del document.body[arg1 : endarg1 + 1] if arg2 < arg1: del document.body[arg1 : endarg1 + 1] if arg2 != -1: del document.body[arg2 : endarg2 + 1] # index of end layout has probably changed j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Lemma layout") i += 1 continue begcmd = [] # if this is not a consecutive env, add start command if not consecutive: begcmd = put_cmd_in_ert("\\begin{lemma}") # has this a consecutive lemma? consecutive = document.body[j + 2] == "\\begin_layout Lemma" # if this is not followed by a consecutive env, add end command if not consecutive: document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"] document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2 i = j def revert_question_env(document): """ Reverts question and question* environments of theorems-ams-extended-bytype module to ERT """ # Do we use theorems-ams-extended-bytype module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "theorems-ams-extended-bytype": have_mod = True continue if not have_mod: return consecutive = False i = 0 while True: i = find_token(document.body, "\\begin_layout Question", i) if i == -1: return starred = document.body[i] == "\\begin_layout Question*" j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Question layout") i += 1 continue # if this is not a consecutive env, add start command begcmd = [] if not consecutive: if starred: begcmd = put_cmd_in_ert("\\begin{question*}") else: begcmd = put_cmd_in_ert("\\begin{question}") # has this a consecutive theorem of same type? consecutive = False if starred: consecutive = document.body[j + 2] == "\\begin_layout Question*" else: consecutive = document.body[j + 2] == "\\begin_layout Question" # if this is not followed by a consecutive env, add end command if not consecutive: if starred: document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"] else: document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"] document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd add_to_preamble(document, "\\providecommand{\questionname}{Question}") if starred: add_to_preamble(document, "\\theoremstyle{plain}\n" \ "\\newtheorem*{question*}{\\protect\\questionname}") else: add_to_preamble(document, "\\theoremstyle{plain}\n" \ "\\newtheorem{question}{\\protect\\questionname}") i = j def convert_dashes(document): "convert -- and --- to \\twohyphens and \\threehyphens" if document.backend != "latex": return i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]: # must not replace anything in insets that store LaTeX contents in .lyx files # (math and command insets withut overridden read() and write() methods # filtering out IPA makes Text::readParToken() more simple # skip ERT as well since it is not needed there j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) i += 1 else: i = j continue while True: j = document.body[i].find("--") if j == -1: break front = document.body[i][:j] back = document.body[i][j+2:] # We can have an arbitrary number of consecutive hyphens. # These must be split into the corresponding number of two and three hyphens # We must match what LaTeX does: First try emdash, then endash, then single hyphen if back.find("-") == 0: back = back[1:] if len(back) > 0: document.body.insert(i+1, back) document.body[i] = front + "\\threehyphens" else: if len(back) > 0: document.body.insert(i+1, back) document.body[i] = front + "\\twohyphens" i += 1 def revert_dashes(document): "convert \\twohyphens and \\threehyphens to -- and ---" i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]: # see convert_dashes j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) i += 1 else: i = j continue replaced = False if document.body[i].find("\\twohyphens") >= 0: document.body[i] = document.body[i].replace("\\twohyphens", "--") replaced = True if document.body[i].find("\\threehyphens") >= 0: document.body[i] = document.body[i].replace("\\threehyphens", "---") replaced = True if replaced and i+1 < len(document.body) and \ (document.body[i+1].find("\\") != 0 or \ document.body[i+1].find("\\twohyphens") == 0 or document.body[i+1].find("\\threehyphens") == 0) and \ len(document.body[i]) + len(document.body[i+1]) <= 80: document.body[i] = document.body[i] + document.body[i+1] document.body[i+1:i+2] = [] else: i += 1 # order is important for the last three! phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"] def is_part_of_converted_phrase(line, j, phrase): "is phrase part of an already converted phrase?" for p in phrases: converted = "\\SpecialCharNoPassThru \\" + p pos = j + len(phrase) - len(converted) if pos >= 0: if line[pos:pos+len(converted)] == converted: return True return False def convert_phrases(document): "convert special phrases from plain text to \\SpecialCharNoPassThru" if document.backend != "latex": return for phrase in phrases: i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]: # must not replace anything in insets that store LaTeX contents in .lyx files # (math and command insets withut overridden read() and write() methods j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 else: i = j continue if document.body[i].find("\\") == 0: i += 1 continue j = document.body[i].find(phrase) if j == -1: i += 1 continue if not is_part_of_converted_phrase(document.body[i], j, phrase): front = document.body[i][:j] back = document.body[i][j+len(phrase):] if len(back) > 0: document.body.insert(i+1, back) # We cannot use SpecialChar since we do not know whether we are outside passThru document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase i += 1 def revert_phrases(document): "convert special phrases to plain text" i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]: # see convert_phrases j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 else: i = j continue replaced = False for phrase in phrases: # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts if document.body[i].find("\\SpecialChar \\" + phrase) >= 0: document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase) replaced = True if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0: document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase) replaced = True if replaced and i+1 < len(document.body) and \ (document.body[i+1].find("\\") != 0 or \ document.body[i+1].find("\\SpecialChar") == 0) and \ len(document.body[i]) + len(document.body[i+1]) <= 80: document.body[i] = document.body[i] + document.body[i+1] document.body[i+1:i+2] = [] i -= 1 i += 1 def convert_specialchar_internal(document, forward): specialchars = {"\\-":"softhyphen", "\\textcompwordmark{}":"ligaturebreak", \ "\\@.":"endofsentence", "\\ldots{}":"ldots", \ "\\menuseparator":"menuseparator", "\\slash{}":"breakableslash", \ "\\nobreakdash-":"nobreakdash", "\\LyX":"LyX", \ "\\TeX":"TeX", "\\LaTeX2e":"LaTeX2e", \ "\\LaTeX":"LaTeX" # must be after LaTeX2e } i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]: # see convert_phrases j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 else: i = j continue for key, value in specialchars.iteritems(): if forward: document.body[i] = document.body[i].replace("\\SpecialChar " + key, "\\SpecialChar " + value) document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + key, "\\SpecialCharNoPassThru " + value) else: document.body[i] = document.body[i].replace("\\SpecialChar " + value, "\\SpecialChar " + key) document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + value, "\\SpecialCharNoPassThru " + key) i += 1 def convert_specialchar(document): "convert special characters to new syntax" convert_specialchar_internal(document, True) def revert_specialchar(document): "convert special characters to old syntax" convert_specialchar_internal(document, False) def revert_georgian(document): "Set the document language to English but assure Georgian output" if document.language == "georgian": document.language = "english" i = find_token(document.header, "\\language georgian", 0) if i != -1: document.header[i] = "\\language english" j = find_token(document.header, "\\language_package default", 0) if j != -1: document.header[j] = "\\language_package babel" k = find_token(document.header, "\\options", 0) if k != -1: document.header[k] = document.header[k].replace("\\options", "\\options georgian,") else: l = find_token(document.header, "\\use_default_options", 0) document.header.insert(l + 1, "\\options georgian") def revert_sigplan_doi(document): " Reverts sigplanconf DOI layout to ERT " if document.textclass != "sigplanconf": return i = 0 while True: i = find_token(document.body, "\\begin_layout DOI", i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of DOI layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\doi{" + content + "}"]) del document.body[i:j + 1] # no need to reset i def revert_ex_itemargs(document): " Reverts \\item arguments of the example environments (Linguistics module) to TeX-code " # Do we use the linguistics module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "linguistics": have_mod = True continue if not have_mod: return i = 0 example_layouts = ["Numbered Examples (consecutive)", "Subexample"] while True: i = find_token(document.body, "\\begin_inset Argument item:", i) if i == -1: return j = find_end_of_inset(document.body, i) # Find containing paragraph layout parent = get_containing_layout(document.body, i) if parent == False: document.warning("Malformed LyX document: Can't find parent paragraph layout") i += 1 continue parbeg = parent[3] layoutname = parent[0] if layoutname in example_layouts: beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i) endPlain = find_end_of_layout(document.body, beginPlain) content = document.body[beginPlain + 1 : endPlain] del document.body[i:j+1] subst = put_cmd_in_ert("[") + content + put_cmd_in_ert("]") document.body[parbeg : parbeg] = subst i += 1 def revert_forest(document): " Reverts the forest environment (Linguistics module) to TeX-code " # Do we use the linguistics module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "linguistics": have_mod = True continue if not have_mod: return i = 0 while True: i = find_token(document.body, "\\begin_inset Flex Structure Tree", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Structure Tree inset") i += 1 continue beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i) endPlain = find_end_of_layout(document.body, beginPlain) content = lyx2latex(document, document.body[beginPlain : endPlain]) add_to_preamble(document, ["\\usepackage{forest}"]) document.body[i:j + 1] = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "", "\\backslash", "begin{forest}", "\\end_layout", "", "\\begin_layout Plain Layout", content, "\\end_layout", "", "\\begin_layout Plain Layout", "\\backslash", "end{forest}", "", "\\end_layout", "", "\\end_inset"] # no need to reset i def revert_glossgroup(document): " Reverts the GroupGlossedWords inset (Linguistics module) to TeX-code " # Do we use the linguistics module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "linguistics": have_mod = True continue if not have_mod: return i = 0 while True: i = find_token(document.body, "\\begin_inset Flex GroupGlossedWords", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of GroupGlossedWords inset") i += 1 continue beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i) endPlain = find_end_of_layout(document.body, beginPlain) content = lyx2latex(document, document.body[beginPlain : endPlain]) document.warning("content: %s" % content) document.body[i:j + 1] = ["{", "", content, "", "}"] # no need to reset i def revert_newgloss(document): " Reverts the new Glosse insets (Linguistics module) to the old format " # Do we use the linguistics module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "linguistics": have_mod = True continue if not have_mod: return glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse") for glosse in glosses: i = 0 while True: i = find_token(document.body, glosse, i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Glosse inset") i += 1 continue arg = find_token(document.body, "\\begin_inset Argument 1", i, j) endarg = find_end_of_inset(document.body, arg) argcontent = "" if arg != -1: argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg) if argbeginPlain == -1: document.warning("Malformed LyX document: Can't find arg plain Layout") i += 1 continue argendPlain = find_end_of_inset(document.body, argbeginPlain) argcontent = lyx2latex(document, document.body[argbeginPlain : argendPlain - 2]) document.body[j:j] = ["", "\\begin_layout Plain Layout","\\backslash", "glt ", argcontent, "\\end_layout"] # remove Arg insets and paragraph, if it only contains this inset if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3: del document.body[arg - 1 : endarg + 4] else: del document.body[arg : endarg + 1] beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i) endPlain = find_end_of_layout(document.body, beginPlain) content = lyx2latex(document, document.body[beginPlain : endPlain]) document.body[beginPlain + 1:endPlain] = [content] i = beginPlain + 1 def convert_newgloss(document): " Converts Glosse insets (Linguistics module) to the new format " # Do we use the linguistics module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "linguistics": have_mod = True continue if not have_mod: return glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse") for glosse in glosses: i = 0 while True: i = find_token(document.body, glosse, i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Glosse inset") i += 1 continue k = i while True: argcontent = [] beginPlain = find_token(document.body, "\\begin_layout Plain Layout", k, j) if beginPlain == -1: break endPlain = find_end_of_layout(document.body, beginPlain) if endPlain == -1: document.warning("Malformed LyX document: Can't find end of Glosse layout") i += 1 continue glt = find_token(document.body, "\\backslash", beginPlain, endPlain) if glt != -1 and document.body[glt + 1].startswith("glt"): document.body[glt + 1] = document.body[glt + 1].lstrip("glt").lstrip() argcontent = document.body[glt + 1 : endPlain] document.body[beginPlain + 1 : endPlain] = ["\\begin_inset Argument 1", "status open", "", "\\begin_layout Plain Layout", "\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout", ""] + argcontent + ["\\end_layout", "", "\\end_inset", "", "\\end_layout", "", "\\end_inset"] else: content = document.body[beginPlain + 1 : endPlain] document.body[beginPlain + 1 : endPlain] = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout"] + content + ["\\end_layout", "", "\\end_inset"] endPlain = find_end_of_layout(document.body, beginPlain) k = endPlain j = find_end_of_inset(document.body, i) i = endPlain + 1 def convert_BoxFeatures(document): " adds new box features " i = 0 while True: i = find_token(document.body, "height_special", i) if i == -1: return document.body[i+1:i+1] = ['thickness "0.4pt"', 'separation "3pt"', 'shadowsize "4pt"'] i = i + 4 def revert_BoxFeatures(document): " outputs new box features as TeX code " i = 0 defaultSep = "3pt" defaultThick = "0.4pt" defaultShadow = "4pt" while True: i = find_token(document.body, "height_special", i) if i == -1: return # read out the values beg = document.body[i+1].find('"'); end = document.body[i+1].rfind('"'); thickness = document.body[i+1][beg+1:end]; beg = document.body[i+2].find('"'); end = document.body[i+2].rfind('"'); separation = document.body[i+2][beg+1:end]; beg = document.body[i+3].find('"'); end = document.body[i+3].rfind('"'); shadowsize = document.body[i+3][beg+1:end]; # delete the specification del document.body[i+1:i+4] # output ERT # first output the closing brace if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick: document.body[i + 10 : i + 10] = put_cmd_in_ert("}") # now output the lengths if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick: document.body[i - 10 : i - 10] = put_cmd_in_ert("{") if thickness != defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness] if separation != defaultSep and thickness == defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation] if separation != defaultSep and thickness != defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation] if shadowsize != defaultShadow and separation == defaultSep and thickness == defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash shadowsize " + shadowsize] if shadowsize != defaultShadow and separation != defaultSep and thickness == defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize] if shadowsize != defaultShadow and separation == defaultSep and thickness != defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash shadowsize " + shadowsize] if shadowsize != defaultShadow and separation != defaultSep and thickness != defaultThick: document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize] i = i + 11 def convert_origin(document): " Insert the origin tag " i = find_token(document.header, "\\textclass ", 0) if i == -1: document.warning("Malformed LyX document: No \\textclass!!") return; if document.dir == "": origin = "stdin" else: origin = document.dir.replace('\\', '/') + '/' if os.name != 'nt': origin = unicode(origin, sys.getfilesystemencoding()) document.header[i:i] = ["\\origin " + origin] def revert_origin(document): " Remove the origin tag " i = find_token(document.header, "\\origin ", 0) if i == -1: document.warning("Malformed LyX document: No \\origin!!") return; del document.header[i] color_names = ["brown", "darkgray", "gray", \ "lightgray", "lime", "olive", "orange", \ "pink", "purple", "teal", "violet"] def revert_textcolor(document): " revert new \\textcolor colors to TeX code " i = 0 j = 0 xcolor = False while True: i = find_token(document.body, "\\color ", i) if i == -1: return else: for color in list(color_names): if document.body[i] == "\\color " + color: # register that xcolor must be loaded in the preamble if xcolor == False: xcolor = True add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\usepackage{xcolor}}{}"]) # find the next \\color and/or the next \\end_layout j = find_token(document.body, "\\color", i + 1) k = find_token(document.body, "\\end_layout", i + 1) if j == -1 and k != -1: j = k +1 # output TeX code # first output the closing brace if k < j: document.body[k: k] = put_cmd_in_ert("}") else: document.body[j: j] = put_cmd_in_ert("}") # now output the \textcolor command document.body[i : i + 1] = put_cmd_in_ert("\\textcolor{" + color + "}{") i = i + 1 def convert_colorbox(document): " adds color settings for boxes " i = 0 while True: i = find_token(document.body, "shadowsize", i) if i == -1: return document.body[i+1:i+1] = ['framecolor "black"', 'backgroundcolor "none"'] i = i + 3 def revert_colorbox(document): " outputs color settings for boxes as TeX code " binset = 0 defaultframecolor = "black" defaultbackcolor = "none" while True: binset = find_token(document.body, "\\begin_inset Box", binset) if binset == -1: return einset = find_end_of_inset(document.body, binset) if einset == -1: document.warning("Malformed LyX document: Can't find end of box inset!") binset += 1 continue blay = find_token(document.body, "\\begin_layout", binset, einset) if blay == -1: document.warning("Malformed LyX document: Can't find start of layout!") binset = einset continue # doing it this way, we make sure only to find a framecolor option frame = find_token(document.body, "framecolor", binset, blay) if frame == -1: binset = einset continue beg = document.body[frame].find('"') end = document.body[frame].rfind('"') framecolor = document.body[frame][beg+1:end] # this should be on the next line bgcolor = frame + 1 beg = document.body[bgcolor].find('"') end = document.body[bgcolor].rfind('"') backcolor = document.body[bgcolor][beg+1:end] # delete those bits del document.body[frame:frame+2] # adjust end of inset einset -= 2 if document.body[binset] == "\\begin_inset Box Boxed" and \ framecolor != defaultframecolor: document.body[binset] = "\\begin_inset Box Frameless" # output TeX code # first output the closing brace if framecolor == defaultframecolor and backcolor == defaultbackcolor: # nothing needed pass else: document.body[einset + 1 : einset + 1] = put_cmd_in_ert("}") if framecolor != defaultframecolor: document.body[binset:binset] = put_cmd_in_ert("\\backslash fcolorbox{" + framecolor + "}{" + backcolor + "}{") else: document.body[binset:binset] = put_cmd_in_ert("\\backslash colorbox{" + backcolor + "}{") binset = einset def revert_mathmulticol(document): " Convert formulas to ERT if they contain multicolumns " i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue lines = document.body[i:j] lines[0] = lines[0].replace('\\begin_inset Formula', '').lstrip() code = "\n".join(lines) converted = False k = 0 n = 0 while n >= 0: n = code.find("\\multicolumn", k) # no need to convert degenerated multicolumn cells, # they work in old LyX versions as "math ERT" if n != -1 and code.find("\\multicolumn{1}", k) != n: ert = put_cmd_in_ert(code) document.body[i:j+1] = ert converted = True break else: k = n + 12 if converted: i = find_end_of_inset(document.body, i) else: i = j def revert_jss(document): " Reverts JSS In_Preamble commands to ERT in preamble " if document.textclass != "jss": return h = 0 m = 0 j = 0 k = 0 n = 0 while True: # at first revert the inset layouts because they can be part of the In_Preamble layouts while m != -1 or j != -1 or h != -1 or k != -1 or n != -1: # \pkg if h != -1: h = find_token(document.body, "\\begin_inset Flex Pkg", h) if h != -1: endh = find_end_of_inset(document.body, h) document.body[endh - 2 : endh + 1] = put_cmd_in_ert("}") document.body[h : h + 4] = put_cmd_in_ert("\\pkg{") h = h + 5 # \proglang if m != -1: m = find_token(document.body, "\\begin_inset Flex Proglang", m) if m != -1: endm = find_end_of_inset(document.body, m) document.body[endm - 2 : endm + 1] = put_cmd_in_ert("}") document.body[m : m + 4] = put_cmd_in_ert("\\proglang{") m = m + 5 # \code if j != -1: j = find_token(document.body, "\\begin_inset Flex Code", j) if j != -1: # assure that we are not in a Code Chunk inset if document.body[j][-1] == "e": endj = find_end_of_inset(document.body, j) document.body[endj - 2 : endj + 1] = put_cmd_in_ert("}") document.body[j : j + 4] = put_cmd_in_ert("\\code{") j = j + 5 else: j = j + 1 # \email if k != -1: k = find_token(document.body, "\\begin_inset Flex E-mail", k) if k != -1: endk = find_end_of_inset(document.body, k) document.body[endk - 2 : endk + 1] = put_cmd_in_ert("}") document.body[k : k + 4] = put_cmd_in_ert("\\email{") k = k + 5 # \url if n != -1: n = find_token(document.body, "\\begin_inset Flex URL", n) if n != -1: endn = find_end_of_inset(document.body, n) document.body[endn - 2 : endn + 1] = put_cmd_in_ert("}") document.body[n : n + 4] = put_cmd_in_ert("\\url{") n = n + 5 # now revert the In_Preamble layouts # \title i = find_token(document.body, "\\begin_layout Title", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Title layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\title{" + content + "}"]) del document.body[i:j + 1] # \author i = find_token(document.body, "\\begin_layout Author", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Author layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\author{" + content + "}"]) del document.body[i:j + 1] # \Plainauthor i = find_token(document.body, "\\begin_layout Plain Author", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Plain Author layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Plainauthor{" + content + "}"]) del document.body[i:j + 1] # \Plaintitle i = find_token(document.body, "\\begin_layout Plain Title", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Plain Title layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Plaintitle{" + content + "}"]) del document.body[i:j + 1] # \Shorttitle i = find_token(document.body, "\\begin_layout Short Title", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Short Title layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Shorttitle{" + content + "}"]) del document.body[i:j + 1] # \Abstract i = find_token(document.body, "\\begin_layout Abstract", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Abstract layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Abstract{" + content + "}"]) del document.body[i:j + 1] # \Keywords i = find_token(document.body, "\\begin_layout Keywords", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Keywords layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Keywords{" + content + "}"]) del document.body[i:j + 1] # \Plainkeywords i = find_token(document.body, "\\begin_layout Plain Keywords", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Plain Keywords layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Plainkeywords{" + content + "}"]) del document.body[i:j + 1] # \Address i = find_token(document.body, "\\begin_layout Address", 0) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Address layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\Address{" + content + "}"]) del document.body[i:j + 1] # finally handle the code layouts h = 0 m = 0 j = 0 k = 0 while m != -1 or j != -1 or h != -1 or k != -1: # \CodeChunk if h != -1: h = find_token(document.body, "\\begin_inset Flex Code Chunk", h) if h != -1: endh = find_end_of_inset(document.body, h) document.body[endh + 1 : endh] = ["\\end_layout"] document.body[endh : endh + 1] = put_cmd_in_ert("\\end{CodeChunk}") document.body[h : h + 3] = put_cmd_in_ert("\\begin{CodeChunk}") document.body[h - 1 : h] = ["\\begin_layout Standard"] h = h + 1 # \CodeInput if j != -1: j = find_token(document.body, "\\begin_layout Code Input", j) if j != -1: endj = find_end_of_layout(document.body, j) document.body[endj : endj + 1] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[endj + 3 : endj + 4] = put_cmd_in_ert("\\end{CodeInput}") document.body[endj + 13 : endj + 13] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[j + 1 : j] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[j : j + 1] = put_cmd_in_ert("\\begin{CodeInput}") j = j + 1 # \CodeOutput if k != -1: k = find_token(document.body, "\\begin_layout Code Output", k) if k != -1: endk = find_end_of_layout(document.body, k) document.body[endk : endk + 1] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[endk + 3 : endk + 4] = put_cmd_in_ert("\\end{CodeOutput}") document.body[endk + 13 : endk + 13] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[k + 1 : k] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[k : k + 1] = put_cmd_in_ert("\\begin{CodeOutput}") k = k + 1 # \Code if m != -1: m = find_token(document.body, "\\begin_layout Code", m) if m != -1: endm = find_end_of_layout(document.body, m) document.body[endm : endm + 1] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[endm + 3 : endm + 4] = put_cmd_in_ert("\\end{Code}") document.body[endm + 13 : endm + 13] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[m + 1 : m] = ["\\end_layout", "", "\\begin_layout Standard"] document.body[m : m + 1] = put_cmd_in_ert("\\begin{Code}") m = m + 1 def convert_subref(document): " converts sub: ref prefixes to subref: " # 1) label insets rx = re.compile(r'^name \"sub:(.+)$') i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset label", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i)) i += 1 continue for p in range(i, j): m = rx.match(document.body[p]) if m: label = m.group(1) document.body[p] = "name \"subsec:" + label i += 1 # 2) xref insets rx = re.compile(r'^reference \"sub:(.+)$') i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset ref", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i)) i += 1 continue for p in range(i, j): m = rx.match(document.body[p]) if m: label = m.group(1) document.body[p] = "reference \"subsec:" + label break i += 1 def revert_subref(document): " reverts subref: ref prefixes to sub: " # 1) label insets rx = re.compile(r'^name \"subsec:(.+)$') i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset label", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i)) i += 1 continue for p in range(i, j): m = rx.match(document.body[p]) if m: label = m.group(1) document.body[p] = "name \"sub:" + label break i += 1 # 2) xref insets rx = re.compile(r'^reference \"subsec:(.+)$') i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset ref", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i)) i += 1 continue for p in range(i, j): m = rx.match(document.body[p]) if m: label = m.group(1) document.body[p] = "reference \"sub:" + label break i += 1 def convert_nounzip(document): " remove the noUnzip parameter of graphics insets " rx = re.compile(r'\s*noUnzip\s*$') i = 0 while True: i = find_token(document.body, "\\begin_inset Graphics", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of graphics inset at line " + str(i)) i += 1 continue k = find_re(document.body, rx, i, j) if k != -1: del document.body[k] j = j - 1 i = j + 1 def convert_revert_external_bbox(document, forward): " add units to bounding box of external insets " rx = re.compile(r'^\s*boundingBox\s+\S+\s+\S+\s+\S+\s+\S+\s*$') i = 0 while True: i = find_token(document.body, "\\begin_inset External", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of external inset at line " + str(i)) i += 1 continue k = find_re(document.body, rx, i, j) if k == -1: i = j + 1 continue tokens = document.body[k].split() if forward: for t in range(1, 5): tokens[t] += "bp" else: for t in range(1, 5): tokens[t] = length_in_bp(tokens[t]) document.body[k] = "\tboundingBox " + tokens[1] + " " + tokens[2] + " " + \ tokens[3] + " " + tokens[4] i = j + 1 def convert_external_bbox(document): convert_revert_external_bbox(document, True) def revert_external_bbox(document): convert_revert_external_bbox(document, False) ## # Conversion hub # supported_versions = ["2.2.0", "2.2"] convert = [ [475, [convert_separator]], # nothing to do for 476: We consider it a bug that older versions # did not load amsmath automatically for these commands, and do not # want to hardcode amsmath off. [476, []], [477, []], [478, []], [479, []], [480, []], [481, [convert_dashes]], [482, [convert_phrases]], [483, [convert_specialchar]], [484, []], [485, []], [486, []], [487, []], [488, [convert_newgloss]], [489, [convert_BoxFeatures]], [490, [convert_origin]], [491, []], [492, [convert_colorbox]], [493, []], [494, []], [495, [convert_subref]], [496, [convert_nounzip]], [497, [convert_external_bbox]] ] revert = [ [496, [revert_external_bbox]], [495, []], # nothing to do since the noUnzip parameter was optional [494, [revert_subref]], [493, [revert_jss]], [492, [revert_mathmulticol]], [491, [revert_colorbox]], [490, [revert_textcolor]], [489, [revert_origin]], [488, [revert_BoxFeatures]], [487, [revert_newgloss, revert_glossgroup]], [486, [revert_forest]], [485, [revert_ex_itemargs]], [484, [revert_sigplan_doi]], [483, [revert_georgian]], [482, [revert_specialchar]], [481, [revert_phrases]], [480, [revert_dashes]], [479, [revert_question_env]], [478, [revert_beamer_lemma]], [477, [revert_xarrow]], [476, [revert_swissgerman]], [475, [revert_smash]], [474, [revert_separator]] ] if __name__ == "__main__": pass