# -*- coding: utf-8 -*- # This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2011 The LyX team # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 2.2""" import re, string import unicodedata import sys, os # Uncomment only what you need to import, please. #from parser_tools import find_token, find_end_of, find_tokens, \ # find_token_exact, find_end_of_inset, find_end_of_layout, \ # find_token_backwards, is_in_inset, get_value, get_quoted_value, \ # del_token, check_token, get_option_value from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, lyx2latex#, \ # insert_to_preamble, latex_length, revert_flex_inset, \ # revert_font_attrs, hex2ratio, str2bool from parser_tools import find_token, find_token_backwards, find_re, \ find_end_of_inset, find_end_of_layout, find_nonempty_line, \ get_containing_layout, get_value, check_token ############################################################################### ### ### Conversion and reversion routines ### ############################################################################### def convert_separator(document): """ Convert layout separators to separator insets and add (LaTeX) paragraph breaks in order to mimic previous LaTeX export. """ parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""] parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak", "\\end_inset", "", "\\end_layout", ""] sty_dict = { "family" : "default", "series" : "default", "shape" : "default", "size" : "default", "bar" : "default", "color" : "inherit" } i = 0 while 1: i = find_token(document.body, "\\begin_deeper", i) if i == -1: break j = find_token_backwards(document.body, "\\end_layout", i-1) if j != -1: # reset any text style before inserting the inset lay = get_containing_layout(document.body, j-1) if lay != False: content = "\n".join(document.body[lay[1]:lay[2]]) for val in list(sty_dict.keys()): if content.find("\\%s" % val) != -1: document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])] i = i + 1 j = j + 1 document.body[j:j] = parins i = i + len(parins) + 1 else: i = i + 1 i = 0 while 1: i = find_token(document.body, "\\align", i) if i == -1: break lay = get_containing_layout(document.body, i) if lay != False and lay[0] == "Plain Layout": i = i + 1 continue j = find_token_backwards(document.body, "\\end_layout", i-1) if j != -1: lay = get_containing_layout(document.body, j-1) if lay != False and lay[0] == "Standard" \ and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \ and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1: # reset any text style before inserting the inset content = "\n".join(document.body[lay[1]:lay[2]]) for val in list(sty_dict.keys()): if content.find("\\%s" % val) != -1: document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])] i = i + 1 j = j + 1 document.body[j:j] = parins i = i + len(parins) + 1 else: i = i + 1 else: i = i + 1 regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE) i = 0 while 1: i = find_re(document.body, regexp, i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Missing `\\end_layout'.") return lay = get_containing_layout(document.body, j-1) if lay != False: lines = document.body[lay[3]:lay[2]] else: lines = [] document.body[i:j+1] = parlay if len(lines) > 0: document.body[i+1:i+1] = lines i = i + len(parlay) + len(lines) + 1 def revert_separator(document): " Revert separator insets to layout separators " beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"] if document.textclass in beamer_classes: beglaysep = "\\begin_layout Separator" else: beglaysep = "\\begin_layout --Separator--" parsep = [beglaysep, "", "\\end_layout", ""] comert = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "%", "\\end_layout", "", "\\end_inset", ""] empert = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", " ", "\\end_layout", "", "\\end_inset", ""] i = 0 while 1: i = find_token(document.body, "\\begin_inset Separator", i) if i == -1: return lay = get_containing_layout(document.body, i) if lay == False: document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i)) i = i + 1 continue layoutname = lay[0] beg = lay[1] end = lay[2] kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1] before = document.body[beg+1:i] something_before = len(before) > 0 and len("".join(before)) > 0 j = find_end_of_inset(document.body, i) after = document.body[j+1:end] something_after = len(after) > 0 and len("".join(after)) > 0 if kind == "plain": beg = beg + len(before) + 1 elif something_before: document.body[i:i] = ["\\end_layout", ""] i = i + 2 j = j + 2 beg = i end = end + 2 if kind == "plain": if something_after: document.body[beg:j+1] = empert i = i + len(empert) else: document.body[beg:j+1] = comert i = i + len(comert) else: if something_after: if layoutname == "Standard": if not something_before: document.body[beg:j+1] = parsep i = i + len(parsep) document.body[i:i] = ["", "\\begin_layout Standard"] i = i + 2 else: document.body[beg:j+1] = ["\\begin_layout Standard"] i = i + 1 else: document.body[beg:j+1] = ["\\begin_deeper"] i = i + 1 end = end + 1 - (j + 1 - beg) if not something_before: document.body[i:i] = parsep i = i + len(parsep) end = end + len(parsep) document.body[i:i] = ["\\begin_layout Standard"] document.body[end+2:end+2] = ["", "\\end_deeper", ""] i = i + 4 else: next_par_is_aligned = False k = find_nonempty_line(document.body, end+1) if k != -1 and check_token(document.body[k], "\\begin_layout"): lay = get_containing_layout(document.body, k) next_par_is_aligned = lay != False and \ find_token(document.body, "\\align", lay[1], lay[2]) != -1 if k != -1 and not next_par_is_aligned \ and not check_token(document.body[k], "\\end_deeper") \ and not check_token(document.body[k], "\\begin_deeper"): if layoutname == "Standard": document.body[beg:j+1] = [beglaysep] i = i + 1 else: document.body[beg:j+1] = ["\\begin_deeper", beglaysep] end = end + 2 - (j + 1 - beg) document.body[end+1:end+1] = ["", "\\end_deeper", ""] i = i + 3 else: if something_before: del document.body[i:end+1] else: del document.body[i:end-1] i = i + 1 def revert_smash(document): " Set amsmath to on if smash commands are used " commands = ["smash[t]", "smash[b]", "notag"] i = find_token(document.header, "\\use_package amsmath", 0) if i == -1: document.warning("Malformed LyX document: Can't find \\use_package amsmath.") return; value = get_value(document.header, "\\use_package amsmath", i).split()[1] if value != "1": # nothing to do if package is not auto but on or off return; j = 0 while True: j = find_token(document.body, '\\begin_inset Formula', j) if j == -1: return k = find_end_of_inset(document.body, j) if k == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j)) j += 1 continue code = "\n".join(document.body[j:k]) for c in commands: if code.find("\\%s" % c) != -1: # set amsmath to on, since it is loaded by the newer format document.header[i] = "\\use_package amsmath 2" return j = k def revert_swissgerman(document): " Set language german-ch-old to german " i = 0 if document.language == "german-ch-old": document.language = "german" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language german" j = 0 while True: j = find_token(document.body, "\\lang german-ch-old", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german") j = j + 1 def revert_use_package(document, pkg, commands, oldauto): # oldauto defines how the version we are reverting to behaves: # if it is true, the old version uses the package automatically. # if it is false, the old version never uses the package. regexp = re.compile(r'(\\use_package\s+%s)' % pkg) i = find_re(document.header, regexp, 0) value = "1" # default is auto if i != -1: value = get_value(document.header, "\\use_package" , i).split()[1] del document.header[i] if value == "2": # on add_to_preamble(document, ["\\usepackage{" + pkg + "}"]) elif value == "1" and not oldauto: # auto i = 0 while True: i = find_token(document.body, '\\begin_inset Formula', i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 continue code = "\n".join(document.body[i:j]) for c in commands: if code.find("\\%s" % c) != -1: add_to_preamble(document, ["\\usepackage{" + pkg + "}"]) return i = j mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \ "xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \ "xLeftarrow", "xleftharpoondown", "xleftharpoonup", \ "xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \ "xmapsto"] def revert_xarrow(document): "remove use_package mathtools" revert_use_package(document, "mathtools", mathtools_commands, False) def revert_beamer_lemma(document): " Reverts beamer lemma layout to ERT " beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"] if document.textclass not in beamer_classes: return consecutive = False i = 0 while True: i = find_token(document.body, "\\begin_layout Lemma", i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Lemma layout") i += 1 continue arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j) endarg1 = find_end_of_inset(document.body, arg1) arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j) endarg2 = find_end_of_inset(document.body, arg2) subst1 = [] subst2 = [] if arg1 != -1: beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1) if beginPlain1 == -1: document.warning("Malformed LyX document: Can't find arg1 plain Layout") i += 1 continue endPlain1 = find_end_of_inset(document.body, beginPlain1) content1 = document.body[beginPlain1 + 1 : endPlain1 - 2] subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">") if arg2 != -1: beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2) if beginPlain2 == -1: document.warning("Malformed LyX document: Can't find arg2 plain Layout") i += 1 continue endPlain2 = find_end_of_inset(document.body, beginPlain2) content2 = document.body[beginPlain2 + 1 : endPlain2 - 2] subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]") # remove Arg insets if arg1 < arg2: del document.body[arg2 : endarg2 + 1] if arg1 != -1: del document.body[arg1 : endarg1 + 1] if arg2 < arg1: del document.body[arg1 : endarg1 + 1] if arg2 != -1: del document.body[arg2 : endarg2 + 1] # index of end layout has probably changed j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Lemma layout") i += 1 continue begcmd = [] # if this is not a consecutive env, add start command if not consecutive: begcmd = put_cmd_in_ert("\\begin{lemma}") # has this a consecutive lemma? consecutive = document.body[j + 2] == "\\begin_layout Lemma" # if this is not followed by a consecutive env, add end command if not consecutive: document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"] document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2 i = j def revert_question_env(document): """ Reverts question and question* environments of theorems-ams-extended-bytype module to ERT """ # Do we use theorems-ams-extended-bytype module? have_mod = False mods = document.get_module_list() for mod in mods: if mod == "theorems-ams-extended-bytype": have_mod = True continue if not have_mod: return consecutive = False i = 0 while True: i = find_token(document.body, "\\begin_layout Question", i) if i == -1: return starred = document.body[i] == "\\begin_layout Question*" j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Question layout") i += 1 continue # if this is not a consecutive env, add start command begcmd = [] if not consecutive: if starred: begcmd = put_cmd_in_ert("\\begin{question*}") else: begcmd = put_cmd_in_ert("\\begin{question}") # has this a consecutive theorem of same type? consecutive = False if starred: consecutive = document.body[j + 2] == "\\begin_layout Question*" else: consecutive = document.body[j + 2] == "\\begin_layout Question" # if this is not followed by a consecutive env, add end command if not consecutive: if starred: document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"] else: document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"] document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd add_to_preamble(document, "\\providecommand{\questionname}{Question}") if starred: add_to_preamble(document, "\\theoremstyle{plain}\n" \ "\\newtheorem*{question*}{\\protect\\questionname}") else: add_to_preamble(document, "\\theoremstyle{plain}\n" \ "\\newtheorem{question}{\\protect\\questionname}") i = j def convert_dashes(document): "convert -- and --- to \\twohyphens and \\threehyphens" if document.backend != "latex": return i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]: # must not replace anything in insets that store LaTeX contents in .lyx files # (math and command insets withut overridden read() and write() methods # filtering out IPA makes Text::readParToken() more simple # skip ERT as well since it is not needed there j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) i += 1 else: i = j continue while True: j = document.body[i].find("--") if j == -1: break front = document.body[i][:j] back = document.body[i][j+2:] # We can have an arbitrary number of consecutive hyphens. # These must be split into the corresponding number of two and three hyphens # We must match what LaTeX does: First try emdash, then endash, then single hyphen if back.find("-") == 0: back = back[1:] if len(back) > 0: document.body.insert(i+1, back) document.body[i] = front + "\\threehyphens" else: if len(back) > 0: document.body.insert(i+1, back) document.body[i] = front + "\\twohyphens" i += 1 def revert_dashes(document): "convert \\twohyphens and \\threehyphens to -- and ---" i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]: # see convert_dashes j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) i += 1 else: i = j continue replaced = False if document.body[i].find("\\twohyphens") >= 0: document.body[i] = document.body[i].replace("\\twohyphens", "--") replaced = True if document.body[i].find("\\threehyphens") >= 0: document.body[i] = document.body[i].replace("\\threehyphens", "---") replaced = True if replaced and i+1 < len(document.body) and \ (document.body[i+1].find("\\") != 0 or \ document.body[i+1].find("\\twohyphens") == 0 or document.body[i+1].find("\\threehyphens") == 0) and \ len(document.body[i]) + len(document.body[i+1]) <= 80: document.body[i] = document.body[i] + document.body[i+1] document.body[i+1:i+2] = [] else: i += 1 # order is important for the last three! phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"] def is_part_of_converted_phrase(line, j, phrase): "is phrase part of an already converted phrase?" for p in phrases: converted = "\\SpecialCharNoPassThru \\" + p pos = j + len(phrase) - len(converted) if pos >= 0: if line[pos:pos+len(converted)] == converted: return True return False def convert_phrases(document): "convert special phrases from plain text to \\SpecialCharNoPassThru" if document.backend != "latex": return for phrase in phrases: i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]: # must not replace anything in insets that store LaTeX contents in .lyx files # (math and command insets withut overridden read() and write() methods j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 else: i = j continue if document.body[i].find("\\") == 0: i += 1 continue j = document.body[i].find(phrase) if j == -1: i += 1 continue if not is_part_of_converted_phrase(document.body[i], j, phrase): front = document.body[i][:j] back = document.body[i][j+len(phrase):] if len(back) > 0: document.body.insert(i+1, back) # We cannot use SpecialChar since we do not know whether we are outside passThru document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase i += 1 def revert_phrases(document): "convert special phrases to plain text" i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]: # see convert_phrases j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 else: i = j continue replaced = False for phrase in phrases: # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts if document.body[i].find("\\SpecialChar \\" + phrase) >= 0: document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase) replaced = True if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0: document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase) replaced = True if replaced and i+1 < len(document.body) and \ (document.body[i+1].find("\\") != 0 or \ document.body[i+1].find("\\SpecialChar") == 0) and \ len(document.body[i]) + len(document.body[i+1]) <= 80: document.body[i] = document.body[i] + document.body[i+1] document.body[i+1:i+2] = [] i -= 1 i += 1 def convert_specialchar_internal(document, forward): specialchars = {"\\-":"softhyphen", "\\textcompwordmark{}":"ligaturebreak", \ "\\@.":"endofsentence", "\\ldots{}":"ldots", \ "\\menuseparator":"menuseparator", "\\slash{}":"breakableslash", \ "\\nobreakdash-":"nobreakdash", "\\LyX":"LyX", \ "\\TeX":"TeX", "\\LaTeX2e":"LaTeX2e", \ "\\LaTeX":"LaTeX" # must be after LaTeX2e } i = 0 while i < len(document.body): words = document.body[i].split() if len(words) > 1 and words[0] == "\\begin_inset" and \ words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]: # see convert_phrases j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i)) i += 1 else: i = j continue for key, value in specialchars.iteritems(): if forward: document.body[i] = document.body[i].replace("\\SpecialChar " + key, "\\SpecialChar " + value) document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + key, "\\SpecialCharNoPassThru " + value) else: document.body[i] = document.body[i].replace("\\SpecialChar " + value, "\\SpecialChar " + key) document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + value, "\\SpecialCharNoPassThru " + key) i += 1 def convert_specialchar(document): "convert special characters to new syntax" convert_specialchar_internal(document, True) def revert_specialchar(document): "convert special characters to old syntax" convert_specialchar_internal(document, False) def revert_georgian(document): "Set the document language for new supported languages to English" if document.language == "georgian": document.language = "english" i = find_token(document.header, "\\language georgian", 0) if i != -1: document.header[i] = "\\language english" j = find_token(document.header, "\\language_package default", 0) if j != -1: document.header[j] = "\\language_package babel" k = find_token(document.header, "\\options", 0) if k != -1: document.header[k] = document.header[k].replace("\\options", "\\options georgian,") else: l = find_token(document.header, "\\use_default_options", 0) document.header.insert(l + 1, "\\options georgian") def revert_sigplan_doi(document): " Reverts sigplanconf DOI layout to ERT " if document.textclass != "sigplanconf": return i = 0 while True: i = find_token(document.body, "\\begin_layout DOI", i) if i == -1: return j = find_end_of_layout(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of DOI layout") i += 1 continue content = lyx2latex(document, document.body[i:j + 1]) add_to_preamble(document, ["\\doi{" + content + "}"]) del document.body[i:j + 1] # no need to reset i ## # Conversion hub # supported_versions = ["2.2.0", "2.2"] convert = [ [475, [convert_separator]], # nothing to do for 476: We consider it a bug that older versions # did not load amsmath automatically for these commands, and do not # want to hardcode amsmath off. [476, []], [477, []], [478, []], [479, []], [480, []], [481, [convert_dashes]], [482, [convert_phrases]], [483, [convert_specialchar]], [484, []], [485, []] ] revert = [ [484, [revert_sigplan_doi]], [483, [revert_georgian]], [482, [revert_specialchar]], [481, [revert_phrases]], [480, [revert_dashes]], [479, [revert_question_env]], [478, [revert_beamer_lemma]], [477, [revert_xarrow]], [476, [revert_swissgerman]], [475, [revert_smash]], [474, [revert_separator]] ] if __name__ == "__main__": pass