# This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2008 José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 2.0""" import re, string import unicodedata import sys, os from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string #################################################################### # Private helper functions def find_end_of_inset(lines, i): " Find end of inset, where lines[i] is included." return find_end_of(lines, i, "\\begin_inset", "\\end_inset") def add_to_preamble(document, text): """ Add text to the preamble if it is not already there. Only the first line is checked!""" if find_token(document.preamble, text[0], 0) != -1: return document.preamble.extend(text) def insert_to_preamble(index, document, text): """ Insert text to the preamble at a given line""" document.preamble.insert(index, text) def read_unicodesymbols(): " Read the unicodesymbols list of unicode characters and corresponding commands." pathname = os.path.abspath(os.path.dirname(sys.argv[0])) fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')) spec_chars = [] # Two backslashes, followed by some non-word character, and then a character # in brackets. The idea is to check for constructs like: \"{u}, which is how # they are written in the unicodesymbols file; but they can also be written # as: \"u or even \" u. r = re.compile(r'\\\\(\W)\{(\w)\}') for line in fp.readlines(): if line[0] != '#' and line.strip() != "": line=line.replace(' "',' ') # remove all quotation marks with spaces before line=line.replace('" ',' ') # remove all quotation marks with spaces after line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis) try: [ucs4,command,dead] = line.split(None,2) if command[0:1] != "\\": continue spec_chars.append([command, unichr(eval(ucs4))]) except: continue m = r.match(command) if m != None: command = "\\\\" # If the character is a double-quote, then we need to escape it, too, # since it is done that way in the LyX file. if m.group(1) == "\"": command += "\\" commandbl = command command += m.group(1) + m.group(2) commandbl += m.group(1) + ' ' + m.group(2) spec_chars.append([command, unichr(eval(ucs4))]) spec_chars.append([commandbl, unichr(eval(ucs4))]) fp.close() return spec_chars unicode_reps = read_unicodesymbols() def put_cmd_in_ert(string): for rep in unicode_reps: string = string.replace(rep[1], rep[0].replace('\\\\', '\\')) string = string.replace('\\', "\\backslash\n") string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \ + string + "\n\\end_layout\n\\end_inset" return string def lyx2latex(document, lines): 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.' # clean up multiline stuff content = "" ert_end = 0 for curline in range(len(lines)): line = lines[curline] if line.startswith("\\begin_inset ERT"): # We don't want to replace things inside ERT, so figure out # where the end of the inset is. ert_end = find_end_of_inset(lines, curline + 1) continue elif line.startswith("\\begin_inset Formula"): line = line[20:] elif line.startswith("\\begin_inset Quotes"): # For now, we do a very basic reversion. Someone who understands # quotes is welcome to fix it up. qtype = line[20:].strip() # lang = qtype[0] side = qtype[1] dbls = qtype[2] if side == "l": if dbls == "d": line = "``" else: line = "`" else: if dbls == "d": line = "''" else: line = "'" elif line.isspace() or \ line.startswith("\\begin_layout") or \ line.startswith("\\end_layout") or \ line.startswith("\\begin_inset") or \ line.startswith("\\end_inset") or \ line.startswith("\\lang") or \ line.strip() == "status collapsed" or \ line.strip() == "status open": #skip all that stuff continue # this needs to be added to the preamble because of cases like # \textmu, \textbackslash, etc. add_to_preamble(document, ['% added by lyx2lyx for converted index entries', '\\@ifundefined{textmu}', ' {\\usepackage{textcomp}}{}']) # a lossless reversion is not possible # try at least to handle some common insets and settings if ert_end >= curline: line = line.replace(r'\backslash', r'\\') else: line = line.replace('&', '\\&{}') line = line.replace('#', '\\#{}') line = line.replace('^', '\\^{}') line = line.replace('%', '\\%{}') line = line.replace('_', '\\_{}') line = line.replace('$', '\\${}') # Do the LyX text --> LaTeX conversion for rep in unicode_reps: line = line.replace(rep[1], rep[0] + "{}") line = line.replace(r'\backslash', r'\textbackslash{}') line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}') line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}') line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}') line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}') line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}') line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}') line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}') line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}') line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'') content += line return content #################################################################### def revert_swiss(document): " Set language german-ch to ngerman " i = 0 if document.language == "german-ch": document.language = "ngerman" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language ngerman" j = 0 while True: j = find_token(document.body, "\\lang german-ch", j) if j == -1: return document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman") j = j + 1 def revert_tabularvalign(document): " Revert the tabular valign option " i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Could not find end of tabular.") i = j continue # don't set a box for longtables, only delete tabularvalignment # the alignment is 2 lines below \\begin_inset Tabular p = document.body[i+2].find("islongtable") if p > -1: q = document.body[i+2].find("tabularvalignment") if q > -1: document.body[i+2] = document.body[i+2][:q-1] document.body[i+2] = document.body[i+2] + '>' i = i + 1 # when no longtable if p == -1: tabularvalignment = 'c' # which valignment is specified? m = document.body[i+2].find('tabularvalignment="top"') if m > -1: tabularvalignment = 't' m = document.body[i+2].find('tabularvalignment="bottom"') if m > -1: tabularvalignment = 'b' # delete tabularvalignment q = document.body[i+2].find("tabularvalignment") if q > -1: document.body[i+2] = document.body[i+2][:q-1] document.body[i+2] = document.body[i+2] + '>' # don't add a box when centered if tabularvalignment == 'c': i = j continue subst = ['\\end_layout', '\\end_inset'] document.body[j+1:j+1] = subst # just inserts those lines subst = ['\\begin_inset Box Frameless', 'position "' + tabularvalignment +'"', 'hor_pos "c"', 'has_inner_box 1', 'inner_pos "c"', 'use_parbox 0', # we don't know the width, assume 50% 'width "50col%"', 'special "none"', 'height "1in"', 'height_special "totalheight"', 'status open', '', '\\begin_layout Plain Layout'] document.body[i:i] = subst # this just inserts the array at i i += len(subst) + 2 # adjust i to save a few cycles def revert_phantom(document): " Reverts phantom to ERT " i = 0 j = 0 while True: i = find_token(document.body, "\\begin_inset Phantom Phantom", i) if i == -1: return substi = document.body[i].replace('\\begin_inset Phantom Phantom', \ '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Plain Layout\n\n\n\\backslash\n' \ 'phantom{\n\\end_layout\n\n\\end_inset\n') substi = substi.split('\n') document.body[i : i+4] = substi i += len(substi) j = find_token(document.body, "\\end_layout", i) if j == -1: document.warning("Malformed LyX document: Could not find end of Phantom inset.") return substj = document.body[j].replace('\\end_layout', \ '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Plain Layout\n\n' \ '}\n\\end_layout\n\n\\end_inset\n') substj = substj.split('\n') document.body[j : j+4] = substj i += len(substj) def revert_hphantom(document): " Reverts hphantom to ERT " i = 0 j = 0 while True: i = find_token(document.body, "\\begin_inset Phantom HPhantom", i) if i == -1: return substi = document.body[i].replace('\\begin_inset Phantom HPhantom', \ '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Plain Layout\n\n\n\\backslash\n' \ 'hphantom{\n\\end_layout\n\n\\end_inset\n') substi = substi.split('\n') document.body[i : i+4] = substi i += len(substi) j = find_token(document.body, "\\end_layout", i) if j == -1: document.warning("Malformed LyX document: Could not find end of HPhantom inset.") return substj = document.body[j].replace('\\end_layout', \ '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Plain Layout\n\n' \ '}\n\\end_layout\n\n\\end_inset\n') substj = substj.split('\n') document.body[j : j+4] = substj i += len(substj) def revert_vphantom(document): " Reverts vphantom to ERT " i = 0 j = 0 while True: i = find_token(document.body, "\\begin_inset Phantom VPhantom", i) if i == -1: return substi = document.body[i].replace('\\begin_inset Phantom VPhantom', \ '\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Plain Layout\n\n\n\\backslash\n' \ 'vphantom{\n\\end_layout\n\n\\end_inset\n') substi = substi.split('\n') document.body[i : i+4] = substi i += len(substi) j = find_token(document.body, "\\end_layout", i) if j == -1: document.warning("Malformed LyX document: Could not find end of VPhantom inset.") return substj = document.body[j].replace('\\end_layout', \ '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \ '\\begin_layout Plain Layout\n\n' \ '}\n\\end_layout\n\n\\end_inset\n') substj = substj.split('\n') document.body[j : j+4] = substj i += len(substj) def revert_xetex(document): " Reverts documents that use XeTeX " i = find_token(document.header, '\\use_xetex', 0) if i == -1: document.warning("Malformed LyX document: Missing \\use_xetex.") return if get_value(document.header, "\\use_xetex", i) == 'false': del document.header[i] return del document.header[i] # 1.) set doc encoding to utf8-plain i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.warning("Malformed LyX document: Missing \\inputencoding.") document.header[i] = "\\inputencoding utf8-plain" # 2.) check font settings l = find_token(document.header, "\\font_roman", 0) if l == -1: document.warning("Malformed LyX document: Missing \\font_roman.") line = document.header[l] l = re.compile(r'\\font_roman (.*)$') m = l.match(line) roman = m.group(1) l = find_token(document.header, "\\font_sans", 0) if l == -1: document.warning("Malformed LyX document: Missing \\font_sans.") line = document.header[l] l = re.compile(r'\\font_sans (.*)$') m = l.match(line) sans = m.group(1) l = find_token(document.header, "\\font_typewriter", 0) if l == -1: document.warning("Malformed LyX document: Missing \\font_typewriter.") line = document.header[l] l = re.compile(r'\\font_typewriter (.*)$') m = l.match(line) typewriter = m.group(1) osf = get_value(document.header, '\\font_osf', 0) == "true" sf_scale = float(get_value(document.header, '\\font_sf_scale', 0)) tt_scale = float(get_value(document.header, '\\font_tt_scale', 0)) # 3.) set preamble stuff pretext = '%% This document must be processed with xelatex!\n' pretext += '\\usepackage{fontspec}\n' if roman != "default": pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n' if sans != "default": pretext += '\\setsansfont[' if sf_scale != 100: pretext += 'Scale=' + str(sf_scale / 100) + ',' pretext += 'Mapping=tex-text]{' + sans + '}\n' if typewriter != "default": pretext += '\\setmonofont' if tt_scale != 100: pretext += '[Scale=' + str(tt_scale / 100) + ']' pretext += '{' + typewriter + '}\n' if osf: pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n' pretext += '\usepackage{xunicode}\n' pretext += '\usepackage{xltxtra}\n' insert_to_preamble(0, document, pretext) # 4.) reset font settings i = find_token(document.header, "\\font_roman", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_roman.") document.header[i] = "\\font_roman default" i = find_token(document.header, "\\font_sans", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_sans.") document.header[i] = "\\font_sans default" i = find_token(document.header, "\\font_typewriter", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_typewriter.") document.header[i] = "\\font_typewriter default" i = find_token(document.header, "\\font_osf", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_osf.") document.header[i] = "\\font_osf false" i = find_token(document.header, "\\font_sc", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_sc.") document.header[i] = "\\font_sc false" i = find_token(document.header, "\\font_sf_scale", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_sf_scale.") document.header[i] = "\\font_sf_scale 100" i = find_token(document.header, "\\font_tt_scale", 0) if i == -1: document.warning("Malformed LyX document: Missing \\font_tt_scale.") document.header[i] = "\\font_tt_scale 100" def revert_outputformat(document): " Remove default output format param " i = find_token(document.header, '\\default_output_format', 0) if i == -1: document.warning("Malformed LyX document: Missing \\default_output_format.") return del document.header[i] def revert_backgroundcolor(document): " Reverts background color to preamble code " i = 0 colorcode = "" while True: i = find_token(document.header, "\\backgroundcolor", i) if i == -1: return colorcode = get_value(document.header, '\\backgroundcolor', 0) del document.header[i] # don't clutter the preamble if backgroundcolor is not set if colorcode == "#ffffff": continue # the color code is in the form #rrggbb where every character denotes a hex number # convert the string to an int red = string.atoi(colorcode[1:3],16) # we want the output "0.5" for the value "127" therefore add here if red != 0: red = red + 1 redout = float(red) / 256 green = string.atoi(colorcode[3:5],16) if green != 0: green = green + 1 greenout = float(green) / 256 blue = string.atoi(colorcode[5:7],16) if blue != 0: blue = blue + 1 blueout = float(blue) / 256 # write the preamble insert_to_preamble(0, document, '% Commands inserted by lyx2lyx to set the background color\n' + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n' + '\\definecolor{page_backgroundcolor}{rgb}{' + str(redout) + ', ' + str(greenout) + ', ' + str(blueout) + '}\n' + '\\pagecolor{page_backgroundcolor}\n') def revert_splitindex(document): " Reverts splitindex-aware documents " i = find_token(document.header, '\\use_indices', 0) if i == -1: document.warning("Malformed LyX document: Missing \\use_indices.") return indices = get_value(document.header, "\\use_indices", i) preamble = "" if indices == "true": preamble += "\\usepackage{splitidx}\n" del document.header[i] i = 0 while True: i = find_token(document.header, "\\index", i) if i == -1: break k = find_token(document.header, "\\end_index", i) if k == -1: document.warning("Malformed LyX document: Missing \\end_index.") return line = document.header[i] l = re.compile(r'\\index (.*)$') m = l.match(line) iname = m.group(1) ishortcut = get_value(document.header, '\\shortcut', i, k) if ishortcut != "" and indices == "true": preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n" del document.header[i:k+1] i = 0 if preamble != "": insert_to_preamble(0, document, preamble) i = 0 while True: i = find_token(document.body, "\\begin_inset Index", i) if i == -1: break line = document.body[i] l = re.compile(r'\\begin_inset Index (.*)$') m = l.match(line) itype = m.group(1) if itype == "idx" or indices == "false": document.body[i] = "\\begin_inset Index" else: k = find_end_of_inset(document.body, i) if k == -1: return content = lyx2latex(document, document.body[i:k]) # escape quotes content = content.replace('"', r'\"') subst = [put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")] document.body[i:k+1] = subst i = i + 1 i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return k = find_end_of_inset(document.body, i) ptype = get_value(document.body, 'type', i, k).strip('"') if ptype == "idx": j = find_token(document.body, "type", i, k) del document.body[j] elif indices == "false": del document.body[i:k+1] else: subst = [put_cmd_in_ert("\\printindex[" + ptype + "]{}")] document.body[i:k+1] = subst i = i + 1 def convert_splitindex(document): " Converts index and printindex insets to splitindex-aware format " i = 0 while True: i = find_token(document.body, "\\begin_inset Index", i) if i == -1: break document.body[i] = document.body[i].replace("\\begin_inset Index", "\\begin_inset Index idx") i = i + 1 i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return if document.body[i + 1].find('LatexCommand printindex') == -1: document.warning("Malformed LyX document: Incomplete printindex inset.") return subst = ["LatexCommand printindex", "type \"idx\""] document.body[i + 1:i + 2] = subst i = i + 1 def revert_subindex(document): " Reverts \\printsubindex CommandInset types " i = find_token(document.header, '\\use_indices', 0) if i == -1: document.warning("Malformed LyX document: Missing \\use_indices.") return indices = get_value(document.header, "\\use_indices", i) i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return k = find_end_of_inset(document.body, i) ctype = get_value(document.body, 'LatexCommand', i, k) if ctype != "printsubindex": i = i + 1 continue ptype = get_value(document.body, 'type', i, k).strip('"') if indices == "false": del document.body[i:k+1] else: subst = [put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")] document.body[i:k+1] = subst i = i + 1 def revert_printindexall(document): " Reverts \\print[sub]index* CommandInset types " i = find_token(document.header, '\\use_indices', 0) if i == -1: document.warning("Malformed LyX document: Missing \\use_indices.") return indices = get_value(document.header, "\\use_indices", i) i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset index_print", i) if i == -1: return k = find_end_of_inset(document.body, i) ctype = get_value(document.body, 'LatexCommand', i, k) if ctype != "printindex*" and ctype != "printsubindex*": i = i + 1 continue if indices == "false": del document.body[i:k+1] else: subst = [put_cmd_in_ert("\\" + ctype + "{}")] document.body[i:k+1] = subst i = i + 1 def revert_strikeout(document): " Reverts \\strikeout character style " while True: i = find_token(document.body, '\\strikeout', 0) if i == -1: return del document.body[i] def revert_uulinewave(document): " Reverts \\uuline, and \\uwave character styles " while True: i = find_token(document.body, '\\uuline', 0) if i == -1: break del document.body[i] while True: i = find_token(document.body, '\\uwave', 0) if i == -1: return del document.body[i] def revert_ulinelatex(document): " Reverts \\uline character style " i = find_token(document.body, '\\bar under', 0) if i == -1: return insert_to_preamble(0, document, '% Commands inserted by lyx2lyx for proper underlining\n' + '\\PassOptionsToPackage{normalem}{ulem}\n' + '\\usepackage{ulem}\n' + '\\let\\cite@rig\\cite\n' + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n' + ' \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n' + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n') def revert_custom_processors(document): " Remove bibtex_command and index_command params " i = find_token(document.header, '\\bibtex_command', 0) if i == -1: document.warning("Malformed LyX document: Missing \\bibtex_command.") return del document.header[i] i = find_token(document.header, '\\index_command', 0) if i == -1: document.warning("Malformed LyX document: Missing \\index_command.") return del document.header[i] def convert_nomencl_width(document): " Add set_width param to nomencl_print " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i) if i == -1: break document.body.insert(i + 2, "set_width \"none\"") i = i + 1 def revert_nomencl_width(document): " Remove set_width param from nomencl_print " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i) if i == -1: break j = find_end_of_inset(document.body, i) l = find_token(document.body, "set_width", i, j) if l == -1: document.warning("Can't find set_width option for nomencl_print!") i = j continue del document.body[l] i = i + 1 def revert_nomencl_cwidth(document): " Remove width param from nomencl_print " i = 0 while True: i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i) if i == -1: break j = find_end_of_inset(document.body, i) l = find_token(document.body, "width", i, j) if l == -1: document.warning("Can't find width option for nomencl_print!") i = j continue width = get_value(document.body, "width", i, j).strip('"') del document.body[l] add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"]) i = i + 1 def revert_applemac(document): " Revert applemac encoding to auto " i = 0 if document.encoding == "applemac": document.encoding = "auto" i = find_token(document.header, "\\encoding", 0) if i != -1: document.header[i] = "\\encoding auto" def revert_longtable_align(document): " Remove longtable alignment setting " i = 0 j = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: break # the alignment is 2 lines below \\begin_inset Tabular j = document.body[i+2].find("longtabularalignment") if j == -1: break document.body[i+2] = document.body[i+2][:j-1] document.body[i+2] = document.body[i+2] + '>' i = i + 1 ## # Conversion hub # supported_versions = ["2.0.0","2.0"] convert = [[346, []], [347, []], [348, []], [349, []], [350, []], [351, []], [352, [convert_splitindex]], [353, []], [354, []], [355, []], [356, []], [357, []], [358, []], [359, [convert_nomencl_width]], [360, []], [361, []], [362, []], [363, []] ] revert = [[362, [revert_longtable_align]], [361, [revert_applemac]], [360, []], [359, [revert_nomencl_cwidth]], [358, [revert_nomencl_width]], [357, [revert_custom_processors]], [356, [revert_ulinelatex]], [355, [revert_uulinewave]], [354, [revert_strikeout]], [353, [revert_printindexall]], [352, [revert_subindex]], [351, [revert_splitindex]], [350, [revert_backgroundcolor]], [349, [revert_outputformat]], [348, [revert_xetex]], [347, [revert_phantom, revert_hphantom, revert_vphantom]], [346, [revert_tabularvalign]], [345, [revert_swiss]] ] if __name__ == "__main__": pass