# This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2006 José Matos # Copyright (C) 2004-2006 Georg Baum # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 1.5""" import re from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value from LyX import get_encoding #################################################################### # Private helper functions def find_end_of_inset(lines, i): " Find beginning of inset, where lines[i] is included." return find_end_of(lines, i, "\\begin_inset", "\\end_inset") # End of helper functions #################################################################### ## # Notes: Framed/Shaded # def revert_framed(document): "Revert framed notes. " i = 0 while 1: i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i) if i == -1: return document.body[i] = "\\begin_inset Note" i = i + 1 ## # Fonts # roman_fonts = {'default' : 'default', 'ae' : 'ae', 'times' : 'times', 'palatino' : 'palatino', 'helvet' : 'default', 'avant' : 'default', 'newcent' : 'newcent', 'bookman' : 'bookman', 'pslatex' : 'times'} sans_fonts = {'default' : 'default', 'ae' : 'default', 'times' : 'default', 'palatino' : 'default', 'helvet' : 'helvet', 'avant' : 'avant', 'newcent' : 'default', 'bookman' : 'default', 'pslatex' : 'helvet'} typewriter_fonts = {'default' : 'default', 'ae' : 'default', 'times' : 'default', 'palatino' : 'default', 'helvet' : 'default', 'avant' : 'default', 'newcent' : 'default', 'bookman' : 'default', 'pslatex' : 'courier'} def convert_font_settings(document): " Convert font settings. " i = 0 i = find_token_exact(document.header, "\\fontscheme", i) if i == -1: document.warning("Malformed LyX document: Missing `\\fontscheme'.") return font_scheme = get_value(document.header, "\\fontscheme", i, i + 1) if font_scheme == '': document.warning("Malformed LyX document: Empty `\\fontscheme'.") font_scheme = 'default' if not font_scheme in roman_fonts.keys(): document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme) font_scheme = 'default' document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme], '\\font_sans %s' % sans_fonts[font_scheme], '\\font_typewriter %s' % typewriter_fonts[font_scheme], '\\font_default_family default', '\\font_sc false', '\\font_osf false', '\\font_sf_scale 100', '\\font_tt_scale 100'] def revert_font_settings(document): " Revert font settings. " i = 0 insert_line = -1 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'} for family in 'roman', 'sans', 'typewriter': name = '\\font_%s' % family i = find_token_exact(document.header, name, i) if i == -1: document.warning("Malformed LyX document: Missing `%s'." % name) i = 0 else: if (insert_line < 0): insert_line = i fonts[family] = get_value(document.header, name, i, i + 1) del document.header[i] i = find_token_exact(document.header, '\\font_default_family', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_default_family'.") font_default_family = 'default' else: font_default_family = get_value(document.header, "\\font_default_family", i, i + 1) del document.header[i] i = find_token_exact(document.header, '\\font_sc', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_sc'.") font_sc = 'false' else: font_sc = get_value(document.header, '\\font_sc', i, i + 1) del document.header[i] if font_sc != 'false': document.warning("Conversion of '\\font_sc' not yet implemented.") i = find_token_exact(document.header, '\\font_osf', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_osf'.") font_osf = 'false' else: font_osf = get_value(document.header, '\\font_osf', i, i + 1) del document.header[i] i = find_token_exact(document.header, '\\font_sf_scale', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_sf_scale'.") font_sf_scale = '100' else: font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1) del document.header[i] if font_sf_scale != '100': document.warning("Conversion of '\\font_sf_scale' not yet implemented.") i = find_token_exact(document.header, '\\font_tt_scale', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_tt_scale'.") font_tt_scale = '100' else: font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1) del document.header[i] if font_tt_scale != '100': document.warning("Conversion of '\\font_tt_scale' not yet implemented.") for font_scheme in roman_fonts.keys(): if (roman_fonts[font_scheme] == fonts['roman'] and sans_fonts[font_scheme] == fonts['sans'] and typewriter_fonts[font_scheme] == fonts['typewriter']): document.header.insert(insert_line, '\\fontscheme %s' % font_scheme) if font_default_family != 'default': document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) if font_osf == 'true': document.warning("Ignoring `\\font_osf = true'") return font_scheme = 'default' document.header.insert(insert_line, '\\fontscheme %s' % font_scheme) if fonts['roman'] == 'cmr': document.preamble.append('\\renewcommand{\\rmdefault}{cmr}') if font_osf == 'true': document.preamble.append('\\usepackage{eco}') font_osf = 'false' for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery': if fonts['roman'] == font: document.preamble.append('\\usepackage{%s}' % font) for font in 'cmss', 'lmss', 'cmbr': if fonts['sans'] == font: document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font) for font in 'berasans': if fonts['sans'] == font: document.preamble.append('\\usepackage{%s}' % font) for font in 'cmtt', 'lmtt', 'cmtl': if fonts['typewriter'] == font: document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font) for font in 'courier', 'beramono', 'luximono': if fonts['typewriter'] == font: document.preamble.append('\\usepackage{%s}' % font) if font_default_family != 'default': document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) if font_osf == 'true': document.warning("Ignoring `\\font_osf = true'") def revert_booktabs(document): " We remove the booktabs flag or everything else will become a mess. " re_row = re.compile(r'^$') re_tspace = re.compile(r'\s+topspace="[^"]+"') re_bspace = re.compile(r'\s+bottomspace="[^"]+"') re_ispace = re.compile(r'\s+interlinespace="[^"]+"') i = 0 while 1: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of tabular.") continue for k in range(i, j): if re.search('^$', document.body[k]): document.warning("Converting 'booktabs' table to normal table.") document.body[k] = document.body[k].replace(' booktabs="true"', '') if re.search(re_row, document.body[k]): document.warning("Removing extra row space.") document.body[k] = re_tspace.sub('', document.body[k]) document.body[k] = re_bspace.sub('', document.body[k]) document.body[k] = re_ispace.sub('', document.body[k]) i = i + 1 def convert_utf8(document): document.encoding = "utf8" def revert_utf8(document): i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.header.append("\\inputencoding auto") elif get_value(document.header, "\\inputencoding", i) == "utf8": document.header[i] = "\\inputencoding auto" document.inputencoding = get_value(document.header, "\\inputencoding", 0) document.encoding = get_encoding(document.language, document.inputencoding, 248) def revert_cs_label(document): " Remove status flag of charstyle label. " i = 0 while 1: i = find_token(document.body, "\\begin_inset CharStyle", i) if i == -1: return # Seach for a line starting 'show_label' # If it is not there, break with a warning message i = i + 1 while 1: if (document.body[i][:10] == "show_label"): del document.body[i] break elif (document.body[i][:13] == "\\begin_layout"): document.warning("Malformed LyX document: Missing 'show_label'.") break i = i + 1 i = i + 1 def convert_bibitem(document): """ Convert \bibitem [option]{argument} to \begin_inset LatexCommand bibitem label "option" key "argument" \end_inset This must be called after convert_commandparams. """ regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})') i = 0 while 1: i = find_token(document.body, "\\bibitem", i) if i == -1: break match = re.match(regex, document.body[i]) option = match.group(1) argument = match.group(2) lines = ['\\begin_inset LatexCommand bibitem'] if option != None: lines.append('label "%s"' % option[1:-1].replace('"', '\\"')) lines.append('key "%s"' % argument[1:-1].replace('"', '\\"')) lines.append('') lines.append('\\end_inset') document.body[i:i+1] = lines i = i + 1 commandparams_info = { # command : [option1, option2, argument] "bibitem" : ["label", "", "key"], "bibtex" : ["options", "btprint", "bibfiles"], "cite" : ["after", "before", "key"], "citet" : ["after", "before", "key"], "citep" : ["after", "before", "key"], "citealt" : ["after", "before", "key"], "citealp" : ["after", "before", "key"], "citeauthor" : ["after", "before", "key"], "citeyear" : ["after", "before", "key"], "citeyearpar" : ["after", "before", "key"], "citet*" : ["after", "before", "key"], "citep*" : ["after", "before", "key"], "citealt*" : ["after", "before", "key"], "citealp*" : ["after", "before", "key"], "citeauthor*" : ["after", "before", "key"], "Citet" : ["after", "before", "key"], "Citep" : ["after", "before", "key"], "Citealt" : ["after", "before", "key"], "Citealp" : ["after", "before", "key"], "Citeauthor" : ["after", "before", "key"], "Citet*" : ["after", "before", "key"], "Citep*" : ["after", "before", "key"], "Citealt*" : ["after", "before", "key"], "Citealp*" : ["after", "before", "key"], "Citeauthor*" : ["after", "before", "key"], "citefield" : ["after", "before", "key"], "citetitle" : ["after", "before", "key"], "cite*" : ["after", "before", "key"], "hfill" : ["", "", ""], "index" : ["", "", "name"], "printindex" : ["", "", "name"], "label" : ["", "", "name"], "eqref" : ["name", "", "reference"], "pageref" : ["name", "", "reference"], "prettyref" : ["name", "", "reference"], "ref" : ["name", "", "reference"], "vpageref" : ["name", "", "reference"], "vref" : ["name", "", "reference"], "tableofcontents" : ["", "", "type"], "htmlurl" : ["name", "", "target"], "url" : ["name", "", "target"]} def convert_commandparams(document): """ Convert \begin_inset LatexCommand \cmdname[opt1][opt2]{arg} \end_inset to \begin_inset LatexCommand cmdname name1 "opt1" name2 "opt2" name3 "arg" \end_inset name1, name2 and name3 can be different for each command. """ # \begin_inset LatexCommand bibitem was not the official version (see # convert_bibitem()), but could be read in, so we convert it here, too. i = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand", i) if i == -1: break command = document.body[i][26:].strip() if command == "": document.warning("Malformed LyX document: Missing LatexCommand name.") i = i + 1 continue # The following parser is taken from the original InsetCommandParams::scanCommand name = "" option1 = "" option2 = "" argument = "" state = "WS" # Used to handle things like \command[foo[bar]]{foo{bar}} nestdepth = 0 b = 0 for c in command: if ((state == "CMDNAME" and c == ' ') or (state == "CMDNAME" and c == '[') or (state == "CMDNAME" and c == '{')): state = "WS" if ((state == "OPTION" and c == ']') or (state == "SECOPTION" and c == ']') or (state == "CONTENT" and c == '}')): if nestdepth == 0: state = "WS" else: nestdepth = nestdepth - 1 if ((state == "OPTION" and c == '[') or (state == "SECOPTION" and c == '[') or (state == "CONTENT" and c == '{')): nestdepth = nestdepth + 1 if state == "CMDNAME": name += c elif state == "OPTION": option1 += c elif state == "SECOPTION": option2 += c elif state == "CONTENT": argument += c elif state == "WS": if c == '\\': state = "CMDNAME" elif c == '[' and b != ']': state = "OPTION" nestdepth = 0 # Just to be sure elif c == '[' and b == ']': state = "SECOPTION" nestdepth = 0 # Just to be sure elif c == '{': state = "CONTENT" nestdepth = 0 # Just to be sure b = c # Now we have parsed the command, output the parameters lines = ["\\begin_inset LatexCommand %s" % name] if option1 != "": if commandparams_info[name][0] == "": document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name)) else: lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"'))) if option2 != "": if commandparams_info[name][1] == "": document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name)) else: lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"'))) if argument != "": if commandparams_info[name][2] == "": document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name)) else: lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"'))) document.body[i:i+1] = lines i = i + 1 def revert_commandparams(document): regex = re.compile(r'(\S+)\s+(.+)') i = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand", i) if i == -1: break name = document.body[i].split()[2] j = find_end_of_inset(document.body, i + 1) preview_line = "" option1 = "" option2 = "" argument = "" for k in range(i + 1, j): match = re.match(regex, document.body[k]) if match: pname = match.group(1) pvalue = match.group(2) if pname == "preview": preview_line = document.body[k] elif (commandparams_info[name][0] != "" and pname == commandparams_info[name][0]): option1 = pvalue.strip('"').replace('\\"', '"') elif (commandparams_info[name][1] != "" and pname == commandparams_info[name][1]): option2 = pvalue.strip('"').replace('\\"', '"') elif (commandparams_info[name][2] != "" and pname == commandparams_info[name][2]): argument = pvalue.strip('"').replace('\\"', '"') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name)) if name == "bibitem": if option1 == "": lines = ["\\bibitem {%s}" % argument] else: lines = ["\\bibitem [%s]{%s}" % (option1, argument)] else: if option1 == "": if option2 == "": lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)] else: lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)] else: if option2 == "": lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)] else: lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)] if name != "bibitem": if preview_line != "": lines.append(preview_line) lines.append('') lines.append('\\end_inset') document.body[i:j+1] = lines i = j + 1 def revert_nomenclature(document): " Convert nomenclature entry to ERT. " regex = re.compile(r'(\S+)\s+(.+)') i = 0 use_nomencl = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i) if i == -1: break use_nomencl = 1 j = find_end_of_inset(document.body, i + 1) preview_line = "" symbol = "" description = "" prefix = "" for k in range(i + 1, j): match = re.match(regex, document.body[k]) if match: name = match.group(1) value = match.group(2) if name == "preview": preview_line = document.body[k] elif name == "symbol": symbol = value.strip('"').replace('\\"', '"') elif name == "description": description = value.strip('"').replace('\\"', '"') elif name == "prefix": prefix = value.strip('"').replace('\\"', '"') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]) if prefix == "": command = 'nomenclature{%s}{%s}' % (symbol, description) else: command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description) document.body[i:j+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', command, '\\end_layout', '', '\\end_inset'] i = i + 11 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: document.preamble.append('\\usepackage{nomencl}[2005/09/22]') document.preamble.append('\\makenomenclature') def revert_printnomenclature(document): " Convert printnomenclature to ERT. " regex = re.compile(r'(\S+)\s+(.+)') i = 0 use_nomencl = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i) if i == -1: break use_nomencl = 1 j = find_end_of_inset(document.body, i + 1) preview_line = "" labelwidth = "" for k in range(i + 1, j): match = re.match(regex, document.body[k]) if match: name = match.group(1) value = match.group(2) if name == "preview": preview_line = document.body[k] elif name == "labelwidth": labelwidth = value.strip('"').replace('\\"', '"') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k]) if labelwidth == "": command = 'nomenclature{}' else: command = 'nomenclature[%s]' % labelwidth document.body[i:j+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', command, '\\end_layout', '', '\\end_inset'] i = i + 11 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: document.preamble.append('\\usepackage{nomencl}[2005/09/22]') document.preamble.append('\\makenomenclature') def convert_esint(document): " Add \\use_esint setting to header. " i = find_token(document.header, "\\cite_engine", 0) if i == -1: document.warning("Malformed LyX document: Missing `\\cite_engine'.") return # 0 is off, 1 is auto, 2 is on. document.header.insert(i, '\\use_esint 0') def revert_esint(document): " Remove \\use_esint setting from header. " i = find_token(document.header, "\\use_esint", 0) if i == -1: document.warning("Malformed LyX document: Missing `\\use_esint'.") return use_esint = document.header[i].split()[1] del document.header[i] # 0 is off, 1 is auto, 2 is on. if (use_esint == 2): document.preamble.append('\\usepackage{esint}') def revert_clearpage(document): " clearpage -> ERT" i = 0 while 1: i = find_token(document.body, "\\clearpage", i) if i == -1: break document.body[i:i+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', 'clearpage', '\\end_layout', '', '\\end_inset'] i = i + 1 def revert_cleardoublepage(document): " cleardoublepage -> ERT" i = 0 while 1: i = find_token(document.body, "\\cleardoublepage", i) if i == -1: break document.body[i:i+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', 'cleardoublepage', '\\end_layout', '', '\\end_inset'] i = i + 1 def revert_encodings(document): " Set new encodings to auto. " encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852", "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250", "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"] i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.header.append("\\inputencoding auto") else: inputenc = get_value(document.header, "\\inputencoding", i) if inputenc in encodings: document.header[i] = "\\inputencoding auto" document.inputencoding = get_value(document.header, "\\inputencoding", 0) ## # Conversion hub # supported_versions = ["1.5.0","1.5"] convert = [[246, []], [247, [convert_font_settings]], [248, []], [249, [convert_utf8]], [250, []], [251, []], [252, [convert_commandparams, convert_bibitem]], [253, []], [254, [convert_esint]], [255, []], [256, []]] revert = [[255, [revert_encodings]], [254, [revert_clearpage, revert_cleardoublepage]], [253, [revert_esint]], [252, [revert_nomenclature, revert_printnomenclature]], [251, [revert_commandparams]], [250, [revert_cs_label]], [249, []], [248, [revert_utf8]], [247, [revert_booktabs]], [246, [revert_font_settings]], [245, [revert_framed]]] if __name__ == "__main__": pass