# This file is part of lyx2lyx # -*- coding: utf-8 -*- # Copyright (C) 2006 José Matos # Copyright (C) 2004-2006 Georg Baum # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 1.5""" import re from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value from LyX import get_encoding #################################################################### # Private helper functions def find_end_of_inset(lines, i): " Find beginning of inset, where lines[i] is included." return find_end_of(lines, i, "\\begin_inset", "\\end_inset") # End of helper functions #################################################################### ## # Notes: Framed/Shaded # def revert_framed(document): "Revert framed notes. " i = 0 while 1: i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i) if i == -1: return document.body[i] = "\\begin_inset Note" i = i + 1 ## # Fonts # roman_fonts = {'default' : 'default', 'ae' : 'ae', 'times' : 'times', 'palatino' : 'palatino', 'helvet' : 'default', 'avant' : 'default', 'newcent' : 'newcent', 'bookman' : 'bookman', 'pslatex' : 'times'} sans_fonts = {'default' : 'default', 'ae' : 'default', 'times' : 'default', 'palatino' : 'default', 'helvet' : 'helvet', 'avant' : 'avant', 'newcent' : 'default', 'bookman' : 'default', 'pslatex' : 'helvet'} typewriter_fonts = {'default' : 'default', 'ae' : 'default', 'times' : 'default', 'palatino' : 'default', 'helvet' : 'default', 'avant' : 'default', 'newcent' : 'default', 'bookman' : 'default', 'pslatex' : 'courier'} def convert_font_settings(document): " Convert font settings. " i = 0 i = find_token_exact(document.header, "\\fontscheme", i) if i == -1: document.warning("Malformed LyX document: Missing `\\fontscheme'.") return font_scheme = get_value(document.header, "\\fontscheme", i, i + 1) if font_scheme == '': document.warning("Malformed LyX document: Empty `\\fontscheme'.") font_scheme = 'default' if not font_scheme in roman_fonts.keys(): document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme) font_scheme = 'default' document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme], '\\font_sans %s' % sans_fonts[font_scheme], '\\font_typewriter %s' % typewriter_fonts[font_scheme], '\\font_default_family default', '\\font_sc false', '\\font_osf false', '\\font_sf_scale 100', '\\font_tt_scale 100'] def revert_font_settings(document): " Revert font settings. " i = 0 insert_line = -1 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'} for family in 'roman', 'sans', 'typewriter': name = '\\font_%s' % family i = find_token_exact(document.header, name, i) if i == -1: document.warning("Malformed LyX document: Missing `%s'." % name) i = 0 else: if (insert_line < 0): insert_line = i fonts[family] = get_value(document.header, name, i, i + 1) del document.header[i] i = find_token_exact(document.header, '\\font_default_family', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_default_family'.") font_default_family = 'default' else: font_default_family = get_value(document.header, "\\font_default_family", i, i + 1) del document.header[i] i = find_token_exact(document.header, '\\font_sc', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_sc'.") font_sc = 'false' else: font_sc = get_value(document.header, '\\font_sc', i, i + 1) del document.header[i] if font_sc != 'false': document.warning("Conversion of '\\font_sc' not yet implemented.") i = find_token_exact(document.header, '\\font_osf', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_osf'.") font_osf = 'false' else: font_osf = get_value(document.header, '\\font_osf', i, i + 1) del document.header[i] i = find_token_exact(document.header, '\\font_sf_scale', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_sf_scale'.") font_sf_scale = '100' else: font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1) del document.header[i] if font_sf_scale != '100': document.warning("Conversion of '\\font_sf_scale' not yet implemented.") i = find_token_exact(document.header, '\\font_tt_scale', i) if i == -1: document.warning("Malformed LyX document: Missing `\\font_tt_scale'.") font_tt_scale = '100' else: font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1) del document.header[i] if font_tt_scale != '100': document.warning("Conversion of '\\font_tt_scale' not yet implemented.") for font_scheme in roman_fonts.keys(): if (roman_fonts[font_scheme] == fonts['roman'] and sans_fonts[font_scheme] == fonts['sans'] and typewriter_fonts[font_scheme] == fonts['typewriter']): document.header.insert(insert_line, '\\fontscheme %s' % font_scheme) if font_default_family != 'default': document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) if font_osf == 'true': document.warning("Ignoring `\\font_osf = true'") return font_scheme = 'default' document.header.insert(insert_line, '\\fontscheme %s' % font_scheme) if fonts['roman'] == 'cmr': document.preamble.append('\\renewcommand{\\rmdefault}{cmr}') if font_osf == 'true': document.preamble.append('\\usepackage{eco}') font_osf = 'false' for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery': if fonts['roman'] == font: document.preamble.append('\\usepackage{%s}' % font) for font in 'cmss', 'lmss', 'cmbr': if fonts['sans'] == font: document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font) for font in 'berasans': if fonts['sans'] == font: document.preamble.append('\\usepackage{%s}' % font) for font in 'cmtt', 'lmtt', 'cmtl': if fonts['typewriter'] == font: document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font) for font in 'courier', 'beramono', 'luximono': if fonts['typewriter'] == font: document.preamble.append('\\usepackage{%s}' % font) if font_default_family != 'default': document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) if font_osf == 'true': document.warning("Ignoring `\\font_osf = true'") def revert_booktabs(document): " We remove the booktabs flag or everything else will become a mess. " re_row = re.compile(r'^$') re_tspace = re.compile(r'\s+topspace="[^"]+"') re_bspace = re.compile(r'\s+bottomspace="[^"]+"') re_ispace = re.compile(r'\s+interlinespace="[^"]+"') i = 0 while 1: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of tabular.") continue for k in range(i, j): if re.search('^$', document.body[k]): document.warning("Converting 'booktabs' table to normal table.") document.body[k] = document.body[k].replace(' booktabs="true"', '') if re.search(re_row, document.body[k]): document.warning("Removing extra row space.") document.body[k] = re_tspace.sub('', document.body[k]) document.body[k] = re_bspace.sub('', document.body[k]) document.body[k] = re_ispace.sub('', document.body[k]) i = i + 1 def convert_multiencoding(document, forward): """ Fix files with multiple encodings. Files with an inputencoding of "auto" or "default" and multiple languages where at least two languages have different default encodings are encoded in multiple encodings for file formats < 249. These files are incorrectly read and written (as if the whole file was in the encoding of the main language). This function - converts from fake unicode values to true unicode if forward is true, and - converts from true unicode values to fake unicode if forward is false. document.encoding must be set to the old value (format 248) in both cases. We do this here and not in LyX.py because it is far easier to do the necessary parsing in modern formats than in ancient ones. """ encoding_stack = [document.encoding] lang_re = re.compile(r"^\\lang\s(\S+)") if document.inputencoding == "auto" or document.inputencoding == "default": for i in range(len(document.body)): result = lang_re.match(document.body[i]) if result: language = result.group(1) if language == "default": document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding)) encoding_stack[-1] = document.encoding else: from lyx2lyx_lang import lang document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3])) encoding_stack[-1] = lang[language][3] elif find_token(document.body, "\\begin_layout", i, i + 1) == i: document.warning("Adding nested encoding %s." % encoding_stack[-1]) encoding_stack.append(encoding_stack[-1]) elif find_token(document.body, "\\end_layout", i, i + 1) == i: document.warning("Removing nested encoding %s." % encoding_stack[-1]) del encoding_stack[-1] if encoding_stack[-1] != document.encoding: if forward: # This line has been incorrectly interpreted as if it was # encoded in 'encoding'. # Convert back to the 8bit string that was in the file. orig = document.body[i].encode(document.encoding) # Convert the 8bit string that was in the file to unicode # with the correct encoding. document.body[i] = orig.decode(encoding_stack[-1]) else: # Convert unicode to the 8bit string that will be written # to the file with the correct encoding. orig = document.body[i].encode(encoding_stack[-1]) # Convert the 8bit string that will be written to the # file to fake unicode with the encoding that will later # be used when writing to the file. document.body[i] = orig.decode(document.encoding) def convert_utf8(document): " Set document encoding to UTF-8. " convert_multiencoding(document, True) document.encoding = "utf8" def revert_utf8(document): " Set document encoding to the value corresponding to inputencoding. " i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.header.append("\\inputencoding auto") elif get_value(document.header, "\\inputencoding", i) == "utf8": document.header[i] = "\\inputencoding auto" document.inputencoding = get_value(document.header, "\\inputencoding", 0) document.encoding = get_encoding(document.language, document.inputencoding, 248) convert_multiencoding(document, False) def revert_cs_label(document): " Remove status flag of charstyle label. " i = 0 while 1: i = find_token(document.body, "\\begin_inset CharStyle", i) if i == -1: return # Seach for a line starting 'show_label' # If it is not there, break with a warning message i = i + 1 while 1: if (document.body[i][:10] == "show_label"): del document.body[i] break elif (document.body[i][:13] == "\\begin_layout"): document.warning("Malformed LyX document: Missing 'show_label'.") break i = i + 1 i = i + 1 def convert_bibitem(document): """ Convert \bibitem [option]{argument} to \begin_inset LatexCommand bibitem label "option" key "argument" \end_inset This must be called after convert_commandparams. """ regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})') i = 0 while 1: i = find_token(document.body, "\\bibitem", i) if i == -1: break match = re.match(regex, document.body[i]) option = match.group(1) argument = match.group(2) lines = ['\\begin_inset LatexCommand bibitem'] if option != None: lines.append('label "%s"' % option[1:-1].replace('"', '\\"')) lines.append('key "%s"' % argument[1:-1].replace('"', '\\"')) lines.append('') lines.append('\\end_inset') document.body[i:i+1] = lines i = i + 1 commandparams_info = { # command : [option1, option2, argument] "bibitem" : ["label", "", "key"], "bibtex" : ["options", "btprint", "bibfiles"], "cite" : ["after", "before", "key"], "citet" : ["after", "before", "key"], "citep" : ["after", "before", "key"], "citealt" : ["after", "before", "key"], "citealp" : ["after", "before", "key"], "citeauthor" : ["after", "before", "key"], "citeyear" : ["after", "before", "key"], "citeyearpar" : ["after", "before", "key"], "citet*" : ["after", "before", "key"], "citep*" : ["after", "before", "key"], "citealt*" : ["after", "before", "key"], "citealp*" : ["after", "before", "key"], "citeauthor*" : ["after", "before", "key"], "Citet" : ["after", "before", "key"], "Citep" : ["after", "before", "key"], "Citealt" : ["after", "before", "key"], "Citealp" : ["after", "before", "key"], "Citeauthor" : ["after", "before", "key"], "Citet*" : ["after", "before", "key"], "Citep*" : ["after", "before", "key"], "Citealt*" : ["after", "before", "key"], "Citealp*" : ["after", "before", "key"], "Citeauthor*" : ["after", "before", "key"], "citefield" : ["after", "before", "key"], "citetitle" : ["after", "before", "key"], "cite*" : ["after", "before", "key"], "hfill" : ["", "", ""], "index" : ["", "", "name"], "printindex" : ["", "", "name"], "label" : ["", "", "name"], "eqref" : ["name", "", "reference"], "pageref" : ["name", "", "reference"], "prettyref" : ["name", "", "reference"], "ref" : ["name", "", "reference"], "vpageref" : ["name", "", "reference"], "vref" : ["name", "", "reference"], "tableofcontents" : ["", "", "type"], "htmlurl" : ["name", "", "target"], "url" : ["name", "", "target"]} def convert_commandparams(document): """ Convert \begin_inset LatexCommand \cmdname[opt1][opt2]{arg} \end_inset to \begin_inset LatexCommand cmdname name1 "opt1" name2 "opt2" name3 "arg" \end_inset name1, name2 and name3 can be different for each command. """ # \begin_inset LatexCommand bibitem was not the official version (see # convert_bibitem()), but could be read in, so we convert it here, too. i = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand", i) if i == -1: break command = document.body[i][26:].strip() if command == "": document.warning("Malformed LyX document: Missing LatexCommand name.") i = i + 1 continue # The following parser is taken from the original InsetCommandParams::scanCommand name = "" option1 = "" option2 = "" argument = "" state = "WS" # Used to handle things like \command[foo[bar]]{foo{bar}} nestdepth = 0 b = 0 for c in command: if ((state == "CMDNAME" and c == ' ') or (state == "CMDNAME" and c == '[') or (state == "CMDNAME" and c == '{')): state = "WS" if ((state == "OPTION" and c == ']') or (state == "SECOPTION" and c == ']') or (state == "CONTENT" and c == '}')): if nestdepth == 0: state = "WS" else: nestdepth = nestdepth - 1 if ((state == "OPTION" and c == '[') or (state == "SECOPTION" and c == '[') or (state == "CONTENT" and c == '{')): nestdepth = nestdepth + 1 if state == "CMDNAME": name += c elif state == "OPTION": option1 += c elif state == "SECOPTION": option2 += c elif state == "CONTENT": argument += c elif state == "WS": if c == '\\': state = "CMDNAME" elif c == '[' and b != ']': state = "OPTION" nestdepth = 0 # Just to be sure elif c == '[' and b == ']': state = "SECOPTION" nestdepth = 0 # Just to be sure elif c == '{': state = "CONTENT" nestdepth = 0 # Just to be sure b = c # Now we have parsed the command, output the parameters lines = ["\\begin_inset LatexCommand %s" % name] if option1 != "": if commandparams_info[name][0] == "": document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name)) else: lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"'))) if option2 != "": if commandparams_info[name][1] == "": document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name)) else: lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"'))) if argument != "": if commandparams_info[name][2] == "": document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name)) else: lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"'))) document.body[i:i+1] = lines i = i + 1 def revert_commandparams(document): regex = re.compile(r'(\S+)\s+(.+)') i = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand", i) if i == -1: break name = document.body[i].split()[2] j = find_end_of_inset(document.body, i + 1) preview_line = "" option1 = "" option2 = "" argument = "" for k in range(i + 1, j): match = re.match(regex, document.body[k]) if match: pname = match.group(1) pvalue = match.group(2) if pname == "preview": preview_line = document.body[k] elif (commandparams_info[name][0] != "" and pname == commandparams_info[name][0]): option1 = pvalue.strip('"').replace('\\"', '"') elif (commandparams_info[name][1] != "" and pname == commandparams_info[name][1]): option2 = pvalue.strip('"').replace('\\"', '"') elif (commandparams_info[name][2] != "" and pname == commandparams_info[name][2]): argument = pvalue.strip('"').replace('\\"', '"') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name)) if name == "bibitem": if option1 == "": lines = ["\\bibitem {%s}" % argument] else: lines = ["\\bibitem [%s]{%s}" % (option1, argument)] else: if option1 == "": if option2 == "": lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)] else: lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)] else: if option2 == "": lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)] else: lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)] if name != "bibitem": if preview_line != "": lines.append(preview_line) lines.append('') lines.append('\\end_inset') document.body[i:j+1] = lines i = j + 1 def revert_nomenclature(document): " Convert nomenclature entry to ERT. " regex = re.compile(r'(\S+)\s+(.+)') i = 0 use_nomencl = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i) if i == -1: break use_nomencl = 1 j = find_end_of_inset(document.body, i + 1) preview_line = "" symbol = "" description = "" prefix = "" for k in range(i + 1, j): match = re.match(regex, document.body[k]) if match: name = match.group(1) value = match.group(2) if name == "preview": preview_line = document.body[k] elif name == "symbol": symbol = value.strip('"').replace('\\"', '"') elif name == "description": description = value.strip('"').replace('\\"', '"') elif name == "prefix": prefix = value.strip('"').replace('\\"', '"') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]) if prefix == "": command = 'nomenclature{%s}{%s}' % (symbol, description) else: command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description) document.body[i:j+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', command, '\\end_layout', '', '\\end_inset'] i = i + 11 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: document.preamble.append('\\usepackage{nomencl}[2005/09/22]') document.preamble.append('\\makenomenclature') def revert_printnomenclature(document): " Convert printnomenclature to ERT. " regex = re.compile(r'(\S+)\s+(.+)') i = 0 use_nomencl = 0 while 1: i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i) if i == -1: break use_nomencl = 1 j = find_end_of_inset(document.body, i + 1) preview_line = "" labelwidth = "" for k in range(i + 1, j): match = re.match(regex, document.body[k]) if match: name = match.group(1) value = match.group(2) if name == "preview": preview_line = document.body[k] elif name == "labelwidth": labelwidth = value.strip('"').replace('\\"', '"') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k]) if labelwidth == "": command = 'nomenclature{}' else: command = 'nomenclature[%s]' % labelwidth document.body[i:j+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', command, '\\end_layout', '', '\\end_inset'] i = i + 11 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: document.preamble.append('\\usepackage{nomencl}[2005/09/22]') document.preamble.append('\\makenomenclature') def convert_esint(document): " Add \\use_esint setting to header. " i = find_token(document.header, "\\cite_engine", 0) if i == -1: document.warning("Malformed LyX document: Missing `\\cite_engine'.") return # 0 is off, 1 is auto, 2 is on. document.header.insert(i, '\\use_esint 0') def revert_esint(document): " Remove \\use_esint setting from header. " i = find_token(document.header, "\\use_esint", 0) if i == -1: document.warning("Malformed LyX document: Missing `\\use_esint'.") return use_esint = document.header[i].split()[1] del document.header[i] # 0 is off, 1 is auto, 2 is on. if (use_esint == 2): document.preamble.append('\\usepackage{esint}') def revert_clearpage(document): " clearpage -> ERT" i = 0 while 1: i = find_token(document.body, "\\clearpage", i) if i == -1: break document.body[i:i+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', 'clearpage', '\\end_layout', '', '\\end_inset'] i = i + 1 def revert_cleardoublepage(document): " cleardoublepage -> ERT" i = 0 while 1: i = find_token(document.body, "\\cleardoublepage", i) if i == -1: break document.body[i:i+1] = ['\\begin_inset ERT', 'status collapsed', '', '\\begin_layout %s' % document.default_layout, '', '', '\\backslash', 'cleardoublepage', '\\end_layout', '', '\\end_inset'] i = i + 1 def revert_encodings(document): " Set new encodings to auto. " encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852", "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250", "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"] i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.header.append("\\inputencoding auto") else: inputenc = get_value(document.header, "\\inputencoding", i) if inputenc in encodings: document.header[i] = "\\inputencoding auto" document.inputencoding = get_value(document.header, "\\inputencoding", 0) ## # Conversion hub # supported_versions = ["1.5.0","1.5"] convert = [[246, []], [247, [convert_font_settings]], [248, []], [249, [convert_utf8]], [250, []], [251, []], [252, [convert_commandparams, convert_bibitem]], [253, []], [254, [convert_esint]], [255, []], [256, []]] revert = [[255, [revert_encodings]], [254, [revert_clearpage, revert_cleardoublepage]], [253, [revert_esint]], [252, [revert_nomenclature, revert_printnomenclature]], [251, [revert_commandparams]], [250, [revert_cs_label]], [249, []], [248, [revert_utf8]], [247, [revert_booktabs]], [246, [revert_font_settings]], [245, [revert_framed]]] if __name__ == "__main__": pass