# -*- coding: utf-8 -*- # This file is part of lyx2lyx # Copyright (C) 2018 The LyX team # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Convert files to the file format generated by lyx 2.4""" import re, string import unicodedata import sys, os # Uncomment only what you need to import, please. from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout, find_token, get_bool_value, get_option_value, get_value, get_quoted_value) # del_token, del_value, del_complete_lines, # find_complete_lines, find_end_of, # find_re, find_substring, find_token_backwards, # get_containing_inset, get_containing_layout, # is_in_inset, set_bool_value # find_tokens, find_token_exact, check_token from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble) # revert_font_attrs, insert_to_preamble, latex_length # get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets # revert_flex_inset, hex2ratio, str2bool #################################################################### # Private helper functions ############################################################################### ### ### Conversion and reversion routines ### ############################################################################### def convert_lst_literalparam(document): " Add param literal to include inset " i = 0 while True: i = find_token(document.body, '\\begin_inset CommandInset include', i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i) i += 1 continue while i < j and document.body[i].strip() != '': i += 1 document.body.insert(i, "literal \"true\"") def revert_lst_literalparam(document): " Remove param literal from include inset " i = 0 while True: i = find_token(document.body, '\\begin_inset CommandInset include', i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i) i += 1 continue k = find_token(document.body, 'literal', i, j) if k == -1: i += 1 continue del document.body[k] def revert_paratype(document): " Revert ParaType font definitions to LaTeX " if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1: preamble = "" i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0) i2 = find_token(document.header, "\\font_sans \"default\"", 0) i3 = find_token(document.header, "\\font_typewriter \"default\"", 0) j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0) sfval = get_value(document.header, "\\font_sf_scale", 0) # cutoff " 100" sfval = sfval[:-4] sfoption = "" if sfval != "100": sfoption = "scaled=" + format(float(sfval) / 100, '.2f') k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0) ttval = get_value(document.header, "\\font_tt_scale", 0) # cutoff " 100" ttval = ttval[:-4] ttoption = "" if ttval != "100": ttoption = "scaled=" + format(float(ttval) / 100, '.2f') if i1 != -1 and i2 != -1 and i3!= -1: add_to_preamble(document, ["\\usepackage{paratype}"]) else: if i1!= -1: add_to_preamble(document, ["\\usepackage{PTSerif}"]) document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default") if j!= -1: if sfoption != "": add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"]) else: add_to_preamble(document, ["\\usepackage{PTSans}"]) document.header[j] = document.header[j].replace("PTSans-TLF", "default") if k!= -1: if ttoption != "": add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"]) else: add_to_preamble(document, ["\\usepackage{PTMono}"]) document.header[k] = document.header[k].replace("PTMono-TLF", "default") def revert_xcharter(document): " Revert XCharter font definitions to LaTeX " i = find_token(document.header, "\\font_roman \"xcharter\"", 0) if i == -1: return # replace unsupported font setting document.header[i] = document.header[i].replace("xcharter", "default") # no need for preamble code with system fonts if get_bool_value(document.header, "\\use_non_tex_fonts"): return # transfer old style figures setting to package options j = find_token(document.header, "\\font_osf true") if j != -1: options = "[osf]" document.header[j] = "\\font_osf false" else: options = "" if i != -1: add_to_preamble(document, ["\\usepackage%s{XCharter}"%options]) def revert_lscape(document): " Reverts the landscape environment (Landscape module) to TeX-code " if not "landscape" in document.get_module_list(): return i = 0 while True: i = find_token(document.body, "\\begin_inset Flex Landscape", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of Landscape inset") i += 1 continue if document.body[i] == "\\begin_inset Flex Landscape (Floating)": document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}") document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}") add_to_preamble(document, ["\\usepackage{afterpage}"]) else: document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}") document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}") add_to_preamble(document, ["\\usepackage{pdflscape}"]) # no need to reset i def convert_fontenc(document): " Convert default fontenc setting " i = find_token(document.header, "\\fontencoding global", 0) if i == -1: return document.header[i] = document.header[i].replace("global", "auto") def revert_fontenc(document): " Revert default fontenc setting " i = find_token(document.header, "\\fontencoding auto", 0) if i == -1: return document.header[i] = document.header[i].replace("auto", "global") def revert_nospellcheck(document): " Remove nospellcheck font info param " i = 0 while True: i = find_token(document.body, '\\nospellcheck', i) if i == -1: return del document.body[i] def revert_floatpclass(document): " Remove float placement params 'document' and 'class' " i = 0 i = find_token(document.header, "\\float_placement class", 0) if i != -1: del document.header[i] i = 0 while True: i = find_token(document.body, '\\begin_inset Float', i) if i == -1: break j = find_end_of_inset(document.body, i) k = find_token(document.body, 'placement class', i, i + 2) if k == -1: k = find_token(document.body, 'placement document', i, i + 2) if k != -1: del document.body[k] i = j continue del document.body[k] def revert_floatalignment(document): " Remove float alignment params " i = 0 i = find_token(document.header, "\\float_alignment", 0) galignment = "" if i != -1: galignment = get_value(document.header, "\\float_alignment", i) del document.header[i] i = 0 while True: i = find_token(document.body, '\\begin_inset Float', i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of inset at line " + str(i)) i += 1 k = find_token(document.body, 'alignment', i, i + 4) if k == -1: i = j continue alignment = get_value(document.body, "alignment", k) if alignment == "document": alignment = galignment del document.body[k] l = find_token(document.body, "\\begin_layout Plain Layout", i, j) if l == -1: document.warning("Can't find float layout!") i = j continue alcmd = [] if alignment == "left": alcmd = put_cmd_in_ert("\\raggedright{}") elif alignment == "center": alcmd = put_cmd_in_ert("\\centering{}") elif alignment == "right": alcmd = put_cmd_in_ert("\\raggedleft{}") if len(alcmd) > 0: document.body[l+1:l+1] = alcmd i = j def revert_tuftecite(document): " Revert \cite commands in tufte classes " tufte = ["tufte-book", "tufte-handout"] if document.textclass not in tufte: return i = 0 while (True): i = find_token(document.body, "\\begin_inset CommandInset citation", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Can't find end of citation inset at line %d!!" %(i)) i += 1 continue k = find_token(document.body, "LatexCommand", i, j) if k == -1: document.warning("Can't find LatexCommand for citation inset at line %d!" %(i)) i = j + 1 continue cmd = get_value(document.body, "LatexCommand", k) if cmd != "cite": i = j + 1 continue pre = get_quoted_value(document.body, "before", i, j) post = get_quoted_value(document.body, "after", i, j) key = get_quoted_value(document.body, "key", i, j) if not key: document.warning("Citation inset at line %d does not have a key!" %(i)) key = "???" # Replace command with ERT res = "\\cite" if pre: res += "[" + pre + "]" if post: res += "[" + post + "]" elif pre: res += "[]" res += "{" + key + "}" document.body[i:j+1] = put_cmd_in_ert([res]) i = j + 1 def revert_stretchcolumn(document): " We remove the column varwidth flags or everything else will become a mess. " i = 0 while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of tabular.") continue for k in range(i, j): if re.search('^$', document.body[k]): document.warning("Converting 'tabularx'/'xltabular' table to normal table.") document.body[k] = document.body[k].replace(' varwidth="true"', '') i = i + 1 def revert_vcolumns(document): " Revert standard columns with line breaks etc. " i = 0 needvarwidth = False needarray = False try: while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Could not find end of tabular.") i += 1 continue # Collect necessary column information m = i + 1 nrows = int(document.body[i+1].split('"')[3]) ncols = int(document.body[i+1].split('"')[5]) col_info = [] for k in range(ncols): m = find_token(document.body, "", begcell) vcand = False if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1: vcand = True elif count_pars_in_inset(document.body, begcell + 2) > 1: vcand = True elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout": vcand = True if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""): if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "": needvarwidth = True alignment = col_info[col][2] col_line = col_info[col][4] vval = "" if alignment == "center": vval = ">{\\centering}" elif alignment == "left": vval = ">{\\raggedright}" elif alignment == "right": vval = ">{\\raggedleft}" if vval != "": needarray = True vval += "V{\\linewidth}" document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">" # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes # with newlines, and we do not want that) while True: endcell = find_token(document.body, "", begcell) linebreak = False nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell) if nl == -1: nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell) if nl == -1: break linebreak = True nle = find_end_of_inset(document.body, nl) del(document.body[nle:nle+1]) if linebreak: document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}") else: document.body[nl:nl+1] = put_cmd_in_ert("\\\\") m += 1 i = j + 1 finally: if needarray == True: add_to_preamble(document, ["\\usepackage{array}"]) if needvarwidth == True: add_to_preamble(document, ["\\usepackage{varwidth}"]) def revert_bibencoding(document): " Revert bibliography encoding " # Get cite engine engine = "basic" i = find_token(document.header, "\\cite_engine", 0) if i == -1: document.warning("Malformed document! Missing \\cite_engine") else: engine = get_value(document.header, "\\cite_engine", i) # Check if biblatex biblatex = False if engine in ["biblatex", "biblatex-natbib"]: biblatex = True # Map lyx to latex encoding names encodings = { "utf8" : "utf8", "utf8x" : "utf8x", "armscii8" : "armscii8", "iso8859-1" : "latin1", "iso8859-2" : "latin2", "iso8859-3" : "latin3", "iso8859-4" : "latin4", "iso8859-5" : "iso88595", "iso8859-6" : "8859-6", "iso8859-7" : "iso-8859-7", "iso8859-8" : "8859-8", "iso8859-9" : "latin5", "iso8859-13" : "latin7", "iso8859-15" : "latin9", "iso8859-16" : "latin10", "applemac" : "applemac", "cp437" : "cp437", "cp437de" : "cp437de", "cp850" : "cp850", "cp852" : "cp852", "cp855" : "cp855", "cp858" : "cp858", "cp862" : "cp862", "cp865" : "cp865", "cp866" : "cp866", "cp1250" : "cp1250", "cp1251" : "cp1251", "cp1252" : "cp1252", "cp1255" : "cp1255", "cp1256" : "cp1256", "cp1257" : "cp1257", "koi8-r" : "koi8-r", "koi8-u" : "koi8-u", "pt154" : "pt154", "utf8-platex" : "utf8", "ascii" : "ascii" } i = 0 bibresources = [] while (True): i = find_token(document.body, "\\begin_inset CommandInset bibtex", i) if i == -1: break j = find_end_of_inset(document.body, i) if j == -1: document.warning("Can't find end of bibtex inset at line %d!!" %(i)) i += 1 continue encoding = get_quoted_value(document.body, "encoding", i, j) if not encoding: i += 1 continue # remove encoding line k = find_token(document.body, "encoding", i, j) if k != -1: del document.body[k] # Re-find inset end line j = find_end_of_inset(document.body, i) if biblatex: biblio_options = "" h = find_token(document.header, "\\biblio_options", 0) if h != -1: biblio_options = get_value(document.header, "\\biblio_options", h) if not "bibencoding" in biblio_options: document.header[h] += ",bibencoding=%s" % encodings[encoding] else: bs = find_token(document.header, "\\biblatex_bibstyle", 0) if bs == -1: # this should not happen document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!") else: document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]] else: document.body[j+1:j+1] = put_cmd_in_ert("\\egroup") document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}") i = j + 1 def convert_vcsinfo(document): " Separate vcs Info inset from buffer Info inset. " types = { "vcs-revision" : "revision", "vcs-tree-revision" : "tree-revision", "vcs-author" : "author", "vcs-time" : "time", "vcs-date" : "date" } i = 0 while True: i = find_token(document.body, "\\begin_inset Info", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of Info inset.") i = i + 1 continue tp = find_token(document.body, 'type', i, j) tpv = get_quoted_value(document.body, "type", tp) if tpv != "buffer": i = i + 1 continue arg = find_token(document.body, 'arg', i, j) argv = get_quoted_value(document.body, "arg", arg) if argv not in list(types.keys()): i = i + 1 continue document.body[tp] = "type \"vcs\"" document.body[arg] = "arg \"" + types[argv] + "\"" i = i + 1 def revert_vcsinfo(document): " Merge vcs Info inset to buffer Info inset. " args = ["revision", "tree-revision", "author", "time", "date" ] i = 0 while True: i = find_token(document.body, "\\begin_inset Info", i) if i == -1: return j = find_end_of_inset(document.body, i + 1) if j == -1: document.warning("Malformed LyX document: Could not find end of Info inset.") i = i + 1 continue tp = find_token(document.body, 'type', i, j) tpv = get_quoted_value(document.body, "type", tp) if tpv != "vcs": i = i + 1 continue arg = find_token(document.body, 'arg', i, j) argv = get_quoted_value(document.body, "arg", arg) if argv not in args: document.warning("Malformed Info inset. Invalid vcs arg.") i = i + 1 continue document.body[tp] = "type \"buffer\"" document.body[arg] = "arg \"vcs-" + argv + "\"" i = i + 1 ## # Conversion hub # supported_versions = ["2.4.0", "2.4"] convert = [ [545, [convert_lst_literalparam]], [546, []], [547, []], [548, []], [549, []], [550, [convert_fontenc]], [551, []], [552, []], [553, []], [554, []], [555, []], [556, []], [557, [convert_vcsinfo]] ] revert = [ [556, [revert_vcsinfo]], [555, [revert_bibencoding]], [554, [revert_vcolumns]], [553, [revert_stretchcolumn]], [552, [revert_tuftecite]], [551, [revert_floatpclass, revert_floatalignment]], [550, [revert_nospellcheck]], [549, [revert_fontenc]], [548, []],# dummy format change [547, [revert_lscape]], [546, [revert_xcharter]], [545, [revert_paratype]], [544, [revert_lst_literalparam]] ] if __name__ == "__main__": pass