
709 lines
25 KiB
Raw Normal View History

# -*- coding: utf-8 -*-
# This file is part of lyx2lyx
# Copyright (C) 2018 The LyX team
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" Convert files to the file format generated by lyx 2.4"""
import re, string
import unicodedata
import sys, os
# Uncomment only what you need to import, please.
from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
find_token, get_bool_value, get_option_value, get_value, get_quoted_value)
# del_token, del_value, del_complete_lines,
# find_complete_lines, find_end_of,
# find_re, find_substring, find_token_backwards,
# get_containing_inset, get_containing_layout,
# is_in_inset, set_bool_value
# find_tokens, find_token_exact, check_token
from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
# revert_font_attrs, insert_to_preamble, latex_length
# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
# revert_flex_inset, hex2ratio, str2bool
# Private helper functions
### Conversion and reversion routines
def removeFrontMatterStyles(document):
" Remove styles Begin/EndFromatter"
layouts = ['BeginFrontmatter', 'EndFrontmatter']
for layout in layouts:
i = 0
while True:
i = find_token(document.body, '\\begin_layout ' + layout, i)
if i == -1:
j = find_end_of_layout(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
i += 1
if document.body[j] == '':
j = j + 1
del document.body[i:j+1]
def addFrontMatterStyles(document):
" Use styles Begin/EndFrontmatter for elsarticle"
def insertFrontmatter(prefix, line):
document.body[line:line] = ['\\begin_layout ' + prefix + 'Frontmatter',
'\\begin_inset Note Note',
'status open', '',
'\\begin_layout Plain Layout',
'Keep this empty!',
'\\end_layout', '',
'\\end_inset', '', '',
if document.textclass == "elsarticle":
layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
first = -1
last = -1
for layout in layouts:
i = 0
while True:
i = find_token(document.body, '\\begin_layout ' + layout, i)
if i == -1:
k = find_end_of_layout(document.body, i)
if k == -1:
document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
i += 1;
if first == -1 or i < first:
first = i
if last == -1 or last <= k:
last = k+1
i = k
if first == -1:
if first > 0 and document.body[first-1] == '':
first -= 1
if document.body[last] == '':
last = last + 1
insertFrontmatter('End', last)
insertFrontmatter('Begin', first)
def convert_lst_literalparam(document):
" Add param literal to include inset "
i = 0
while True:
i = find_token(document.body, '\\begin_inset CommandInset include', i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
i += 1
while i < j and document.body[i].strip() != '':
i += 1
document.body.insert(i, "literal \"true\"")
def revert_lst_literalparam(document):
" Remove param literal from include inset "
i = 0
while True:
i = find_token(document.body, '\\begin_inset CommandInset include', i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
i += 1
k = find_token(document.body, 'literal', i, j)
if k == -1:
i += 1
del document.body[k]
def revert_paratype(document):
" Revert ParaType font definitions to LaTeX "
if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
preamble = ""
i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
i2 = find_token(document.header, "\\font_sans \"default\"", 0)
i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
sfval = get_value(document.header, "\\font_sf_scale", 0)
# cutoff " 100"
sfval = sfval[:-4]
sfoption = ""
if sfval != "100":
sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
ttval = get_value(document.header, "\\font_tt_scale", 0)
# cutoff " 100"
ttval = ttval[:-4]
ttoption = ""
if ttval != "100":
ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
if i1 != -1 and i2 != -1 and i3!= -1:
add_to_preamble(document, ["\\usepackage{paratype}"])
if i1!= -1:
add_to_preamble(document, ["\\usepackage{PTSerif}"])
document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
if j!= -1:
if sfoption != "":
add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
add_to_preamble(document, ["\\usepackage{PTSans}"])
document.header[j] = document.header[j].replace("PTSans-TLF", "default")
if k!= -1:
if ttoption != "":
add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
add_to_preamble(document, ["\\usepackage{PTMono}"])
document.header[k] = document.header[k].replace("PTMono-TLF", "default")
def revert_xcharter(document):
" Revert XCharter font definitions to LaTeX "
i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
if i == -1:
# replace unsupported font setting
document.header[i] = document.header[i].replace("xcharter", "default")
# no need for preamble code with system fonts
if get_bool_value(document.header, "\\use_non_tex_fonts"):
# transfer old style figures setting to package options
j = find_token(document.header, "\\font_osf true")
if j != -1:
options = "[osf]"
document.header[j] = "\\font_osf false"
options = ""
if i != -1:
add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
def revert_lscape(document):
" Reverts the landscape environment (Landscape module) to TeX-code "
if not "landscape" in document.get_module_list():
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex Landscape", i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of Landscape inset")
i += 1
2018-04-18 14:20:19 +00:00
if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
add_to_preamble(document, ["\\usepackage{afterpage}"])
document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
add_to_preamble(document, ["\\usepackage{pdflscape}"])
# no need to reset i
def convert_fontenc(document):
" Convert default fontenc setting "
i = find_token(document.header, "\\fontencoding global", 0)
if i == -1:
document.header[i] = document.header[i].replace("global", "auto")
def revert_fontenc(document):
" Revert default fontenc setting "
i = find_token(document.header, "\\fontencoding auto", 0)
if i == -1:
document.header[i] = document.header[i].replace("auto", "global")
def revert_nospellcheck(document):
" Remove nospellcheck font info param "
i = 0
while True:
i = find_token(document.body, '\\nospellcheck', i)
if i == -1:
del document.body[i]
def revert_floatpclass(document):
" Remove float placement params 'document' and 'class' "
i = 0
i = find_token(document.header, "\\float_placement class", 0)
if i != -1:
del document.header[i]
i = 0
while True:
i = find_token(document.body, '\\begin_inset Float', i)
if i == -1:
j = find_end_of_inset(document.body, i)
k = find_token(document.body, 'placement class', i, i + 2)
if k == -1:
k = find_token(document.body, 'placement document', i, i + 2)
if k != -1:
del document.body[k]
i = j
del document.body[k]
def revert_floatalignment(document):
" Remove float alignment params "
i = 0
i = find_token(document.header, "\\float_alignment", 0)
galignment = ""
if i != -1:
galignment = get_value(document.header, "\\float_alignment", i)
del document.header[i]
i = 0
while True:
i = find_token(document.body, '\\begin_inset Float', i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
i += 1
k = find_token(document.body, 'alignment', i, i + 4)
if k == -1:
i = j
alignment = get_value(document.body, "alignment", k)
if alignment == "document":
alignment = galignment
del document.body[k]
l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
if l == -1:
document.warning("Can't find float layout!")
i = j
alcmd = []
if alignment == "left":
alcmd = put_cmd_in_ert("\\raggedright{}")
elif alignment == "center":
alcmd = put_cmd_in_ert("\\centering{}")
elif alignment == "right":
alcmd = put_cmd_in_ert("\\raggedleft{}")
if len(alcmd) > 0:
document.body[l+1:l+1] = alcmd
i = j
def revert_tuftecite(document):
" Revert \cite commands in tufte classes "
tufte = ["tufte-book", "tufte-handout"]
if document.textclass not in tufte:
i = 0
while (True):
i = find_token(document.body, "\\begin_inset CommandInset citation", i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Can't find end of citation inset at line %d!!" %(i))
i += 1
k = find_token(document.body, "LatexCommand", i, j)
if k == -1:
document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
i = j + 1
cmd = get_value(document.body, "LatexCommand", k)
if cmd != "cite":
i = j + 1
pre = get_quoted_value(document.body, "before", i, j)
post = get_quoted_value(document.body, "after", i, j)
key = get_quoted_value(document.body, "key", i, j)
if not key:
document.warning("Citation inset at line %d does not have a key!" %(i))
key = "???"
# Replace command with ERT
res = "\\cite"
if pre:
res += "[" + pre + "]"
if post:
res += "[" + post + "]"
elif pre:
res += "[]"
res += "{" + key + "}"
document.body[i:j+1] = put_cmd_in_ert([res])
i = j + 1
def revert_stretchcolumn(document):
" We remove the column varwidth flags or everything else will become a mess. "
i = 0
while True:
i = find_token(document.body, "\\begin_inset Tabular", i)
if i == -1:
j = find_end_of_inset(document.body, i + 1)
if j == -1:
document.warning("Malformed LyX document: Could not find end of tabular.")
for k in range(i, j):
if'^<column.*varwidth="[^"]+".*>$', document.body[k]):
document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
document.body[k] = document.body[k].replace(' varwidth="true"', '')
i = i + 1
def revert_vcolumns(document):
" Revert standard columns with line breaks etc. "
i = 0
needvarwidth = False
needarray = False
while True:
i = find_token(document.body, "\\begin_inset Tabular", i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Could not find end of tabular.")
i += 1
# Collect necessary column information
m = i + 1
nrows = int(document.body[i+1].split('"')[3])
ncols = int(document.body[i+1].split('"')[5])
col_info = []
for k in range(ncols):
m = find_token(document.body, "<column", m)
width = get_option_value(document.body[m], 'width')
varwidth = get_option_value(document.body[m], 'varwidth')
alignment = get_option_value(document.body[m], 'alignment')
special = get_option_value(document.body[m], 'special')
col_info.append([width, varwidth, alignment, special, m])
# Now parse cells
m = i + 1
lines = []
for row in range(nrows):
for col in range(ncols):
m = find_token(document.body, "<cell", m)
multicolumn = get_option_value(document.body[m], 'multicolumn')
multirow = get_option_value(document.body[m], 'multirow')
width = get_option_value(document.body[m], 'width')
rotate = get_option_value(document.body[m], 'rotate')
# Check for: linebreaks, multipars, non-standard environments
begcell = m
endcell = find_token(document.body, "</cell>", begcell)
vcand = False
if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
vcand = True
elif count_pars_in_inset(document.body, begcell + 2) > 1:
vcand = True
elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
vcand = True
if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
needvarwidth = True
alignment = col_info[col][2]
col_line = col_info[col][4]
vval = ""
if alignment == "center":
vval = ">{\\centering}"
elif alignment == "left":
vval = ">{\\raggedright}"
elif alignment == "right":
vval = ">{\\raggedleft}"
if vval != "":
needarray = True
vval += "V{\\linewidth}"
document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
# ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
# with newlines, and we do not want that)
while True:
endcell = find_token(document.body, "</cell>", begcell)
linebreak = False
nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
if nl == -1:
nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
if nl == -1:
linebreak = True
nle = find_end_of_inset(document.body, nl)
if linebreak:
document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
m += 1
i = j + 1
if needarray == True:
add_to_preamble(document, ["\\usepackage{array}"])
if needvarwidth == True:
add_to_preamble(document, ["\\usepackage{varwidth}"])
def revert_bibencoding(document):
" Revert bibliography encoding "
# Get cite engine
engine = "basic"
i = find_token(document.header, "\\cite_engine", 0)
if i == -1:
document.warning("Malformed document! Missing \\cite_engine")
engine = get_value(document.header, "\\cite_engine", i)
# Check if biblatex
biblatex = False
if engine in ["biblatex", "biblatex-natbib"]:
biblatex = True
# Map lyx to latex encoding names
encodings = {
"utf8" : "utf8",
"utf8x" : "utf8x",
"armscii8" : "armscii8",
"iso8859-1" : "latin1",
"iso8859-2" : "latin2",
"iso8859-3" : "latin3",
"iso8859-4" : "latin4",
"iso8859-5" : "iso88595",
"iso8859-6" : "8859-6",
"iso8859-7" : "iso-8859-7",
"iso8859-8" : "8859-8",
"iso8859-9" : "latin5",
"iso8859-13" : "latin7",
"iso8859-15" : "latin9",
"iso8859-16" : "latin10",
"applemac" : "applemac",
"cp437" : "cp437",
"cp437de" : "cp437de",
"cp850" : "cp850",
"cp852" : "cp852",
"cp855" : "cp855",
"cp858" : "cp858",
"cp862" : "cp862",
"cp865" : "cp865",
"cp866" : "cp866",
"cp1250" : "cp1250",
"cp1251" : "cp1251",
"cp1252" : "cp1252",
"cp1255" : "cp1255",
"cp1256" : "cp1256",
"cp1257" : "cp1257",
"koi8-r" : "koi8-r",
"koi8-u" : "koi8-u",
"pt154" : "pt154",
"utf8-platex" : "utf8",
"ascii" : "ascii"
i = 0
bibresources = []
while (True):
i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Can't find end of bibtex inset at line %d!!" %(i))
i += 1
encoding = get_quoted_value(document.body, "encoding", i, j)
if not encoding:
i += 1
# remove encoding line
k = find_token(document.body, "encoding", i, j)
if k != -1:
del document.body[k]
# Re-find inset end line
j = find_end_of_inset(document.body, i)
if biblatex:
biblio_options = ""
h = find_token(document.header, "\\biblio_options", 0)
if h != -1:
biblio_options = get_value(document.header, "\\biblio_options", h)
if not "bibencoding" in biblio_options:
document.header[h] += ",bibencoding=%s" % encodings[encoding]
bs = find_token(document.header, "\\biblatex_bibstyle", 0)
if bs == -1:
# this should not happen
document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
i = j + 1
def convert_vcsinfo(document):
" Separate vcs Info inset from buffer Info inset. "
types = {
"vcs-revision" : "revision",
"vcs-tree-revision" : "tree-revision",
"vcs-author" : "author",
"vcs-time" : "time",
"vcs-date" : "date"
i = 0
while True:
i = find_token(document.body, "\\begin_inset Info", i)
if i == -1:
j = find_end_of_inset(document.body, i + 1)
if j == -1:
document.warning("Malformed LyX document: Could not find end of Info inset.")
i = i + 1
tp = find_token(document.body, 'type', i, j)
tpv = get_quoted_value(document.body, "type", tp)
if tpv != "buffer":
i = i + 1
arg = find_token(document.body, 'arg', i, j)
argv = get_quoted_value(document.body, "arg", arg)
if argv not in list(types.keys()):
i = i + 1
document.body[tp] = "type \"vcs\""
document.body[arg] = "arg \"" + types[argv] + "\""
i = i + 1
def revert_vcsinfo(document):
" Merge vcs Info inset to buffer Info inset. "
args = ["revision", "tree-revision", "author", "time", "date" ]
i = 0
while True:
i = find_token(document.body, "\\begin_inset Info", i)
if i == -1:
j = find_end_of_inset(document.body, i + 1)
if j == -1:
document.warning("Malformed LyX document: Could not find end of Info inset.")
i = i + 1
tp = find_token(document.body, 'type', i, j)
tpv = get_quoted_value(document.body, "type", tp)
if tpv != "vcs":
i = i + 1
arg = find_token(document.body, 'arg', i, j)
argv = get_quoted_value(document.body, "arg", arg)
if argv not in args:
document.warning("Malformed Info inset. Invalid vcs arg.")
i = i + 1
document.body[tp] = "type \"buffer\""
document.body[arg] = "arg \"vcs-" + argv + "\""
i = i + 1
# Conversion hub
supported_versions = ["2.4.0", "2.4"]
convert = [
[545, [convert_lst_literalparam]],
[546, []],
[547, []],
[548, []],
[549, []],
[550, [convert_fontenc]],
[551, []],
[552, []],
[553, []],
[554, []],
[555, []],
[556, []],
[557, [convert_vcsinfo]],
[558, [removeFrontMatterStyles]]
revert = [
[557, [addFrontMatterStyles]],
[556, [revert_vcsinfo]],
[555, [revert_bibencoding]],
[554, [revert_vcolumns]],
[553, [revert_stretchcolumn]],
[552, [revert_tuftecite]],
[551, [revert_floatpclass, revert_floatalignment]],
[550, [revert_nospellcheck]],
[549, [revert_fontenc]],
[548, []],# dummy format change
[547, [revert_lscape]],
[546, [revert_xcharter]],
[545, [revert_paratype]],
[544, [revert_lst_literalparam]]
if __name__ == "__main__":