mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-13 22:49:20 +00:00
e6e3777363
File format change. Fixes: #6223
578 lines
20 KiB
Python
578 lines
20 KiB
Python
# -*- coding: utf-8 -*-
|
|
# This file is part of lyx2lyx
|
|
# Copyright (C) 2018 The LyX team
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
""" Convert files to the file format generated by lyx 2.4"""
|
|
|
|
import re, string
|
|
import unicodedata
|
|
import sys, os
|
|
|
|
# Uncomment only what you need to import, please.
|
|
|
|
from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
|
|
find_token, get_bool_value, get_option_value, get_value, get_quoted_value)
|
|
# del_token, del_value, del_complete_lines,
|
|
# find_complete_lines, find_end_of,
|
|
# find_re, find_substring, find_token_backwards,
|
|
# get_containing_inset, get_containing_layout,
|
|
# is_in_inset, set_bool_value
|
|
# find_tokens, find_token_exact, check_token
|
|
|
|
from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
|
|
# revert_font_attrs, insert_to_preamble, latex_length
|
|
# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
|
|
# revert_flex_inset, hex2ratio, str2bool
|
|
|
|
####################################################################
|
|
# Private helper functions
|
|
|
|
|
|
|
|
###############################################################################
|
|
###
|
|
### Conversion and reversion routines
|
|
###
|
|
###############################################################################
|
|
|
|
|
|
def convert_lst_literalparam(document):
|
|
" Add param literal to include inset "
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\begin_inset CommandInset include', i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
|
|
i += 1
|
|
continue
|
|
while i < j and document.body[i].strip() != '':
|
|
i += 1
|
|
document.body.insert(i, "literal \"true\"")
|
|
|
|
|
|
def revert_lst_literalparam(document):
|
|
" Remove param literal from include inset "
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\begin_inset CommandInset include', i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
|
|
i += 1
|
|
continue
|
|
k = find_token(document.body, 'literal', i, j)
|
|
if k == -1:
|
|
i += 1
|
|
continue
|
|
del document.body[k]
|
|
|
|
|
|
def revert_paratype(document):
|
|
" Revert ParaType font definitions to LaTeX "
|
|
|
|
if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
|
|
preamble = ""
|
|
i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
|
|
i2 = find_token(document.header, "\\font_sans \"default\"", 0)
|
|
i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
|
|
j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
|
|
sfval = get_value(document.header, "\\font_sf_scale", 0)
|
|
# cutoff " 100"
|
|
sfval = sfval[:-4]
|
|
sfoption = ""
|
|
if sfval != "100":
|
|
sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
|
|
k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
|
|
ttval = get_value(document.header, "\\font_tt_scale", 0)
|
|
# cutoff " 100"
|
|
ttval = ttval[:-4]
|
|
ttoption = ""
|
|
if ttval != "100":
|
|
ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
|
|
if i1 != -1 and i2 != -1 and i3!= -1:
|
|
add_to_preamble(document, ["\\usepackage{paratype}"])
|
|
else:
|
|
if i1!= -1:
|
|
add_to_preamble(document, ["\\usepackage{PTSerif}"])
|
|
document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
|
|
if j!= -1:
|
|
if sfoption != "":
|
|
add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
|
|
else:
|
|
add_to_preamble(document, ["\\usepackage{PTSans}"])
|
|
document.header[j] = document.header[j].replace("PTSans-TLF", "default")
|
|
if k!= -1:
|
|
if ttoption != "":
|
|
add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
|
|
else:
|
|
add_to_preamble(document, ["\\usepackage{PTMono}"])
|
|
document.header[k] = document.header[k].replace("PTMono-TLF", "default")
|
|
|
|
|
|
def revert_xcharter(document):
|
|
" Revert XCharter font definitions to LaTeX "
|
|
|
|
i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
|
|
if i == -1:
|
|
return
|
|
|
|
# replace unsupported font setting
|
|
document.header[i] = document.header[i].replace("xcharter", "default")
|
|
# no need for preamble code with system fonts
|
|
if get_bool_value(document.header, "\\use_non_tex_fonts"):
|
|
return
|
|
|
|
# transfer old style figures setting to package options
|
|
j = find_token(document.header, "\\font_osf true")
|
|
if j != -1:
|
|
options = "[osf]"
|
|
document.header[j] = "\\font_osf false"
|
|
else:
|
|
options = ""
|
|
if i != -1:
|
|
add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
|
|
|
|
|
|
def revert_lscape(document):
|
|
" Reverts the landscape environment (Landscape module) to TeX-code "
|
|
|
|
if not "landscape" in document.get_module_list():
|
|
return
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Flex Landscape", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of Landscape inset")
|
|
i += 1
|
|
continue
|
|
|
|
if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
|
|
document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
|
|
document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
|
|
add_to_preamble(document, ["\\usepackage{afterpage}"])
|
|
else:
|
|
document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
|
|
document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
|
|
|
|
add_to_preamble(document, ["\\usepackage{pdflscape}"])
|
|
# no need to reset i
|
|
|
|
|
|
def convert_fontenc(document):
|
|
" Convert default fontenc setting "
|
|
|
|
i = find_token(document.header, "\\fontencoding global", 0)
|
|
if i == -1:
|
|
return
|
|
|
|
document.header[i] = document.header[i].replace("global", "auto")
|
|
|
|
|
|
def revert_fontenc(document):
|
|
" Revert default fontenc setting "
|
|
|
|
i = find_token(document.header, "\\fontencoding auto", 0)
|
|
if i == -1:
|
|
return
|
|
|
|
document.header[i] = document.header[i].replace("auto", "global")
|
|
|
|
|
|
def revert_nospellcheck(document):
|
|
" Remove nospellcheck font info param "
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\nospellcheck', i)
|
|
if i == -1:
|
|
return
|
|
del document.body[i]
|
|
|
|
|
|
def revert_floatpclass(document):
|
|
" Remove float placement params 'document' and 'class' "
|
|
|
|
i = 0
|
|
i = find_token(document.header, "\\float_placement class", 0)
|
|
if i != -1:
|
|
del document.header[i]
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\begin_inset Float', i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
k = find_token(document.body, 'placement class', i, i + 2)
|
|
if k == -1:
|
|
k = find_token(document.body, 'placement document', i, i + 2)
|
|
if k != -1:
|
|
del document.body[k]
|
|
i = j
|
|
continue
|
|
del document.body[k]
|
|
|
|
|
|
def revert_floatalignment(document):
|
|
" Remove float alignment params "
|
|
|
|
i = 0
|
|
i = find_token(document.header, "\\float_alignment", 0)
|
|
galignment = ""
|
|
if i != -1:
|
|
galignment = get_value(document.header, "\\float_alignment", i)
|
|
del document.header[i]
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\begin_inset Float', i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
|
|
i += 1
|
|
k = find_token(document.body, 'alignment', i, i + 4)
|
|
if k == -1:
|
|
i = j
|
|
continue
|
|
alignment = get_value(document.body, "alignment", k)
|
|
if alignment == "document":
|
|
alignment = galignment
|
|
del document.body[k]
|
|
l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
|
|
if l == -1:
|
|
document.warning("Can't find float layout!")
|
|
i = j
|
|
continue
|
|
alcmd = []
|
|
if alignment == "left":
|
|
alcmd = put_cmd_in_ert("\\raggedright{}")
|
|
elif alignment == "center":
|
|
alcmd = put_cmd_in_ert("\\centering{}")
|
|
elif alignment == "right":
|
|
alcmd = put_cmd_in_ert("\\raggedleft{}")
|
|
if len(alcmd) > 0:
|
|
document.body[l+1:l+1] = alcmd
|
|
i = j
|
|
|
|
|
|
def revert_tuftecite(document):
|
|
" Revert \cite commands in tufte classes "
|
|
|
|
tufte = ["tufte-book", "tufte-handout"]
|
|
if document.textclass not in tufte:
|
|
return
|
|
|
|
i = 0
|
|
while (True):
|
|
i = find_token(document.body, "\\begin_inset CommandInset citation", i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Can't find end of citation inset at line %d!!" %(i))
|
|
i += 1
|
|
continue
|
|
k = find_token(document.body, "LatexCommand", i, j)
|
|
if k == -1:
|
|
document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
|
|
i = j + 1
|
|
continue
|
|
cmd = get_value(document.body, "LatexCommand", k)
|
|
if cmd != "cite":
|
|
i = j + 1
|
|
continue
|
|
pre = get_quoted_value(document.body, "before", i, j)
|
|
post = get_quoted_value(document.body, "after", i, j)
|
|
key = get_quoted_value(document.body, "key", i, j)
|
|
if not key:
|
|
document.warning("Citation inset at line %d does not have a key!" %(i))
|
|
key = "???"
|
|
# Replace command with ERT
|
|
res = "\\cite"
|
|
if pre:
|
|
res += "[" + pre + "]"
|
|
if post:
|
|
res += "[" + post + "]"
|
|
elif pre:
|
|
res += "[]"
|
|
res += "{" + key + "}"
|
|
document.body[i:j+1] = put_cmd_in_ert([res])
|
|
i = j + 1
|
|
|
|
|
|
def revert_stretchcolumn(document):
|
|
" We remove the column varwidth flags or everything else will become a mess. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Tabular", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of tabular.")
|
|
continue
|
|
for k in range(i, j):
|
|
if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
|
|
document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
|
|
document.body[k] = document.body[k].replace(' varwidth="true"', '')
|
|
i = i + 1
|
|
|
|
|
|
def revert_vcolumns(document):
|
|
" Revert standard columns with line breaks etc. "
|
|
i = 0
|
|
needvarwidth = False
|
|
needarray = False
|
|
try:
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Tabular", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of tabular.")
|
|
i += 1
|
|
continue
|
|
|
|
# Collect necessary column information
|
|
m = i + 1
|
|
nrows = int(document.body[i+1].split('"')[3])
|
|
ncols = int(document.body[i+1].split('"')[5])
|
|
col_info = []
|
|
for k in range(ncols):
|
|
m = find_token(document.body, "<column", m)
|
|
width = get_option_value(document.body[m], 'width')
|
|
varwidth = get_option_value(document.body[m], 'varwidth')
|
|
alignment = get_option_value(document.body[m], 'alignment')
|
|
special = get_option_value(document.body[m], 'special')
|
|
col_info.append([width, varwidth, alignment, special, m])
|
|
|
|
# Now parse cells
|
|
m = i + 1
|
|
lines = []
|
|
for row in range(nrows):
|
|
for col in range(ncols):
|
|
m = find_token(document.body, "<cell", m)
|
|
multicolumn = get_option_value(document.body[m], 'multicolumn')
|
|
multirow = get_option_value(document.body[m], 'multirow')
|
|
width = get_option_value(document.body[m], 'width')
|
|
rotate = get_option_value(document.body[m], 'rotate')
|
|
# Check for: linebreaks, multipars, non-standard environments
|
|
begcell = m
|
|
endcell = find_token(document.body, "</cell>", begcell)
|
|
vcand = False
|
|
if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
|
|
vcand = True
|
|
elif count_pars_in_inset(document.body, begcell + 2) > 1:
|
|
vcand = True
|
|
elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
|
|
vcand = True
|
|
if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
|
|
if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
|
|
needvarwidth = True
|
|
alignment = col_info[col][2]
|
|
col_line = col_info[col][4]
|
|
vval = ""
|
|
if alignment == "center":
|
|
vval = ">{\\centering}"
|
|
elif alignment == "left":
|
|
vval = ">{\\raggedright}"
|
|
elif alignment == "right":
|
|
vval = ">{\\raggedleft}"
|
|
if vval != "":
|
|
needarray = True
|
|
vval += "V{\\linewidth}"
|
|
|
|
document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
|
|
# ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
|
|
# with newlines, and we do not want that)
|
|
while True:
|
|
endcell = find_token(document.body, "</cell>", begcell)
|
|
linebreak = False
|
|
nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
|
|
if nl == -1:
|
|
nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
|
|
if nl == -1:
|
|
break
|
|
linebreak = True
|
|
nle = find_end_of_inset(document.body, nl)
|
|
del(document.body[nle:nle+1])
|
|
if linebreak:
|
|
document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
|
|
else:
|
|
document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
|
|
m += 1
|
|
|
|
i = j + 1
|
|
|
|
finally:
|
|
if needarray == True:
|
|
add_to_preamble(document, ["\\usepackage{array}"])
|
|
if needvarwidth == True:
|
|
add_to_preamble(document, ["\\usepackage{varwidth}"])
|
|
|
|
|
|
def revert_bibencoding(document):
|
|
" Revert bibliography encoding "
|
|
|
|
# Get cite engine
|
|
engine = "basic"
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed document! Missing \\cite_engine")
|
|
else:
|
|
engine = get_value(document.header, "\\cite_engine", i)
|
|
|
|
# Check if biblatex
|
|
biblatex = False
|
|
if engine in ["biblatex", "biblatex-natbib"]:
|
|
biblatex = True
|
|
|
|
# Map lyx to latex encoding names
|
|
encodings = {
|
|
"utf8" : "utf8",
|
|
"utf8x" : "utf8x",
|
|
"armscii8" : "armscii8",
|
|
"iso8859-1" : "latin1",
|
|
"iso8859-2" : "latin2",
|
|
"iso8859-3" : "latin3",
|
|
"iso8859-4" : "latin4",
|
|
"iso8859-5" : "iso88595",
|
|
"iso8859-6" : "8859-6",
|
|
"iso8859-7" : "iso-8859-7",
|
|
"iso8859-8" : "8859-8",
|
|
"iso8859-9" : "latin5",
|
|
"iso8859-13" : "latin7",
|
|
"iso8859-15" : "latin9",
|
|
"iso8859-16" : "latin10",
|
|
"applemac" : "applemac",
|
|
"cp437" : "cp437",
|
|
"cp437de" : "cp437de",
|
|
"cp850" : "cp850",
|
|
"cp852" : "cp852",
|
|
"cp855" : "cp855",
|
|
"cp858" : "cp858",
|
|
"cp862" : "cp862",
|
|
"cp865" : "cp865",
|
|
"cp866" : "cp866",
|
|
"cp1250" : "cp1250",
|
|
"cp1251" : "cp1251",
|
|
"cp1252" : "cp1252",
|
|
"cp1255" : "cp1255",
|
|
"cp1256" : "cp1256",
|
|
"cp1257" : "cp1257",
|
|
"koi8-r" : "koi8-r",
|
|
"koi8-u" : "koi8-u",
|
|
"pt154" : "pt154",
|
|
"utf8-platex" : "utf8",
|
|
"ascii" : "ascii"
|
|
}
|
|
|
|
i = 0
|
|
bibresources = []
|
|
while (True):
|
|
i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Can't find end of bibtex inset at line %d!!" %(i))
|
|
i += 1
|
|
continue
|
|
encoding = get_quoted_value(document.body, "encoding", i, j)
|
|
if not encoding:
|
|
i += 1
|
|
continue
|
|
# remove encoding line
|
|
k = find_token(document.body, "encoding", i, j)
|
|
if k != -1:
|
|
del document.body[k]
|
|
# Re-find inset end line
|
|
j = find_end_of_inset(document.body, i)
|
|
if biblatex:
|
|
biblio_options = ""
|
|
h = find_token(document.header, "\\biblio_options", 0)
|
|
if h != -1:
|
|
biblio_options = get_value(document.header, "\\biblio_options", h)
|
|
if not "bibencoding" in biblio_options:
|
|
document.header[h] += ",bibencoding=%s" % encodings[encoding]
|
|
else:
|
|
bs = find_token(document.header, "\\biblatex_bibstyle", 0)
|
|
if bs == -1:
|
|
# this should not happen
|
|
document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
|
|
else:
|
|
document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
|
|
else:
|
|
document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
|
|
document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
|
|
|
|
i = j + 1
|
|
|
|
|
|
##
|
|
# Conversion hub
|
|
#
|
|
|
|
supported_versions = ["2.4.0", "2.4"]
|
|
convert = [
|
|
[545, [convert_lst_literalparam]],
|
|
[546, []],
|
|
[547, []],
|
|
[548, []],
|
|
[549, []],
|
|
[550, [convert_fontenc]],
|
|
[551, []],
|
|
[552, []],
|
|
[553, []],
|
|
[554, []],
|
|
[555, []],
|
|
[556, []]
|
|
]
|
|
|
|
revert = [
|
|
[555, [revert_bibencoding]],
|
|
[554, [revert_vcolumns]],
|
|
[553, [revert_stretchcolumn]],
|
|
[552, [revert_tuftecite]],
|
|
[551, [revert_floatpclass, revert_floatalignment]],
|
|
[550, [revert_nospellcheck]],
|
|
[549, [revert_fontenc]],
|
|
[548, []],# dummy format change
|
|
[547, [revert_lscape]],
|
|
[546, [revert_xcharter]],
|
|
[545, [revert_paratype]],
|
|
[544, [revert_lst_literalparam]]
|
|
]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|