mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-07 02:28:35 +00:00
8aa37c43a1
Previously, consecutive dashes in .lyx files were combined to endash and emdash in some cases, and in other cases they were output as is. This made the code complicated, and resulted in inconsitencies ((bug #3647). Now, a dash in a .lyx file is always a dash in the output, for all flavours. The special handling is moved to the input side, so that you still get an endash if you type two hyphens. If needed, this can be changed or made customizable without the need to update the file format again. Many thanks for the fruitful mailing list dicsussion, which contributed significantly to the final version.
580 lines
21 KiB
Python
580 lines
21 KiB
Python
# -*- coding: utf-8 -*-
|
|
# This file is part of lyx2lyx
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2011 The LyX team
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
""" Convert files to the file format generated by lyx 2.2"""
|
|
|
|
import re, string
|
|
import unicodedata
|
|
import sys, os
|
|
|
|
# Uncomment only what you need to import, please.
|
|
|
|
#from parser_tools import find_token, find_end_of, find_tokens, \
|
|
# find_token_exact, find_end_of_inset, find_end_of_layout, \
|
|
# find_token_backwards, is_in_inset, get_value, get_quoted_value, \
|
|
# del_token, check_token, get_option_value
|
|
|
|
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert#, \
|
|
# insert_to_preamble, lyx2latex, latex_length, revert_flex_inset, \
|
|
# revert_font_attrs, hex2ratio, str2bool
|
|
|
|
from parser_tools import find_token, find_token_backwards, find_re, \
|
|
find_end_of_inset, find_end_of_layout, find_nonempty_line, \
|
|
get_containing_layout, get_value, check_token
|
|
|
|
###############################################################################
|
|
###
|
|
### Conversion and reversion routines
|
|
###
|
|
###############################################################################
|
|
|
|
def convert_separator(document):
|
|
"""
|
|
Convert layout separators to separator insets and add (LaTeX) paragraph
|
|
breaks in order to mimic previous LaTeX export.
|
|
"""
|
|
|
|
parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""]
|
|
parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak",
|
|
"\\end_inset", "", "\\end_layout", ""]
|
|
sty_dict = {
|
|
"family" : "default",
|
|
"series" : "default",
|
|
"shape" : "default",
|
|
"size" : "default",
|
|
"bar" : "default",
|
|
"color" : "inherit"
|
|
}
|
|
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_deeper", i)
|
|
if i == -1:
|
|
break
|
|
|
|
j = find_token_backwards(document.body, "\\end_layout", i-1)
|
|
if j != -1:
|
|
# reset any text style before inserting the inset
|
|
lay = get_containing_layout(document.body, j-1)
|
|
if lay != False:
|
|
content = "\n".join(document.body[lay[1]:lay[2]])
|
|
for val in sty_dict.keys():
|
|
if content.find("\\%s" % val) != -1:
|
|
document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
|
|
i = i + 1
|
|
j = j + 1
|
|
document.body[j:j] = parins
|
|
i = i + len(parins) + 1
|
|
else:
|
|
i = i + 1
|
|
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\align", i)
|
|
if i == -1:
|
|
break
|
|
|
|
lay = get_containing_layout(document.body, i)
|
|
if lay != False and lay[0] == "Plain Layout":
|
|
i = i + 1
|
|
continue
|
|
|
|
j = find_token_backwards(document.body, "\\end_layout", i-1)
|
|
if j != -1:
|
|
lay = get_containing_layout(document.body, j-1)
|
|
if lay != False and lay[0] == "Standard" \
|
|
and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \
|
|
and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1:
|
|
# reset any text style before inserting the inset
|
|
content = "\n".join(document.body[lay[1]:lay[2]])
|
|
for val in sty_dict.keys():
|
|
if content.find("\\%s" % val) != -1:
|
|
document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
|
|
i = i + 1
|
|
j = j + 1
|
|
document.body[j:j] = parins
|
|
i = i + len(parins) + 1
|
|
else:
|
|
i = i + 1
|
|
else:
|
|
i = i + 1
|
|
|
|
regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE)
|
|
|
|
i = 0
|
|
while 1:
|
|
i = find_re(document.body, regexp, i)
|
|
if i == -1:
|
|
return
|
|
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
|
|
lay = get_containing_layout(document.body, j-1)
|
|
if lay != False:
|
|
lines = document.body[lay[3]:lay[2]]
|
|
else:
|
|
lines = []
|
|
|
|
document.body[i:j+1] = parlay
|
|
if len(lines) > 0:
|
|
document.body[i+1:i+1] = lines
|
|
|
|
i = i + len(parlay) + len(lines) + 1
|
|
|
|
|
|
def revert_separator(document):
|
|
" Revert separator insets to layout separators "
|
|
|
|
beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
|
|
if document.textclass in beamer_classes:
|
|
beglaysep = "\\begin_layout Separator"
|
|
else:
|
|
beglaysep = "\\begin_layout --Separator--"
|
|
|
|
parsep = [beglaysep, "", "\\end_layout", ""]
|
|
comert = ["\\begin_inset ERT", "status collapsed", "",
|
|
"\\begin_layout Plain Layout", "%", "\\end_layout",
|
|
"", "\\end_inset", ""]
|
|
empert = ["\\begin_inset ERT", "status collapsed", "",
|
|
"\\begin_layout Plain Layout", " ", "\\end_layout",
|
|
"", "\\end_inset", ""]
|
|
|
|
i = 0
|
|
while 1:
|
|
i = find_token(document.body, "\\begin_inset Separator", i)
|
|
if i == -1:
|
|
return
|
|
|
|
lay = get_containing_layout(document.body, i)
|
|
if lay == False:
|
|
document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i))
|
|
i = i + 1
|
|
continue
|
|
|
|
layoutname = lay[0]
|
|
beg = lay[1]
|
|
end = lay[2]
|
|
kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1]
|
|
before = document.body[beg+1:i]
|
|
something_before = len(before) > 0 and len("".join(before)) > 0
|
|
j = find_end_of_inset(document.body, i)
|
|
after = document.body[j+1:end]
|
|
something_after = len(after) > 0 and len("".join(after)) > 0
|
|
if kind == "plain":
|
|
beg = beg + len(before) + 1
|
|
elif something_before:
|
|
document.body[i:i] = ["\\end_layout", ""]
|
|
i = i + 2
|
|
j = j + 2
|
|
beg = i
|
|
end = end + 2
|
|
|
|
if kind == "plain":
|
|
if something_after:
|
|
document.body[beg:j+1] = empert
|
|
i = i + len(empert)
|
|
else:
|
|
document.body[beg:j+1] = comert
|
|
i = i + len(comert)
|
|
else:
|
|
if something_after:
|
|
if layoutname == "Standard":
|
|
if not something_before:
|
|
document.body[beg:j+1] = parsep
|
|
i = i + len(parsep)
|
|
document.body[i:i] = ["", "\\begin_layout Standard"]
|
|
i = i + 2
|
|
else:
|
|
document.body[beg:j+1] = ["\\begin_layout Standard"]
|
|
i = i + 1
|
|
else:
|
|
document.body[beg:j+1] = ["\\begin_deeper"]
|
|
i = i + 1
|
|
end = end + 1 - (j + 1 - beg)
|
|
if not something_before:
|
|
document.body[i:i] = parsep
|
|
i = i + len(parsep)
|
|
end = end + len(parsep)
|
|
document.body[i:i] = ["\\begin_layout Standard"]
|
|
document.body[end+2:end+2] = ["", "\\end_deeper", ""]
|
|
i = i + 4
|
|
else:
|
|
next_par_is_aligned = False
|
|
k = find_nonempty_line(document.body, end+1)
|
|
if k != -1 and check_token(document.body[k], "\\begin_layout"):
|
|
lay = get_containing_layout(document.body, k)
|
|
next_par_is_aligned = lay != False and \
|
|
find_token(document.body, "\\align", lay[1], lay[2]) != -1
|
|
if k != -1 and not next_par_is_aligned \
|
|
and not check_token(document.body[k], "\\end_deeper") \
|
|
and not check_token(document.body[k], "\\begin_deeper"):
|
|
if layoutname == "Standard":
|
|
document.body[beg:j+1] = [beglaysep]
|
|
i = i + 1
|
|
else:
|
|
document.body[beg:j+1] = ["\\begin_deeper", beglaysep]
|
|
end = end + 2 - (j + 1 - beg)
|
|
document.body[end+1:end+1] = ["", "\\end_deeper", ""]
|
|
i = i + 3
|
|
else:
|
|
if something_before:
|
|
del document.body[i:end+1]
|
|
else:
|
|
del document.body[i:end-1]
|
|
|
|
i = i + 1
|
|
|
|
|
|
def revert_smash(document):
|
|
" Set amsmath to on if smash commands are used "
|
|
|
|
commands = ["smash[t]", "smash[b]", "notag"]
|
|
i = find_token(document.header, "\\use_package amsmath", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Can't find \\use_package amsmath.")
|
|
return;
|
|
value = get_value(document.header, "\\use_package amsmath", i).split()[1]
|
|
if value != "1":
|
|
# nothing to do if package is not auto but on or off
|
|
return;
|
|
j = 0
|
|
while True:
|
|
j = find_token(document.body, '\\begin_inset Formula', j)
|
|
if j == -1:
|
|
return
|
|
k = find_end_of_inset(document.body, j)
|
|
if k == -1:
|
|
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j))
|
|
j += 1
|
|
continue
|
|
code = "\n".join(document.body[j:k])
|
|
for c in commands:
|
|
if code.find("\\%s" % c) != -1:
|
|
# set amsmath to on, since it is loaded by the newer format
|
|
document.header[i] = "\\use_package amsmath 2"
|
|
return
|
|
j = k
|
|
|
|
|
|
def revert_swissgerman(document):
|
|
" Set language german-ch-old to german "
|
|
i = 0
|
|
if document.language == "german-ch-old":
|
|
document.language = "german"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language german"
|
|
j = 0
|
|
while True:
|
|
j = find_token(document.body, "\\lang german-ch-old", j)
|
|
if j == -1:
|
|
return
|
|
document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german")
|
|
j = j + 1
|
|
|
|
|
|
def revert_use_package(document, pkg, commands, oldauto):
|
|
# oldauto defines how the version we are reverting to behaves:
|
|
# if it is true, the old version uses the package automatically.
|
|
# if it is false, the old version never uses the package.
|
|
regexp = re.compile(r'(\\use_package\s+%s)' % pkg)
|
|
i = find_re(document.header, regexp, 0)
|
|
value = "1" # default is auto
|
|
if i != -1:
|
|
value = get_value(document.header, "\\use_package" , i).split()[1]
|
|
del document.header[i]
|
|
if value == "2": # on
|
|
add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
|
|
elif value == "1" and not oldauto: # auto
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\begin_inset Formula', i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
|
|
i += 1
|
|
continue
|
|
code = "\n".join(document.body[i:j])
|
|
for c in commands:
|
|
if code.find("\\%s" % c) != -1:
|
|
add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
|
|
return
|
|
i = j
|
|
|
|
|
|
mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \
|
|
"xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \
|
|
"xLeftarrow", "xleftharpoondown", "xleftharpoonup", \
|
|
"xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \
|
|
"xmapsto"]
|
|
|
|
def revert_xarrow(document):
|
|
"remove use_package mathtools"
|
|
revert_use_package(document, "mathtools", mathtools_commands, False)
|
|
|
|
|
|
def revert_beamer_lemma(document):
|
|
" Reverts beamer lemma layout to ERT "
|
|
|
|
beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
|
|
if document.textclass not in beamer_classes:
|
|
return
|
|
|
|
consecutive = False
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_layout Lemma", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of Lemma layout")
|
|
i += 1
|
|
continue
|
|
arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j)
|
|
endarg1 = find_end_of_inset(document.body, arg1)
|
|
arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j)
|
|
endarg2 = find_end_of_inset(document.body, arg2)
|
|
subst1 = []
|
|
subst2 = []
|
|
if arg1 != -1:
|
|
beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1)
|
|
if beginPlain1 == -1:
|
|
document.warning("Malformed LyX document: Can't find arg1 plain Layout")
|
|
i += 1
|
|
continue
|
|
endPlain1 = find_end_of_inset(document.body, beginPlain1)
|
|
content1 = document.body[beginPlain1 + 1 : endPlain1 - 2]
|
|
subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">")
|
|
if arg2 != -1:
|
|
beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2)
|
|
if beginPlain2 == -1:
|
|
document.warning("Malformed LyX document: Can't find arg2 plain Layout")
|
|
i += 1
|
|
continue
|
|
endPlain2 = find_end_of_inset(document.body, beginPlain2)
|
|
content2 = document.body[beginPlain2 + 1 : endPlain2 - 2]
|
|
subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]")
|
|
|
|
# remove Arg insets
|
|
if arg1 < arg2:
|
|
del document.body[arg2 : endarg2 + 1]
|
|
if arg1 != -1:
|
|
del document.body[arg1 : endarg1 + 1]
|
|
if arg2 < arg1:
|
|
del document.body[arg1 : endarg1 + 1]
|
|
if arg2 != -1:
|
|
del document.body[arg2 : endarg2 + 1]
|
|
|
|
# index of end layout has probably changed
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of Lemma layout")
|
|
i += 1
|
|
continue
|
|
|
|
begcmd = []
|
|
|
|
# if this is not a consecutive env, add start command
|
|
if not consecutive:
|
|
begcmd = put_cmd_in_ert("\\begin{lemma}")
|
|
|
|
# has this a consecutive lemma?
|
|
consecutive = document.body[j + 2] == "\\begin_layout Lemma"
|
|
|
|
# if this is not followed by a consecutive env, add end command
|
|
if not consecutive:
|
|
document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"]
|
|
|
|
document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2
|
|
|
|
i = j
|
|
|
|
|
|
|
|
def revert_question_env(document):
|
|
"""
|
|
Reverts question and question* environments of
|
|
theorems-ams-extended-bytype module to ERT
|
|
"""
|
|
|
|
# Do we use theorems-ams-extended-bytype module?
|
|
have_mod = False
|
|
mods = document.get_module_list()
|
|
for mod in mods:
|
|
if mod == "theorems-ams-extended-bytype":
|
|
have_mod = True
|
|
continue
|
|
|
|
if not have_mod:
|
|
return
|
|
|
|
consecutive = False
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_layout Question", i)
|
|
if i == -1:
|
|
return
|
|
|
|
starred = document.body[i] == "\\begin_layout Question*"
|
|
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of Question layout")
|
|
i += 1
|
|
continue
|
|
|
|
# if this is not a consecutive env, add start command
|
|
begcmd = []
|
|
if not consecutive:
|
|
if starred:
|
|
begcmd = put_cmd_in_ert("\\begin{question*}")
|
|
else:
|
|
begcmd = put_cmd_in_ert("\\begin{question}")
|
|
|
|
# has this a consecutive theorem of same type?
|
|
consecutive = False
|
|
if starred:
|
|
consecutive = document.body[j + 2] == "\\begin_layout Question*"
|
|
else:
|
|
consecutive = document.body[j + 2] == "\\begin_layout Question"
|
|
|
|
# if this is not followed by a consecutive env, add end command
|
|
if not consecutive:
|
|
if starred:
|
|
document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"]
|
|
else:
|
|
document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"]
|
|
|
|
document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd
|
|
|
|
add_to_preamble(document, "\\providecommand{\questionname}{Question}")
|
|
|
|
if starred:
|
|
add_to_preamble(document, "\\theoremstyle{plain}\n" \
|
|
"\\newtheorem*{question*}{\\protect\\questionname}")
|
|
else:
|
|
add_to_preamble(document, "\\theoremstyle{plain}\n" \
|
|
"\\newtheorem{question}{\\protect\\questionname}")
|
|
|
|
i = j
|
|
|
|
|
|
def convert_dashes(document):
|
|
"convert -- and --- to \\twohyphens and \\threehyphens"
|
|
|
|
if document.backend != "latex":
|
|
return
|
|
|
|
i = 0
|
|
while i < len(document.body):
|
|
words = document.body[i].split()
|
|
if len(words) > 1 and words[0] == "\\begin_inset" and \
|
|
words[1] in ["ERT", "Formula", "IPA"]:
|
|
# must not replace anything in math
|
|
# filtering out IPA makes Text::readParToken() more simple
|
|
# skip ERT as well since it is not needed there
|
|
j = find_end_of_inset(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
|
|
i += 1
|
|
else:
|
|
i = j
|
|
continue
|
|
while True:
|
|
j = document.body[i].find("--")
|
|
if j == -1:
|
|
break
|
|
front = document.body[i][:j]
|
|
back = document.body[i][j+2:]
|
|
# We can have an arbitrary number of consecutive hyphens.
|
|
# These must be split into the corresponding number of two and three hyphens
|
|
# We must match what LaTeX does: First try emdash, then endash, then single hyphen
|
|
if back.find("-") == 0:
|
|
back = back[1:]
|
|
if len(back) > 0:
|
|
document.body.insert(i+1, back)
|
|
document.body[i] = front + "\\threehyphens"
|
|
else:
|
|
if len(back) > 0:
|
|
document.body.insert(i+1, back)
|
|
document.body[i] = front + "\\twohyphens"
|
|
i += 1
|
|
|
|
|
|
def revert_dashes(document):
|
|
"convert \\twohyphens and \\threehyphens to -- and ---"
|
|
|
|
i = 0
|
|
while i < len(document.body):
|
|
replaced = False
|
|
if document.body[i].find("\\twohyphens") >= 0:
|
|
document.body[i] = document.body[i].replace("\\twohyphens", "--")
|
|
replaced = True
|
|
if document.body[i].find("\\threehyphens") >= 0:
|
|
document.body[i] = document.body[i].replace("\\threehyphens", "---")
|
|
replaced = True
|
|
if replaced and i+1 < len(document.body) and \
|
|
(document.body[i+1].find("\\") != 0 or \
|
|
document.body[i+1].find("\\twohyphens") == 0 or
|
|
document.body[i+1].find("\\threehyphens") == 0) and \
|
|
len(document.body[i]) + len(document.body[i+1]) <= 80:
|
|
document.body[i] = document.body[i] + document.body[i+1]
|
|
document.body[i+1:i+2] = []
|
|
else:
|
|
i += 1
|
|
|
|
|
|
##
|
|
# Conversion hub
|
|
#
|
|
|
|
supported_versions = ["2.2.0", "2.2"]
|
|
convert = [
|
|
[475, [convert_separator]],
|
|
# nothing to do for 476: We consider it a bug that older versions
|
|
# did not load amsmath automatically for these commands, and do not
|
|
# want to hardcode amsmath off.
|
|
[476, []],
|
|
[477, []],
|
|
[478, []],
|
|
[479, []],
|
|
[480, []],
|
|
[481, [convert_dashes]]
|
|
]
|
|
|
|
revert = [
|
|
[480, [revert_dashes]],
|
|
[479, [revert_question_env]],
|
|
[478, [revert_beamer_lemma]],
|
|
[477, [revert_xarrow]],
|
|
[476, [revert_swissgerman]],
|
|
[475, [revert_smash]],
|
|
[474, [revert_separator]]
|
|
]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|