Add lyx2lyx tools from 2.4.x

This commit is contained in:
Richard Kimberly Heck 2024-05-02 11:02:01 -04:00
parent ee25620931
commit a5c328b304
21 changed files with 8069 additions and 1765 deletions

View File

@ -1,6 +1,6 @@
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2015 The LyX Team
# Copyright (C) 2002-2018 The LyX Team
# Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>
# Copyright (C) 2002-2006 José Matos <jamatos@lyx.org>
#
@ -20,8 +20,8 @@
" The LyX module has all the rules related with different lyx file formats."
from parser_tools import get_value, check_token, find_token, \
find_tokens, find_end_of
from parser_tools import (get_value, check_token, find_token, find_tokens,
find_end_of, find_complete_lines)
import os.path
import gzip
import locale
@ -34,8 +34,10 @@ import codecs
try:
import lyx2lyx_version
version__ = lyx2lyx_version.version
stable_version = True
except: # we are running from build directory so assume the last version
version__ = '2.3'
version__ = '2.4'
stable_version = False
default_debug__ = 2
@ -69,8 +71,8 @@ def minor_versions(major, last_minor_version):
# Regular expressions used
format_re = re.compile(r"(\d)[\.,]?(\d\d)")
fileformat = re.compile(r"\\lyxformat\s*(\S*)")
original_version = re.compile(r".*?LyX ([\d.]*)")
original_tex2lyx_version = re.compile(r".*?tex2lyx ([\d.]*)")
original_version = re.compile(b".*?LyX ([\\d.]*)")
original_tex2lyx_version = re.compile(b".*?tex2lyx ([\\d.]*)")
##
# file format information:
@ -92,8 +94,9 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)),
("1_6", list(range(277,346)), minor_versions("1.6" , 10)),
("2_0", list(range(346,414)), minor_versions("2.0" , 8)),
("2_1", list(range(414,475)), minor_versions("2.1" , 5)),
("2_2", list(range(475,509)), minor_versions("2.2" , 0)),
("2_3", (), minor_versions("2.3" , 0))
("2_2", list(range(475,509)), minor_versions("2.2" , 4)),
("2_3", list(range(509,545)), minor_versions("2.3" , 0)),
("2_4", (), minor_versions("2.4" , 0))
]
####################################################################
@ -119,19 +122,29 @@ def formats_list():
def format_info():
" Returns a list with supported file formats."
out = """Major version:
minor versions
formats
" Returns a list with the supported file formats."
template = """
%s\tstable format: %s
\tstable versions: %s
\tdevelopment formats: %s
"""
out = "version: formats and versions"
for version in format_relation:
major = str(version[2][0])
versions = str(version[2][1:])
if len(version[1]) == 1:
formats = str(version[1][0])
stable_format = str(version[1][0])
elif not stable_version and major == version__:
stable_format = "-- not yet --"
versions = "-- not yet --"
formats = "%s - %s" % (version[1][0], version[1][-1])
else:
formats = "%s - %s" % (version[1][-1], version[1][0])
out += "%s\n\t%s\n\t%s\n\n" % (major, versions, formats)
formats = "%s - %s" % (version[1][0], version[1][-2])
stable_format = str(version[1][-1])
out += template % (major, stable_format, versions, formats)
return out + '\n'
@ -281,7 +294,7 @@ class LyX_base:
""" Emits warning to self.error, if the debug_level is less
than the self.debug."""
if debug_level <= self.debug:
self.err.write("Warning: " + message + "\n")
self.err.write("lyx2lyx warning: " + message + "\n")
def error(self, message):
@ -434,8 +447,8 @@ class LyX_base:
else:
header = self.header
for line in header + [''] + self.body:
self.output.write(line+u"\n")
for line in header + [u''] + self.body:
self.output.write(line+u'\n')
def choose_output(self, output):
@ -506,10 +519,10 @@ class LyX_base:
file, returns the most likely value, or None otherwise."""
for line in self.header:
if line[0] != "#":
if line[0:1] != b"#":
return None
line = line.replace("fix",".")
line = line.replace(b"fix",b".")
# need to test original_tex2lyx_version first because tex2lyx
# writes "#LyX file created by tex2lyx 2.2"
result = original_tex2lyx_version.match(line)
@ -517,14 +530,14 @@ class LyX_base:
result = original_version.match(line)
if result:
# Special know cases: reLyX and KLyX
if line.find("reLyX") != -1 or line.find("KLyX") != -1:
if line.find(b"reLyX") != -1 or line.find(b"KLyX") != -1:
return "0.12"
if result:
res = result.group(1)
if not res:
self.warning(line)
#self.warning("Version %s" % result.group(1))
return res
return res.decode('ascii') if not PY2 else res
self.warning(str(self.header[:2]))
return None
@ -533,7 +546,7 @@ class LyX_base:
" Set the header with the version used."
initial_comment = " ".join(["#LyX %s created this file." % version__,
"For more info see http://www.lyx.org/"])
"For more info see https://www.lyx.org/"])
# Simple heuristic to determine the comment that always starts
# a lyx file
@ -582,6 +595,7 @@ class LyX_base:
#Note that the module will be added at the END of the extant ones
def add_module(self, module):
" Append module to the modules list."
i = find_token(self.header, "\\begin_modules", 0)
if i == -1:
#No modules yet included
@ -602,7 +616,16 @@ class LyX_base:
self.header.insert(j, module)
def del_module(self, module):
" Delete `module` from module list, return success."
modlist = self.get_module_list()
if module not in modlist:
return False
self.set_module_list([line for line in modlist if line != module])
return True
def get_module_list(self):
" Return list of modules."
i = find_token(self.header, "\\begin_modules", 0)
if (i == -1):
return []
@ -611,23 +634,23 @@ class LyX_base:
def set_module_list(self, mlist):
modbegin = find_token(self.header, "\\begin_modules", 0)
newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
if (modbegin == -1):
i = find_token(self.header, "\\begin_modules", 0)
if (i == -1):
#No modules yet included
tclass = find_token(self.header, "\\textclass", 0)
if tclass == -1:
self.warning("Malformed LyX document: No \\textclass!!")
return
modbegin = tclass + 1
self.header[modbegin:modbegin] = newmodlist
return
modend = find_token(self.header, "\\end_modules", modbegin)
if modend == -1:
i = j = tclass + 1
else:
j = find_token(self.header, "\\end_modules", i)
if j == -1:
self.warning("(set_module_list) Malformed LyX document: No \\end_modules.")
return
newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
self.header[modbegin:modend + 1] = newmodlist
j += 1
if mlist:
mlist = ['\\begin_modules'] + mlist + ['\\end_modules']
self.header[i:j] = mlist
def set_parameter(self, param, value):
@ -678,7 +701,7 @@ class LyX_base:
try:
conv(self)
except:
self.warning("An error ocurred in %s, %s" %
self.warning("An error occurred in %s, %s" %
(version, str(conv)),
default_debug__)
if not self.try_hard:
@ -760,6 +783,53 @@ class LyX_base:
return mode, steps
def append_local_layout(self, new_layout):
" Append `new_layout` to the local layouts."
# new_layout may be a string or a list of strings (lines)
try:
new_layout = new_layout.splitlines()
except AttributeError:
pass
i = find_token(self.header, "\\begin_local_layout", 0)
if i == -1:
k = find_token(self.header, "\\language", 0)
if k == -1:
# this should not happen
self.warning("Malformed LyX document! No \\language header found!")
return
self.header[k : k] = ["\\begin_local_layout", "\\end_local_layout"]
i = k
j = find_end_of(self.header, i, "\\begin_local_layout", "\\end_local_layout")
if j == -1:
# this should not happen
self.warning("Malformed LyX document: Can't find end of local layout!")
return
self.header[i+1 : i+1] = new_layout
def del_local_layout(self, layout_def):
" Delete `layout_def` from local layouts, return success."
i = find_complete_lines(self.header, layout_def)
if i == -1:
return False
j = i+len(layout_def)
if (self.header[i-1] == "\\begin_local_layout" and
self.header[j] == "\\end_local_layout"):
i -=1
j +=1
self.header[i:j] = []
return True
def del_from_header(self, lines):
" Delete `lines` from the document header, return success."
i = find_complete_lines(self.header, lines)
if i == -1:
return False
j = i + len(lines)
self.header[i:j] = []
return True
# Part of an unfinished attempt to make lyx2lyx gave a more
# structured view of the document.
# def get_toc(self, depth = 4):

View File

@ -34,6 +34,7 @@ dist_lyx2lyx_PYTHON = \
lyx_2_1.py \
lyx_2_2.py \
lyx_2_3.py \
lyx_2_4.py \
profiling.py \
test_parser_tools.py

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python
#! /usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2011 The LyX Team
# Copyright (C) 2002-2007 José Matos <jamatos@lyx.org>
@ -56,24 +56,25 @@ def main():
parser.add_argument("--noisy",
action="store_const", const=10, dest="debug")
parser.add_argument("-c", "--encoding", type=cmd_arg, dest="cjk_encoding",
help="files in format 413 and lower are read and"
help="Files in format 413 and lower are read and"
" written in the format of CJK-LyX."
" If encoding is not given or 'auto' the encoding"
" is determined from the locale.")
parser.add_argument("-e", "--err", type=cmd_arg, dest="error",
help= "file name of the error file else goes to stderr")
help= "File name of the error file else goes to stderr.")
parser.add_argument("-o", "--output", type=cmd_arg, dest="output",
help= "name of the output file else goes to stdout")
help= "Name of the output file else goes to stdout.")
parser.add_argument("-t", "--to", type=cmd_arg, dest= "end_format",
help= "destination file format, default (latest)")
help= "Destination file format, default <latest>.")
parser.add_argument("-V", "--final_version", type=cmd_arg, dest= "final_version",
help= "destination version, default (latest)")
help= "Destination version, default <latest>.")
parser.add_argument("-l", "--list", action="store_true",
help = "list all available formats and supported versions")
help = "List all available formats and supported versions.")
parser.add_argument("-n", "--try-hard", action="store_true",
help = "try hard (ignore any convertion errors)")
help = "Try hard (ignore any conversion errors).")
parser.add_argument("-s", "--systemlyxdir", type=cmd_arg, dest= "systemlyxdir",
help= "LyX system directory for conversion from version 489 or older")
help= "LyX system directory for conversion from"
" version 489 or older.")
parser.add_argument('--version', action='version', version="""lyx2lyx, version %s
Copyright (C) 2011 The LyX Team, José Matos and Dekel Tsur""" % LyX.version__)
parser.add_argument("input", nargs='?', type=cmd_arg, default=None)

View File

@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
default index is 0, so the material is inserted at the beginning.
Prepends a comment "% Added by lyx2lyx" to text.
put_cmd_in_ert(arg):
Here arg should be a list of strings (lines), which we want to
put_cmd_in_ert(cmd):
Here cmd should be a list of strings (lines), which we want to
wrap in ERT. Returns a list of strings so wrapped.
A call to this routine will often go something like this:
i = find_token('\\begin_inset FunkyInset', ...)
@ -74,14 +74,29 @@ convert_info_insets(document, type, func):
type : the type to match. This can be a regular expression.
func : function from string to string to apply to the "arg" field of
the info insets.
is_document_option(document, option):
Find if _option_ is a document option (\\options in the header).
insert_document_option(document, option):
Insert _option_ as a document option.
remove_document_option(document, option):
Remove _option_ as a document option.
revert_language(document, lyxname, babelname="", polyglossianame=""):
Reverts native language support to ERT
If babelname or polyglossianame is empty, it is assumed
this language package is not supported for the given language.
'''
from __future__ import print_function
import re
import string
from parser_tools import find_token, find_end_of_inset
import sys
from parser_tools import (find_token, find_end_of_inset, get_containing_layout,
get_containing_inset, get_value, get_bool_value)
from unicode_symbols import unicode_reps
# This will accept either a list of lines or a single line.
# It is bad practice to pass something with embedded newlines,
# though we will handle that.
@ -129,25 +144,42 @@ def insert_to_preamble(document, text, index = 0):
document.preamble[index:index] = text
def put_cmd_in_ert(arg):
'''
arg should be a list of lines we want to wrap in ERT.
Returns a list of strings, with the lines so wrapped.
'''
# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
# Created from the reversed list to keep the first of alternative definitions.
licr_table = {ord(ch): cmd for cmd, ch in unicode_reps[::-1]}
ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
# It will be faster for us to work with a single string internally.
# That way, we only go through the unicode_reps loop once.
if type(arg) is list:
s = "\n".join(arg)
else:
s = arg
for rep in unicode_reps:
s = s.replace(rep[1], rep[0])
s = s.replace('\\', "\\backslash\n")
ret += s.splitlines()
ret += ["\\end_layout", "", "\\end_inset"]
return ret
def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
"""
Return ERT inset wrapping `cmd` as a list of strings.
`cmd` can be a string or list of lines. Non-ASCII characters are converted
to the respective LICR macros if defined in unicodesymbols,
`is_open` is a boolean setting the inset status to "open",
`as_paragraph` wraps the ERT inset in a Standard paragraph.
"""
status = {False:"collapsed", True:"open"}
ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "",
"\\begin_layout Plain Layout", "",
# content here ([5:5])
"\\end_layout", "", "\\end_inset"]
paragraph = ["\\begin_layout Standard",
# content here ([1:1])
"", "", "\\end_layout", ""]
# ensure cmd is an unicode instance and make it "LyX safe".
if isinstance(cmd, list):
cmd = u"\n".join(cmd)
elif sys.version_info[0] == 2 and isinstance(cmd, str):
cmd = cmd.decode('utf8')
cmd = cmd.translate(licr_table)
cmd = cmd.replace("\\", "\n\\backslash\n")
ert_inset[5:5] = cmd.splitlines()
if not as_paragraph:
return ert_inset
paragraph[1:1] = ert_inset
return paragraph
def get_ert(lines, i, verbatim = False):
@ -314,9 +346,14 @@ def latex_length(slen):
# the + always precedes the -
# Convert relative lengths to LaTeX units
units = {"text%":"\\textwidth", "col%":"\\columnwidth",
"page%":"\\paperwidth", "line%":"\\linewidth",
"theight%":"\\textheight", "pheight%":"\\paperheight"}
units = {"col%": "\\columnwidth",
"text%": "\\textwidth",
"page%": "\\paperwidth",
"line%": "\\linewidth",
"theight%": "\\textheight",
"pheight%": "\\paperheight",
"baselineskip%": "\\baselineskip"
}
for unit in list(units.keys()):
i = slen.find(unit)
if i == -1:
@ -536,3 +573,220 @@ def convert_info_insets(document, type, func):
new_arg = func(arg.group(1))
document.body[i + 2] = 'arg "%s"' % new_arg
i += 3
def insert_document_option(document, option):
"Insert _option_ as a document option."
# Find \options in the header
i = find_token(document.header, "\\options", 0)
# if the options does not exists add it after the textclass
if i == -1:
i = find_token(document.header, "\\textclass", 0) + 1
document.header.insert(i, r"\options %s" % option)
return
# otherwise append to options
if not is_document_option(document, option):
document.header[i] += ",%s" % option
def remove_document_option(document, option):
""" Remove _option_ as a document option."""
i = find_token(document.header, "\\options")
options = get_value(document.header, "\\options", i)
options = [op.strip() for op in options.split(',')]
# Remove `option` from \options
options = [op for op in options if op != option]
if options:
document.header[i] = "\\options " + ','.join(options)
else:
del document.header[i]
def is_document_option(document, option):
"Find if _option_ is a document option"
options = get_value(document.header, "\\options")
options = [op.strip() for op in options.split(',')]
return option in options
singlepar_insets = [s.strip() for s in
u"Argument, Caption Above, Caption Below, Caption Bicaption,"
u"Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
u"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
u"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
u"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
u"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
u"IPADeco, Index, Info, Phantom, Script".split(',')]
# print(singlepar_insets)
def revert_language(document, lyxname, babelname="", polyglossianame=""):
" Revert native language support "
# Does the document use polyglossia?
use_polyglossia = False
if get_bool_value(document.header, "\\use_non_tex_fonts"):
i = find_token(document.header, "\\language_package")
if i == -1:
document.warning("Malformed document! Missing \\language_package")
else:
pack = get_value(document.header, "\\language_package", i)
if pack in ("default", "auto"):
use_polyglossia = True
# Do we use this language with polyglossia?
with_polyglossia = use_polyglossia and polyglossianame != ""
# Do we use this language with babel?
with_babel = with_polyglossia == False and babelname != ""
# Are we dealing with a primary or secondary language?
primary = document.language == lyxname
secondary = False
# Main language first
orig_doc_language = document.language
if primary:
# Change LyX document language to English (we will tell LaTeX
# to use the original language at the end of this function):
document.language = "english"
i = find_token(document.header, "\\language %s" % lyxname, 0)
if i != -1:
document.header[i] = "\\language english"
# Now look for occurences in the body
i = 0
while True:
i = find_token(document.body, "\\lang", i+1)
if i == -1:
break
if document.body[i].startswith("\\lang %s" % lyxname):
secondary = True
texname = use_polyglossia and polyglossianame or babelname
elif primary and document.body[i].startswith("\\lang english"):
# Since we switched the main language manually, English parts need to be marked
texname = "english"
else:
continue
parent = get_containing_layout(document.body, i)
i_e = parent[2] # end line no,
# print(i, texname, parent, document.body[i+1], file=sys.stderr)
# Move leading space to the previous line:
if document.body[i+1].startswith(" "):
document.body[i+1] = document.body[i+1][1:]
document.body.insert(i, " ")
continue
# TODO: handle nesting issues with font attributes, e.g.
# \begin_layout Standard
#
# \emph on
# \lang macedonian
# Македонски јазик
# \emph default
# — јужнословенски јазик, дел од групата на словенски јазици од јазичното
# семејство на индоевропски јазици.
# Македонскиот е службен и национален јазик во Македонија.
# \end_layout
# Ensure correct handling of list labels
if (parent[0] in ["Labeling", "Description"]
and not " " in "\n".join(document.body[parent[3]:i])):
# line `i+1` is first line of a list item,
# part before a space character is the label
# TODO: insets or language change before first space character
labelline = document.body[i+1].split(' ', 1)
if len(labelline) > 1:
# Insert a space in the (original) document language
# between label and remainder.
# print(" Label:", labelline, file=sys.stderr)
lines = [labelline[0],
"\\lang %s" % orig_doc_language,
" ",
"\\lang %s" % (primary and "english" or lyxname),
labelline[1]]
document.body[i+1:i+2] = lines
i_e += 4
# Find out where to end the language change.
langswitch = i
while True:
langswitch = find_token(document.body, "\\lang", langswitch+1, i_e)
if langswitch == -1:
break
# print(" ", langswitch, document.body[langswitch], file=sys.stderr)
# skip insets
i_a = parent[3] # paragraph start line
container = get_containing_inset(document.body[i_a:i_e], langswitch-i_a)
if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a:
# print(" inset", container, file=sys.stderr)
continue
i_e = langswitch
break
# use function or environment?
singlepar = i_e - i < 3
if not singlepar and parent[0] == "Plain Layout":
# environment not allowed in some insets
container = get_containing_inset(document.body, i)
singlepar = container[0] in singlepar_insets
# Delete empty language switches:
if not "".join(document.body[i+1:i_e]):
del document.body[i:i_e]
i -= 1
continue
if singlepar:
if with_polyglossia:
begin_cmd = "\\text%s{"%texname
elif with_babel:
begin_cmd = "\\foreignlanguage{%s}{" % texname
end_cmd = "}"
else:
if with_polyglossia:
begin_cmd = "\\begin{%s}"%texname
end_cmd = "\\end{%s}"%texname
elif with_babel:
begin_cmd = "\\begin{otherlanguage}{%s}" % texname
end_cmd = "\\end{otherlanguage}"
if (not primary or texname == "english"):
try:
document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
document.body[i+1:i+1] = put_cmd_in_ert(begin_cmd)
except UnboundLocalError:
pass
del document.body[i]
if not (primary or secondary):
return
# Make the language known to Babel/Polyglossia and ensure the correct
# document language:
doc_lang_switch = ""
if with_babel:
# add as global option
insert_document_option(document, babelname)
# Since user options are appended to the document options,
# Babel will treat `babelname` as primary language.
if not primary:
doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
if with_polyglossia:
# Define language in the user preamble
# (don't use \AtBeginDocument, this fails with some languages).
add_to_preamble(document, ["\\usepackage{polyglossia}",
"\\setotherlanguage{%s}" % polyglossianame])
if primary:
# Changing the main language must be done in the document body.
doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
# Reset LaTeX main language if required and not already done
if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
document.body[2:2] = put_cmd_in_ert(doc_lang_switch,
is_open=True, as_paragraph=True)

View File

@ -32,18 +32,17 @@ def regularise_header(document):
def find_next_space(line, j):
""" Return position of next space or backslash, which one comes
first, starting from position k, if not existing return last
position in line."""
l = line.find(' ', j)
if l == -1:
l = len(line)
k = line.find('\\', j)
if k == -1:
k = len(line)
first, starting from position j, if none exists returns last
position in line (+1)."""
space_pos = line.find(' ', j)
if space_pos == -1:
space_pos = len(line)
if k < l:
return k
return l
bksl_pos = line.find('\\', j)
if bksl_pos == -1:
bksl_pos = len(line)
return min(space_pos, bksl_pos)
def regularise_body(document):
@ -65,36 +64,38 @@ def regularise_body(document):
while i < len(document.body):
line = document.body[i]
j = 0
tmp = []
new_block = []
while j < len(line):
k = line.find('\\', j)
if k == -1:
tmp += [line[j:]]
new_block += [line[j:]]
break
if k != j:
tmp += [line[j: k]]
#document.warning("j=%d\tk=%d\t#%s#%s#" % (j,k,line,line[j: k]))
new_block += [line[j: k]]
j = k
k = find_next_space(line, j+1)
# These tokens take the rest of the line
token = line[j+1:k]
# These tokens take the rest of the line
if token in getline_tokens:
tmp += [line[j:]]
#document.warning("getline_token:%s\tj=%d\t\t#%s#%s#" % (token,j,line,line[j:]))
new_block += [line[j:]]
break
# These tokens take no arguments
if token in noargs_tokens:
tmp += [line[j:k]]
new_block += [line[j:k]]
j = k
continue
# These tokens take one argument
if token in onearg_tokens:
k = find_next_space(line, k + 1)
tmp += [line[j:k]]
new_block += [line[j:k]]
j = k
continue
@ -104,29 +105,30 @@ def regularise_body(document):
inset = line[k+1: l]
if inset == "Latex":
tmp += [line[j:l]]
new_block += [line[j:l]]
j = l
continue
if inset in ["LatexCommand", "LatexDel"]:
tmp += [line[j:]]
if inset in ["LatexCommand", "LatexDel", "Label", "Figure",
"Formula"]:
new_block += [line[j:]]
break
if inset == "Quotes":
l = find_next_space(line, l + 1)
tmp += [line[j:l]]
new_block += [line[j:l]]
j = l
continue
document.warning("unkown inset %s" % line)
document.warning("unkown inset %s" % inset)
assert(False)
# We are inside a latex inset, pass the text verbatim
tmp += [line[j:]]
new_block += [line[j:]]
break
document.body[i: i+1] = tmp
i += len(tmp)
document.body[i: i+1] = new_block
i += len(new_block)
supported_versions = ["0.10.%d" % i for i in range(8)] + ["0.10"]

View File

@ -112,7 +112,7 @@ def update_inset_label(document):
i = find_token(lines, '\\begin_inset Label', i)
if i == -1:
return
lines[i] = '\\begin_inset LatexCommand \label{' + lines[i][19:] + '}'
lines[i] = '\\begin_inset LatexCommand \\label{' + lines[i][19:] + '}'
i = i + 1

View File

@ -69,7 +69,7 @@ def find_beginning_of_inset(lines, i):
def find_end_of_inset(lines, i):
" Finds the matching \end_inset"
r" Finds the matching \end_inset"
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
@ -143,7 +143,7 @@ def get_width(mo):
def remove_oldfloat(document):
" Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
r" Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
lines = document.body
i = 0
while True:
@ -250,7 +250,7 @@ def remove_pextra(document):
if flag:
flag = 0
if hfill:
start = ["","\hfill",""]+start
start = ["",r"\hfill",""]+start
else:
start = ['\\layout %s' % document.default_layout,''] + start
@ -324,7 +324,7 @@ def remove_oldert(document):
new = []
new2 = []
if check_token(lines[i], "\\layout LaTeX"):
new = ['\layout %s' % document.default_layout, "", ""]
new = [r'\layout %s' % document.default_layout, "", ""]
k = i+1
while True:
@ -745,8 +745,8 @@ def update_longtables(document):
for j in range(rows):
i = find_token(body, '<row', i)
self.endfoot = false # footer row
self.endlastfoot = false # last footer row
row_info[i].endfoot = false # footer row
row_info[i].endlastfoot = false # last footer row
if row_info[j].endhead:
insert_attribute(body, i, 'endhead="true"')
@ -808,7 +808,7 @@ def change_infoinset(document):
note_lines = [txt]+note_lines
for line in note_lines:
new = new + ['\layout %s' % document.default_layout, ""]
new = new + [r'\layout %s' % document.default_layout, ""]
tmp = line.split('\\')
new = new + [tmp[0]]
for x in tmp[1:]:

View File

@ -27,7 +27,7 @@ from parser_tools import find_token, find_end_of, get_value,\
# Private helper functions
def find_end_of_inset(lines, i):
"Finds the matching \end_inset"
r"Finds the matching \end_inset"
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")

View File

@ -81,7 +81,7 @@ def get_next_paragraph(lines, i, format):
def find_end_of_inset(lines, i):
"Finds the matching \end_inset"
r"Finds the matching \end_inset"
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
def del_token(lines, token, start, end):
@ -103,7 +103,7 @@ def del_token(lines, token, start, end):
####################################################################
def remove_color_default(document):
" Remove \color default"
r" Remove \color default"
i = 0
while True:
i = find_token(document.body, "\\color default", i)
@ -114,12 +114,12 @@ def remove_color_default(document):
def add_end_header(document):
" Add \end_header"
r" Add \end_header"
document.header.append("\\end_header");
def rm_end_header(document):
" Remove \end_header"
r" Remove \end_header"
i = find_token(document.header, "\\end_header", 0)
if i == -1:
return
@ -169,14 +169,14 @@ def revert_amsmath(document):
def convert_spaces(document):
" \SpecialChar ~ -> \InsetSpace ~"
r" \SpecialChar ~ -> \InsetSpace ~"
for i in range(len(document.body)):
document.body[i] = document.body[i].replace("\\SpecialChar ~",
"\\InsetSpace ~")
def revert_spaces(document):
" \InsetSpace ~ -> \SpecialChar ~"
r" \InsetSpace ~ -> \SpecialChar ~"
regexp = re.compile(r'(.*)(\\InsetSpace\s+)(\S+)')
i = 0
while True:
@ -197,18 +197,18 @@ def revert_spaces(document):
def rename_spaces(document):
""" \InsetSpace \, -> \InsetSpace \thinspace{}
\InsetSpace \space -> \InsetSpace \space{}"""
""" \\InsetSpace \\, -> \\InsetSpace \thinspace{}
\\InsetSpace \\space -> \\InsetSpace \\space{}"""
for i in range(len(document.body)):
document.body[i] = document.body[i].replace("\\InsetSpace \\space",
"\\InsetSpace \\space{}")
document.body[i] = document.body[i].replace("\\InsetSpace \,",
document.body[i] = document.body[i].replace("\\InsetSpace \\,",
"\\InsetSpace \\thinspace{}")
def revert_space_names(document):
""" \InsetSpace \thinspace{} -> \InsetSpace \,
\InsetSpace \space{} -> \InsetSpace \space"""
""" \\InsetSpace \thinspace{} -> \\InsetSpace \\,
\\InsetSpace \\space{} -> \\InsetSpace \\space"""
for i in range(len(document.body)):
document.body[i] = document.body[i].replace("\\InsetSpace \\space{}",
"\\InsetSpace \\space")
@ -262,7 +262,7 @@ def revert_bibtex(document):
def remove_insetparent(document):
" Remove \lyxparent"
r" Remove \lyxparent"
i = 0
while True:
i = find_token(document.body, "\\begin_inset LatexCommand \\lyxparent", i)
@ -428,7 +428,7 @@ def revert_comment(document):
def add_end_layout(document):
" Add \end_layout"
r" Add \end_layout"
i = find_token(document.body, '\\layout', 0)
if i == -1:
@ -502,7 +502,7 @@ def add_end_layout(document):
def rm_end_layout(document):
" Remove \end_layout"
r" Remove \end_layout"
i = 0
while True:
i = find_token(document.body, '\\end_layout', i)
@ -544,7 +544,7 @@ def rm_body_changes(document):
def layout2begin_layout(document):
" \layout -> \begin_layout "
r" \layout -> \begin_layout "
i = 0
while True:
i = find_token(document.body, '\\layout', i)
@ -556,7 +556,7 @@ def layout2begin_layout(document):
def begin_layout2layout(document):
" \begin_layout -> \layout "
r" \begin_layout -> \layout "
i = 0
while True:
i = find_token(document.body, '\\begin_layout', i)
@ -1972,9 +1972,9 @@ def convert_names(document):
'\\begin_layout %s' % document.default_layout,
"",
"%s" % firstname,
"\end_layout",
r"\end_layout",
"",
"\end_inset",
r"\end_inset",
"",
"",
"\\begin_inset CharStyle Surname",

View File

@ -24,6 +24,7 @@ import unicodedata
import sys, os
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
from lyx2lyx_tools import insert_document_option
from LyX import get_encoding
# Provide support for both python 2 and 3
@ -420,7 +421,7 @@ def revert_unicode_line(document, i, insets, spec_chars, replacement_character =
else:
if insets and insets[-1] == "Formula":
# avoid putting an ERT in a math; instead put command as text
command = command.replace('\\\\', '\mathrm{')
command = command.replace('\\\\', r'\mathrm{')
command = command + '}'
elif not insets or insets[-1] != "ERT":
# add an ERT inset with the replacement character
@ -490,7 +491,7 @@ def revert_cs_label(document):
def convert_bibitem(document):
""" Convert
r""" Convert
\bibitem [option]{argument}
to
@ -575,16 +576,16 @@ commandparams_info = {
def convert_commandparams(document):
""" Convert
\begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
\end_inset
\\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
\\end_inset
to
\begin_inset LatexCommand cmdname
\\begin_inset LatexCommand cmdname
name1 "opt1"
name2 "opt2"
name3 "arg"
\end_inset
\\end_inset
name1, name2 and name3 can be different for each command.
"""
@ -895,7 +896,7 @@ def revert_cleardoublepage(document):
def convert_lyxline(document):
" remove fontsize commands for \lyxline "
r" remove fontsize commands for \lyxline "
# The problematic is: The old \lyxline definition doesn't handle the fontsize
# to change the line thickness. The new definiton does this so that imported
# \lyxlines would have a different line thickness. The eventual fontsize command
@ -1686,7 +1687,7 @@ def revert_CJK(document):
def revert_preamble_listings_params(document):
" Revert preamble option \listings_params "
r" Revert preamble option \listings_params "
i = find_token(document.header, "\\listings_params", 0)
if i != -1:
document.preamble.append('\\usepackage{listings}')
@ -1907,13 +1908,7 @@ def revert_ext_font_sizes(document):
i = find_token(document.header, '\\paperfontsize', 0)
document.header[i] = '\\paperfontsize default'
i = find_token(document.header, '\\options', 0)
if i == -1:
i = find_token(document.header, '\\textclass', 0) + 1
document.header[i:i] = ['\\options %s' % fontsize]
else:
document.header[i] += ',%s' % fontsize
insert_document_option(document, fontsize)
def convert_ext_font_sizes(document):
@ -2010,10 +2005,10 @@ def convert_arabic (document):
document.header[i] = "\\language arabic_arabtex"
i = 0
while i < len(document.body):
h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
if (h != -1):
# change the language name
document.body[i] = '\lang arabic_arabtex'
document.body[i] = r'\lang arabic_arabtex'
i = i + 1
@ -2025,10 +2020,10 @@ def revert_arabic (document):
document.header[i] = "\\language arabic"
i = 0
while i < len(document.body):
h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
if (h != -1):
# change the language name
document.body[i] = '\lang arabic'
document.body[i] = r'\lang arabic'
i = i + 1

View File

@ -23,7 +23,7 @@ import unicodedata
import sys, os
from parser_tools import find_token, find_end_of, find_tokens, get_value
from unicode_symbols import read_unicodesymbols
from unicode_symbols import unicode_reps
####################################################################
# Private helper functions
@ -146,61 +146,13 @@ def set_option(document, m, option, value):
return l
# FIXME: Remove this function if the version imported from unicode_symbols works.
# This function was the predecessor from that function, that in the meanwhile got
# new fixes.
def read_unicodesymbols2():
" Read the unicodesymbols list of unicode characters and corresponding commands."
# Provide support for both python 2 and 3
PY2 = sys.version_info[0] == 2
if not PY2:
unichr = chr
# End of code to support for both python 2 and 3
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
spec_chars = []
# Two backslashes, followed by some non-word character, and then a character
# in brackets. The idea is to check for constructs like: \"{u}, which is how
# they are written in the unicodesymbols file; but they can also be written
# as: \"u or even \" u.
r = re.compile(r'\\\\(\W)\{(\w)\}')
for line in fp.readlines():
if line[0] != '#' and line.strip() != "":
line=line.replace(' "',' ') # remove all quotation marks with spaces before
line=line.replace('" ',' ') # remove all quotation marks with spaces after
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
try:
[ucs4,command,dead] = line.split(None,2)
if command[0:1] != "\\":
continue
spec_chars.append([command, unichr(eval(ucs4))])
except:
continue
m = r.match(command)
if m != None:
command = "\\\\"
# If the character is a double-quote, then we need to escape it, too,
# since it is done that way in the LyX file.
if m.group(1) == "\"":
command += "\\"
commandbl = command
command += m.group(1) + m.group(2)
commandbl += m.group(1) + ' ' + m.group(2)
spec_chars.append([command, unichr(eval(ucs4))])
spec_chars.append([commandbl, unichr(eval(ucs4))])
fp.close()
return spec_chars
def extract_argument(line):
'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
if not line:
return (None, "")
bracere = re.compile("(\s*)(.*)")
bracere = re.compile(r"(\s*)(.*)")
n = bracere.match(line)
whitespace = n.group(1)
stuff = n.group(2)
@ -280,8 +232,6 @@ def latex2ert(line, isindex):
return retval
unicode_reps = read_unicodesymbols()
#Bug 5022....
#Might should do latex2ert first, then deal with stuff that DOESN'T
#end up inside ERT. That routine could be modified so that it returned
@ -327,7 +277,7 @@ def latex2lyx(data, isindex):
data = data.replace('\\\\', '\\')
# Math:
mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
mathre = re.compile(r'^(.*?)(\$.*?\$)(.*)')
lines = data.split('\n')
for line in lines:
#document.warning("LINE: " + line)
@ -996,7 +946,7 @@ def remove_inzip_options(document):
def convert_inset_command(document):
"""
r"""
Convert:
\begin_inset LatexCommand cmd
to
@ -1033,7 +983,7 @@ def convert_inset_command(document):
def revert_inset_command(document):
"""
r"""
Convert:
\begin_inset CommandInset InsetType
LatexCommand cmd
@ -1608,7 +1558,7 @@ def convert_usorbian(document):
def convert_macro_global(document):
"Remove TeX code command \global when it is in front of a macro"
r"Remove TeX code command \global when it is in front of a macro"
# math macros are nowadays already defined \global, so that an additional
# \global would make the document uncompilable, see
# http://www.lyx.org/trac/ticket/5371
@ -2389,7 +2339,7 @@ def revert_wrapplacement(document):
def remove_extra_embedded_files(document):
" Remove \extra_embedded_files from buffer params "
r" Remove \extra_embedded_files from buffer params "
i = find_token(document.header, '\\extra_embedded_files', 0)
if i == -1:
return

View File

@ -22,14 +22,15 @@ import re, string
import unicodedata
import sys, os
from parser_tools import find_token, find_end_of, find_tokens, \
from parser_tools import del_complete_lines, \
find_token, find_end_of, find_tokens, \
find_token_exact, find_end_of_inset, find_end_of_layout, \
find_token_backwards, is_in_inset, get_value, get_quoted_value, \
del_token, check_token, get_option_value
from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
revert_font_attrs, hex2ratio, str2bool
revert_font_attrs, hex2ratio, str2bool, revert_language
####################################################################
# Private helper functions
@ -377,7 +378,7 @@ def revert_splitindex(document):
l = re.compile(r'\\begin_inset Index (.*)$')
m = l.match(line)
itype = m.group(1)
if itype == "idx" or indices == "false":
if itype == "idx" or useindices == "false":
document.body[i] = "\\begin_inset Index"
else:
k = find_end_of_inset(document.body, i)
@ -484,6 +485,15 @@ def revert_printindexall(document):
document.body[i:k + 1] = subst
i = i + 1
strikeout_preamble = ['% for proper underlining',
r'\PassOptionsToPackage{normalem}{ulem}',
r'\usepackage{ulem}']
def convert_strikeout(document):
" Remove preamble code loading 'ulem' package. "
del_complete_lines(document.preamble,
['% Added by lyx2lyx']+strikeout_preamble)
def revert_strikeout(document):
" Reverts \\strikeout font attribute "
@ -491,25 +501,30 @@ def revert_strikeout(document):
changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
changed = revert_font_attrs(document.body, "\\strikeout", "\\sout") or changed
if changed == True:
insert_to_preamble(document, \
['% for proper underlining',
'\\PassOptionsToPackage{normalem}{ulem}',
'\\usepackage{ulem}'])
insert_to_preamble(document, strikeout_preamble)
ulinelatex_preamble = ['% fix underbar in citations',
r'\let\cite@rig\cite',
r'\newcommand{\b@xcite}[2][\%]{\def\def@pt{\%}\def\pas@pt{#1}',
r' \mbox{\ifx\def@pt\pas@pt\cite@rig{#2}\else\cite@rig[#1]{#2}\fi}}',
r'\renewcommand{\underbar}[1]{{\let\cite\b@xcite\uline{#1}}}']
def convert_ulinelatex(document):
" Remove preamble code for \\uline font attribute. "
del_complete_lines(document.preamble,
['% Added by lyx2lyx']+ulinelatex_preamble)
def revert_ulinelatex(document):
" Reverts \\uline font attribute "
" Add preamble code for \\uline font attribute in citations. "
i = find_token(document.body, '\\bar under', 0)
if i == -1:
return
insert_to_preamble(document,\
['% for proper underlining',
'\\PassOptionsToPackage{normalem}{ulem}',
'\\usepackage{ulem}',
'\\let\\cite@rig\\cite',
'\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
' \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
'\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
try:
document.preamble.index(r'\usepackage{ulem}')
except ValueError:
insert_to_preamble(document, strikeout_preamble)
insert_to_preamble(document, ulinelatex_preamble)
def revert_custom_processors(document):
@ -818,6 +833,9 @@ def revert_suppress_date(document):
del document.header[i]
mhchem_preamble = [r"\PassOptionsToPackage{version=3}{mhchem}",
r"\usepackage{mhchem}"]
def convert_mhchem(document):
"Set mhchem to off for versions older than 1.6.x"
if document.initial_format < 277:
@ -835,47 +853,44 @@ def convert_mhchem(document):
# pre-1.5.x document
i = find_token(document.header, "\\use_amsmath", 0)
if i == -1:
document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
document.warning("Malformed LyX document: "
"Could not find amsmath or esint setting.")
return
document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
# remove LyX-inserted preamble
if mhchem != 0:
del_complete_lines(document.preamble,
['% Added by lyx2lyx']+mhchem_preamble)
def revert_mhchem(document):
"Revert mhchem loading to preamble code"
"Revert mhchem loading to preamble code."
mhchem = "off"
i = find_token(document.header, "\\use_mhchem", 0)
if i == -1:
document.warning("Malformed LyX document: Could not find mhchem setting.")
mhchem = "auto"
else:
val = get_value(document.header, "\\use_mhchem", i)
if val == "1":
mhchem = "auto"
elif val == "2":
mhchem = "on"
del document.header[i]
mhchem = get_value(document.header, "\\use_mhchem", delete=True)
try:
mhchem = int(mhchem)
except ValueError:
document.warning("Malformed LyX document: "
"Could not find mhchem setting.")
mhchem = 1 # "auto"
# mhchem in {0: "off", 1: "auto", 2: "on"}
if mhchem == "off":
# don't load case
return
if mhchem == "auto":
if mhchem == 1: # "auto"
i = 0
while True:
while i != 1 and mhchem == 1:
i = find_token(document.body, "\\begin_inset Formula", i)
if i == -1:
j = find_end_of_inset(document.body, i)
if j == -1:
break
line = document.body[i]
if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
mhchem = "on"
if (True for line in document.body[i:j]
if r"\ce{" in line or r"\cf{" in line):
mhchem = 2
break
i += 1
if mhchem == "on":
pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
"\\usepackage{mhchem}"]
insert_to_preamble(document, pre)
if (mhchem == 2 # on
and find_token(document.preamble, r"\usepackage{mhchem}") == -1):
insert_to_preamble(document, mhchem_preamble)
def revert_fontenc(document):
@ -956,6 +971,20 @@ def revert_includeonly(document):
document.header[i : j + 1] = []
def convert_includeall(document):
" Add maintain_unincluded_children param "
i = 0
i = find_token(document.header, "\\maintain_unincluded_children", 0)
if i == -1:
i = find_token(document.header, "\\textclass", 0)
if i == -1:
document.warning("Malformed LyX document! Missing \\textclass header.")
return
document.header.insert(i, "\\maintain_unincluded_children false")
return
def revert_includeall(document):
" Remove maintain_unincluded_children param "
del_token(document.header, '\\maintain_unincluded_children', 0)
@ -1090,7 +1119,7 @@ def revert_multirow(document):
def convert_math_output(document):
" Convert \html_use_mathml to \html_math_output "
r" Convert \html_use_mathml to \html_math_output "
i = find_token(document.header, "\\html_use_mathml", 0)
if i == -1:
return
@ -1107,7 +1136,7 @@ def convert_math_output(document):
def revert_math_output(document):
" Revert \html_math_output to \html_use_mathml "
r" Revert \html_math_output to \html_use_mathml "
i = find_token(document.header, "\\html_math_output", 0)
if i == -1:
return
@ -1266,19 +1295,7 @@ def revert_notefontcolor(document):
def revert_turkmen(document):
"Set language Turkmen to English"
if document.language == "turkmen":
document.language = "english"
i = find_token(document.header, "\\language", 0)
if i != -1:
document.header[i] = "\\language english"
j = 0
while True:
j = find_token(document.body, "\\lang turkmen", j)
if j == -1:
return
document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
j += 1
revert_language(document, "turkmen", "turkmen", "turkmen")
def revert_fontcolor(document):
@ -1602,8 +1619,8 @@ def revert_IEEEtran(document):
def convert_prettyref(document):
" Converts prettyref references to neutral formatted refs "
re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
re_ref = re.compile("^\\s*reference\\s+\"(\\w+):(\\S+)\"")
nm_ref = re.compile("^\\s*name\\s+\"(\\w+):(\\S+)\"")
i = 0
while True:
@ -1624,8 +1641,8 @@ def convert_prettyref(document):
def revert_refstyle(document):
" Reverts neutral formatted refs to prettyref "
re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
re_ref = re.compile("^reference\\s+\"(\\w+):(\\S+)\"")
nm_ref = re.compile("^\\s*name\\s+\"(\\w+):(\\S+)\"")
i = 0
while True:
@ -1664,12 +1681,10 @@ def revert_nameref(document):
i += 1
# Make sure it is actually in an inset!
# A normal line could begin with "LatexCommand nameref"!
val = is_in_inset(document.body, cmdloc, \
stins, endins = is_in_inset(document.body, cmdloc,
"\\begin_inset CommandInset ref")
if not val:
if endins == -1:
continue
stins, endins = val
# ok, so it is in an InsetRef
refline = find_token(document.body, "reference", stins, endins)
if refline == -1:
@ -1699,17 +1714,16 @@ def remove_Nameref(document):
break
cmdloc = i
i += 1
# Make sure it is actually in an inset!
val = is_in_inset(document.body, cmdloc, \
"\\begin_inset CommandInset ref")
val = is_in_inset(document.body, cmdloc,
"\\begin_inset CommandInset ref", default=False)
if not val:
continue
document.body[cmdloc] = "LatexCommand nameref"
def revert_mathrsfs(document):
" Load mathrsfs if \mathrsfs us use in the document "
r" Load mathrsfs if \mathrsfs us use in the document "
i = 0
for line in document.body:
if line.find("\\mathscr{") != -1:
@ -2145,7 +2159,7 @@ def convert_passthru(document):
if not check_passthru:
return
rx = re.compile("\\\\begin_layout \s*(\w+)")
rx = re.compile("\\\\begin_layout \\s*(\\w+)")
beg = 0
for lay in ["Chunk", "Scrap"]:
while True:
@ -2175,7 +2189,7 @@ def convert_passthru(document):
break
ne = find_end_of_inset(document.body, ns)
if ne == -1 or ne > end:
document.warning("Can't find end of inset at line " + str(nb))
document.warning("Can't find end of inset at line " + str(ne))
ns += 1
continue
if document.body[ne + 1] == "":
@ -2209,7 +2223,7 @@ def revert_passthru(document):
" http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
if not check_passthru:
return
rx = re.compile("\\\\begin_layout \s*(\w+)")
rx = re.compile("\\\\begin_layout \\s*(\\w+)")
beg = 0
for lay in ["Chunk", "Scrap"]:
while True:
@ -2501,7 +2515,7 @@ def revert_langpack(document):
def convert_langpack(document):
" Add \\language_package parameter "
i = find_token(document.header, "\language" , 0)
i = find_token(document.header, r"\language" , 0)
if i == -1:
document.warning("Malformed document. No \\language defined!")
return
@ -2548,9 +2562,9 @@ convert = [[346, []],
[352, [convert_splitindex]],
[353, []],
[354, []],
[355, []],
[355, [convert_strikeout]],
[356, []],
[357, []],
[357, [convert_ulinelatex]],
[358, []],
[359, [convert_nomencl_width]],
[360, []],
@ -2569,7 +2583,7 @@ convert = [[346, []],
[373, [merge_gbrief]],
[374, []],
[375, []],
[376, []],
[376, [convert_includeall]],
[377, []],
[378, []],
[379, [convert_math_output]],

View File

@ -24,7 +24,8 @@ import sys, os
# Uncomment only what you need to import, please.
from parser_tools import count_pars_in_inset, del_token, find_token, find_token_exact, \
from parser_tools import count_pars_in_inset, del_complete_lines, del_token, \
find_token, find_token_exact, \
find_token_backwards, find_end_of, find_end_of_inset, find_end_of_layout, \
find_end_of_sequence, find_re, get_option_value, get_containing_layout, \
get_containing_inset, get_value, get_quoted_value, set_option_value
@ -33,7 +34,7 @@ from parser_tools import count_pars_in_inset, del_token, find_token, find_token_
#find_end_of_inset, find_end_of_layout, \
#is_in_inset, del_token, check_token
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert, revert_language
#from lyx2lyx_tools import insert_to_preamble, \
# lyx2latex, latex_length, revert_flex_inset, \
@ -58,7 +59,7 @@ def revert_Argument_to_TeX_brace(document, line, endline, n, nmax, environment,
usage:
revert_Argument_to_TeX_brace(document, LineOfBegin, LineOfEnd, StartArgument, EndArgument, isEnvironment, isOpt)
LineOfBegin is the line of the \\begin_layout or \\begin_inset statement
LineOfEnd is the line of the \end_layout or \end_inset statement, if "0" is given, the end of the file is used instead
LineOfEnd is the line of the \\end_layout or \\end_inset statement, if "0" is given, the end of the file is used instead
StartArgument is the number of the first argument that needs to be converted
EndArgument is the number of the last argument that needs to be converted or the last defined one
isEnvironment must be true, if the layout is for a LaTeX environment
@ -352,7 +353,7 @@ def revert_undertilde(document):
def revert_negative_space(document):
"Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
"Revert InsetSpace negmedspace and negthickspace into their TeX-code counterparts"
i = 0
j = 0
reverted = False
@ -365,7 +366,7 @@ def revert_negative_space(document):
if reverted == True:
i = find_token(document.header, "\\use_amsmath 2", 0)
if i == -1:
add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}{}"])
return
if i == -1:
return
@ -555,8 +556,6 @@ def handle_longtable_captions(document, forward):
get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
elif get_option_value(document.body[begin_row], 'caption') == 'true':
if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
if get_option_value(document.body[begin_row], 'endhead') == 'true':
document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
if get_option_value(document.body[begin_row], 'endfoot') == 'true':
@ -618,15 +617,16 @@ def convert_use_package(document, pkg, commands, oldauto):
# oldauto defines how the version we are converting from behaves:
# if it is true, the old version uses the package automatically.
# if it is false, the old version never uses the package.
i = find_token(document.header, "\\use_package", 0)
i = find_token(document.header, "\\use_package")
if i == -1:
document.warning("Malformed LyX document: Can't find \\use_package.")
return;
j = find_token(document.preamble, "\\usepackage{" + pkg + "}", 0)
if j != -1:
# package was loaded in the preamble, convert this to header setting for round trip
packageline = "\\usepackage{%s}" % pkg
if (del_complete_lines(document.preamble,
['% Added by lyx2lyx', packageline]) or
del_complete_lines(document.preamble, [packageline])):
# package was loaded in the preamble, convert this to header setting
document.header.insert(i + 1, "\\use_package " + pkg + " 2") # on
del document.preamble[j]
# If oldauto is true we have two options:
# We can either set the package to auto - this is correct for files in
# format 425 to 463, and may create a conflict for older files which use
@ -1060,7 +1060,7 @@ def convert_table_rotation(document):
def convert_listoflistings(document):
'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
r'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
# We can support roundtrip because the command is so simple
i = 0
while True:
@ -1169,24 +1169,16 @@ def revert_ancientgreek(document):
def revert_languages(document):
"Set the document language for new supported languages to English"
languages = [
"coptic", "divehi", "hindi", "kurmanji", "lao", "marathi", "occitan", "sanskrit",
"syriac", "tamil", "telugu", "urdu"
]
for n in range(len(languages)):
if document.language == languages[n]:
document.language = "english"
i = find_token(document.header, "\\language", 0)
if i != -1:
document.header[i] = "\\language english"
j = 0
while j < len(document.body):
j = find_token(document.body, "\\lang " + languages[n], j)
if j != -1:
document.body[j] = document.body[j].replace("\\lang " + languages[n], "\\lang english")
j += 1
else:
j = len(document.body)
# polyglossia-only
polyglossia_languages = ["coptic", "divehi", "hindi", "lao", "marathi",
"occitan", "sanskrit", "syriac", "tamil",
"telugu", "urdu"]
# babel-only
babel_languages = ["kurmanji"]
for lang in polyglossia_languages:
revert_language(document, lang, "", lang)
for lang in babel_languages:
revert_language(document, lang, lang, "")
def convert_armenian(document):
@ -1555,10 +1547,11 @@ def convert_latexargs(document):
"theorems-chap-bytype", "theorems-chap", "theorems-named", "theorems-sec-bytype",
"theorems-sec", "theorems-starred", "theorems-std", "todonotes"]
# Modules we need to take care of
caveat_modules = ["initials"]
caveat_modules = ["initials"] # TODO: , "graphicboxes", "bicaption"]
# information about the relevant styles in caveat_modules (number of opt and req args)
# use this if we get more caveat_modules. For now, use hard coding (see below).
# initials = [{'Layout' : 'Initial', 'opt' : 1, 'req' : 1}]
# graphicboxes = { ... }
# Is this a known safe layout?
safe_layout = document.textclass in safe_layouts
@ -4560,19 +4553,7 @@ def revert_aa2(document):
def revert_tibetan(document):
"Set the document language for Tibetan to English"
if document.language == "tibetan":
document.language = "english"
i = find_token(document.header, "\\language", 0)
if i != -1:
document.header[i] = "\\language english"
j = 0
while j < len(document.body):
j = find_token(document.body, "\\lang tibetan", j)
if j != -1:
document.body[j] = document.body[j].replace("\\lang tibetan", "\\lang english")
j += 1
else:
j = len(document.body)
revert_language(document, "tibetan", "", "tibetan")
#############

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

5848
lib/lyx2lyx/lyx_2_4.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>,
# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
# José Matos <jamatos@lyx.org>, Richard Kimberly Heck <rikiheck@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@ -18,12 +18,12 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''
"""
This module offers several free functions to help parse lines.
More documentaton is below, but here is a quick guide to what
they do. Optional arguments are marked by brackets.
find_token(lines, token, start[, end[, ignorews]]):
find_token(lines, token[, start[, end[, ignorews]]]):
Returns the first line i, start <= i < end, on which
token is found at the beginning. Returns -1 if not
found.
@ -31,10 +31,10 @@ find_token(lines, token, start[, end[, ignorews]]):
in whitespace do not count, except that there must be no
extra whitespace following token itself.
find_token_exact(lines, token, start[, end]):
find_token_exact(lines, token[, start[, end]]]):
As find_token, but with ignorews set to True.
find_tokens(lines, tokens, start[, end[, ignorews]]):
find_tokens(lines, tokens[, start[, end[, ignorews]]]):
Returns the first line i, start <= i < end, on which
one of the tokens in tokens is found at the beginning.
Returns -1 if not found.
@ -42,18 +42,21 @@ find_tokens(lines, tokens, start[, end[, ignorews]]):
in whitespace do not count, except that there must be no
extra whitespace following token itself.
find_tokens_exact(lines, token, start[, end]):
find_tokens_exact(lines, token[, start[, end]]):
As find_tokens, but with ignorews True.
find_token_backwards(lines, token, start):
find_tokens_backwards(lines, tokens, start):
As before, but look backwards.
find_substring(lines, sub[, start[, end]]) -> int
As find_token, but sub may be anywhere in the line.
find_re(lines, rexp, start[, end]):
As find_token, but rexp is a regular expression object,
so it has to be passed as e.g.: re.compile(r'...').
get_value(lines, token, start[, end[, default]):
get_value(lines, token[, start[, end[, default[, delete]]]]):
Similar to find_token, but it returns what follows the
token on the found line. Example:
get_value(document.header, "\\use_xetex", 0)
@ -63,8 +66,9 @@ get_value(lines, token, start[, end[, default]):
is stripped.) The final argument, default, defaults to "",
and is what is returned if we do not find anything. So you
can use that to set a default.
If delete is True, then delete the line if found.
get_quoted_value(lines, token, start[, end[, default]]):
get_quoted_value(lines, token[, start[, end[, default[, delete]]]]):
Similar to get_value, but it will strip quotes off the
value, if they are present. So use this one for cases
where the value is normally quoted.
@ -74,13 +78,20 @@ get_option_value(line, option):
option="value"
and returns value. Returns "" if not found.
get_bool_value(lines, token, start[, end[, default]]):
get_bool_value(lines, token[, start[, end[, default, delete]]]]):
Like get_value, but returns a boolean.
del_token(lines, token, start[, end]):
set_bool_value(lines, token, value[, start[, end]]):
Find `token` in `lines[start:end]` and set to boolean value bool(`value`).
Return old value. Raise ValueError if token is not in lines.
del_token(lines, token[, start[, end]]):
Like find_token, but deletes the line if it finds one.
Returns True if a line got deleted, otherwise False.
Use get_* with the optional argument "delete=True", if you want to
get and delete a token.
find_beginning_of(lines, i, start_token, end_token):
Here, start_token and end_token are meant to be a matching
pair, like "\\begin_layout" and "\\end_layout". We look for
@ -110,25 +121,25 @@ find_end_of_layout(lines, i):
find_end_of_sequence(lines, i):
Find the end of the sequence of layouts of the same kind.
Considers nesting. If the last paragraph in sequence is nested,
the position of the last \end_deeper is returned, else
the position of the last \end_layout.
the position of the last \\end_deeper is returned, else
the position of the last \\end_layout.
is_in_inset(lines, i, inset):
Checks if line i is in an inset of the given type.
is_in_inset(lines, i, inset, default=(-1,-1)):
Check if line i is in an inset of the given type.
If so, returns starting and ending lines. Otherwise,
returns False.
return default.
Example:
is_in_inset(document.body, i, "\\begin_inset Tabular")
returns False unless i is within a table. If it is, then
returns (-1,-1) unless i is within a table. If it is, then
it returns the line on which the table begins and the one
on which it ends. Note that this pair will evaulate to
boolean True, so
if is_in_inset(...):
if is_in_inset(..., default=False):
will do what you expect.
get_containing_inset(lines, i):
Finds out what kind of inset line i is within. Returns a
list containing what follows \begin_inset on the line
list containing what follows \\begin_inset on the line
on which the inset begins, plus the starting and ending line.
Returns False on any kind of error or if it isn't in an inset.
So get_containing_inset(document.body, i) might return:
@ -152,7 +163,7 @@ is_nonempty_line(line):
count_pars_in_inset(lines, i):
Counts the paragraphs inside an inset.
'''
"""
import re
@ -161,9 +172,11 @@ def check_token(line, token):
""" check_token(line, token) -> bool
Return True if token is present in line and is the first element
else returns False."""
else returns False.
return line[:len(token)] == token
Deprecated. Use line.startswith(token).
"""
return line.startswith(token)
def is_nonempty_line(line):
@ -171,50 +184,53 @@ def is_nonempty_line(line):
Return False if line is either empty or it has only whitespaces,
else return True."""
return line != " "*len(line)
return bool(line.strip())
# Utilities for a list of lines
def find_token(lines, token, start, end = 0, ignorews = False):
def find_token(lines, token, start=0, end=0, ignorews=False):
""" find_token(lines, token, start[[, end], ignorews]) -> int
Return the lowest line where token is found, and is the first
element, in lines[start, end].
If ignorews is True (default is False), then differences in
whitespace are ignored, except that there must be no extra
whitespace following token itself.
whitespace are ignored, but there must be whitespace following
token itself.
Use find_substring(lines, sub) to find a substring anywhere in `lines`.
Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
m = len(token)
if ignorews:
y = token.split()
for i in range(start, end):
if ignorews:
x = lines[i].split()
y = token.split()
if len(x) < len(y):
continue
if x[:len(y)] == y:
return i
else:
if lines[i][:m] == token:
if lines[i].startswith(token):
return i
return -1
def find_token_exact(lines, token, start, end = 0):
def find_token_exact(lines, token, start=0, end=0):
return find_token(lines, token, start, end, True)
def find_tokens(lines, tokens, start, end = 0, ignorews = False):
def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
Return the lowest line where one token in tokens is found, and is
the first element, in lines[start, end].
Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
@ -228,23 +244,41 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
if x[:len(y)] == y:
return i
else:
if lines[i][:len(token)] == token:
if lines[i].startswith(token):
return i
return -1
def find_tokens_exact(lines, tokens, start, end = 0):
def find_tokens_exact(lines, tokens, start=0, end=0):
return find_tokens(lines, tokens, start, end, True)
def find_re(lines, rexp, start, end = 0):
""" find_token_re(lines, rexp, start[, end]) -> int
def find_substring(lines, sub, start=0, end=0):
""" find_substring(lines, sub[, start[, end]]) -> int
Return the lowest line where rexp, a regular expression, is found
in lines[start, end].
Return the lowest line number `i` in [start, end] where
`sub` is a substring of line[i].
Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
if sub in lines[i]:
return i
return -1
def find_re(lines, rexp, start=0, end=0):
""" find_re(lines, rexp[, start[, end]]) -> int
Return the lowest line number `i` in [start, end] where the regular
expression object `rexp` matches at the beginning of line[i].
Return -1 on failure.
Start your pattern with the wildcard ".*" to find a match anywhere in a
line. Use find_substring() to find a substring anywhere in the lines.
"""
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
@ -260,10 +294,8 @@ def find_token_backwards(lines, token, start):
element, in lines[start, end].
Return -1 on failure."""
m = len(token)
for i in range(start, -1, -1):
line = lines[i]
if line[:m] == token:
if lines[i].startswith(token):
return i
return -1
@ -278,30 +310,111 @@ def find_tokens_backwards(lines, tokens, start):
for i in range(start, -1, -1):
line = lines[i]
for token in tokens:
if line[:len(token)] == token:
if line.startswith(token):
return i
return -1
def get_value(lines, token, start, end = 0, default = ""):
""" get_value(lines, token, start[[, end], default]) -> string
def find_complete_lines(lines, sublines, start=0, end=0):
"""Find first occurence of sequence `sublines` in list `lines`.
Return index of first line or -1 on failure.
Efficient search for a sub-list in a large list. Works for any values.
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
0
The `start` and `end` arguments work similar to list.index()
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
4
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
-1
The return value can be used to substitute the sub-list.
Take care to check before use:
>>> l = [1, 1, 2]
>>> s = find_complete_lines(l, [1, 2])
>>> if s != -1:
... l[s:s+2] = [3]; l
[1, 3]
See also del_complete_lines().
"""
if not sublines:
return start
end = end or len(lines)
N = len(sublines)
try:
while True:
for j, value in enumerate(sublines):
i = lines.index(value, start, end)
if j and i != start:
start = i-j
break
start = i + 1
else:
return i +1 - N
except ValueError: # `sublines` not found
return -1
def find_across_lines(lines, sub, start=0, end=0):
sublines = sub.splitlines()
if len(sublines) > 2:
# at least 3 lines: the middle one(s) are complete -> use index search
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
if i < start+1:
return -1
try:
if (lines[i-1].endswith(sublines[0]) and
lines[i+len(sublines)].startswith(sublines[-1])):
return i-1
except IndexError:
pass
elif len(sublines) > 1:
# last subline must start a line
i = find_token(lines, sublines[-1], start, end)
if i < start + 1:
return -1
if lines[i-1].endswith(sublines[0]):
return i-1
else: # no line-break, may be in the middle of a line
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
if sub in lines[i]:
return i
return -1
def get_value(lines, token, start=0, end=0, default="", delete=False):
"""Find `token` in `lines` and return part of line that follows it.
Find the next line that looks like:
token followed by other stuff
Returns "followed by other stuff" with leading and trailing
If `delete` is True, delete the line (if found).
Return "followed by other stuff" with leading and trailing
whitespace removed.
"""
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
# TODO: establish desired behaviour, eventually change to
# return lines.pop(i)[len(token):].strip() # or default
# see test_parser_tools.py
l = lines[i].split(None, 1)
if delete:
del(lines[i])
if len(l) > 1:
return l[1].strip()
return default
def get_quoted_value(lines, token, start, end = 0, default = ""):
def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
""" get_quoted_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
@ -312,33 +425,52 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
if they are there.
Note that we will NOT strip quotes from default!
"""
val = get_value(lines, token, start, end, "")
val = get_value(lines, token, start, end, "", delete)
if not val:
return default
return val.strip('"')
def get_bool_value(lines, token, start, end = 0, default = None):
""" get_value(lines, token, start[[, end], default]) -> string
bool_values = {"true": True, "1": True,
"false": False, "0": False}
def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
""" get_bool_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
token bool_value
`token` <bool_value>
Returns True if bool_value is 1 or true and
False if bool_value is 0 or false
Return True if <bool_value> is 1 or "true", False if <bool_value>
is 0 or "false", else `default`.
"""
val = get_quoted_value(lines, token, start, end, default, delete)
return bool_values.get(val, default)
val = get_quoted_value(lines, token, start, end, "")
if val == "1" or val == "true":
return True
if val == "0" or val == "false":
return False
return default
def set_bool_value(lines, token, value, start=0, end=0):
"""Find `token` in `lines` and set to bool(`value`).
Return previous value. Raise `ValueError` if `token` is not in lines.
Cf. find_token(), get_bool_value().
"""
i = find_token(lines, token, start, end)
if i == -1:
raise ValueError
oldvalue = get_bool_value(lines, token, i, i+1)
if oldvalue is value:
return oldvalue
# set to new value
if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
lines[i] = "%s %d" % (token, value)
else:
lines[i] = "%s %s" % (token, str(value).lower())
return oldvalue
def get_option_value(line, option):
rx = option + '\s*=\s*"([^"]+)"'
rx = option + r'\s*=\s*"([^"]+)"'
rx = re.compile(rx)
m = rx.search(line)
if not m:
@ -347,15 +479,15 @@ def get_option_value(line, option):
def set_option_value(line, option, value):
rx = '(' + option + '\s*=\s*")[^"]+"'
rx = '(' + option + r'\s*=\s*")[^"]+"'
rx = re.compile(rx)
m = rx.search(line)
if not m:
return line
return re.sub(rx, '\g<1>' + value + '"', line)
return re.sub(rx, r'\g<1>' + value + '"', line)
def del_token(lines, token, start, end = 0):
def del_token(lines, token, start=0, end=0):
""" del_token(lines, token, start, end) -> int
Find the first line in lines where token is the first element
@ -368,6 +500,41 @@ def del_token(lines, token, start, end = 0):
del lines[k]
return True
def del_complete_lines(lines, sublines, start=0, end=0):
"""Delete first occurence of `sublines` in list `lines`.
Efficient deletion of a sub-list in a list. Works for any values.
The `start` and `end` arguments work similar to list.index()
Returns True if a deletion was done and False if not.
>>> l = [1, 0, 1, 1, 1, 2]
>>> del_complete_lines(l, [0, 1, 1])
True
>>> l
[1, 1, 2]
"""
i = find_complete_lines(lines, sublines, start, end)
if i == -1:
return False
del(lines[i:i+len(sublines)])
return True
def del_value(lines, token, start=0, end=0, default=None):
"""
Find the next line that looks like:
token followed by other stuff
Delete that line and return "followed by other stuff"
with leading and trailing whitespace removed.
If token is not found, return `default`.
"""
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
return lines.pop(i)[len(token):].strip()
def find_beginning_of(lines, i, start_token, end_token):
count = 1
@ -375,7 +542,7 @@ def find_beginning_of(lines, i, start_token, end_token):
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
if i == -1:
return -1
if check_token(lines[i], end_token):
if lines[i].startswith(end_token):
count = count+1
else:
count = count-1
@ -391,7 +558,7 @@ def find_end_of(lines, i, start_token, end_token):
i = find_tokens(lines, [end_token, start_token], i+1)
if i == -1:
return -1
if check_token(lines[i], start_token):
if lines[i].startswith(start_token):
count = count+1
else:
count = count-1
@ -400,11 +567,11 @@ def find_end_of(lines, i, start_token, end_token):
return -1
def find_nonempty_line(lines, start, end = 0):
def find_nonempty_line(lines, start=0, end=0):
if end == 0:
end = len(lines)
for i in range(start, end):
if is_nonempty_line(lines[i]):
if lines[i].strip():
return i
return -1
@ -419,35 +586,33 @@ def find_end_of_layout(lines, i):
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
def is_in_inset(lines, i, inset):
'''
Checks if line i is in an inset of the given type.
If so, returns starting and ending lines.
Otherwise, returns False.
def is_in_inset(lines, i, inset, default=(-1,-1)):
"""
Check if line i is in an inset of the given type.
If so, return starting and ending lines, otherwise `default`.
Example:
is_in_inset(document.body, i, "\\begin_inset Tabular")
returns False unless i is within a table. If it is, then
it returns the line on which the table begins and the one
on which it ends. Note that this pair will evaulate to
boolean True, so
if is_in_inset(...):
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
If it is, then it returns the line on which the table begins and the one
on which it ends.
Note that this pair will evaulate to boolean True, so (with the optional
default value set to False)
if is_in_inset(..., default=False):
will do what you expect.
'''
defval = (-1, -1)
stins = find_token_backwards(lines, inset, i)
if stins == -1:
return defval
endins = find_end_of_inset(lines, stins)
# note that this includes the notfound case.
if endins < i:
return defval
return (stins, endins)
"""
start = find_token_backwards(lines, inset, i)
if start == -1:
return default
end = find_end_of_inset(lines, start)
if end < i: # this includes the notfound case.
return default
return (start, end)
def get_containing_inset(lines, i):
'''
Finds out what kind of inset line i is within. Returns a
list containing (i) what follows \begin_inset on the line
list containing (i) what follows \\begin_inset on the line
on which the inset begins, plus the starting and ending line.
Returns False on any kind of error or if it isn't in an inset.
'''
@ -473,12 +638,15 @@ def get_containing_inset(lines, i):
def get_containing_layout(lines, i):
'''
Finds out what kind of layout line i is within. Returns a
list containing what follows \begin_layout on the line
on which the layout begins, plus the starting and ending line
and the start of the paragraph (after all params). I.e, returns:
Find out what kind of layout line `i` is within.
Return a tuple
(layoutname, layoutstart, layoutend, startofcontent)
Returns False on any kind of error.
containing
* layout style/name,
* start line number,
* end line number, and
* number of first paragraph line (after all params).
Return `False` on any kind of error.
'''
j = i
while True:
@ -493,10 +661,13 @@ def get_containing_layout(lines, i):
if endlay < i:
return False
lay = get_value(lines, "\\begin_layout", stlay)
if lay == "":
# shouldn't happen
return False
layoutname = get_value(lines, "\\begin_layout", stlay)
if layoutname == "": # layout style missing
# TODO: What shall we do in this case?
pass
# layoutname == "Standard" # use same fallback as the LyX parser:
# raise ValueError("Missing layout name on line %d"%stlay) # diagnosis
# return False # generic error response
par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
"\\start_of_appendix", "\\paragraph_spacing", "\\align",
"\\labelwidthstring"]
@ -505,7 +676,7 @@ def get_containing_layout(lines, i):
stpar += 1
if lines[stpar].split(' ', 1)[0] not in par_params:
break
return (lay, stlay, endlay, stpar)
return (layoutname, stlay, endlay, stpar)
def count_pars_in_inset(lines, i):
@ -518,7 +689,8 @@ def count_pars_in_inset(lines, i):
pars = 0
for j in range(ins[1], ins[2]):
m = re.match(r'\\begin_layout (.*)', lines[j])
if m and get_containing_inset(lines, j)[0] == ins[0]:
found_inset = get_containing_inset(lines, j)
if m and found_inset and found_inset[1] == ins[1]:
pars += 1
return pars
@ -553,4 +725,3 @@ def find_end_of_sequence(lines, i):
i = i + 1
return endlay

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python
#! /usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (C) 2004 José Matos <jamatos@lyx.org>
#

View File

@ -0,0 +1,79 @@
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2018 The LyX team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
" This modules tests the auxiliary functions for lyx2lyx."
from lyx2lyx_tools import *
import unittest
class TestParserTools(unittest.TestCase):
def test_put_cmd_in_ert(self):
ert = [u'\\begin_inset ERT',
u'status collapsed',
u'',
u'\\begin_layout Plain Layout',
u'',
u'',
u'\\backslash',
u'texttt{Gr',
u'\\backslash',
u'"{u}',
u'\\backslash',
u'ss{}e}',
u'\\end_layout',
u'',
u'\\end_inset']
ert_open = ert[:]
ert_open[1] = u'status open'
ert_paragraph = ["\\begin_layout Standard",
u'\\begin_inset ERT',
u'status collapsed',
u'',
u'\\begin_layout Plain Layout',
u'',
u'',
u'\\backslash',
u'texttt{Gr',
u'\\backslash',
u'"{u}',
u'\\backslash',
u'ss{}e}',
u'\\end_layout',
u'',
u'\\end_inset',
u'',
u'',
u'\\end_layout',
u'']
self.assertEqual(put_cmd_in_ert("\\texttt{Grüße}"), ert)
self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}", is_open=True), ert_open)
self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}", as_paragraph=True), ert_paragraph)
def test_latex_length(self):
self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
self.assertEqual(latex_length("11em"), (False, "11em"))
self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
if __name__ == '__main__':
unittest.main()

View File

@ -22,7 +22,7 @@ from parser_tools import *
import unittest
ug = r"""
lines = r"""
\begin_layout Standard
The
\begin_inset Quotes eld
@ -56,9 +56,26 @@ Introduction
describes that, too.
\end_layout
"""
""".splitlines()
header = r"""\begin_header
\origin unavailable
\paperpagestyle default
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict fallse
\end_header""".splitlines()
newheader = r"""\begin_header
\origin unavailable
\paperpagestyle default
\output_changes true
\html_math_output 0
\html_css_as_file 1
\html_be_strict false
\end_header""".splitlines()
lines = ug.splitlines()
class TestParserTools(unittest.TestCase):
@ -77,10 +94,21 @@ class TestParserTools(unittest.TestCase):
def test_find_token(self):
self.assertEqual(find_token(lines, '\\emph', 0), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
# no line starts with "emph" (without backspace):
self.assertEqual(find_token(lines, 'emph', 0), -1)
# token on line[start] is found:
self.assertEqual(find_token(lines, '\\emph', 7), 7)
self.assertEqual(find_token(lines, '\\emph', 8), 9)
# token on line[end] is not found:
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
# `ignorews` looks for whitespace-separated tokens:
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
# only first token is found:
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
def test_find_tokens(self):
@ -89,5 +117,128 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
def test_find_substring(self):
# Quotes is not a "token" (substring at the start of any line):
self.assertEqual(find_token(lines, "Quotes", 0), -1)
self.assertEqual(find_substring(lines, "Quotes", 0), 3)
# return -1 on failure:
self.assertEqual(find_substring(lines, "Qualen", 0), -1)
def test_find_re(self):
regexp_object = re.compile(r'\\begin.*Quote')
# matching starts with line[start] (default: start=0)
self.assertEqual(find_re(lines, regexp_object), 3)
self.assertEqual(find_re(lines, regexp_object, start=3), 3)
# matching ends one line *before* line[end]:
self.assertEqual(find_re(lines, regexp_object, start=4), 11)
self.assertEqual(find_re(lines, regexp_object, start=4, end=11), -1)
def test_find_complete_lines(self):
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
# return index of first line of sublines:
self.assertEqual(find_complete_lines(lines, sublines), 3)
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
# return -1 if sublines is not found:
self.assertEqual(find_complete_lines(lines, ['x']), -1)
# search includes line `start`:
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
# serch excludes line `end`
self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
# an empty list is always found
self.assertEqual(find_complete_lines(lines, []), 0)
def test_find_across_lines(self):
# sub with at least 2 line-breaks (uses find_complete_lines):
sub = "Quotes eld\n\\end_inset\n\n\n"
self.assertEqual(find_across_lines(lines, sub), 3)
# Return -1 if not found
self.assertEqual(find_across_lines(lines, sub, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
sub = "Quotes eld\n\\end_inset\nx\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes X\n\\end_inset\n\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes eld\n\\end_insert\n\n"
self.assertEqual(find_across_lines(lines, sub), -1)
# sub with up to 1 line-break:
sub = "Quotes eld\n\\end_inset"
self.assertEqual(find_across_lines(lines, sub), 3)
self.assertEqual(find_across_lines(lines, sub, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
sub = "Quotes X eld\n\\end_inset\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes eld\n\\end_insert\n"
self.assertEqual(find_across_lines(lines, sub), -1)
# sub without line-break
sub = "end_"
self.assertEqual(find_across_lines(lines, sub), 4)
self.assertEqual(find_across_lines(lines, sub, 5), 12)
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
self.assertEqual(find_across_lines(lines, "XXX"), -1)
def test_get_value(self):
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
# TODO: do we want this:
self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
# or only the part after "token":
# self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
# return default if not found
self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
# TODO: do we want this:
self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
# or emtpy string if token is found but has no value:
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
def test_get_bool_value(self):
self.assertEqual(get_bool_value(header, "\\output_changes"), False)
self.assertEqual(get_bool_value(newheader, "\\output_changes"), True)
self.assertEqual(get_bool_value(header, "\\html_css_as_file"), False)
self.assertEqual(get_bool_value(newheader, "\\html_css_as_file"), True)
self.assertEqual(get_bool_value(header, "\\something"), None)
self.assertEqual(get_bool_value(header, "\\output_changes", 4), None)
def test_set_bool_value(self):
# set to new value, return old value
self.assertEqual(set_bool_value(header, "\\output_changes", True), False)
self.assertEqual(set_bool_value(header, "\\html_css_as_file", True), False)
# return default if misspelled:
self.assertEqual(set_bool_value(header, "\\html_be_strict", False), None)
# catch error and insert new setting:
self.assertRaises(ValueError, set_bool_value, header, "\\something", 0)
self.assertEqual(header, newheader)
def test_del_complete_lines(self):
l = lines[:]
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
# normal operation: remove the first occurence of sublines:
self.assertEqual(del_complete_lines(l, sublines), True)
self.assertEqual(l[3], "")
self.assertEqual(len(l), len(lines)-len(sublines))
# special cases:
l = lines[:]
self.assertEqual(del_complete_lines(l, sublines, 21), False)
self.assertEqual(l, lines)
# deleting empty sublist returns success but does not change the list:
self.assertEqual(del_complete_lines(l, [], 21), True)
self.assertEqual(l, lines)
def test_del_value(self):
l = lines[:]
self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
# return default if not found
self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
if __name__ == '__main__':
unittest.main()

View File

@ -18,7 +18,7 @@
" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
import sys, os, re
import sys, os, re, codecs
# Provide support for both python 2 and 3
PY2 = sys.version_info[0] == 2
@ -28,14 +28,13 @@ if not PY2:
def read_unicodesymbols():
" Read the unicodesymbols list of unicode characters and corresponding commands."
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
pathname = os.path.abspath(os.path.dirname(__file__))
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
# For python 3+ we have to specify the encoding for those systems
# where the default is not UTF-8
fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
# Read as Unicode strings in both, Python 2 and 3
# Specify the encoding for those systems where the default is not UTF-8
fp = codecs.open(filename, encoding="utf8")
spec_chars = []
# A backslash, followed by some non-word character, and then a character
# in brackets. The idea is to check for constructs like: \"{u}, which is how
# they are written in the unicodesymbols file; but they can also be written
@ -43,8 +42,12 @@ def read_unicodesymbols():
# The two backslashes in the string literal are needed to specify a literal
# backslash in the regex. Without r prefix, these would be four backslashes.
r = re.compile(r'\\(\W)\{(\w)\}')
spec_chars = []
for line in fp.readlines():
if line[0] != '#' and line.strip() != "":
if not line.strip() or line.startswith('#'):
# skip empty lines and comments
continue
# Note: backslashes in the string literals with r prefix are not escaped,
# so one backslash in the source file equals one backslash in memory.
# Without r prefix backslahses are escaped, so two backslashes in the
@ -57,10 +60,11 @@ def read_unicodesymbols():
[ucs4,command,dead] = line.split(None,2)
if command[0:1] != "\\":
continue
literal_char = unichr(int(ucs4, 16))
if (line.find("notermination=text") < 0 and
line.find("notermination=both") < 0 and command[-1] != "}"):
command = command + "{}"
spec_chars.append([command, unichr(eval(ucs4))])
spec_chars.append([command, literal_char])
except:
continue
m = r.match(command)
@ -69,10 +73,11 @@ def read_unicodesymbols():
commandbl = command
command += m.group(1) + m.group(2)
commandbl += m.group(1) + ' ' + m.group(2)
spec_chars.append([command, unichr(eval(ucs4))])
spec_chars.append([commandbl, unichr(eval(ucs4))])
spec_chars.append([command, literal_char])
spec_chars.append([commandbl, literal_char])
fp.close()
return spec_chars
unicode_reps = read_unicodesymbols()