mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-13 22:49:20 +00:00
Add lyx2lyx tools from 2.4.x
This commit is contained in:
parent
ee25620931
commit
a5c328b304
@ -1,6 +1,6 @@
|
||||
# This file is part of lyx2lyx
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2002-2015 The LyX Team
|
||||
# Copyright (C) 2002-2018 The LyX Team
|
||||
# Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>
|
||||
# Copyright (C) 2002-2006 José Matos <jamatos@lyx.org>
|
||||
#
|
||||
@ -20,8 +20,8 @@
|
||||
|
||||
" The LyX module has all the rules related with different lyx file formats."
|
||||
|
||||
from parser_tools import get_value, check_token, find_token, \
|
||||
find_tokens, find_end_of
|
||||
from parser_tools import (get_value, check_token, find_token, find_tokens,
|
||||
find_end_of, find_complete_lines)
|
||||
import os.path
|
||||
import gzip
|
||||
import locale
|
||||
@ -34,8 +34,10 @@ import codecs
|
||||
try:
|
||||
import lyx2lyx_version
|
||||
version__ = lyx2lyx_version.version
|
||||
stable_version = True
|
||||
except: # we are running from build directory so assume the last version
|
||||
version__ = '2.3'
|
||||
version__ = '2.4'
|
||||
stable_version = False
|
||||
|
||||
default_debug__ = 2
|
||||
|
||||
@ -69,8 +71,8 @@ def minor_versions(major, last_minor_version):
|
||||
# Regular expressions used
|
||||
format_re = re.compile(r"(\d)[\.,]?(\d\d)")
|
||||
fileformat = re.compile(r"\\lyxformat\s*(\S*)")
|
||||
original_version = re.compile(r".*?LyX ([\d.]*)")
|
||||
original_tex2lyx_version = re.compile(r".*?tex2lyx ([\d.]*)")
|
||||
original_version = re.compile(b".*?LyX ([\\d.]*)")
|
||||
original_tex2lyx_version = re.compile(b".*?tex2lyx ([\\d.]*)")
|
||||
|
||||
##
|
||||
# file format information:
|
||||
@ -92,8 +94,9 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)),
|
||||
("1_6", list(range(277,346)), minor_versions("1.6" , 10)),
|
||||
("2_0", list(range(346,414)), minor_versions("2.0" , 8)),
|
||||
("2_1", list(range(414,475)), minor_versions("2.1" , 5)),
|
||||
("2_2", list(range(475,509)), minor_versions("2.2" , 0)),
|
||||
("2_3", (), minor_versions("2.3" , 0))
|
||||
("2_2", list(range(475,509)), minor_versions("2.2" , 4)),
|
||||
("2_3", list(range(509,545)), minor_versions("2.3" , 0)),
|
||||
("2_4", (), minor_versions("2.4" , 0))
|
||||
]
|
||||
|
||||
####################################################################
|
||||
@ -119,19 +122,29 @@ def formats_list():
|
||||
|
||||
|
||||
def format_info():
|
||||
" Returns a list with supported file formats."
|
||||
out = """Major version:
|
||||
minor versions
|
||||
formats
|
||||
" Returns a list with the supported file formats."
|
||||
template = """
|
||||
%s\tstable format: %s
|
||||
\tstable versions: %s
|
||||
\tdevelopment formats: %s
|
||||
"""
|
||||
|
||||
out = "version: formats and versions"
|
||||
for version in format_relation:
|
||||
major = str(version[2][0])
|
||||
versions = str(version[2][1:])
|
||||
if len(version[1]) == 1:
|
||||
formats = str(version[1][0])
|
||||
stable_format = str(version[1][0])
|
||||
elif not stable_version and major == version__:
|
||||
stable_format = "-- not yet --"
|
||||
versions = "-- not yet --"
|
||||
formats = "%s - %s" % (version[1][0], version[1][-1])
|
||||
else:
|
||||
formats = "%s - %s" % (version[1][-1], version[1][0])
|
||||
out += "%s\n\t%s\n\t%s\n\n" % (major, versions, formats)
|
||||
formats = "%s - %s" % (version[1][0], version[1][-2])
|
||||
stable_format = str(version[1][-1])
|
||||
|
||||
out += template % (major, stable_format, versions, formats)
|
||||
return out + '\n'
|
||||
|
||||
|
||||
@ -281,7 +294,7 @@ class LyX_base:
|
||||
""" Emits warning to self.error, if the debug_level is less
|
||||
than the self.debug."""
|
||||
if debug_level <= self.debug:
|
||||
self.err.write("Warning: " + message + "\n")
|
||||
self.err.write("lyx2lyx warning: " + message + "\n")
|
||||
|
||||
|
||||
def error(self, message):
|
||||
@ -434,8 +447,8 @@ class LyX_base:
|
||||
else:
|
||||
header = self.header
|
||||
|
||||
for line in header + [''] + self.body:
|
||||
self.output.write(line+u"\n")
|
||||
for line in header + [u''] + self.body:
|
||||
self.output.write(line+u'\n')
|
||||
|
||||
|
||||
def choose_output(self, output):
|
||||
@ -506,10 +519,10 @@ class LyX_base:
|
||||
file, returns the most likely value, or None otherwise."""
|
||||
|
||||
for line in self.header:
|
||||
if line[0] != "#":
|
||||
if line[0:1] != b"#":
|
||||
return None
|
||||
|
||||
line = line.replace("fix",".")
|
||||
line = line.replace(b"fix",b".")
|
||||
# need to test original_tex2lyx_version first because tex2lyx
|
||||
# writes "#LyX file created by tex2lyx 2.2"
|
||||
result = original_tex2lyx_version.match(line)
|
||||
@ -517,14 +530,14 @@ class LyX_base:
|
||||
result = original_version.match(line)
|
||||
if result:
|
||||
# Special know cases: reLyX and KLyX
|
||||
if line.find("reLyX") != -1 or line.find("KLyX") != -1:
|
||||
if line.find(b"reLyX") != -1 or line.find(b"KLyX") != -1:
|
||||
return "0.12"
|
||||
if result:
|
||||
res = result.group(1)
|
||||
if not res:
|
||||
self.warning(line)
|
||||
#self.warning("Version %s" % result.group(1))
|
||||
return res
|
||||
return res.decode('ascii') if not PY2 else res
|
||||
self.warning(str(self.header[:2]))
|
||||
return None
|
||||
|
||||
@ -533,7 +546,7 @@ class LyX_base:
|
||||
" Set the header with the version used."
|
||||
|
||||
initial_comment = " ".join(["#LyX %s created this file." % version__,
|
||||
"For more info see http://www.lyx.org/"])
|
||||
"For more info see https://www.lyx.org/"])
|
||||
|
||||
# Simple heuristic to determine the comment that always starts
|
||||
# a lyx file
|
||||
@ -582,6 +595,7 @@ class LyX_base:
|
||||
|
||||
#Note that the module will be added at the END of the extant ones
|
||||
def add_module(self, module):
|
||||
" Append module to the modules list."
|
||||
i = find_token(self.header, "\\begin_modules", 0)
|
||||
if i == -1:
|
||||
#No modules yet included
|
||||
@ -602,7 +616,16 @@ class LyX_base:
|
||||
self.header.insert(j, module)
|
||||
|
||||
|
||||
def del_module(self, module):
|
||||
" Delete `module` from module list, return success."
|
||||
modlist = self.get_module_list()
|
||||
if module not in modlist:
|
||||
return False
|
||||
self.set_module_list([line for line in modlist if line != module])
|
||||
return True
|
||||
|
||||
def get_module_list(self):
|
||||
" Return list of modules."
|
||||
i = find_token(self.header, "\\begin_modules", 0)
|
||||
if (i == -1):
|
||||
return []
|
||||
@ -611,23 +634,23 @@ class LyX_base:
|
||||
|
||||
|
||||
def set_module_list(self, mlist):
|
||||
modbegin = find_token(self.header, "\\begin_modules", 0)
|
||||
newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
|
||||
if (modbegin == -1):
|
||||
i = find_token(self.header, "\\begin_modules", 0)
|
||||
if (i == -1):
|
||||
#No modules yet included
|
||||
tclass = find_token(self.header, "\\textclass", 0)
|
||||
if tclass == -1:
|
||||
self.warning("Malformed LyX document: No \\textclass!!")
|
||||
return
|
||||
modbegin = tclass + 1
|
||||
self.header[modbegin:modbegin] = newmodlist
|
||||
return
|
||||
modend = find_token(self.header, "\\end_modules", modbegin)
|
||||
if modend == -1:
|
||||
self.warning("(set_module_list)Malformed LyX document: No \\end_modules.")
|
||||
return
|
||||
newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
|
||||
self.header[modbegin:modend + 1] = newmodlist
|
||||
i = j = tclass + 1
|
||||
else:
|
||||
j = find_token(self.header, "\\end_modules", i)
|
||||
if j == -1:
|
||||
self.warning("(set_module_list) Malformed LyX document: No \\end_modules.")
|
||||
return
|
||||
j += 1
|
||||
if mlist:
|
||||
mlist = ['\\begin_modules'] + mlist + ['\\end_modules']
|
||||
self.header[i:j] = mlist
|
||||
|
||||
|
||||
def set_parameter(self, param, value):
|
||||
@ -678,7 +701,7 @@ class LyX_base:
|
||||
try:
|
||||
conv(self)
|
||||
except:
|
||||
self.warning("An error ocurred in %s, %s" %
|
||||
self.warning("An error occurred in %s, %s" %
|
||||
(version, str(conv)),
|
||||
default_debug__)
|
||||
if not self.try_hard:
|
||||
@ -760,6 +783,53 @@ class LyX_base:
|
||||
return mode, steps
|
||||
|
||||
|
||||
def append_local_layout(self, new_layout):
|
||||
" Append `new_layout` to the local layouts."
|
||||
# new_layout may be a string or a list of strings (lines)
|
||||
try:
|
||||
new_layout = new_layout.splitlines()
|
||||
except AttributeError:
|
||||
pass
|
||||
i = find_token(self.header, "\\begin_local_layout", 0)
|
||||
if i == -1:
|
||||
k = find_token(self.header, "\\language", 0)
|
||||
if k == -1:
|
||||
# this should not happen
|
||||
self.warning("Malformed LyX document! No \\language header found!")
|
||||
return
|
||||
self.header[k : k] = ["\\begin_local_layout", "\\end_local_layout"]
|
||||
i = k
|
||||
|
||||
j = find_end_of(self.header, i, "\\begin_local_layout", "\\end_local_layout")
|
||||
if j == -1:
|
||||
# this should not happen
|
||||
self.warning("Malformed LyX document: Can't find end of local layout!")
|
||||
return
|
||||
|
||||
self.header[i+1 : i+1] = new_layout
|
||||
|
||||
def del_local_layout(self, layout_def):
|
||||
" Delete `layout_def` from local layouts, return success."
|
||||
i = find_complete_lines(self.header, layout_def)
|
||||
if i == -1:
|
||||
return False
|
||||
j = i+len(layout_def)
|
||||
if (self.header[i-1] == "\\begin_local_layout" and
|
||||
self.header[j] == "\\end_local_layout"):
|
||||
i -=1
|
||||
j +=1
|
||||
self.header[i:j] = []
|
||||
return True
|
||||
|
||||
def del_from_header(self, lines):
|
||||
" Delete `lines` from the document header, return success."
|
||||
i = find_complete_lines(self.header, lines)
|
||||
if i == -1:
|
||||
return False
|
||||
j = i + len(lines)
|
||||
self.header[i:j] = []
|
||||
return True
|
||||
|
||||
# Part of an unfinished attempt to make lyx2lyx gave a more
|
||||
# structured view of the document.
|
||||
# def get_toc(self, depth = 4):
|
||||
|
@ -34,6 +34,7 @@ dist_lyx2lyx_PYTHON = \
|
||||
lyx_2_1.py \
|
||||
lyx_2_2.py \
|
||||
lyx_2_3.py \
|
||||
lyx_2_4.py \
|
||||
profiling.py \
|
||||
test_parser_tools.py
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#! /usr/bin/env python
|
||||
#! /usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2002-2011 The LyX Team
|
||||
# Copyright (C) 2002-2007 José Matos <jamatos@lyx.org>
|
||||
@ -56,26 +56,27 @@ def main():
|
||||
parser.add_argument("--noisy",
|
||||
action="store_const", const=10, dest="debug")
|
||||
parser.add_argument("-c", "--encoding", type=cmd_arg, dest="cjk_encoding",
|
||||
help="files in format 413 and lower are read and"
|
||||
help="Files in format 413 and lower are read and"
|
||||
" written in the format of CJK-LyX."
|
||||
"If encoding is not given or 'auto' the encoding"
|
||||
"is determined from the locale.")
|
||||
" If encoding is not given or 'auto' the encoding"
|
||||
" is determined from the locale.")
|
||||
parser.add_argument("-e", "--err", type=cmd_arg, dest="error",
|
||||
help= "file name of the error file else goes to stderr")
|
||||
help= "File name of the error file else goes to stderr.")
|
||||
parser.add_argument("-o", "--output", type=cmd_arg, dest="output",
|
||||
help= "name of the output file else goes to stdout")
|
||||
help= "Name of the output file else goes to stdout.")
|
||||
parser.add_argument("-t", "--to", type=cmd_arg, dest= "end_format",
|
||||
help= "destination file format, default (latest)")
|
||||
help= "Destination file format, default <latest>.")
|
||||
parser.add_argument("-V", "--final_version", type=cmd_arg, dest= "final_version",
|
||||
help= "destination version, default (latest)")
|
||||
help= "Destination version, default <latest>.")
|
||||
parser.add_argument("-l", "--list", action="store_true",
|
||||
help = "list all available formats and supported versions")
|
||||
help = "List all available formats and supported versions.")
|
||||
parser.add_argument("-n", "--try-hard", action="store_true",
|
||||
help = "try hard (ignore any convertion errors)")
|
||||
help = "Try hard (ignore any conversion errors).")
|
||||
parser.add_argument("-s", "--systemlyxdir", type=cmd_arg, dest= "systemlyxdir",
|
||||
help= "LyX system directory for conversion from version 489 or older")
|
||||
help= "LyX system directory for conversion from"
|
||||
" version 489 or older.")
|
||||
parser.add_argument('--version', action='version', version="""lyx2lyx, version %s
|
||||
Copyright (C) 2011 The LyX Team, José Matos and Dekel Tsur""" % LyX.version__)
|
||||
Copyright (C) 2011 The LyX Team, José Matos and Dekel Tsur""" % LyX.version__)
|
||||
parser.add_argument("input", nargs='?', type=cmd_arg, default=None)
|
||||
|
||||
options = parser.parse_args()
|
||||
|
@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
|
||||
default index is 0, so the material is inserted at the beginning.
|
||||
Prepends a comment "% Added by lyx2lyx" to text.
|
||||
|
||||
put_cmd_in_ert(arg):
|
||||
Here arg should be a list of strings (lines), which we want to
|
||||
put_cmd_in_ert(cmd):
|
||||
Here cmd should be a list of strings (lines), which we want to
|
||||
wrap in ERT. Returns a list of strings so wrapped.
|
||||
A call to this routine will often go something like this:
|
||||
i = find_token('\\begin_inset FunkyInset', ...)
|
||||
@ -65,23 +65,38 @@ lyx2verbatim(document, lines):
|
||||
can and return a string containing the translated material.
|
||||
|
||||
latex_length(slen):
|
||||
Convert lengths (in LyX form) to their LaTeX representation. Returns
|
||||
(bool, length), where the bool tells us if it was a percentage, and
|
||||
the length is the LaTeX representation.
|
||||
Convert lengths (in LyX form) to their LaTeX representation. Returns
|
||||
(bool, length), where the bool tells us if it was a percentage, and
|
||||
the length is the LaTeX representation.
|
||||
|
||||
convert_info_insets(document, type, func):
|
||||
Applies func to the argument of all info insets matching certain types
|
||||
type : the type to match. This can be a regular expression.
|
||||
func : function from string to string to apply to the "arg" field of
|
||||
the info insets.
|
||||
Applies func to the argument of all info insets matching certain types
|
||||
type : the type to match. This can be a regular expression.
|
||||
func : function from string to string to apply to the "arg" field of
|
||||
the info insets.
|
||||
|
||||
is_document_option(document, option):
|
||||
Find if _option_ is a document option (\\options in the header).
|
||||
|
||||
insert_document_option(document, option):
|
||||
Insert _option_ as a document option.
|
||||
|
||||
remove_document_option(document, option):
|
||||
Remove _option_ as a document option.
|
||||
|
||||
revert_language(document, lyxname, babelname="", polyglossianame=""):
|
||||
Reverts native language support to ERT
|
||||
If babelname or polyglossianame is empty, it is assumed
|
||||
this language package is not supported for the given language.
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import string
|
||||
from parser_tools import find_token, find_end_of_inset
|
||||
import sys
|
||||
from parser_tools import (find_token, find_end_of_inset, get_containing_layout,
|
||||
get_containing_inset, get_value, get_bool_value)
|
||||
from unicode_symbols import unicode_reps
|
||||
|
||||
|
||||
# This will accept either a list of lines or a single line.
|
||||
# It is bad practice to pass something with embedded newlines,
|
||||
# though we will handle that.
|
||||
@ -129,25 +144,42 @@ def insert_to_preamble(document, text, index = 0):
|
||||
document.preamble[index:index] = text
|
||||
|
||||
|
||||
def put_cmd_in_ert(arg):
|
||||
'''
|
||||
arg should be a list of lines we want to wrap in ERT.
|
||||
Returns a list of strings, with the lines so wrapped.
|
||||
'''
|
||||
# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
|
||||
# Created from the reversed list to keep the first of alternative definitions.
|
||||
licr_table = {ord(ch): cmd for cmd, ch in unicode_reps[::-1]}
|
||||
|
||||
ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
|
||||
# It will be faster for us to work with a single string internally.
|
||||
# That way, we only go through the unicode_reps loop once.
|
||||
if type(arg) is list:
|
||||
s = "\n".join(arg)
|
||||
else:
|
||||
s = arg
|
||||
for rep in unicode_reps:
|
||||
s = s.replace(rep[1], rep[0])
|
||||
s = s.replace('\\', "\\backslash\n")
|
||||
ret += s.splitlines()
|
||||
ret += ["\\end_layout", "", "\\end_inset"]
|
||||
return ret
|
||||
def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
|
||||
"""
|
||||
Return ERT inset wrapping `cmd` as a list of strings.
|
||||
|
||||
`cmd` can be a string or list of lines. Non-ASCII characters are converted
|
||||
to the respective LICR macros if defined in unicodesymbols,
|
||||
`is_open` is a boolean setting the inset status to "open",
|
||||
`as_paragraph` wraps the ERT inset in a Standard paragraph.
|
||||
"""
|
||||
|
||||
status = {False:"collapsed", True:"open"}
|
||||
ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "",
|
||||
"\\begin_layout Plain Layout", "",
|
||||
# content here ([5:5])
|
||||
"\\end_layout", "", "\\end_inset"]
|
||||
|
||||
paragraph = ["\\begin_layout Standard",
|
||||
# content here ([1:1])
|
||||
"", "", "\\end_layout", ""]
|
||||
# ensure cmd is an unicode instance and make it "LyX safe".
|
||||
if isinstance(cmd, list):
|
||||
cmd = u"\n".join(cmd)
|
||||
elif sys.version_info[0] == 2 and isinstance(cmd, str):
|
||||
cmd = cmd.decode('utf8')
|
||||
cmd = cmd.translate(licr_table)
|
||||
cmd = cmd.replace("\\", "\n\\backslash\n")
|
||||
|
||||
ert_inset[5:5] = cmd.splitlines()
|
||||
if not as_paragraph:
|
||||
return ert_inset
|
||||
paragraph[1:1] = ert_inset
|
||||
return paragraph
|
||||
|
||||
|
||||
def get_ert(lines, i, verbatim = False):
|
||||
@ -314,9 +346,14 @@ def latex_length(slen):
|
||||
# the + always precedes the -
|
||||
|
||||
# Convert relative lengths to LaTeX units
|
||||
units = {"text%":"\\textwidth", "col%":"\\columnwidth",
|
||||
"page%":"\\paperwidth", "line%":"\\linewidth",
|
||||
"theight%":"\\textheight", "pheight%":"\\paperheight"}
|
||||
units = {"col%": "\\columnwidth",
|
||||
"text%": "\\textwidth",
|
||||
"page%": "\\paperwidth",
|
||||
"line%": "\\linewidth",
|
||||
"theight%": "\\textheight",
|
||||
"pheight%": "\\paperheight",
|
||||
"baselineskip%": "\\baselineskip"
|
||||
}
|
||||
for unit in list(units.keys()):
|
||||
i = slen.find(unit)
|
||||
if i == -1:
|
||||
@ -536,3 +573,220 @@ def convert_info_insets(document, type, func):
|
||||
new_arg = func(arg.group(1))
|
||||
document.body[i + 2] = 'arg "%s"' % new_arg
|
||||
i += 3
|
||||
|
||||
|
||||
def insert_document_option(document, option):
|
||||
"Insert _option_ as a document option."
|
||||
|
||||
# Find \options in the header
|
||||
i = find_token(document.header, "\\options", 0)
|
||||
# if the options does not exists add it after the textclass
|
||||
if i == -1:
|
||||
i = find_token(document.header, "\\textclass", 0) + 1
|
||||
document.header.insert(i, r"\options %s" % option)
|
||||
return
|
||||
# otherwise append to options
|
||||
if not is_document_option(document, option):
|
||||
document.header[i] += ",%s" % option
|
||||
|
||||
|
||||
def remove_document_option(document, option):
|
||||
""" Remove _option_ as a document option."""
|
||||
|
||||
i = find_token(document.header, "\\options")
|
||||
options = get_value(document.header, "\\options", i)
|
||||
options = [op.strip() for op in options.split(',')]
|
||||
|
||||
# Remove `option` from \options
|
||||
options = [op for op in options if op != option]
|
||||
|
||||
if options:
|
||||
document.header[i] = "\\options " + ','.join(options)
|
||||
else:
|
||||
del document.header[i]
|
||||
|
||||
|
||||
def is_document_option(document, option):
|
||||
"Find if _option_ is a document option"
|
||||
|
||||
options = get_value(document.header, "\\options")
|
||||
options = [op.strip() for op in options.split(',')]
|
||||
return option in options
|
||||
|
||||
|
||||
singlepar_insets = [s.strip() for s in
|
||||
u"Argument, Caption Above, Caption Below, Caption Bicaption,"
|
||||
u"Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
|
||||
u"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
|
||||
u"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
|
||||
u"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
|
||||
u"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
|
||||
u"IPADeco, Index, Info, Phantom, Script".split(',')]
|
||||
# print(singlepar_insets)
|
||||
|
||||
def revert_language(document, lyxname, babelname="", polyglossianame=""):
|
||||
" Revert native language support "
|
||||
|
||||
# Does the document use polyglossia?
|
||||
use_polyglossia = False
|
||||
if get_bool_value(document.header, "\\use_non_tex_fonts"):
|
||||
i = find_token(document.header, "\\language_package")
|
||||
if i == -1:
|
||||
document.warning("Malformed document! Missing \\language_package")
|
||||
else:
|
||||
pack = get_value(document.header, "\\language_package", i)
|
||||
if pack in ("default", "auto"):
|
||||
use_polyglossia = True
|
||||
|
||||
# Do we use this language with polyglossia?
|
||||
with_polyglossia = use_polyglossia and polyglossianame != ""
|
||||
# Do we use this language with babel?
|
||||
with_babel = with_polyglossia == False and babelname != ""
|
||||
|
||||
# Are we dealing with a primary or secondary language?
|
||||
primary = document.language == lyxname
|
||||
secondary = False
|
||||
|
||||
# Main language first
|
||||
orig_doc_language = document.language
|
||||
if primary:
|
||||
# Change LyX document language to English (we will tell LaTeX
|
||||
# to use the original language at the end of this function):
|
||||
document.language = "english"
|
||||
i = find_token(document.header, "\\language %s" % lyxname, 0)
|
||||
if i != -1:
|
||||
document.header[i] = "\\language english"
|
||||
|
||||
# Now look for occurences in the body
|
||||
i = 0
|
||||
while True:
|
||||
i = find_token(document.body, "\\lang", i+1)
|
||||
if i == -1:
|
||||
break
|
||||
if document.body[i].startswith("\\lang %s" % lyxname):
|
||||
secondary = True
|
||||
texname = use_polyglossia and polyglossianame or babelname
|
||||
elif primary and document.body[i].startswith("\\lang english"):
|
||||
# Since we switched the main language manually, English parts need to be marked
|
||||
texname = "english"
|
||||
else:
|
||||
continue
|
||||
|
||||
parent = get_containing_layout(document.body, i)
|
||||
i_e = parent[2] # end line no,
|
||||
# print(i, texname, parent, document.body[i+1], file=sys.stderr)
|
||||
|
||||
# Move leading space to the previous line:
|
||||
if document.body[i+1].startswith(" "):
|
||||
document.body[i+1] = document.body[i+1][1:]
|
||||
document.body.insert(i, " ")
|
||||
continue
|
||||
|
||||
# TODO: handle nesting issues with font attributes, e.g.
|
||||
# \begin_layout Standard
|
||||
#
|
||||
# \emph on
|
||||
# \lang macedonian
|
||||
# Македонски јазик
|
||||
# \emph default
|
||||
# — јужнословенски јазик, дел од групата на словенски јазици од јазичното
|
||||
# семејство на индоевропски јазици.
|
||||
# Македонскиот е службен и национален јазик во Македонија.
|
||||
# \end_layout
|
||||
|
||||
# Ensure correct handling of list labels
|
||||
if (parent[0] in ["Labeling", "Description"]
|
||||
and not " " in "\n".join(document.body[parent[3]:i])):
|
||||
# line `i+1` is first line of a list item,
|
||||
# part before a space character is the label
|
||||
# TODO: insets or language change before first space character
|
||||
labelline = document.body[i+1].split(' ', 1)
|
||||
if len(labelline) > 1:
|
||||
# Insert a space in the (original) document language
|
||||
# between label and remainder.
|
||||
# print(" Label:", labelline, file=sys.stderr)
|
||||
lines = [labelline[0],
|
||||
"\\lang %s" % orig_doc_language,
|
||||
" ",
|
||||
"\\lang %s" % (primary and "english" or lyxname),
|
||||
labelline[1]]
|
||||
document.body[i+1:i+2] = lines
|
||||
i_e += 4
|
||||
|
||||
# Find out where to end the language change.
|
||||
langswitch = i
|
||||
while True:
|
||||
langswitch = find_token(document.body, "\\lang", langswitch+1, i_e)
|
||||
if langswitch == -1:
|
||||
break
|
||||
# print(" ", langswitch, document.body[langswitch], file=sys.stderr)
|
||||
# skip insets
|
||||
i_a = parent[3] # paragraph start line
|
||||
container = get_containing_inset(document.body[i_a:i_e], langswitch-i_a)
|
||||
if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a:
|
||||
# print(" inset", container, file=sys.stderr)
|
||||
continue
|
||||
i_e = langswitch
|
||||
break
|
||||
|
||||
# use function or environment?
|
||||
singlepar = i_e - i < 3
|
||||
if not singlepar and parent[0] == "Plain Layout":
|
||||
# environment not allowed in some insets
|
||||
container = get_containing_inset(document.body, i)
|
||||
singlepar = container[0] in singlepar_insets
|
||||
|
||||
# Delete empty language switches:
|
||||
if not "".join(document.body[i+1:i_e]):
|
||||
del document.body[i:i_e]
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
if singlepar:
|
||||
if with_polyglossia:
|
||||
begin_cmd = "\\text%s{"%texname
|
||||
elif with_babel:
|
||||
begin_cmd = "\\foreignlanguage{%s}{" % texname
|
||||
end_cmd = "}"
|
||||
else:
|
||||
if with_polyglossia:
|
||||
begin_cmd = "\\begin{%s}"%texname
|
||||
end_cmd = "\\end{%s}"%texname
|
||||
elif with_babel:
|
||||
begin_cmd = "\\begin{otherlanguage}{%s}" % texname
|
||||
end_cmd = "\\end{otherlanguage}"
|
||||
|
||||
if (not primary or texname == "english"):
|
||||
try:
|
||||
document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
|
||||
document.body[i+1:i+1] = put_cmd_in_ert(begin_cmd)
|
||||
except UnboundLocalError:
|
||||
pass
|
||||
del document.body[i]
|
||||
|
||||
if not (primary or secondary):
|
||||
return
|
||||
|
||||
# Make the language known to Babel/Polyglossia and ensure the correct
|
||||
# document language:
|
||||
doc_lang_switch = ""
|
||||
if with_babel:
|
||||
# add as global option
|
||||
insert_document_option(document, babelname)
|
||||
# Since user options are appended to the document options,
|
||||
# Babel will treat `babelname` as primary language.
|
||||
if not primary:
|
||||
doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
|
||||
if with_polyglossia:
|
||||
# Define language in the user preamble
|
||||
# (don't use \AtBeginDocument, this fails with some languages).
|
||||
add_to_preamble(document, ["\\usepackage{polyglossia}",
|
||||
"\\setotherlanguage{%s}" % polyglossianame])
|
||||
if primary:
|
||||
# Changing the main language must be done in the document body.
|
||||
doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
|
||||
|
||||
# Reset LaTeX main language if required and not already done
|
||||
if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
|
||||
document.body[2:2] = put_cmd_in_ert(doc_lang_switch,
|
||||
is_open=True, as_paragraph=True)
|
||||
|
@ -32,18 +32,17 @@ def regularise_header(document):
|
||||
|
||||
def find_next_space(line, j):
|
||||
""" Return position of next space or backslash, which one comes
|
||||
first, starting from position k, if not existing return last
|
||||
position in line."""
|
||||
l = line.find(' ', j)
|
||||
if l == -1:
|
||||
l = len(line)
|
||||
k = line.find('\\', j)
|
||||
if k == -1:
|
||||
k = len(line)
|
||||
first, starting from position j, if none exists returns last
|
||||
position in line (+1)."""
|
||||
space_pos = line.find(' ', j)
|
||||
if space_pos == -1:
|
||||
space_pos = len(line)
|
||||
|
||||
if k < l:
|
||||
return k
|
||||
return l
|
||||
bksl_pos = line.find('\\', j)
|
||||
if bksl_pos == -1:
|
||||
bksl_pos = len(line)
|
||||
|
||||
return min(space_pos, bksl_pos)
|
||||
|
||||
|
||||
def regularise_body(document):
|
||||
@ -65,36 +64,38 @@ def regularise_body(document):
|
||||
while i < len(document.body):
|
||||
line = document.body[i]
|
||||
j = 0
|
||||
tmp = []
|
||||
new_block = []
|
||||
while j < len(line):
|
||||
k = line.find('\\', j)
|
||||
|
||||
if k == -1:
|
||||
tmp += [line[j:]]
|
||||
new_block += [line[j:]]
|
||||
break
|
||||
|
||||
if k != j:
|
||||
tmp += [line[j: k]]
|
||||
#document.warning("j=%d\tk=%d\t#%s#%s#" % (j,k,line,line[j: k]))
|
||||
new_block += [line[j: k]]
|
||||
j = k
|
||||
|
||||
k = find_next_space(line, j+1)
|
||||
|
||||
# These tokens take the rest of the line
|
||||
token = line[j+1:k]
|
||||
# These tokens take the rest of the line
|
||||
if token in getline_tokens:
|
||||
tmp += [line[j:]]
|
||||
#document.warning("getline_token:%s\tj=%d\t\t#%s#%s#" % (token,j,line,line[j:]))
|
||||
new_block += [line[j:]]
|
||||
break
|
||||
|
||||
# These tokens take no arguments
|
||||
if token in noargs_tokens:
|
||||
tmp += [line[j:k]]
|
||||
new_block += [line[j:k]]
|
||||
j = k
|
||||
continue
|
||||
|
||||
# These tokens take one argument
|
||||
if token in onearg_tokens:
|
||||
k = find_next_space(line, k + 1)
|
||||
tmp += [line[j:k]]
|
||||
new_block += [line[j:k]]
|
||||
j = k
|
||||
continue
|
||||
|
||||
@ -104,29 +105,30 @@ def regularise_body(document):
|
||||
inset = line[k+1: l]
|
||||
|
||||
if inset == "Latex":
|
||||
tmp += [line[j:l]]
|
||||
new_block += [line[j:l]]
|
||||
j = l
|
||||
continue
|
||||
|
||||
if inset in ["LatexCommand", "LatexDel"]:
|
||||
tmp += [line[j:]]
|
||||
if inset in ["LatexCommand", "LatexDel", "Label", "Figure",
|
||||
"Formula"]:
|
||||
new_block += [line[j:]]
|
||||
break
|
||||
|
||||
if inset == "Quotes":
|
||||
l = find_next_space(line, l + 1)
|
||||
tmp += [line[j:l]]
|
||||
new_block += [line[j:l]]
|
||||
j = l
|
||||
continue
|
||||
|
||||
document.warning("unkown inset %s" % line)
|
||||
document.warning("unkown inset %s" % inset)
|
||||
assert(False)
|
||||
|
||||
# We are inside a latex inset, pass the text verbatim
|
||||
tmp += [line[j:]]
|
||||
new_block += [line[j:]]
|
||||
break
|
||||
|
||||
document.body[i: i+1] = tmp
|
||||
i += len(tmp)
|
||||
document.body[i: i+1] = new_block
|
||||
i += len(new_block)
|
||||
|
||||
|
||||
supported_versions = ["0.10.%d" % i for i in range(8)] + ["0.10"]
|
||||
|
@ -112,7 +112,7 @@ def update_inset_label(document):
|
||||
i = find_token(lines, '\\begin_inset Label', i)
|
||||
if i == -1:
|
||||
return
|
||||
lines[i] = '\\begin_inset LatexCommand \label{' + lines[i][19:] + '}'
|
||||
lines[i] = '\\begin_inset LatexCommand \\label{' + lines[i][19:] + '}'
|
||||
i = i + 1
|
||||
|
||||
|
||||
|
@ -69,7 +69,7 @@ def find_beginning_of_inset(lines, i):
|
||||
|
||||
|
||||
def find_end_of_inset(lines, i):
|
||||
" Finds the matching \end_inset"
|
||||
r" Finds the matching \end_inset"
|
||||
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
||||
|
||||
|
||||
@ -143,7 +143,7 @@ def get_width(mo):
|
||||
|
||||
|
||||
def remove_oldfloat(document):
|
||||
" Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
|
||||
r" Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
|
||||
lines = document.body
|
||||
i = 0
|
||||
while True:
|
||||
@ -250,7 +250,7 @@ def remove_pextra(document):
|
||||
if flag:
|
||||
flag = 0
|
||||
if hfill:
|
||||
start = ["","\hfill",""]+start
|
||||
start = ["",r"\hfill",""]+start
|
||||
else:
|
||||
start = ['\\layout %s' % document.default_layout,''] + start
|
||||
|
||||
@ -324,7 +324,7 @@ def remove_oldert(document):
|
||||
new = []
|
||||
new2 = []
|
||||
if check_token(lines[i], "\\layout LaTeX"):
|
||||
new = ['\layout %s' % document.default_layout, "", ""]
|
||||
new = [r'\layout %s' % document.default_layout, "", ""]
|
||||
|
||||
k = i+1
|
||||
while True:
|
||||
@ -745,8 +745,8 @@ def update_longtables(document):
|
||||
for j in range(rows):
|
||||
i = find_token(body, '<row', i)
|
||||
|
||||
self.endfoot = false # footer row
|
||||
self.endlastfoot = false # last footer row
|
||||
row_info[i].endfoot = false # footer row
|
||||
row_info[i].endlastfoot = false # last footer row
|
||||
if row_info[j].endhead:
|
||||
insert_attribute(body, i, 'endhead="true"')
|
||||
|
||||
@ -808,7 +808,7 @@ def change_infoinset(document):
|
||||
note_lines = [txt]+note_lines
|
||||
|
||||
for line in note_lines:
|
||||
new = new + ['\layout %s' % document.default_layout, ""]
|
||||
new = new + [r'\layout %s' % document.default_layout, ""]
|
||||
tmp = line.split('\\')
|
||||
new = new + [tmp[0]]
|
||||
for x in tmp[1:]:
|
||||
|
@ -27,7 +27,7 @@ from parser_tools import find_token, find_end_of, get_value,\
|
||||
# Private helper functions
|
||||
|
||||
def find_end_of_inset(lines, i):
|
||||
"Finds the matching \end_inset"
|
||||
r"Finds the matching \end_inset"
|
||||
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
||||
|
||||
|
||||
|
@ -81,7 +81,7 @@ def get_next_paragraph(lines, i, format):
|
||||
|
||||
|
||||
def find_end_of_inset(lines, i):
|
||||
"Finds the matching \end_inset"
|
||||
r"Finds the matching \end_inset"
|
||||
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
||||
|
||||
def del_token(lines, token, start, end):
|
||||
@ -103,7 +103,7 @@ def del_token(lines, token, start, end):
|
||||
####################################################################
|
||||
|
||||
def remove_color_default(document):
|
||||
" Remove \color default"
|
||||
r" Remove \color default"
|
||||
i = 0
|
||||
while True:
|
||||
i = find_token(document.body, "\\color default", i)
|
||||
@ -114,12 +114,12 @@ def remove_color_default(document):
|
||||
|
||||
|
||||
def add_end_header(document):
|
||||
" Add \end_header"
|
||||
r" Add \end_header"
|
||||
document.header.append("\\end_header");
|
||||
|
||||
|
||||
def rm_end_header(document):
|
||||
" Remove \end_header"
|
||||
r" Remove \end_header"
|
||||
i = find_token(document.header, "\\end_header", 0)
|
||||
if i == -1:
|
||||
return
|
||||
@ -169,14 +169,14 @@ def revert_amsmath(document):
|
||||
|
||||
|
||||
def convert_spaces(document):
|
||||
" \SpecialChar ~ -> \InsetSpace ~"
|
||||
r" \SpecialChar ~ -> \InsetSpace ~"
|
||||
for i in range(len(document.body)):
|
||||
document.body[i] = document.body[i].replace("\\SpecialChar ~",
|
||||
"\\InsetSpace ~")
|
||||
|
||||
|
||||
def revert_spaces(document):
|
||||
" \InsetSpace ~ -> \SpecialChar ~"
|
||||
r" \InsetSpace ~ -> \SpecialChar ~"
|
||||
regexp = re.compile(r'(.*)(\\InsetSpace\s+)(\S+)')
|
||||
i = 0
|
||||
while True:
|
||||
@ -197,18 +197,18 @@ def revert_spaces(document):
|
||||
|
||||
|
||||
def rename_spaces(document):
|
||||
""" \InsetSpace \, -> \InsetSpace \thinspace{}
|
||||
\InsetSpace \space -> \InsetSpace \space{}"""
|
||||
""" \\InsetSpace \\, -> \\InsetSpace \thinspace{}
|
||||
\\InsetSpace \\space -> \\InsetSpace \\space{}"""
|
||||
for i in range(len(document.body)):
|
||||
document.body[i] = document.body[i].replace("\\InsetSpace \\space",
|
||||
"\\InsetSpace \\space{}")
|
||||
document.body[i] = document.body[i].replace("\\InsetSpace \,",
|
||||
document.body[i] = document.body[i].replace("\\InsetSpace \\,",
|
||||
"\\InsetSpace \\thinspace{}")
|
||||
|
||||
|
||||
def revert_space_names(document):
|
||||
""" \InsetSpace \thinspace{} -> \InsetSpace \,
|
||||
\InsetSpace \space{} -> \InsetSpace \space"""
|
||||
""" \\InsetSpace \thinspace{} -> \\InsetSpace \\,
|
||||
\\InsetSpace \\space{} -> \\InsetSpace \\space"""
|
||||
for i in range(len(document.body)):
|
||||
document.body[i] = document.body[i].replace("\\InsetSpace \\space{}",
|
||||
"\\InsetSpace \\space")
|
||||
@ -262,7 +262,7 @@ def revert_bibtex(document):
|
||||
|
||||
|
||||
def remove_insetparent(document):
|
||||
" Remove \lyxparent"
|
||||
r" Remove \lyxparent"
|
||||
i = 0
|
||||
while True:
|
||||
i = find_token(document.body, "\\begin_inset LatexCommand \\lyxparent", i)
|
||||
@ -428,7 +428,7 @@ def revert_comment(document):
|
||||
|
||||
|
||||
def add_end_layout(document):
|
||||
" Add \end_layout"
|
||||
r" Add \end_layout"
|
||||
i = find_token(document.body, '\\layout', 0)
|
||||
|
||||
if i == -1:
|
||||
@ -502,7 +502,7 @@ def add_end_layout(document):
|
||||
|
||||
|
||||
def rm_end_layout(document):
|
||||
" Remove \end_layout"
|
||||
r" Remove \end_layout"
|
||||
i = 0
|
||||
while True:
|
||||
i = find_token(document.body, '\\end_layout', i)
|
||||
@ -544,7 +544,7 @@ def rm_body_changes(document):
|
||||
|
||||
|
||||
def layout2begin_layout(document):
|
||||
" \layout -> \begin_layout "
|
||||
r" \layout -> \begin_layout "
|
||||
i = 0
|
||||
while True:
|
||||
i = find_token(document.body, '\\layout', i)
|
||||
@ -556,7 +556,7 @@ def layout2begin_layout(document):
|
||||
|
||||
|
||||
def begin_layout2layout(document):
|
||||
" \begin_layout -> \layout "
|
||||
r" \begin_layout -> \layout "
|
||||
i = 0
|
||||
while True:
|
||||
i = find_token(document.body, '\\begin_layout', i)
|
||||
@ -1972,9 +1972,9 @@ def convert_names(document):
|
||||
'\\begin_layout %s' % document.default_layout,
|
||||
"",
|
||||
"%s" % firstname,
|
||||
"\end_layout",
|
||||
r"\end_layout",
|
||||
"",
|
||||
"\end_inset",
|
||||
r"\end_inset",
|
||||
"",
|
||||
"",
|
||||
"\\begin_inset CharStyle Surname",
|
||||
|
@ -24,6 +24,7 @@ import unicodedata
|
||||
import sys, os
|
||||
|
||||
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
|
||||
from lyx2lyx_tools import insert_document_option
|
||||
from LyX import get_encoding
|
||||
|
||||
# Provide support for both python 2 and 3
|
||||
@ -420,7 +421,7 @@ def revert_unicode_line(document, i, insets, spec_chars, replacement_character =
|
||||
else:
|
||||
if insets and insets[-1] == "Formula":
|
||||
# avoid putting an ERT in a math; instead put command as text
|
||||
command = command.replace('\\\\', '\mathrm{')
|
||||
command = command.replace('\\\\', r'\mathrm{')
|
||||
command = command + '}'
|
||||
elif not insets or insets[-1] != "ERT":
|
||||
# add an ERT inset with the replacement character
|
||||
@ -490,7 +491,7 @@ def revert_cs_label(document):
|
||||
|
||||
|
||||
def convert_bibitem(document):
|
||||
""" Convert
|
||||
r""" Convert
|
||||
\bibitem [option]{argument}
|
||||
|
||||
to
|
||||
@ -575,16 +576,16 @@ commandparams_info = {
|
||||
def convert_commandparams(document):
|
||||
""" Convert
|
||||
|
||||
\begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
|
||||
\end_inset
|
||||
\\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
|
||||
\\end_inset
|
||||
|
||||
to
|
||||
|
||||
\begin_inset LatexCommand cmdname
|
||||
\\begin_inset LatexCommand cmdname
|
||||
name1 "opt1"
|
||||
name2 "opt2"
|
||||
name3 "arg"
|
||||
\end_inset
|
||||
\\end_inset
|
||||
|
||||
name1, name2 and name3 can be different for each command.
|
||||
"""
|
||||
@ -895,7 +896,7 @@ def revert_cleardoublepage(document):
|
||||
|
||||
|
||||
def convert_lyxline(document):
|
||||
" remove fontsize commands for \lyxline "
|
||||
r" remove fontsize commands for \lyxline "
|
||||
# The problematic is: The old \lyxline definition doesn't handle the fontsize
|
||||
# to change the line thickness. The new definiton does this so that imported
|
||||
# \lyxlines would have a different line thickness. The eventual fontsize command
|
||||
@ -1686,7 +1687,7 @@ def revert_CJK(document):
|
||||
|
||||
|
||||
def revert_preamble_listings_params(document):
|
||||
" Revert preamble option \listings_params "
|
||||
r" Revert preamble option \listings_params "
|
||||
i = find_token(document.header, "\\listings_params", 0)
|
||||
if i != -1:
|
||||
document.preamble.append('\\usepackage{listings}')
|
||||
@ -1907,13 +1908,7 @@ def revert_ext_font_sizes(document):
|
||||
|
||||
i = find_token(document.header, '\\paperfontsize', 0)
|
||||
document.header[i] = '\\paperfontsize default'
|
||||
|
||||
i = find_token(document.header, '\\options', 0)
|
||||
if i == -1:
|
||||
i = find_token(document.header, '\\textclass', 0) + 1
|
||||
document.header[i:i] = ['\\options %s' % fontsize]
|
||||
else:
|
||||
document.header[i] += ',%s' % fontsize
|
||||
insert_document_option(document, fontsize)
|
||||
|
||||
|
||||
def convert_ext_font_sizes(document):
|
||||
@ -2010,10 +2005,10 @@ def convert_arabic (document):
|
||||
document.header[i] = "\\language arabic_arabtex"
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
|
||||
h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
|
||||
if (h != -1):
|
||||
# change the language name
|
||||
document.body[i] = '\lang arabic_arabtex'
|
||||
document.body[i] = r'\lang arabic_arabtex'
|
||||
i = i + 1
|
||||
|
||||
|
||||
@ -2025,10 +2020,10 @@ def revert_arabic (document):
|
||||
document.header[i] = "\\language arabic"
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
|
||||
h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
|
||||
if (h != -1):
|
||||
# change the language name
|
||||
document.body[i] = '\lang arabic'
|
||||
document.body[i] = r'\lang arabic'
|
||||
i = i + 1
|
||||
|
||||
|
||||
|
@ -23,7 +23,7 @@ import unicodedata
|
||||
import sys, os
|
||||
|
||||
from parser_tools import find_token, find_end_of, find_tokens, get_value
|
||||
from unicode_symbols import read_unicodesymbols
|
||||
from unicode_symbols import unicode_reps
|
||||
|
||||
####################################################################
|
||||
# Private helper functions
|
||||
@ -146,61 +146,13 @@ def set_option(document, m, option, value):
|
||||
return l
|
||||
|
||||
|
||||
# FIXME: Remove this function if the version imported from unicode_symbols works.
|
||||
# This function was the predecessor from that function, that in the meanwhile got
|
||||
# new fixes.
|
||||
def read_unicodesymbols2():
|
||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||
|
||||
# Provide support for both python 2 and 3
|
||||
PY2 = sys.version_info[0] == 2
|
||||
if not PY2:
|
||||
unichr = chr
|
||||
# End of code to support for both python 2 and 3
|
||||
|
||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
|
||||
spec_chars = []
|
||||
# Two backslashes, followed by some non-word character, and then a character
|
||||
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
||||
# they are written in the unicodesymbols file; but they can also be written
|
||||
# as: \"u or even \" u.
|
||||
r = re.compile(r'\\\\(\W)\{(\w)\}')
|
||||
for line in fp.readlines():
|
||||
if line[0] != '#' and line.strip() != "":
|
||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
||||
try:
|
||||
[ucs4,command,dead] = line.split(None,2)
|
||||
if command[0:1] != "\\":
|
||||
continue
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
except:
|
||||
continue
|
||||
m = r.match(command)
|
||||
if m != None:
|
||||
command = "\\\\"
|
||||
# If the character is a double-quote, then we need to escape it, too,
|
||||
# since it is done that way in the LyX file.
|
||||
if m.group(1) == "\"":
|
||||
command += "\\"
|
||||
commandbl = command
|
||||
command += m.group(1) + m.group(2)
|
||||
commandbl += m.group(1) + ' ' + m.group(2)
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
spec_chars.append([commandbl, unichr(eval(ucs4))])
|
||||
fp.close()
|
||||
return spec_chars
|
||||
|
||||
|
||||
def extract_argument(line):
|
||||
'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
|
||||
|
||||
if not line:
|
||||
return (None, "")
|
||||
|
||||
bracere = re.compile("(\s*)(.*)")
|
||||
bracere = re.compile(r"(\s*)(.*)")
|
||||
n = bracere.match(line)
|
||||
whitespace = n.group(1)
|
||||
stuff = n.group(2)
|
||||
@ -280,8 +232,6 @@ def latex2ert(line, isindex):
|
||||
return retval
|
||||
|
||||
|
||||
unicode_reps = read_unicodesymbols()
|
||||
|
||||
#Bug 5022....
|
||||
#Might should do latex2ert first, then deal with stuff that DOESN'T
|
||||
#end up inside ERT. That routine could be modified so that it returned
|
||||
@ -327,7 +277,7 @@ def latex2lyx(data, isindex):
|
||||
data = data.replace('\\\\', '\\')
|
||||
|
||||
# Math:
|
||||
mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
|
||||
mathre = re.compile(r'^(.*?)(\$.*?\$)(.*)')
|
||||
lines = data.split('\n')
|
||||
for line in lines:
|
||||
#document.warning("LINE: " + line)
|
||||
@ -996,7 +946,7 @@ def remove_inzip_options(document):
|
||||
|
||||
|
||||
def convert_inset_command(document):
|
||||
"""
|
||||
r"""
|
||||
Convert:
|
||||
\begin_inset LatexCommand cmd
|
||||
to
|
||||
@ -1033,7 +983,7 @@ def convert_inset_command(document):
|
||||
|
||||
|
||||
def revert_inset_command(document):
|
||||
"""
|
||||
r"""
|
||||
Convert:
|
||||
\begin_inset CommandInset InsetType
|
||||
LatexCommand cmd
|
||||
@ -1608,7 +1558,7 @@ def convert_usorbian(document):
|
||||
|
||||
|
||||
def convert_macro_global(document):
|
||||
"Remove TeX code command \global when it is in front of a macro"
|
||||
r"Remove TeX code command \global when it is in front of a macro"
|
||||
# math macros are nowadays already defined \global, so that an additional
|
||||
# \global would make the document uncompilable, see
|
||||
# http://www.lyx.org/trac/ticket/5371
|
||||
@ -2389,7 +2339,7 @@ def revert_wrapplacement(document):
|
||||
|
||||
|
||||
def remove_extra_embedded_files(document):
|
||||
" Remove \extra_embedded_files from buffer params "
|
||||
r" Remove \extra_embedded_files from buffer params "
|
||||
i = find_token(document.header, '\\extra_embedded_files', 0)
|
||||
if i == -1:
|
||||
return
|
||||
|
@ -22,14 +22,15 @@ import re, string
|
||||
import unicodedata
|
||||
import sys, os
|
||||
|
||||
from parser_tools import find_token, find_end_of, find_tokens, \
|
||||
from parser_tools import del_complete_lines, \
|
||||
find_token, find_end_of, find_tokens, \
|
||||
find_token_exact, find_end_of_inset, find_end_of_layout, \
|
||||
find_token_backwards, is_in_inset, get_value, get_quoted_value, \
|
||||
del_token, check_token, get_option_value
|
||||
|
||||
from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
|
||||
put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
|
||||
revert_font_attrs, hex2ratio, str2bool
|
||||
revert_font_attrs, hex2ratio, str2bool, revert_language
|
||||
|
||||
####################################################################
|
||||
# Private helper functions
|
||||
@ -377,7 +378,7 @@ def revert_splitindex(document):
|
||||
l = re.compile(r'\\begin_inset Index (.*)$')
|
||||
m = l.match(line)
|
||||
itype = m.group(1)
|
||||
if itype == "idx" or indices == "false":
|
||||
if itype == "idx" or useindices == "false":
|
||||
document.body[i] = "\\begin_inset Index"
|
||||
else:
|
||||
k = find_end_of_inset(document.body, i)
|
||||
@ -484,6 +485,15 @@ def revert_printindexall(document):
|
||||
document.body[i:k + 1] = subst
|
||||
i = i + 1
|
||||
|
||||
strikeout_preamble = ['% for proper underlining',
|
||||
r'\PassOptionsToPackage{normalem}{ulem}',
|
||||
r'\usepackage{ulem}']
|
||||
|
||||
def convert_strikeout(document):
|
||||
" Remove preamble code loading 'ulem' package. "
|
||||
del_complete_lines(document.preamble,
|
||||
['% Added by lyx2lyx']+strikeout_preamble)
|
||||
|
||||
|
||||
def revert_strikeout(document):
|
||||
" Reverts \\strikeout font attribute "
|
||||
@ -491,25 +501,30 @@ def revert_strikeout(document):
|
||||
changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
|
||||
changed = revert_font_attrs(document.body, "\\strikeout", "\\sout") or changed
|
||||
if changed == True:
|
||||
insert_to_preamble(document, \
|
||||
['% for proper underlining',
|
||||
'\\PassOptionsToPackage{normalem}{ulem}',
|
||||
'\\usepackage{ulem}'])
|
||||
insert_to_preamble(document, strikeout_preamble)
|
||||
|
||||
|
||||
ulinelatex_preamble = ['% fix underbar in citations',
|
||||
r'\let\cite@rig\cite',
|
||||
r'\newcommand{\b@xcite}[2][\%]{\def\def@pt{\%}\def\pas@pt{#1}',
|
||||
r' \mbox{\ifx\def@pt\pas@pt\cite@rig{#2}\else\cite@rig[#1]{#2}\fi}}',
|
||||
r'\renewcommand{\underbar}[1]{{\let\cite\b@xcite\uline{#1}}}']
|
||||
|
||||
def convert_ulinelatex(document):
|
||||
" Remove preamble code for \\uline font attribute. "
|
||||
del_complete_lines(document.preamble,
|
||||
['% Added by lyx2lyx']+ulinelatex_preamble)
|
||||
|
||||
def revert_ulinelatex(document):
|
||||
" Reverts \\uline font attribute "
|
||||
" Add preamble code for \\uline font attribute in citations. "
|
||||
i = find_token(document.body, '\\bar under', 0)
|
||||
if i == -1:
|
||||
return
|
||||
insert_to_preamble(document,\
|
||||
['% for proper underlining',
|
||||
'\\PassOptionsToPackage{normalem}{ulem}',
|
||||
'\\usepackage{ulem}',
|
||||
'\\let\\cite@rig\\cite',
|
||||
'\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
|
||||
' \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
|
||||
'\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
|
||||
try:
|
||||
document.preamble.index(r'\usepackage{ulem}')
|
||||
except ValueError:
|
||||
insert_to_preamble(document, strikeout_preamble)
|
||||
insert_to_preamble(document, ulinelatex_preamble)
|
||||
|
||||
|
||||
def revert_custom_processors(document):
|
||||
@ -818,6 +833,9 @@ def revert_suppress_date(document):
|
||||
del document.header[i]
|
||||
|
||||
|
||||
mhchem_preamble = [r"\PassOptionsToPackage{version=3}{mhchem}",
|
||||
r"\usepackage{mhchem}"]
|
||||
|
||||
def convert_mhchem(document):
|
||||
"Set mhchem to off for versions older than 1.6.x"
|
||||
if document.initial_format < 277:
|
||||
@ -835,47 +853,44 @@ def convert_mhchem(document):
|
||||
# pre-1.5.x document
|
||||
i = find_token(document.header, "\\use_amsmath", 0)
|
||||
if i == -1:
|
||||
document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
|
||||
document.warning("Malformed LyX document: "
|
||||
"Could not find amsmath or esint setting.")
|
||||
return
|
||||
document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
|
||||
# remove LyX-inserted preamble
|
||||
if mhchem != 0:
|
||||
del_complete_lines(document.preamble,
|
||||
['% Added by lyx2lyx']+mhchem_preamble)
|
||||
|
||||
|
||||
def revert_mhchem(document):
|
||||
"Revert mhchem loading to preamble code"
|
||||
"Revert mhchem loading to preamble code."
|
||||
|
||||
mhchem = "off"
|
||||
i = find_token(document.header, "\\use_mhchem", 0)
|
||||
if i == -1:
|
||||
document.warning("Malformed LyX document: Could not find mhchem setting.")
|
||||
mhchem = "auto"
|
||||
else:
|
||||
val = get_value(document.header, "\\use_mhchem", i)
|
||||
if val == "1":
|
||||
mhchem = "auto"
|
||||
elif val == "2":
|
||||
mhchem = "on"
|
||||
del document.header[i]
|
||||
mhchem = get_value(document.header, "\\use_mhchem", delete=True)
|
||||
try:
|
||||
mhchem = int(mhchem)
|
||||
except ValueError:
|
||||
document.warning("Malformed LyX document: "
|
||||
"Could not find mhchem setting.")
|
||||
mhchem = 1 # "auto"
|
||||
# mhchem in {0: "off", 1: "auto", 2: "on"}
|
||||
|
||||
if mhchem == "off":
|
||||
# don't load case
|
||||
return
|
||||
|
||||
if mhchem == "auto":
|
||||
if mhchem == 1: # "auto"
|
||||
i = 0
|
||||
while True:
|
||||
while i != 1 and mhchem == 1:
|
||||
i = find_token(document.body, "\\begin_inset Formula", i)
|
||||
if i == -1:
|
||||
break
|
||||
line = document.body[i]
|
||||
if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
|
||||
mhchem = "on"
|
||||
break
|
||||
j = find_end_of_inset(document.body, i)
|
||||
if j == -1:
|
||||
break
|
||||
if (True for line in document.body[i:j]
|
||||
if r"\ce{" in line or r"\cf{" in line):
|
||||
mhchem = 2
|
||||
break
|
||||
i += 1
|
||||
|
||||
if mhchem == "on":
|
||||
pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
|
||||
"\\usepackage{mhchem}"]
|
||||
insert_to_preamble(document, pre)
|
||||
if (mhchem == 2 # on
|
||||
and find_token(document.preamble, r"\usepackage{mhchem}") == -1):
|
||||
insert_to_preamble(document, mhchem_preamble)
|
||||
|
||||
|
||||
def revert_fontenc(document):
|
||||
@ -956,6 +971,20 @@ def revert_includeonly(document):
|
||||
document.header[i : j + 1] = []
|
||||
|
||||
|
||||
def convert_includeall(document):
|
||||
" Add maintain_unincluded_children param "
|
||||
|
||||
i = 0
|
||||
i = find_token(document.header, "\\maintain_unincluded_children", 0)
|
||||
if i == -1:
|
||||
i = find_token(document.header, "\\textclass", 0)
|
||||
if i == -1:
|
||||
document.warning("Malformed LyX document! Missing \\textclass header.")
|
||||
return
|
||||
document.header.insert(i, "\\maintain_unincluded_children false")
|
||||
return
|
||||
|
||||
|
||||
def revert_includeall(document):
|
||||
" Remove maintain_unincluded_children param "
|
||||
del_token(document.header, '\\maintain_unincluded_children', 0)
|
||||
@ -1090,7 +1119,7 @@ def revert_multirow(document):
|
||||
|
||||
|
||||
def convert_math_output(document):
|
||||
" Convert \html_use_mathml to \html_math_output "
|
||||
r" Convert \html_use_mathml to \html_math_output "
|
||||
i = find_token(document.header, "\\html_use_mathml", 0)
|
||||
if i == -1:
|
||||
return
|
||||
@ -1107,7 +1136,7 @@ def convert_math_output(document):
|
||||
|
||||
|
||||
def revert_math_output(document):
|
||||
" Revert \html_math_output to \html_use_mathml "
|
||||
r" Revert \html_math_output to \html_use_mathml "
|
||||
i = find_token(document.header, "\\html_math_output", 0)
|
||||
if i == -1:
|
||||
return
|
||||
@ -1266,19 +1295,7 @@ def revert_notefontcolor(document):
|
||||
def revert_turkmen(document):
|
||||
"Set language Turkmen to English"
|
||||
|
||||
if document.language == "turkmen":
|
||||
document.language = "english"
|
||||
i = find_token(document.header, "\\language", 0)
|
||||
if i != -1:
|
||||
document.header[i] = "\\language english"
|
||||
|
||||
j = 0
|
||||
while True:
|
||||
j = find_token(document.body, "\\lang turkmen", j)
|
||||
if j == -1:
|
||||
return
|
||||
document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
|
||||
j += 1
|
||||
revert_language(document, "turkmen", "turkmen", "turkmen")
|
||||
|
||||
|
||||
def revert_fontcolor(document):
|
||||
@ -1602,8 +1619,8 @@ def revert_IEEEtran(document):
|
||||
|
||||
def convert_prettyref(document):
|
||||
" Converts prettyref references to neutral formatted refs "
|
||||
re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
|
||||
nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
|
||||
re_ref = re.compile("^\\s*reference\\s+\"(\\w+):(\\S+)\"")
|
||||
nm_ref = re.compile("^\\s*name\\s+\"(\\w+):(\\S+)\"")
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
@ -1624,8 +1641,8 @@ def convert_prettyref(document):
|
||||
|
||||
def revert_refstyle(document):
|
||||
" Reverts neutral formatted refs to prettyref "
|
||||
re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
|
||||
nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
|
||||
re_ref = re.compile("^reference\\s+\"(\\w+):(\\S+)\"")
|
||||
nm_ref = re.compile("^\\s*name\\s+\"(\\w+):(\\S+)\"")
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
@ -1664,12 +1681,10 @@ def revert_nameref(document):
|
||||
i += 1
|
||||
# Make sure it is actually in an inset!
|
||||
# A normal line could begin with "LatexCommand nameref"!
|
||||
val = is_in_inset(document.body, cmdloc, \
|
||||
"\\begin_inset CommandInset ref")
|
||||
if not val:
|
||||
stins, endins = is_in_inset(document.body, cmdloc,
|
||||
"\\begin_inset CommandInset ref")
|
||||
if endins == -1:
|
||||
continue
|
||||
stins, endins = val
|
||||
|
||||
# ok, so it is in an InsetRef
|
||||
refline = find_token(document.body, "reference", stins, endins)
|
||||
if refline == -1:
|
||||
@ -1699,17 +1714,16 @@ def remove_Nameref(document):
|
||||
break
|
||||
cmdloc = i
|
||||
i += 1
|
||||
|
||||
# Make sure it is actually in an inset!
|
||||
val = is_in_inset(document.body, cmdloc, \
|
||||
"\\begin_inset CommandInset ref")
|
||||
val = is_in_inset(document.body, cmdloc,
|
||||
"\\begin_inset CommandInset ref", default=False)
|
||||
if not val:
|
||||
continue
|
||||
document.body[cmdloc] = "LatexCommand nameref"
|
||||
|
||||
|
||||
def revert_mathrsfs(document):
|
||||
" Load mathrsfs if \mathrsfs us use in the document "
|
||||
r" Load mathrsfs if \mathrsfs us use in the document "
|
||||
i = 0
|
||||
for line in document.body:
|
||||
if line.find("\\mathscr{") != -1:
|
||||
@ -2145,7 +2159,7 @@ def convert_passthru(document):
|
||||
if not check_passthru:
|
||||
return
|
||||
|
||||
rx = re.compile("\\\\begin_layout \s*(\w+)")
|
||||
rx = re.compile("\\\\begin_layout \\s*(\\w+)")
|
||||
beg = 0
|
||||
for lay in ["Chunk", "Scrap"]:
|
||||
while True:
|
||||
@ -2175,7 +2189,7 @@ def convert_passthru(document):
|
||||
break
|
||||
ne = find_end_of_inset(document.body, ns)
|
||||
if ne == -1 or ne > end:
|
||||
document.warning("Can't find end of inset at line " + str(nb))
|
||||
document.warning("Can't find end of inset at line " + str(ne))
|
||||
ns += 1
|
||||
continue
|
||||
if document.body[ne + 1] == "":
|
||||
@ -2209,7 +2223,7 @@ def revert_passthru(document):
|
||||
" http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
|
||||
if not check_passthru:
|
||||
return
|
||||
rx = re.compile("\\\\begin_layout \s*(\w+)")
|
||||
rx = re.compile("\\\\begin_layout \\s*(\\w+)")
|
||||
beg = 0
|
||||
for lay in ["Chunk", "Scrap"]:
|
||||
while True:
|
||||
@ -2501,7 +2515,7 @@ def revert_langpack(document):
|
||||
|
||||
def convert_langpack(document):
|
||||
" Add \\language_package parameter "
|
||||
i = find_token(document.header, "\language" , 0)
|
||||
i = find_token(document.header, r"\language" , 0)
|
||||
if i == -1:
|
||||
document.warning("Malformed document. No \\language defined!")
|
||||
return
|
||||
@ -2548,9 +2562,9 @@ convert = [[346, []],
|
||||
[352, [convert_splitindex]],
|
||||
[353, []],
|
||||
[354, []],
|
||||
[355, []],
|
||||
[355, [convert_strikeout]],
|
||||
[356, []],
|
||||
[357, []],
|
||||
[357, [convert_ulinelatex]],
|
||||
[358, []],
|
||||
[359, [convert_nomencl_width]],
|
||||
[360, []],
|
||||
@ -2569,7 +2583,7 @@ convert = [[346, []],
|
||||
[373, [merge_gbrief]],
|
||||
[374, []],
|
||||
[375, []],
|
||||
[376, []],
|
||||
[376, [convert_includeall]],
|
||||
[377, []],
|
||||
[378, []],
|
||||
[379, [convert_math_output]],
|
||||
|
@ -24,7 +24,8 @@ import sys, os
|
||||
|
||||
# Uncomment only what you need to import, please.
|
||||
|
||||
from parser_tools import count_pars_in_inset, del_token, find_token, find_token_exact, \
|
||||
from parser_tools import count_pars_in_inset, del_complete_lines, del_token, \
|
||||
find_token, find_token_exact, \
|
||||
find_token_backwards, find_end_of, find_end_of_inset, find_end_of_layout, \
|
||||
find_end_of_sequence, find_re, get_option_value, get_containing_layout, \
|
||||
get_containing_inset, get_value, get_quoted_value, set_option_value
|
||||
@ -33,7 +34,7 @@ from parser_tools import count_pars_in_inset, del_token, find_token, find_token_
|
||||
#find_end_of_inset, find_end_of_layout, \
|
||||
#is_in_inset, del_token, check_token
|
||||
|
||||
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert
|
||||
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert, revert_language
|
||||
|
||||
#from lyx2lyx_tools import insert_to_preamble, \
|
||||
# lyx2latex, latex_length, revert_flex_inset, \
|
||||
@ -58,7 +59,7 @@ def revert_Argument_to_TeX_brace(document, line, endline, n, nmax, environment,
|
||||
usage:
|
||||
revert_Argument_to_TeX_brace(document, LineOfBegin, LineOfEnd, StartArgument, EndArgument, isEnvironment, isOpt)
|
||||
LineOfBegin is the line of the \\begin_layout or \\begin_inset statement
|
||||
LineOfEnd is the line of the \end_layout or \end_inset statement, if "0" is given, the end of the file is used instead
|
||||
LineOfEnd is the line of the \\end_layout or \\end_inset statement, if "0" is given, the end of the file is used instead
|
||||
StartArgument is the number of the first argument that needs to be converted
|
||||
EndArgument is the number of the last argument that needs to be converted or the last defined one
|
||||
isEnvironment must be true, if the layout is for a LaTeX environment
|
||||
@ -352,7 +353,7 @@ def revert_undertilde(document):
|
||||
|
||||
|
||||
def revert_negative_space(document):
|
||||
"Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
|
||||
"Revert InsetSpace negmedspace and negthickspace into their TeX-code counterparts"
|
||||
i = 0
|
||||
j = 0
|
||||
reverted = False
|
||||
@ -365,7 +366,7 @@ def revert_negative_space(document):
|
||||
if reverted == True:
|
||||
i = find_token(document.header, "\\use_amsmath 2", 0)
|
||||
if i == -1:
|
||||
add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
|
||||
add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}{}"])
|
||||
return
|
||||
if i == -1:
|
||||
return
|
||||
@ -555,8 +556,6 @@ def handle_longtable_captions(document, forward):
|
||||
get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
|
||||
document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
|
||||
elif get_option_value(document.body[begin_row], 'caption') == 'true':
|
||||
if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
|
||||
document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
|
||||
if get_option_value(document.body[begin_row], 'endhead') == 'true':
|
||||
document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
|
||||
if get_option_value(document.body[begin_row], 'endfoot') == 'true':
|
||||
@ -618,15 +617,16 @@ def convert_use_package(document, pkg, commands, oldauto):
|
||||
# oldauto defines how the version we are converting from behaves:
|
||||
# if it is true, the old version uses the package automatically.
|
||||
# if it is false, the old version never uses the package.
|
||||
i = find_token(document.header, "\\use_package", 0)
|
||||
i = find_token(document.header, "\\use_package")
|
||||
if i == -1:
|
||||
document.warning("Malformed LyX document: Can't find \\use_package.")
|
||||
return;
|
||||
j = find_token(document.preamble, "\\usepackage{" + pkg + "}", 0)
|
||||
if j != -1:
|
||||
# package was loaded in the preamble, convert this to header setting for round trip
|
||||
packageline = "\\usepackage{%s}" % pkg
|
||||
if (del_complete_lines(document.preamble,
|
||||
['% Added by lyx2lyx', packageline]) or
|
||||
del_complete_lines(document.preamble, [packageline])):
|
||||
# package was loaded in the preamble, convert this to header setting
|
||||
document.header.insert(i + 1, "\\use_package " + pkg + " 2") # on
|
||||
del document.preamble[j]
|
||||
# If oldauto is true we have two options:
|
||||
# We can either set the package to auto - this is correct for files in
|
||||
# format 425 to 463, and may create a conflict for older files which use
|
||||
@ -1060,7 +1060,7 @@ def convert_table_rotation(document):
|
||||
|
||||
|
||||
def convert_listoflistings(document):
|
||||
'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
|
||||
r'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
|
||||
# We can support roundtrip because the command is so simple
|
||||
i = 0
|
||||
while True:
|
||||
@ -1169,24 +1169,16 @@ def revert_ancientgreek(document):
|
||||
def revert_languages(document):
|
||||
"Set the document language for new supported languages to English"
|
||||
|
||||
languages = [
|
||||
"coptic", "divehi", "hindi", "kurmanji", "lao", "marathi", "occitan", "sanskrit",
|
||||
"syriac", "tamil", "telugu", "urdu"
|
||||
]
|
||||
for n in range(len(languages)):
|
||||
if document.language == languages[n]:
|
||||
document.language = "english"
|
||||
i = find_token(document.header, "\\language", 0)
|
||||
if i != -1:
|
||||
document.header[i] = "\\language english"
|
||||
j = 0
|
||||
while j < len(document.body):
|
||||
j = find_token(document.body, "\\lang " + languages[n], j)
|
||||
if j != -1:
|
||||
document.body[j] = document.body[j].replace("\\lang " + languages[n], "\\lang english")
|
||||
j += 1
|
||||
else:
|
||||
j = len(document.body)
|
||||
# polyglossia-only
|
||||
polyglossia_languages = ["coptic", "divehi", "hindi", "lao", "marathi",
|
||||
"occitan", "sanskrit", "syriac", "tamil",
|
||||
"telugu", "urdu"]
|
||||
# babel-only
|
||||
babel_languages = ["kurmanji"]
|
||||
for lang in polyglossia_languages:
|
||||
revert_language(document, lang, "", lang)
|
||||
for lang in babel_languages:
|
||||
revert_language(document, lang, lang, "")
|
||||
|
||||
|
||||
def convert_armenian(document):
|
||||
@ -1555,10 +1547,11 @@ def convert_latexargs(document):
|
||||
"theorems-chap-bytype", "theorems-chap", "theorems-named", "theorems-sec-bytype",
|
||||
"theorems-sec", "theorems-starred", "theorems-std", "todonotes"]
|
||||
# Modules we need to take care of
|
||||
caveat_modules = ["initials"]
|
||||
caveat_modules = ["initials"] # TODO: , "graphicboxes", "bicaption"]
|
||||
# information about the relevant styles in caveat_modules (number of opt and req args)
|
||||
# use this if we get more caveat_modules. For now, use hard coding (see below).
|
||||
# initials = [{'Layout' : 'Initial', 'opt' : 1, 'req' : 1}]
|
||||
# graphicboxes = { ... }
|
||||
|
||||
# Is this a known safe layout?
|
||||
safe_layout = document.textclass in safe_layouts
|
||||
@ -4560,19 +4553,7 @@ def revert_aa2(document):
|
||||
def revert_tibetan(document):
|
||||
"Set the document language for Tibetan to English"
|
||||
|
||||
if document.language == "tibetan":
|
||||
document.language = "english"
|
||||
i = find_token(document.header, "\\language", 0)
|
||||
if i != -1:
|
||||
document.header[i] = "\\language english"
|
||||
j = 0
|
||||
while j < len(document.body):
|
||||
j = find_token(document.body, "\\lang tibetan", j)
|
||||
if j != -1:
|
||||
document.body[j] = document.body[j].replace("\\lang tibetan", "\\lang english")
|
||||
j += 1
|
||||
else:
|
||||
j = len(document.body)
|
||||
revert_language(document, "tibetan", "", "tibetan")
|
||||
|
||||
|
||||
#############
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
5848
lib/lyx2lyx/lyx_2_4.py
Normal file
5848
lib/lyx2lyx/lyx_2_4.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
# This file is part of lyx2lyx
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>,
|
||||
# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
|
||||
# José Matos <jamatos@lyx.org>, Richard Kimberly Heck <rikiheck@lyx.org>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
@ -18,12 +18,12 @@
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
|
||||
'''
|
||||
"""
|
||||
This module offers several free functions to help parse lines.
|
||||
More documentaton is below, but here is a quick guide to what
|
||||
they do. Optional arguments are marked by brackets.
|
||||
|
||||
find_token(lines, token, start[, end[, ignorews]]):
|
||||
find_token(lines, token[, start[, end[, ignorews]]]):
|
||||
Returns the first line i, start <= i < end, on which
|
||||
token is found at the beginning. Returns -1 if not
|
||||
found.
|
||||
@ -31,10 +31,10 @@ find_token(lines, token, start[, end[, ignorews]]):
|
||||
in whitespace do not count, except that there must be no
|
||||
extra whitespace following token itself.
|
||||
|
||||
find_token_exact(lines, token, start[, end]):
|
||||
find_token_exact(lines, token[, start[, end]]]):
|
||||
As find_token, but with ignorews set to True.
|
||||
|
||||
find_tokens(lines, tokens, start[, end[, ignorews]]):
|
||||
find_tokens(lines, tokens[, start[, end[, ignorews]]]):
|
||||
Returns the first line i, start <= i < end, on which
|
||||
one of the tokens in tokens is found at the beginning.
|
||||
Returns -1 if not found.
|
||||
@ -42,18 +42,21 @@ find_tokens(lines, tokens, start[, end[, ignorews]]):
|
||||
in whitespace do not count, except that there must be no
|
||||
extra whitespace following token itself.
|
||||
|
||||
find_tokens_exact(lines, token, start[, end]):
|
||||
find_tokens_exact(lines, token[, start[, end]]):
|
||||
As find_tokens, but with ignorews True.
|
||||
|
||||
find_token_backwards(lines, token, start):
|
||||
find_tokens_backwards(lines, tokens, start):
|
||||
As before, but look backwards.
|
||||
|
||||
find_substring(lines, sub[, start[, end]]) -> int
|
||||
As find_token, but sub may be anywhere in the line.
|
||||
|
||||
find_re(lines, rexp, start[, end]):
|
||||
As find_token, but rexp is a regular expression object,
|
||||
so it has to be passed as e.g.: re.compile(r'...').
|
||||
|
||||
get_value(lines, token, start[, end[, default]):
|
||||
get_value(lines, token[, start[, end[, default[, delete]]]]):
|
||||
Similar to find_token, but it returns what follows the
|
||||
token on the found line. Example:
|
||||
get_value(document.header, "\\use_xetex", 0)
|
||||
@ -63,8 +66,9 @@ get_value(lines, token, start[, end[, default]):
|
||||
is stripped.) The final argument, default, defaults to "",
|
||||
and is what is returned if we do not find anything. So you
|
||||
can use that to set a default.
|
||||
If delete is True, then delete the line if found.
|
||||
|
||||
get_quoted_value(lines, token, start[, end[, default]]):
|
||||
get_quoted_value(lines, token[, start[, end[, default[, delete]]]]):
|
||||
Similar to get_value, but it will strip quotes off the
|
||||
value, if they are present. So use this one for cases
|
||||
where the value is normally quoted.
|
||||
@ -74,13 +78,20 @@ get_option_value(line, option):
|
||||
option="value"
|
||||
and returns value. Returns "" if not found.
|
||||
|
||||
get_bool_value(lines, token, start[, end[, default]]):
|
||||
get_bool_value(lines, token[, start[, end[, default, delete]]]]):
|
||||
Like get_value, but returns a boolean.
|
||||
|
||||
del_token(lines, token, start[, end]):
|
||||
set_bool_value(lines, token, value[, start[, end]]):
|
||||
Find `token` in `lines[start:end]` and set to boolean value bool(`value`).
|
||||
Return old value. Raise ValueError if token is not in lines.
|
||||
|
||||
del_token(lines, token[, start[, end]]):
|
||||
Like find_token, but deletes the line if it finds one.
|
||||
Returns True if a line got deleted, otherwise False.
|
||||
|
||||
Use get_* with the optional argument "delete=True", if you want to
|
||||
get and delete a token.
|
||||
|
||||
find_beginning_of(lines, i, start_token, end_token):
|
||||
Here, start_token and end_token are meant to be a matching
|
||||
pair, like "\\begin_layout" and "\\end_layout". We look for
|
||||
@ -110,25 +121,25 @@ find_end_of_layout(lines, i):
|
||||
find_end_of_sequence(lines, i):
|
||||
Find the end of the sequence of layouts of the same kind.
|
||||
Considers nesting. If the last paragraph in sequence is nested,
|
||||
the position of the last \end_deeper is returned, else
|
||||
the position of the last \end_layout.
|
||||
the position of the last \\end_deeper is returned, else
|
||||
the position of the last \\end_layout.
|
||||
|
||||
is_in_inset(lines, i, inset):
|
||||
Checks if line i is in an inset of the given type.
|
||||
is_in_inset(lines, i, inset, default=(-1,-1)):
|
||||
Check if line i is in an inset of the given type.
|
||||
If so, returns starting and ending lines. Otherwise,
|
||||
returns False.
|
||||
return default.
|
||||
Example:
|
||||
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
||||
returns False unless i is within a table. If it is, then
|
||||
returns (-1,-1) unless i is within a table. If it is, then
|
||||
it returns the line on which the table begins and the one
|
||||
on which it ends. Note that this pair will evaulate to
|
||||
boolean True, so
|
||||
if is_in_inset(...):
|
||||
if is_in_inset(..., default=False):
|
||||
will do what you expect.
|
||||
|
||||
get_containing_inset(lines, i):
|
||||
Finds out what kind of inset line i is within. Returns a
|
||||
list containing what follows \begin_inset on the line
|
||||
list containing what follows \\begin_inset on the line
|
||||
on which the inset begins, plus the starting and ending line.
|
||||
Returns False on any kind of error or if it isn't in an inset.
|
||||
So get_containing_inset(document.body, i) might return:
|
||||
@ -152,7 +163,7 @@ is_nonempty_line(line):
|
||||
count_pars_in_inset(lines, i):
|
||||
Counts the paragraphs inside an inset.
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
@ -161,9 +172,11 @@ def check_token(line, token):
|
||||
""" check_token(line, token) -> bool
|
||||
|
||||
Return True if token is present in line and is the first element
|
||||
else returns False."""
|
||||
else returns False.
|
||||
|
||||
return line[:len(token)] == token
|
||||
Deprecated. Use line.startswith(token).
|
||||
"""
|
||||
return line.startswith(token)
|
||||
|
||||
|
||||
def is_nonempty_line(line):
|
||||
@ -171,50 +184,53 @@ def is_nonempty_line(line):
|
||||
|
||||
Return False if line is either empty or it has only whitespaces,
|
||||
else return True."""
|
||||
return line != " "*len(line)
|
||||
return bool(line.strip())
|
||||
|
||||
|
||||
# Utilities for a list of lines
|
||||
def find_token(lines, token, start, end = 0, ignorews = False):
|
||||
def find_token(lines, token, start=0, end=0, ignorews=False):
|
||||
""" find_token(lines, token, start[[, end], ignorews]) -> int
|
||||
|
||||
Return the lowest line where token is found, and is the first
|
||||
element, in lines[start, end].
|
||||
|
||||
If ignorews is True (default is False), then differences in
|
||||
whitespace are ignored, except that there must be no extra
|
||||
whitespace following token itself.
|
||||
whitespace are ignored, but there must be whitespace following
|
||||
token itself.
|
||||
|
||||
Use find_substring(lines, sub) to find a substring anywhere in `lines`.
|
||||
|
||||
Return -1 on failure."""
|
||||
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
m = len(token)
|
||||
if ignorews:
|
||||
y = token.split()
|
||||
for i in range(start, end):
|
||||
if ignorews:
|
||||
x = lines[i].split()
|
||||
y = token.split()
|
||||
if len(x) < len(y):
|
||||
continue
|
||||
if x[:len(y)] == y:
|
||||
return i
|
||||
else:
|
||||
if lines[i][:m] == token:
|
||||
if lines[i].startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def find_token_exact(lines, token, start, end = 0):
|
||||
def find_token_exact(lines, token, start=0, end=0):
|
||||
return find_token(lines, token, start, end, True)
|
||||
|
||||
|
||||
def find_tokens(lines, tokens, start, end = 0, ignorews = False):
|
||||
def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
|
||||
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
|
||||
|
||||
Return the lowest line where one token in tokens is found, and is
|
||||
the first element, in lines[start, end].
|
||||
|
||||
Return -1 on failure."""
|
||||
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
|
||||
@ -228,23 +244,41 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
|
||||
if x[:len(y)] == y:
|
||||
return i
|
||||
else:
|
||||
if lines[i][:len(token)] == token:
|
||||
if lines[i].startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def find_tokens_exact(lines, tokens, start, end = 0):
|
||||
def find_tokens_exact(lines, tokens, start=0, end=0):
|
||||
return find_tokens(lines, tokens, start, end, True)
|
||||
|
||||
|
||||
def find_re(lines, rexp, start, end = 0):
|
||||
""" find_token_re(lines, rexp, start[, end]) -> int
|
||||
def find_substring(lines, sub, start=0, end=0):
|
||||
""" find_substring(lines, sub[, start[, end]]) -> int
|
||||
|
||||
Return the lowest line where rexp, a regular expression, is found
|
||||
in lines[start, end].
|
||||
Return the lowest line number `i` in [start, end] where
|
||||
`sub` is a substring of line[i].
|
||||
|
||||
Return -1 on failure."""
|
||||
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
for i in range(start, end):
|
||||
if sub in lines[i]:
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def find_re(lines, rexp, start=0, end=0):
|
||||
""" find_re(lines, rexp[, start[, end]]) -> int
|
||||
|
||||
Return the lowest line number `i` in [start, end] where the regular
|
||||
expression object `rexp` matches at the beginning of line[i].
|
||||
Return -1 on failure.
|
||||
|
||||
Start your pattern with the wildcard ".*" to find a match anywhere in a
|
||||
line. Use find_substring() to find a substring anywhere in the lines.
|
||||
"""
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
for i in range(start, end):
|
||||
@ -260,10 +294,8 @@ def find_token_backwards(lines, token, start):
|
||||
element, in lines[start, end].
|
||||
|
||||
Return -1 on failure."""
|
||||
m = len(token)
|
||||
for i in range(start, -1, -1):
|
||||
line = lines[i]
|
||||
if line[:m] == token:
|
||||
if lines[i].startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
@ -278,30 +310,111 @@ def find_tokens_backwards(lines, tokens, start):
|
||||
for i in range(start, -1, -1):
|
||||
line = lines[i]
|
||||
for token in tokens:
|
||||
if line[:len(token)] == token:
|
||||
if line.startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def get_value(lines, token, start, end = 0, default = ""):
|
||||
""" get_value(lines, token, start[[, end], default]) -> string
|
||||
def find_complete_lines(lines, sublines, start=0, end=0):
|
||||
"""Find first occurence of sequence `sublines` in list `lines`.
|
||||
Return index of first line or -1 on failure.
|
||||
|
||||
Efficient search for a sub-list in a large list. Works for any values.
|
||||
|
||||
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
|
||||
0
|
||||
|
||||
The `start` and `end` arguments work similar to list.index()
|
||||
|
||||
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
|
||||
4
|
||||
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
|
||||
-1
|
||||
|
||||
The return value can be used to substitute the sub-list.
|
||||
Take care to check before use:
|
||||
|
||||
>>> l = [1, 1, 2]
|
||||
>>> s = find_complete_lines(l, [1, 2])
|
||||
>>> if s != -1:
|
||||
... l[s:s+2] = [3]; l
|
||||
[1, 3]
|
||||
|
||||
See also del_complete_lines().
|
||||
"""
|
||||
if not sublines:
|
||||
return start
|
||||
end = end or len(lines)
|
||||
N = len(sublines)
|
||||
try:
|
||||
while True:
|
||||
for j, value in enumerate(sublines):
|
||||
i = lines.index(value, start, end)
|
||||
if j and i != start:
|
||||
start = i-j
|
||||
break
|
||||
start = i + 1
|
||||
else:
|
||||
return i +1 - N
|
||||
except ValueError: # `sublines` not found
|
||||
return -1
|
||||
|
||||
|
||||
def find_across_lines(lines, sub, start=0, end=0):
|
||||
sublines = sub.splitlines()
|
||||
if len(sublines) > 2:
|
||||
# at least 3 lines: the middle one(s) are complete -> use index search
|
||||
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
|
||||
if i < start+1:
|
||||
return -1
|
||||
try:
|
||||
if (lines[i-1].endswith(sublines[0]) and
|
||||
lines[i+len(sublines)].startswith(sublines[-1])):
|
||||
return i-1
|
||||
except IndexError:
|
||||
pass
|
||||
elif len(sublines) > 1:
|
||||
# last subline must start a line
|
||||
i = find_token(lines, sublines[-1], start, end)
|
||||
if i < start + 1:
|
||||
return -1
|
||||
if lines[i-1].endswith(sublines[0]):
|
||||
return i-1
|
||||
else: # no line-break, may be in the middle of a line
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
for i in range(start, end):
|
||||
if sub in lines[i]:
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def get_value(lines, token, start=0, end=0, default="", delete=False):
|
||||
"""Find `token` in `lines` and return part of line that follows it.
|
||||
|
||||
Find the next line that looks like:
|
||||
token followed by other stuff
|
||||
Returns "followed by other stuff" with leading and trailing
|
||||
|
||||
If `delete` is True, delete the line (if found).
|
||||
|
||||
Return "followed by other stuff" with leading and trailing
|
||||
whitespace removed.
|
||||
"""
|
||||
|
||||
i = find_token_exact(lines, token, start, end)
|
||||
if i == -1:
|
||||
return default
|
||||
# TODO: establish desired behaviour, eventually change to
|
||||
# return lines.pop(i)[len(token):].strip() # or default
|
||||
# see test_parser_tools.py
|
||||
l = lines[i].split(None, 1)
|
||||
if delete:
|
||||
del(lines[i])
|
||||
if len(l) > 1:
|
||||
return l[1].strip()
|
||||
return default
|
||||
|
||||
|
||||
def get_quoted_value(lines, token, start, end = 0, default = ""):
|
||||
def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
|
||||
""" get_quoted_value(lines, token, start[[, end], default]) -> string
|
||||
|
||||
Find the next line that looks like:
|
||||
@ -312,33 +425,52 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
|
||||
if they are there.
|
||||
Note that we will NOT strip quotes from default!
|
||||
"""
|
||||
val = get_value(lines, token, start, end, "")
|
||||
val = get_value(lines, token, start, end, "", delete)
|
||||
if not val:
|
||||
return default
|
||||
return val.strip('"')
|
||||
|
||||
|
||||
def get_bool_value(lines, token, start, end = 0, default = None):
|
||||
""" get_value(lines, token, start[[, end], default]) -> string
|
||||
bool_values = {"true": True, "1": True,
|
||||
"false": False, "0": False}
|
||||
|
||||
def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
|
||||
""" get_bool_value(lines, token, start[[, end], default]) -> string
|
||||
|
||||
Find the next line that looks like:
|
||||
token bool_value
|
||||
`token` <bool_value>
|
||||
|
||||
Returns True if bool_value is 1 or true and
|
||||
False if bool_value is 0 or false
|
||||
Return True if <bool_value> is 1 or "true", False if <bool_value>
|
||||
is 0 or "false", else `default`.
|
||||
"""
|
||||
val = get_quoted_value(lines, token, start, end, default, delete)
|
||||
return bool_values.get(val, default)
|
||||
|
||||
val = get_quoted_value(lines, token, start, end, "")
|
||||
|
||||
if val == "1" or val == "true":
|
||||
return True
|
||||
if val == "0" or val == "false":
|
||||
return False
|
||||
return default
|
||||
def set_bool_value(lines, token, value, start=0, end=0):
|
||||
"""Find `token` in `lines` and set to bool(`value`).
|
||||
|
||||
Return previous value. Raise `ValueError` if `token` is not in lines.
|
||||
|
||||
Cf. find_token(), get_bool_value().
|
||||
"""
|
||||
i = find_token(lines, token, start, end)
|
||||
if i == -1:
|
||||
raise ValueError
|
||||
oldvalue = get_bool_value(lines, token, i, i+1)
|
||||
if oldvalue is value:
|
||||
return oldvalue
|
||||
# set to new value
|
||||
if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
|
||||
lines[i] = "%s %d" % (token, value)
|
||||
else:
|
||||
lines[i] = "%s %s" % (token, str(value).lower())
|
||||
|
||||
return oldvalue
|
||||
|
||||
|
||||
def get_option_value(line, option):
|
||||
rx = option + '\s*=\s*"([^"]+)"'
|
||||
rx = option + r'\s*=\s*"([^"]+)"'
|
||||
rx = re.compile(rx)
|
||||
m = rx.search(line)
|
||||
if not m:
|
||||
@ -347,15 +479,15 @@ def get_option_value(line, option):
|
||||
|
||||
|
||||
def set_option_value(line, option, value):
|
||||
rx = '(' + option + '\s*=\s*")[^"]+"'
|
||||
rx = '(' + option + r'\s*=\s*")[^"]+"'
|
||||
rx = re.compile(rx)
|
||||
m = rx.search(line)
|
||||
if not m:
|
||||
return line
|
||||
return re.sub(rx, '\g<1>' + value + '"', line)
|
||||
return re.sub(rx, r'\g<1>' + value + '"', line)
|
||||
|
||||
|
||||
def del_token(lines, token, start, end = 0):
|
||||
def del_token(lines, token, start=0, end=0):
|
||||
""" del_token(lines, token, start, end) -> int
|
||||
|
||||
Find the first line in lines where token is the first element
|
||||
@ -368,6 +500,41 @@ def del_token(lines, token, start, end = 0):
|
||||
del lines[k]
|
||||
return True
|
||||
|
||||
def del_complete_lines(lines, sublines, start=0, end=0):
|
||||
"""Delete first occurence of `sublines` in list `lines`.
|
||||
|
||||
Efficient deletion of a sub-list in a list. Works for any values.
|
||||
The `start` and `end` arguments work similar to list.index()
|
||||
|
||||
Returns True if a deletion was done and False if not.
|
||||
|
||||
>>> l = [1, 0, 1, 1, 1, 2]
|
||||
>>> del_complete_lines(l, [0, 1, 1])
|
||||
True
|
||||
>>> l
|
||||
[1, 1, 2]
|
||||
"""
|
||||
i = find_complete_lines(lines, sublines, start, end)
|
||||
if i == -1:
|
||||
return False
|
||||
del(lines[i:i+len(sublines)])
|
||||
return True
|
||||
|
||||
|
||||
def del_value(lines, token, start=0, end=0, default=None):
|
||||
"""
|
||||
Find the next line that looks like:
|
||||
token followed by other stuff
|
||||
Delete that line and return "followed by other stuff"
|
||||
with leading and trailing whitespace removed.
|
||||
|
||||
If token is not found, return `default`.
|
||||
"""
|
||||
i = find_token_exact(lines, token, start, end)
|
||||
if i == -1:
|
||||
return default
|
||||
return lines.pop(i)[len(token):].strip()
|
||||
|
||||
|
||||
def find_beginning_of(lines, i, start_token, end_token):
|
||||
count = 1
|
||||
@ -375,7 +542,7 @@ def find_beginning_of(lines, i, start_token, end_token):
|
||||
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
|
||||
if i == -1:
|
||||
return -1
|
||||
if check_token(lines[i], end_token):
|
||||
if lines[i].startswith(end_token):
|
||||
count = count+1
|
||||
else:
|
||||
count = count-1
|
||||
@ -391,7 +558,7 @@ def find_end_of(lines, i, start_token, end_token):
|
||||
i = find_tokens(lines, [end_token, start_token], i+1)
|
||||
if i == -1:
|
||||
return -1
|
||||
if check_token(lines[i], start_token):
|
||||
if lines[i].startswith(start_token):
|
||||
count = count+1
|
||||
else:
|
||||
count = count-1
|
||||
@ -400,11 +567,11 @@ def find_end_of(lines, i, start_token, end_token):
|
||||
return -1
|
||||
|
||||
|
||||
def find_nonempty_line(lines, start, end = 0):
|
||||
def find_nonempty_line(lines, start=0, end=0):
|
||||
if end == 0:
|
||||
end = len(lines)
|
||||
for i in range(start, end):
|
||||
if is_nonempty_line(lines[i]):
|
||||
if lines[i].strip():
|
||||
return i
|
||||
return -1
|
||||
|
||||
@ -419,35 +586,33 @@ def find_end_of_layout(lines, i):
|
||||
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
|
||||
|
||||
|
||||
def is_in_inset(lines, i, inset):
|
||||
'''
|
||||
Checks if line i is in an inset of the given type.
|
||||
If so, returns starting and ending lines.
|
||||
Otherwise, returns False.
|
||||
def is_in_inset(lines, i, inset, default=(-1,-1)):
|
||||
"""
|
||||
Check if line i is in an inset of the given type.
|
||||
If so, return starting and ending lines, otherwise `default`.
|
||||
Example:
|
||||
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
||||
returns False unless i is within a table. If it is, then
|
||||
it returns the line on which the table begins and the one
|
||||
on which it ends. Note that this pair will evaulate to
|
||||
boolean True, so
|
||||
if is_in_inset(...):
|
||||
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
|
||||
If it is, then it returns the line on which the table begins and the one
|
||||
on which it ends.
|
||||
Note that this pair will evaulate to boolean True, so (with the optional
|
||||
default value set to False)
|
||||
if is_in_inset(..., default=False):
|
||||
will do what you expect.
|
||||
'''
|
||||
defval = (-1, -1)
|
||||
stins = find_token_backwards(lines, inset, i)
|
||||
if stins == -1:
|
||||
return defval
|
||||
endins = find_end_of_inset(lines, stins)
|
||||
# note that this includes the notfound case.
|
||||
if endins < i:
|
||||
return defval
|
||||
return (stins, endins)
|
||||
"""
|
||||
start = find_token_backwards(lines, inset, i)
|
||||
if start == -1:
|
||||
return default
|
||||
end = find_end_of_inset(lines, start)
|
||||
if end < i: # this includes the notfound case.
|
||||
return default
|
||||
return (start, end)
|
||||
|
||||
|
||||
def get_containing_inset(lines, i):
|
||||
'''
|
||||
Finds out what kind of inset line i is within. Returns a
|
||||
list containing (i) what follows \begin_inset on the line
|
||||
list containing (i) what follows \\begin_inset on the line
|
||||
on which the inset begins, plus the starting and ending line.
|
||||
Returns False on any kind of error or if it isn't in an inset.
|
||||
'''
|
||||
@ -473,12 +638,15 @@ def get_containing_inset(lines, i):
|
||||
|
||||
def get_containing_layout(lines, i):
|
||||
'''
|
||||
Finds out what kind of layout line i is within. Returns a
|
||||
list containing what follows \begin_layout on the line
|
||||
on which the layout begins, plus the starting and ending line
|
||||
and the start of the paragraph (after all params). I.e, returns:
|
||||
Find out what kind of layout line `i` is within.
|
||||
Return a tuple
|
||||
(layoutname, layoutstart, layoutend, startofcontent)
|
||||
Returns False on any kind of error.
|
||||
containing
|
||||
* layout style/name,
|
||||
* start line number,
|
||||
* end line number, and
|
||||
* number of first paragraph line (after all params).
|
||||
Return `False` on any kind of error.
|
||||
'''
|
||||
j = i
|
||||
while True:
|
||||
@ -493,10 +661,13 @@ def get_containing_layout(lines, i):
|
||||
if endlay < i:
|
||||
return False
|
||||
|
||||
lay = get_value(lines, "\\begin_layout", stlay)
|
||||
if lay == "":
|
||||
# shouldn't happen
|
||||
return False
|
||||
layoutname = get_value(lines, "\\begin_layout", stlay)
|
||||
if layoutname == "": # layout style missing
|
||||
# TODO: What shall we do in this case?
|
||||
pass
|
||||
# layoutname == "Standard" # use same fallback as the LyX parser:
|
||||
# raise ValueError("Missing layout name on line %d"%stlay) # diagnosis
|
||||
# return False # generic error response
|
||||
par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
|
||||
"\\start_of_appendix", "\\paragraph_spacing", "\\align",
|
||||
"\\labelwidthstring"]
|
||||
@ -505,7 +676,7 @@ def get_containing_layout(lines, i):
|
||||
stpar += 1
|
||||
if lines[stpar].split(' ', 1)[0] not in par_params:
|
||||
break
|
||||
return (lay, stlay, endlay, stpar)
|
||||
return (layoutname, stlay, endlay, stpar)
|
||||
|
||||
|
||||
def count_pars_in_inset(lines, i):
|
||||
@ -518,7 +689,8 @@ def count_pars_in_inset(lines, i):
|
||||
pars = 0
|
||||
for j in range(ins[1], ins[2]):
|
||||
m = re.match(r'\\begin_layout (.*)', lines[j])
|
||||
if m and get_containing_inset(lines, j)[0] == ins[0]:
|
||||
found_inset = get_containing_inset(lines, j)
|
||||
if m and found_inset and found_inset[1] == ins[1]:
|
||||
pars += 1
|
||||
|
||||
return pars
|
||||
@ -553,4 +725,3 @@ def find_end_of_sequence(lines, i):
|
||||
i = i + 1
|
||||
|
||||
return endlay
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#! /usr/bin/env python
|
||||
#! /usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004 José Matos <jamatos@lyx.org>
|
||||
#
|
||||
|
79
lib/lyx2lyx/test_lyx2lyx_tools.py
Normal file
79
lib/lyx2lyx/test_lyx2lyx_tools.py
Normal file
@ -0,0 +1,79 @@
|
||||
# This file is part of lyx2lyx
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018 The LyX team
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
" This modules tests the auxiliary functions for lyx2lyx."
|
||||
|
||||
from lyx2lyx_tools import *
|
||||
|
||||
import unittest
|
||||
|
||||
class TestParserTools(unittest.TestCase):
|
||||
|
||||
def test_put_cmd_in_ert(self):
|
||||
ert = [u'\\begin_inset ERT',
|
||||
u'status collapsed',
|
||||
u'',
|
||||
u'\\begin_layout Plain Layout',
|
||||
u'',
|
||||
u'',
|
||||
u'\\backslash',
|
||||
u'texttt{Gr',
|
||||
u'\\backslash',
|
||||
u'"{u}',
|
||||
u'\\backslash',
|
||||
u'ss{}e}',
|
||||
u'\\end_layout',
|
||||
u'',
|
||||
u'\\end_inset']
|
||||
ert_open = ert[:]
|
||||
ert_open[1] = u'status open'
|
||||
ert_paragraph = ["\\begin_layout Standard",
|
||||
u'\\begin_inset ERT',
|
||||
u'status collapsed',
|
||||
u'',
|
||||
u'\\begin_layout Plain Layout',
|
||||
u'',
|
||||
u'',
|
||||
u'\\backslash',
|
||||
u'texttt{Gr',
|
||||
u'\\backslash',
|
||||
u'"{u}',
|
||||
u'\\backslash',
|
||||
u'ss{}e}',
|
||||
u'\\end_layout',
|
||||
u'',
|
||||
u'\\end_inset',
|
||||
u'',
|
||||
u'',
|
||||
u'\\end_layout',
|
||||
u'']
|
||||
self.assertEqual(put_cmd_in_ert("\\texttt{Grüße}"), ert)
|
||||
self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
|
||||
self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}", is_open=True), ert_open)
|
||||
self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}", as_paragraph=True), ert_paragraph)
|
||||
|
||||
def test_latex_length(self):
|
||||
self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
|
||||
self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
|
||||
self.assertEqual(latex_length("11em"), (False, "11em"))
|
||||
self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -22,7 +22,7 @@ from parser_tools import *
|
||||
|
||||
import unittest
|
||||
|
||||
ug = r"""
|
||||
lines = r"""
|
||||
\begin_layout Standard
|
||||
The
|
||||
\begin_inset Quotes eld
|
||||
@ -56,9 +56,26 @@ Introduction
|
||||
describes that, too.
|
||||
\end_layout
|
||||
|
||||
"""
|
||||
""".splitlines()
|
||||
|
||||
header = r"""\begin_header
|
||||
\origin unavailable
|
||||
\paperpagestyle default
|
||||
\output_changes false
|
||||
\html_math_output 0
|
||||
\html_css_as_file 0
|
||||
\html_be_strict fallse
|
||||
\end_header""".splitlines()
|
||||
|
||||
newheader = r"""\begin_header
|
||||
\origin unavailable
|
||||
\paperpagestyle default
|
||||
\output_changes true
|
||||
\html_math_output 0
|
||||
\html_css_as_file 1
|
||||
\html_be_strict false
|
||||
\end_header""".splitlines()
|
||||
|
||||
lines = ug.splitlines()
|
||||
|
||||
class TestParserTools(unittest.TestCase):
|
||||
|
||||
@ -77,10 +94,21 @@ class TestParserTools(unittest.TestCase):
|
||||
|
||||
def test_find_token(self):
|
||||
self.assertEqual(find_token(lines, '\\emph', 0), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
|
||||
self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
|
||||
self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
|
||||
# no line starts with "emph" (without backspace):
|
||||
self.assertEqual(find_token(lines, 'emph', 0), -1)
|
||||
# token on line[start] is found:
|
||||
self.assertEqual(find_token(lines, '\\emph', 7), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 8), 9)
|
||||
# token on line[end] is not found:
|
||||
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
|
||||
# `ignorews` looks for whitespace-separated tokens:
|
||||
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
|
||||
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
|
||||
# only first token is found:
|
||||
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
|
||||
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
|
||||
|
||||
|
||||
def test_find_tokens(self):
|
||||
@ -89,5 +117,128 @@ class TestParserTools(unittest.TestCase):
|
||||
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
|
||||
|
||||
|
||||
def test_find_substring(self):
|
||||
# Quotes is not a "token" (substring at the start of any line):
|
||||
self.assertEqual(find_token(lines, "Quotes", 0), -1)
|
||||
self.assertEqual(find_substring(lines, "Quotes", 0), 3)
|
||||
# return -1 on failure:
|
||||
self.assertEqual(find_substring(lines, "Qualen", 0), -1)
|
||||
|
||||
|
||||
def test_find_re(self):
|
||||
regexp_object = re.compile(r'\\begin.*Quote')
|
||||
# matching starts with line[start] (default: start=0)
|
||||
self.assertEqual(find_re(lines, regexp_object), 3)
|
||||
self.assertEqual(find_re(lines, regexp_object, start=3), 3)
|
||||
# matching ends one line *before* line[end]:
|
||||
self.assertEqual(find_re(lines, regexp_object, start=4), 11)
|
||||
self.assertEqual(find_re(lines, regexp_object, start=4, end=11), -1)
|
||||
|
||||
def test_find_complete_lines(self):
|
||||
sublines = ["\\begin_inset Quotes eld",
|
||||
"\\end_inset"]
|
||||
# return index of first line of sublines:
|
||||
self.assertEqual(find_complete_lines(lines, sublines), 3)
|
||||
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
|
||||
# return -1 if sublines is not found:
|
||||
self.assertEqual(find_complete_lines(lines, ['x']), -1)
|
||||
# search includes line `start`:
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
|
||||
# serch excludes line `end`
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
|
||||
# an empty list is always found
|
||||
self.assertEqual(find_complete_lines(lines, []), 0)
|
||||
|
||||
|
||||
def test_find_across_lines(self):
|
||||
# sub with at least 2 line-breaks (uses find_complete_lines):
|
||||
sub = "Quotes eld\n\\end_inset\n\n\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), 3)
|
||||
# Return -1 if not found
|
||||
self.assertEqual(find_across_lines(lines, sub, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
|
||||
sub = "Quotes eld\n\\end_inset\nx\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
sub = "Quotes X\n\\end_inset\n\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
sub = "Quotes eld\n\\end_insert\n\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
# sub with up to 1 line-break:
|
||||
sub = "Quotes eld\n\\end_inset"
|
||||
self.assertEqual(find_across_lines(lines, sub), 3)
|
||||
self.assertEqual(find_across_lines(lines, sub, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
|
||||
sub = "Quotes X eld\n\\end_inset\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
sub = "Quotes eld\n\\end_insert\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
# sub without line-break
|
||||
sub = "end_"
|
||||
self.assertEqual(find_across_lines(lines, sub), 4)
|
||||
self.assertEqual(find_across_lines(lines, sub, 5), 12)
|
||||
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
|
||||
self.assertEqual(find_across_lines(lines, "XXX"), -1)
|
||||
|
||||
|
||||
def test_get_value(self):
|
||||
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
|
||||
# TODO: do we want this:
|
||||
self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
|
||||
# or only the part after "token":
|
||||
# self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
|
||||
# return default if not found
|
||||
self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
|
||||
# TODO: do we want this:
|
||||
self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
|
||||
# or emtpy string if token is found but has no value:
|
||||
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
|
||||
|
||||
def test_get_bool_value(self):
|
||||
self.assertEqual(get_bool_value(header, "\\output_changes"), False)
|
||||
self.assertEqual(get_bool_value(newheader, "\\output_changes"), True)
|
||||
self.assertEqual(get_bool_value(header, "\\html_css_as_file"), False)
|
||||
self.assertEqual(get_bool_value(newheader, "\\html_css_as_file"), True)
|
||||
self.assertEqual(get_bool_value(header, "\\something"), None)
|
||||
self.assertEqual(get_bool_value(header, "\\output_changes", 4), None)
|
||||
|
||||
def test_set_bool_value(self):
|
||||
# set to new value, return old value
|
||||
self.assertEqual(set_bool_value(header, "\\output_changes", True), False)
|
||||
self.assertEqual(set_bool_value(header, "\\html_css_as_file", True), False)
|
||||
# return default if misspelled:
|
||||
self.assertEqual(set_bool_value(header, "\\html_be_strict", False), None)
|
||||
# catch error and insert new setting:
|
||||
self.assertRaises(ValueError, set_bool_value, header, "\\something", 0)
|
||||
self.assertEqual(header, newheader)
|
||||
|
||||
def test_del_complete_lines(self):
|
||||
l = lines[:]
|
||||
sublines = ["\\begin_inset Quotes eld",
|
||||
"\\end_inset"]
|
||||
# normal operation: remove the first occurence of sublines:
|
||||
self.assertEqual(del_complete_lines(l, sublines), True)
|
||||
self.assertEqual(l[3], "")
|
||||
self.assertEqual(len(l), len(lines)-len(sublines))
|
||||
# special cases:
|
||||
l = lines[:]
|
||||
self.assertEqual(del_complete_lines(l, sublines, 21), False)
|
||||
self.assertEqual(l, lines)
|
||||
# deleting empty sublist returns success but does not change the list:
|
||||
self.assertEqual(del_complete_lines(l, [], 21), True)
|
||||
self.assertEqual(l, lines)
|
||||
|
||||
def test_del_value(self):
|
||||
l = lines[:]
|
||||
self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
|
||||
self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
|
||||
# return default if not found
|
||||
self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
|
||||
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
|
||||
|
||||
import sys, os, re
|
||||
import sys, os, re, codecs
|
||||
|
||||
# Provide support for both python 2 and 3
|
||||
PY2 = sys.version_info[0] == 2
|
||||
@ -28,14 +28,13 @@ if not PY2:
|
||||
|
||||
def read_unicodesymbols():
|
||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
pathname = os.path.abspath(os.path.dirname(__file__))
|
||||
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
|
||||
|
||||
# For python 3+ we have to specify the encoding for those systems
|
||||
# where the default is not UTF-8
|
||||
fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
|
||||
# Read as Unicode strings in both, Python 2 and 3
|
||||
# Specify the encoding for those systems where the default is not UTF-8
|
||||
fp = codecs.open(filename, encoding="utf8")
|
||||
|
||||
spec_chars = []
|
||||
# A backslash, followed by some non-word character, and then a character
|
||||
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
||||
# they are written in the unicodesymbols file; but they can also be written
|
||||
@ -43,36 +42,42 @@ def read_unicodesymbols():
|
||||
# The two backslashes in the string literal are needed to specify a literal
|
||||
# backslash in the regex. Without r prefix, these would be four backslashes.
|
||||
r = re.compile(r'\\(\W)\{(\w)\}')
|
||||
|
||||
spec_chars = []
|
||||
for line in fp.readlines():
|
||||
if line[0] != '#' and line.strip() != "":
|
||||
# Note: backslashes in the string literals with r prefix are not escaped,
|
||||
# so one backslash in the source file equals one backslash in memory.
|
||||
# Without r prefix backslahses are escaped, so two backslashes in the
|
||||
# source file equal one backslash in memory.
|
||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # unescape "
|
||||
line=line.replace(r'\\','\\') # unescape \
|
||||
try:
|
||||
[ucs4,command,dead] = line.split(None,2)
|
||||
if command[0:1] != "\\":
|
||||
continue
|
||||
if (line.find("notermination=text") < 0 and
|
||||
line.find("notermination=both") < 0 and command[-1] != "}"):
|
||||
command = command + "{}"
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
except:
|
||||
if not line.strip() or line.startswith('#'):
|
||||
# skip empty lines and comments
|
||||
continue
|
||||
# Note: backslashes in the string literals with r prefix are not escaped,
|
||||
# so one backslash in the source file equals one backslash in memory.
|
||||
# Without r prefix backslahses are escaped, so two backslashes in the
|
||||
# source file equal one backslash in memory.
|
||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # unescape "
|
||||
line=line.replace(r'\\','\\') # unescape \
|
||||
try:
|
||||
[ucs4,command,dead] = line.split(None,2)
|
||||
if command[0:1] != "\\":
|
||||
continue
|
||||
m = r.match(command)
|
||||
if m != None:
|
||||
command = "\\"
|
||||
commandbl = command
|
||||
command += m.group(1) + m.group(2)
|
||||
commandbl += m.group(1) + ' ' + m.group(2)
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
spec_chars.append([commandbl, unichr(eval(ucs4))])
|
||||
literal_char = unichr(int(ucs4, 16))
|
||||
if (line.find("notermination=text") < 0 and
|
||||
line.find("notermination=both") < 0 and command[-1] != "}"):
|
||||
command = command + "{}"
|
||||
spec_chars.append([command, literal_char])
|
||||
except:
|
||||
continue
|
||||
m = r.match(command)
|
||||
if m != None:
|
||||
command = "\\"
|
||||
commandbl = command
|
||||
command += m.group(1) + m.group(2)
|
||||
commandbl += m.group(1) + ' ' + m.group(2)
|
||||
spec_chars.append([command, literal_char])
|
||||
spec_chars.append([commandbl, literal_char])
|
||||
fp.close()
|
||||
return spec_chars
|
||||
|
||||
|
||||
unicode_reps = read_unicodesymbols()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user