Reformat lyx2lyx code using ruff

This commit is contained in:
José Matos 2024-06-15 10:06:06 +01:00
parent aaef6d2693
commit b4db3ea137
29 changed files with 13802 additions and 9780 deletions

View File

@ -19,8 +19,14 @@
"The LyX module has all the rules related with different lyx file formats."
from parser_tools import (get_value, check_token, find_token, find_tokens,
find_end_of, find_complete_lines)
from parser_tools import (
get_value,
check_token,
find_token,
find_tokens,
find_end_of,
find_complete_lines,
)
import os.path
import gzip
import locale
@ -32,10 +38,11 @@ import codecs
try:
import lyx2lyx_version
version__ = lyx2lyx_version.version
stable_version = True
except: # we are running from build directory so assume the last version
version__ = '2.5'
version__ = "2.5"
stable_version = False
default_debug__ = 2
@ -44,10 +51,12 @@ default_debug__ = 2
####################################################################
# Private helper functions
def find_end_of_inset(lines, i):
"Find beginning of inset, where lines[i] is included."
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
def minor_versions(major, last_minor_version):
"""Generate minor versions, using major as prefix and minor
versions from 0 until last_minor_version, plus the generic version.
@ -73,7 +82,8 @@ original_tex2lyx_version = re.compile(b".*?tex2lyx ([\\d.]*)")
##
# file format information:
# file, supported formats, stable release versions
format_relation = [("0_06", [200], minor_versions("0.6" , 4)),
format_relation = [
("0_06", [200], minor_versions("0.6", 4)),
("0_08", [210], minor_versions("0.8", 6) + ["0.7"]),
("0_10", [210], minor_versions("0.10", 7) + ["0.9"]),
("0_12", [215], minor_versions("0.12", 1) + ["0.11"]),
@ -93,7 +103,7 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)),
("2_2", list(range(475, 509)), minor_versions("2.2", 4)),
("2_3", list(range(509, 545)), minor_versions("2.3", 7)),
("2_4", list(range(545, 621)), minor_versions("2.4", 0)),
("2_5", (), minor_versions("2.5" , 0))
("2_5", (), minor_versions("2.5", 0)),
]
####################################################################
@ -102,12 +112,11 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)),
if not format_relation[-1][1]:
step, mode = format_relation[-1][0], "convert"
convert = getattr(__import__("lyx_" + step), mode)
format_relation[-1] = (step,
[conv[0] for conv in convert],
format_relation[-1][2])
format_relation[-1] = (step, [conv[0] for conv in convert], format_relation[-1][2])
# #
####################################################################
def formats_list():
"Returns a list with supported file formats."
formats = []
@ -142,7 +151,7 @@ def format_info():
stable_format = str(version[1][-1])
out += template % (major, stable_format, versions, formats)
return out + '\n'
return out + "\n"
def get_end_format():
@ -165,10 +174,10 @@ def get_backend(textclass):
def trim_eol(line):
"Remove end of line char(s)."
if line[-1] != '\n' and line[-1] != '\r':
if line[-1] != "\n" and line[-1] != "\r":
# May happen for the last line of a document
return line
if line[-2:-1] == '\r':
if line[-2:-1] == "\r":
return line[:-2]
else:
return line[:-1]
@ -193,11 +202,12 @@ def get_encoding(language, inputencoding, format, cjk_encoding):
# This means that files created by CJK-LyX can only be converted using
# the correct locale settings unless the encoding is given as commandline
# argument.
if cjk_encoding == 'auto':
if cjk_encoding == "auto":
return locale.getpreferredencoding()
elif cjk_encoding:
return cjk_encoding
from lyx2lyx_lang import lang
if inputencoding == "auto" or inputencoding == "default":
return lang[language][3]
if inputencoding == "":
@ -209,17 +219,27 @@ def get_encoding(language, inputencoding, format, cjk_encoding):
return "iso-8859-15"
return inputencoding
##
# Class
#
class LyX_base:
"""This class carries all the information of the LyX file."""
def __init__(self, end_format = 0, input = '', output = '', error = '',
debug = default_debug__, try_hard = 0, cjk_encoding = '',
final_version = '', systemlyxdir = '', language = 'english',
encoding = 'auto'):
def __init__(
self,
end_format=0,
input="",
output="",
error="",
debug=default_debug__,
try_hard=0,
cjk_encoding="",
final_version="",
systemlyxdir="",
language="english",
encoding="auto",
):
"""Arguments:
end_format: final format that the file should be converted. (integer)
input: the name of the input source, if empty resort to standard input.
@ -247,7 +267,9 @@ class LyX_base:
# and ignore the version.
if final_version:
message = "Incompatible version %s for specified format %d" % (
final_version, self.end_format)
final_version,
self.end_format,
)
for version in format_relation:
if self.end_format in version[1]:
if final_version not in version[2]:
@ -277,7 +299,7 @@ class LyX_base:
# This is a hack: We use '' since we don't know the default
# layout of the text class. LyX will parse it as default layout.
# FIXME: Read the layout file and use the real default layout
self.default_layout = ''
self.default_layout = ""
self.header = []
self.preamble = []
self.body = []
@ -286,14 +308,12 @@ class LyX_base:
self.language = language
self.systemlyxdir = systemlyxdir
def warning(self, message, debug_level=default_debug__):
"""Emits warning to self.error, if the debug_level is less
than the self.debug."""
if debug_level <= self.debug:
self.err.write("lyx2lyx warning: " + message + "\n")
def error(self, message):
"Emits a warning and exits if not in try_hard mode."
self.warning(message)
@ -303,7 +323,6 @@ class LyX_base:
self.status = 2
def read(self):
"""Reads a file into the self.header and
self.body parts, from self.input."""
@ -330,8 +349,8 @@ class LyX_base:
first_line = False
line = trim_eol_binary(line)
decoded = line.decode('latin1')
if check_token(decoded, '\\begin_preamble'):
decoded = line.decode("latin1")
if check_token(decoded, "\\begin_preamble"):
while True:
line = self.input.readline()
if not line:
@ -339,51 +358,60 @@ class LyX_base:
self.error("Invalid LyX file: Missing body.")
line = trim_eol_binary(line)
decoded = line.decode('latin1')
if check_token(decoded, '\\end_preamble'):
decoded = line.decode("latin1")
if check_token(decoded, "\\end_preamble"):
break
if decoded.split()[:0] in ("\\layout",
"\\begin_layout", "\\begin_body"):
self.warning("Malformed LyX file:"
if decoded.split()[:0] in (
"\\layout",
"\\begin_layout",
"\\begin_body",
):
self.warning(
"Malformed LyX file:"
"Missing '\\end_preamble'."
"\nAdding it now and hoping"
"for the best.")
"for the best."
)
self.preamble.append(line)
if check_token(decoded, '\\end_preamble'):
if check_token(decoded, "\\end_preamble"):
continue
line = line.rstrip()
if not line:
continue
if decoded.split()[0] in ("\\layout", "\\begin_layout",
"\\begin_body", "\\begin_deeper"):
if decoded.split()[0] in (
"\\layout",
"\\begin_layout",
"\\begin_body",
"\\begin_deeper",
):
self.body.append(line)
break
self.header.append(line)
i = find_token(self.header, b'\\textclass', 0)
i = find_token(self.header, b"\\textclass", 0)
if i == -1:
self.warning("Malformed LyX file: Missing '\\textclass'.")
i = find_token(self.header, b'\\lyxformat', 0) + 1
self.header[i:i] = [b'\\textclass article']
i = find_token(self.header, b"\\lyxformat", 0) + 1
self.header[i:i] = [b"\\textclass article"]
self.textclass = get_value(self.header, b"\\textclass", 0,
default = b"")
self.language = get_value(self.header, b"\\language", 0,
default = b"english").decode('ascii')
self.inputencoding = get_value(self.header, b"\\inputencoding", 0,
default = b"auto").decode('ascii')
self.textclass = get_value(self.header, b"\\textclass", 0, default=b"")
self.language = get_value(self.header, b"\\language", 0, default=b"english").decode(
"ascii"
)
self.inputencoding = get_value(
self.header, b"\\inputencoding", 0, default=b"auto"
).decode("ascii")
self.format = self.read_format()
self.initial_format = self.format
self.encoding = get_encoding(self.language,
self.inputencoding, self.format,
self.cjk_encoding)
self.encoding = get_encoding(
self.language, self.inputencoding, self.format, self.cjk_encoding
)
self.initial_version = self.read_version()
# Second pass over header and preamble, now we know the file encoding
@ -404,7 +432,6 @@ class LyX_base:
break
self.body.append(trim_eol(line))
def write(self):
"Writes the LyX file to self.output."
self.choose_output(self.output)
@ -412,18 +439,18 @@ class LyX_base:
self.set_format()
self.set_textclass()
if self.encoding == "auto":
self.encoding = get_encoding(self.language, self.encoding,
self.format, self.cjk_encoding)
self.encoding = get_encoding(
self.language, self.encoding, self.format, self.cjk_encoding
)
if self.preamble:
i = find_token(self.header, '\\textclass', 0) + 1
preamble = ['\\begin_preamble'] + self.preamble + ['\\end_preamble']
i = find_token(self.header, "\\textclass", 0) + 1
preamble = ["\\begin_preamble"] + self.preamble + ["\\end_preamble"]
header = self.header[:i] + preamble + self.header[i:]
else:
header = self.header
for line in header + [''] + self.body:
self.output.write(line+'\n')
for line in header + [""] + self.body:
self.output.write(line + "\n")
def choose_output(self, output):
"""Choose output streams dealing transparently with
@ -435,20 +462,19 @@ class LyX_base:
# interfaces.
if self.compressed:
if output:
outputfileobj = open(output, 'wb')
outputfileobj = open(output, "wb")
else:
# We cannot not use stdout directly since it needs text, not bytes in python 3
outputfileobj = os.fdopen(sys.stdout.fileno(), 'wb')
outputfileobj = os.fdopen(sys.stdout.fileno(), "wb")
# We cannot not use gzip.open() since it is not supported by python 2
zipbuffer = gzip.GzipFile(mode='wb', fileobj=outputfileobj)
zipbuffer = gzip.GzipFile(mode="wb", fileobj=outputfileobj)
# We do not want to use different newlines on different OSes inside zipped files
self.output = io.TextIOWrapper(zipbuffer, encoding=self.encoding, newline='\n')
self.output = io.TextIOWrapper(zipbuffer, encoding=self.encoding, newline="\n")
else:
if output:
self.output = open(output, 'w', encoding=self.encoding)
self.output = open(output, "w", encoding=self.encoding)
else:
self.output = open(sys.stdout.fileno(), 'w', encoding=self.encoding)
self.output = open(sys.stdout.fileno(), "w", encoding=self.encoding)
def choose_input(self, input):
"""Choose input stream, dealing transparently with
@ -456,27 +482,26 @@ class LyX_base:
# Since we do not know the encoding yet we need to read the input as
# bytes in binary mode, and convert later to unicode.
if input and input != '-':
if input and input != "-":
self.dir = os.path.dirname(os.path.abspath(input))
try:
gzip.open(input).readline()
self.input = gzip.open(input)
self.compressed = True
except:
self.input = open(input, 'rb')
self.input = open(input, "rb")
self.compressed = False
else:
self.dir = ''
self.input = os.fdopen(sys.stdin.fileno(), 'rb')
self.dir = ""
self.input = os.fdopen(sys.stdin.fileno(), "rb")
self.compressed = False
def lyxformat(self, format):
"Returns the file format representation, an integer."
result = format_re.match(format)
if result:
format = int(result.group(1) + result.group(2))
elif format == '2':
elif format == "2":
format = 200
else:
self.error(str(format) + ": " + "Invalid LyX file.")
@ -487,7 +512,6 @@ class LyX_base:
self.error(str(format) + ": " + "Format not supported.")
return None
def read_version(self):
"""Searchs for clues of the LyX version used to write the
file, returns the most likely value, or None otherwise."""
@ -511,16 +535,19 @@ class LyX_base:
if not res:
self.warning(line)
# self.warning("Version %s" % result.group(1))
return res.decode('ascii')
return res.decode("ascii")
self.warning(str(self.header[:2]))
return None
def set_version(self):
"Set the header with the version used."
initial_comment = " ".join(["#LyX %s created this file." % version__,
"For more info see https://www.lyx.org/"])
initial_comment = " ".join(
[
"#LyX %s created this file." % version__,
"For more info see https://www.lyx.org/",
]
)
# Simple heuristic to determine the comment that always starts
# a lyx file
@ -534,21 +561,19 @@ class LyX_base:
# 2) the second line had the lyx version used
# later we decided that 1) was a privacy risk for no gain
# here we remove the second line effectively erasing 1)
if self.header[1][0] == '#':
if self.header[1][0] == "#":
del self.header[1]
def read_format(self):
"Read from the header the fileformat of the present LyX file."
for line in self.header:
result = fileformat.match(line.decode('ascii'))
result = fileformat.match(line.decode("ascii"))
if result:
return self.lyxformat(result.group(1))
else:
self.error("Invalid LyX File: Missing format.")
return None
def set_format(self):
"Set the file format of the file, in the header."
if self.format <= 217:
@ -558,12 +583,10 @@ class LyX_base:
i = find_token(self.header, "\\lyxformat", 0)
self.header[i] = "\\lyxformat %s" % format
def set_textclass(self):
i = find_token(self.header, "\\textclass", 0)
self.header[i] = "\\textclass %s" % self.textclass
# Note that the module will be added at the END of the extant ones
def add_module(self, module):
"Append module to the modules list."
@ -586,7 +609,6 @@ class LyX_base:
return
self.header.insert(j, module)
def del_module(self, module):
"Delete `module` from module list, return success."
modlist = self.get_module_list()
@ -598,15 +620,14 @@ class LyX_base:
def get_module_list(self):
"Return list of modules."
i = find_token(self.header, "\\begin_modules", 0)
if (i == -1):
if i == -1:
return []
j = find_token(self.header, "\\end_modules", i)
return self.header[i + 1 : j]
def set_module_list(self, mlist):
i = find_token(self.header, "\\begin_modules", 0)
if (i == -1):
if i == -1:
# No modules yet included
tclass = find_token(self.header, "\\textclass", 0)
if tclass == -1:
@ -620,32 +641,32 @@ class LyX_base:
return
j += 1
if mlist:
mlist = ['\\begin_modules'] + mlist + ['\\end_modules']
mlist = ["\\begin_modules"] + mlist + ["\\end_modules"]
self.header[i:j] = mlist
def set_parameter(self, param, value):
"Set the value of the header parameter."
i = find_token(self.header, '\\' + param, 0)
i = find_token(self.header, "\\" + param, 0)
if i == -1:
self.warning('Parameter not found in the header: %s' % param, 3)
self.warning("Parameter not found in the header: %s" % param, 3)
return
self.header[i] = f'\\{param} {str(value)}'
self.header[i] = f"\\{param} {str(value)}"
def is_default_layout(self, layout):
"Check whether a layout is the default layout of this class."
# FIXME: Check against the real text class default layout
if layout == 'Standard' or layout == self.default_layout:
if layout == "Standard" or layout == self.default_layout:
return 1
return 0
def convert(self):
"Convert from current (self.format) to self.end_format."
if self.format == self.end_format:
self.warning("No conversion needed: Target format %s "
"same as current format!" % self.format, default_debug__)
self.warning(
"No conversion needed: Target format %s "
"same as current format!" % self.format,
default_debug__,
)
return
mode, conversion_chain = self.chain()
@ -654,17 +675,20 @@ class LyX_base:
for step in conversion_chain:
steps = getattr(__import__("lyx_" + step), mode)
self.warning(f"Convertion step: {step} - {mode}",
default_debug__ + 1)
self.warning(f"Convertion step: {step} - {mode}", default_debug__ + 1)
if not steps:
self.error("The conversion to an older "
"format (%s) is not implemented." % self.format)
self.error(
"The conversion to an older "
"format (%s) is not implemented." % self.format
)
multi_conv = len(steps) != 1
for version, table in steps:
if multi_conv and \
(self.format >= version and mode == "convert") or\
(self.format <= version and mode == "revert"):
if (
multi_conv
and (self.format >= version and mode == "convert")
or (self.format <= version and mode == "revert")
):
continue
for conv in table:
@ -672,22 +696,22 @@ class LyX_base:
try:
conv(self)
except:
self.warning("An error occurred in %s, %s" %
(version, str(conv)),
default_debug__)
self.warning(
"An error occurred in %s, %s" % (version, str(conv)),
default_debug__,
)
if not self.try_hard:
raise
self.status = 2
else:
self.warning("%lf: Elapsed time on %s" %
(time.time() - init_t,
str(conv)), default_debug__ +
1)
self.warning(
"%lf: Elapsed time on %s" % (time.time() - init_t, str(conv)),
default_debug__ + 1,
)
self.format = version
if self.end_format == self.format:
return
def chain(self):
"""This is where all the decisions related with the
conversion are taken. It returns a list of modules needed to
@ -705,9 +729,11 @@ class LyX_base:
if not correct_version:
if format <= 215:
self.warning("Version does not match file format, "
"discarding it. (Version %s, format %d)" %
(self.initial_version, self.format))
self.warning(
"Version does not match file format, "
"discarding it. (Version %s, format %d)"
% (self.initial_version, self.format)
)
for rel in format_relation:
if format in rel[1]:
initial_step = rel[0]
@ -753,7 +779,6 @@ class LyX_base:
self.warning("Convertion mode: %s\tsteps%s" % (mode, steps), 10)
return mode, steps
def append_local_layout(self, new_layout):
"Append `new_layout` to the local layouts."
# new_layout may be a string or a list of strings (lines)
@ -785,8 +810,10 @@ class LyX_base:
if i == -1:
return False
j = i + len(layout_def)
if (self.header[i-1] == "\\begin_local_layout" and
self.header[j] == "\\end_local_layout"):
if (
self.header[i - 1] == "\\begin_local_layout"
and self.header[j] == "\\end_local_layout"
):
i -= 1
j += 1
self.header[i:j] = []
@ -801,6 +828,7 @@ class LyX_base:
self.header[i:j] = []
return True
# Part of an unfinished attempt to make lyx2lyx gave a more
# structured view of the document.
# def get_toc(self, depth = 4):
@ -867,12 +895,30 @@ class LyX_base:
class File(LyX_base):
"This class reads existing LyX files."
def __init__(self, end_format = 0, input = '', output = '', error = '',
debug = default_debug__, try_hard = 0, cjk_encoding = '',
final_version = '', systemlyxdir = ''):
LyX_base.__init__(self, end_format, input, output, error,
debug, try_hard, cjk_encoding, final_version,
systemlyxdir)
def __init__(
self,
end_format=0,
input="",
output="",
error="",
debug=default_debug__,
try_hard=0,
cjk_encoding="",
final_version="",
systemlyxdir="",
):
LyX_base.__init__(
self,
end_format,
input,
output,
error,
debug,
try_hard,
cjk_encoding,
final_version,
systemlyxdir,
)
self.read()

View File

@ -20,6 +20,7 @@ dictionary, ready to use by other python modules"""
import pprint
def parse_line(line):
"Parse line from languages and return it as a list."
j = 0
@ -45,15 +46,14 @@ def parse_line(line):
return tmp
if __name__ == '__main__':
if __name__ == "__main__":
lines = open("../languages", "rb")
lang = {}
for line in lines:
if line[:1] != '#':
if line[:1] != "#":
tmp = parse_line(line[:-1])
lang[tmp[0]] = tmp[1:]
print("# This file is generated by generate_incoding_info.py from lib/languages file.")
print("# Do not change this file directly.")
print()

View File

@ -1,103 +1,88 @@
# This file is generated by generate_incoding_info.py from lib/languages file.
# Do not change this file directly.
lang = {'afrikaans': ['afrikaans', 'Afrikaans', 'false', 'iso8859-1', 'af_ZA', ''],
'american': ['american', 'American', 'false', 'iso8859-1', 'en_US', ''],
'arabic': ['arabic', 'Arabic', 'true', 'iso8859-6', 'ar_SA', ''],
'austrian': ['austrian', 'Austrian', 'false', 'iso8859-1', 'de_AT', ''],
'bahasa': ['bahasa', 'Bahasa', 'false', 'iso8859-1', 'in_ID', ''],
'basque': ['basque', 'Basque', 'false', 'iso8859-1', 'eu_ES', ''],
'belarusian': ['belarusian', 'Belarusian', 'false', 'cp1251', 'be_BY', ''],
'brazil': ['brazil',
'Portuguese (Brazil)',
'false',
'iso8859-1',
'pt_BR',
''],
'breton': ['breton', 'Breton', 'false', 'iso8859-1', 'br_FR', ''],
'british': ['british', 'British', 'false', 'iso8859-1', 'en_GB', ''],
'bulgarian': ['bulgarian', 'Bulgarian', 'false', 'cp1251', 'bg_BG', ''],
'canadian': ['canadian', 'Canadian', 'false', 'iso8859-1', 'en_CA', ''],
'canadien': ['canadien',
'French Canadian',
'false',
'iso8859-1',
'fr_CA',
''],
'catalan': ['catalan', 'Catalan', 'false', 'iso8859-1', 'ca_ES', ''],
'croatian': ['croatian', 'Croatian', 'false', 'iso8859-2', 'hr_HR', ''],
'czech': ['czech', 'Czech', 'false', 'iso8859-2', 'cs_CZ', ''],
'danish': ['danish', 'Danish', 'false', 'iso8859-1', 'da_DK', ''],
'default': ['default', 'default', 'false', 'iso8859-1', 'C', ''],
'dutch': ['dutch', 'Dutch', 'false', 'iso8859-1', 'nl_NL', ''],
'english': ['english', 'English', 'false', 'iso8859-1', 'en_US', ''],
'esperanto': ['esperanto', 'Esperanto', 'false', 'iso8859-3', 'eo', ''],
'estonian': ['estonian', 'Estonian', 'false', 'iso8859-1', 'et_EE', ''],
'finnish': ['finnish', 'Finnish', 'false', 'iso8859-1', 'fi_FI', ''],
'french': ['french',
'French',
'false',
'iso8859-1',
'fr_FR',
'\\addto\\extrasfrench{\\providecommand{\\og}{\\leavevmode\\flqq~}\\providecommand{\\fg}{\\ifdim\\lastskip>\\z@\\unskip\\fi~\\frqq}}'],
'frenchb': ['french', 'French', 'false', 'iso8859-1', 'fr_FR', ''], # for compatibility reasons
'galician': ['galician', 'Galician', 'false', 'iso8859-1', 'gl_ES', ''],
'german': ['german', 'German', 'false', 'iso8859-1', 'de_DE', ''],
'greek': ['greek', 'Greek', 'false', 'iso8859-7', 'el_GR', ''],
'hebrew': ['hebrew', 'Hebrew', 'true', 'cp1255', 'he_IL', ''],
'icelandic': ['icelandic', 'Icelandic', 'false', 'iso8859-1', 'is_IS', ''],
'irish': ['irish', 'Irish', 'false', 'iso8859-1', 'ga_IE', ''],
'italian': ['italian', 'Italian', 'false', 'iso8859-1', 'it_IT', ''],
'kazakh': ['kazakh', 'Kazakh', 'false', 'pt154', 'kk_KZ', ''],
'latvian': ['latvian', 'Latvian', 'false', 'iso8859-13', 'lv_LV', ''],
'lithuanian': ['lithuanian',
'Lithuanian',
'false',
'iso8859-13',
'lt_LT',
''],
'magyar': ['magyar', 'Magyar', 'false', 'iso8859-2', 'hu_HU', ''],
'naustrian': ['naustrian',
'Austrian (new spelling)',
'false',
'iso8859-1',
'de_AT',
''],
'ngerman': ['ngerman',
'German (new spelling)',
'false',
'iso8859-1',
'de_DE',
''],
'norsk': ['norsk', 'Norsk', 'false', 'iso8859-1', 'no_NO', ''],
'nynorsk': ['nynorsk', 'Nynorsk', 'false', 'iso8859-1', 'nn_NO', ''],
'polish': ['polish', 'Polish', 'false', 'iso8859-2', 'pl_PL', ''],
'portuges': ['portuges', 'Portugese', 'false', 'iso8859-1', 'pt_PT', ''],
'romanian': ['romanian', 'Romanian', 'false', 'iso8859-2', 'ro_RO', ''],
'russian': ['russian', 'Russian', 'false', 'koi8-r', 'ru_RU', ''],
'scottish': ['scottish', 'Scottish', 'false', 'iso8859-1', 'gd_GB', ''],
'serbian': ['croatian', 'Serbian', 'false', 'iso8859-5', 'sr_HR', ''],
'serbocroatian': ['croatian',
'Serbo-Croatian',
'false',
'iso8859-2',
'sh_HR',
''],
'slovak': ['slovak', 'Slovak', 'false', 'iso8859-2', 'sk_SK', ''],
'slovene': ['slovene', 'Slovene', 'false', 'iso8859-2', 'sl_SI', ''],
'spanish': ['spanish',
'Spanish',
'false',
'iso8859-1',
'es_ES',
'\\deactivatetilden'],
'swedish': ['swedish', 'Swedish', 'false', 'iso8859-1', 'sv_SE', ''],
'thai': ['thai',
'Thai',
'false',
'tis620-0',
'th_TH',
'\\usepackage{thswitch}'],
'turkish': ['turkish', 'Turkish', 'false', 'iso8859-9', 'tr_TR', ''],
'ukrainian': ['ukrainian', 'Ukrainian', 'false', 'koi8-u', 'uk_UA', ''],
'welsh': ['welsh', 'Welsh', 'false', 'iso8859-1', 'cy_GB', '']}
lang = {
"afrikaans": ["afrikaans", "Afrikaans", "false", "iso8859-1", "af_ZA", ""],
"american": ["american", "American", "false", "iso8859-1", "en_US", ""],
"arabic": ["arabic", "Arabic", "true", "iso8859-6", "ar_SA", ""],
"austrian": ["austrian", "Austrian", "false", "iso8859-1", "de_AT", ""],
"bahasa": ["bahasa", "Bahasa", "false", "iso8859-1", "in_ID", ""],
"basque": ["basque", "Basque", "false", "iso8859-1", "eu_ES", ""],
"belarusian": ["belarusian", "Belarusian", "false", "cp1251", "be_BY", ""],
"brazil": ["brazil", "Portuguese (Brazil)", "false", "iso8859-1", "pt_BR", ""],
"breton": ["breton", "Breton", "false", "iso8859-1", "br_FR", ""],
"british": ["british", "British", "false", "iso8859-1", "en_GB", ""],
"bulgarian": ["bulgarian", "Bulgarian", "false", "cp1251", "bg_BG", ""],
"canadian": ["canadian", "Canadian", "false", "iso8859-1", "en_CA", ""],
"canadien": ["canadien", "French Canadian", "false", "iso8859-1", "fr_CA", ""],
"catalan": ["catalan", "Catalan", "false", "iso8859-1", "ca_ES", ""],
"croatian": ["croatian", "Croatian", "false", "iso8859-2", "hr_HR", ""],
"czech": ["czech", "Czech", "false", "iso8859-2", "cs_CZ", ""],
"danish": ["danish", "Danish", "false", "iso8859-1", "da_DK", ""],
"default": ["default", "default", "false", "iso8859-1", "C", ""],
"dutch": ["dutch", "Dutch", "false", "iso8859-1", "nl_NL", ""],
"english": ["english", "English", "false", "iso8859-1", "en_US", ""],
"esperanto": ["esperanto", "Esperanto", "false", "iso8859-3", "eo", ""],
"estonian": ["estonian", "Estonian", "false", "iso8859-1", "et_EE", ""],
"finnish": ["finnish", "Finnish", "false", "iso8859-1", "fi_FI", ""],
"french": [
"french",
"French",
"false",
"iso8859-1",
"fr_FR",
"\\addto\\extrasfrench{\\providecommand{\\og}{\\leavevmode\\flqq~}\\providecommand{\\fg}{\\ifdim\\lastskip>\\z@\\unskip\\fi~\\frqq}}",
],
"frenchb": [
"french",
"French",
"false",
"iso8859-1",
"fr_FR",
"",
], # for compatibility reasons
"galician": ["galician", "Galician", "false", "iso8859-1", "gl_ES", ""],
"german": ["german", "German", "false", "iso8859-1", "de_DE", ""],
"greek": ["greek", "Greek", "false", "iso8859-7", "el_GR", ""],
"hebrew": ["hebrew", "Hebrew", "true", "cp1255", "he_IL", ""],
"icelandic": ["icelandic", "Icelandic", "false", "iso8859-1", "is_IS", ""],
"irish": ["irish", "Irish", "false", "iso8859-1", "ga_IE", ""],
"italian": ["italian", "Italian", "false", "iso8859-1", "it_IT", ""],
"kazakh": ["kazakh", "Kazakh", "false", "pt154", "kk_KZ", ""],
"latvian": ["latvian", "Latvian", "false", "iso8859-13", "lv_LV", ""],
"lithuanian": ["lithuanian", "Lithuanian", "false", "iso8859-13", "lt_LT", ""],
"magyar": ["magyar", "Magyar", "false", "iso8859-2", "hu_HU", ""],
"naustrian": [
"naustrian",
"Austrian (new spelling)",
"false",
"iso8859-1",
"de_AT",
"",
],
"ngerman": ["ngerman", "German (new spelling)", "false", "iso8859-1", "de_DE", ""],
"norsk": ["norsk", "Norsk", "false", "iso8859-1", "no_NO", ""],
"nynorsk": ["nynorsk", "Nynorsk", "false", "iso8859-1", "nn_NO", ""],
"polish": ["polish", "Polish", "false", "iso8859-2", "pl_PL", ""],
"portuges": ["portuges", "Portugese", "false", "iso8859-1", "pt_PT", ""],
"romanian": ["romanian", "Romanian", "false", "iso8859-2", "ro_RO", ""],
"russian": ["russian", "Russian", "false", "koi8-r", "ru_RU", ""],
"scottish": ["scottish", "Scottish", "false", "iso8859-1", "gd_GB", ""],
"serbian": ["croatian", "Serbian", "false", "iso8859-5", "sr_HR", ""],
"serbocroatian": ["croatian", "Serbo-Croatian", "false", "iso8859-2", "sh_HR", ""],
"slovak": ["slovak", "Slovak", "false", "iso8859-2", "sk_SK", ""],
"slovene": ["slovene", "Slovene", "false", "iso8859-2", "sl_SI", ""],
"spanish": [
"spanish",
"Spanish",
"false",
"iso8859-1",
"es_ES",
"\\deactivatetilden",
],
"swedish": ["swedish", "Swedish", "false", "iso8859-1", "sv_SE", ""],
"thai": ["thai", "Thai", "false", "tis620-0", "th_TH", "\\usepackage{thswitch}"],
"turkish": ["turkish", "Turkish", "false", "iso8859-9", "tr_TR", ""],
"ukrainian": ["ukrainian", "Ukrainian", "false", "koi8-u", "uk_UA", ""],
"welsh": ["welsh", "Welsh", "false", "iso8859-1", "cy_GB", ""],
}

View File

@ -15,7 +15,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''
"""
This module offers several free functions to help with lyx2lyx'ing.
More documentaton is below, but here is a quick guide to what
they do. Optional arguments are marked by brackets.
@ -87,14 +87,21 @@ revert_language(document, lyxname, babelname="", polyglossianame=""):
Reverts native language support to ERT
If babelname or polyglossianame is empty, it is assumed
this language package is not supported for the given language.
'''
"""
import re
import sys
from parser_tools import (find_token, find_end_of_inset, get_containing_layout,
get_containing_inset, get_value, get_bool_value)
from parser_tools import (
find_token,
find_end_of_inset,
get_containing_layout,
get_containing_inset,
get_value,
get_bool_value,
)
from unicode_symbols import unicode_reps
# This will accept either a list of lines or a single line.
# It is bad practice to pass something with embedded newlines,
# though we will handle that.
@ -105,7 +112,7 @@ def add_to_preamble(document, text):
# split on \n just in case
# it'll give us the one element list we want
# if there's no \n, too
text = text.split('\n')
text = text.split("\n")
i = 0
prelen = len(document.preamble)
@ -136,7 +143,7 @@ def insert_to_preamble(document, text, index = 0):
# split on \n just in case
# it'll give us the one element list we want
# if there's no \n, too
text = text.split('\n')
text = text.split("\n")
text.insert(0, "% Added by lyx2lyx")
document.preamble[index:index] = text
@ -146,6 +153,7 @@ def insert_to_preamble(document, text, index = 0):
# Created from the reversed list to keep the first of alternative definitions.
licr_table = {ord(ch): cmd for cmd, ch in unicode_reps[::-1]}
def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
"""
Return ERT inset wrapping `cmd` as a list of strings.
@ -157,14 +165,26 @@ def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
"""
status = {False: "collapsed", True: "open"}
ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "",
"\\begin_layout Plain Layout", "",
ert_inset = [
"\\begin_inset ERT",
"status %s" % status[is_open],
"",
"\\begin_layout Plain Layout",
"",
# content here ([5:5])
"\\end_layout", "", "\\end_inset"]
"\\end_layout",
"",
"\\end_inset",
]
paragraph = ["\\begin_layout Standard",
paragraph = [
"\\begin_layout Standard",
# content here ([1:1])
"", "", "\\end_layout", ""]
"",
"",
"\\end_layout",
"",
]
# ensure cmd is an unicode instance and make it "LyX safe".
if isinstance(cmd, list):
cmd = "\n".join(cmd)
@ -179,7 +199,7 @@ def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
def get_ert(lines, i, verbatim=False):
'Convert an ERT inset into LaTeX.'
"Convert an ERT inset into LaTeX."
if not lines[i].startswith("\\begin_inset ERT"):
return ""
j = find_end_of_inset(lines, i)
@ -213,7 +233,7 @@ def get_ert(lines, i, verbatim = False):
def lyx2latex(document, lines):
'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
"Convert some LyX stuff into corresponding LaTeX stuff, as best we can."
content = ""
ert_end = 0
@ -272,67 +292,86 @@ def lyx2latex(document, lines):
length = latex_length(line[8:])[1]
line = hspace + "{" + length + "}"
hspace = ""
elif line.isspace() or \
line.startswith("\\begin_layout") or \
line.startswith("\\end_layout") or \
line.startswith("\\begin_inset") or \
line.startswith("\\end_inset") or \
line.startswith("\\lang") or \
line.strip() == "status collapsed" or \
line.strip() == "status open":
elif (
line.isspace()
or line.startswith("\\begin_layout")
or line.startswith("\\end_layout")
or line.startswith("\\begin_inset")
or line.startswith("\\end_inset")
or line.startswith("\\lang")
or line.strip() == "status collapsed"
or line.strip() == "status open"
):
# skip all that stuff
continue
# this needs to be added to the preamble because of cases like
# \textmu, \textbackslash, etc.
add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
'\\@ifundefined{textmu}',
' {\\usepackage{textcomp}}{}'])
add_to_preamble(
document,
[
"% added by lyx2lyx for converted index entries",
"\\@ifundefined{textmu}",
" {\\usepackage{textcomp}}{}",
],
)
# a lossless reversion is not possible
# try at least to handle some common insets and settings
if ert_end >= curline:
line = line.replace(r'\backslash', '\\')
line = line.replace(r"\backslash", "\\")
else:
# No need to add "{}" after single-nonletter macros
line = line.replace('&', '\\&')
line = line.replace('#', '\\#')
line = line.replace('^', '\\textasciicircum{}')
line = line.replace('%', '\\%')
line = line.replace('_', '\\_')
line = line.replace('$', '\\$')
line = line.replace("&", "\\&")
line = line.replace("#", "\\#")
line = line.replace("^", "\\textasciicircum{}")
line = line.replace("%", "\\%")
line = line.replace("_", "\\_")
line = line.replace("$", "\\$")
# Do the LyX text --> LaTeX conversion
for rep in unicode_reps:
line = line.replace(rep[1], rep[0])
line = line.replace(r'\backslash', r'\textbackslash{}')
line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
line = line.replace(r"\backslash", r"\textbackslash{}")
line = line.replace(r"\series bold", r"\bfseries{}").replace(
r"\series default", r"\mdseries{}"
)
line = line.replace(r"\shape italic", r"\itshape{}").replace(
r"\shape smallcaps", r"\scshape{}"
)
line = line.replace(r"\shape slanted", r"\slshape{}").replace(
r"\shape default", r"\upshape{}"
)
line = line.replace(r"\emph on", r"\em{}").replace(r"\emph default", r"\em{}")
line = line.replace(r"\noun on", r"\scshape{}").replace(
r"\noun default", r"\upshape{}"
)
line = line.replace(r"\bar under", r"\underbar{").replace(r"\bar default", r"}")
line = line.replace(r"\family sans", r"\sffamily{}").replace(
r"\family default", r"\normalfont{}"
)
line = line.replace(r"\family typewriter", r"\ttfamily{}").replace(
r"\family roman", r"\rmfamily{}"
)
line = line.replace(r"\InsetSpace ", r"").replace(r"\SpecialChar ", r"")
content += line
return content
def lyx2verbatim(document, lines):
'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
"Convert some LyX stuff into corresponding verbatim stuff, as best we can."
content = lyx2latex(document, lines)
content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
content = re.sub(r"\\(?!backslash)", r"\n\\backslash\n", content)
return content
def latex_length(slen):
'''
"""
Convert lengths to their LaTeX representation. Returns (bool, length),
where the bool tells us if it was a percentage, and the length is the
LaTeX representation.
'''
"""
i = 0
percent = False
# the slen has the form
@ -342,13 +381,14 @@ def latex_length(slen):
# the + always precedes the -
# Convert relative lengths to LaTeX units
units = {"col%": "\\columnwidth",
units = {
"col%": "\\columnwidth",
"text%": "\\textwidth",
"page%": "\\paperwidth",
"line%": "\\linewidth",
"theight%": "\\textheight",
"pheight%": "\\paperheight",
"baselineskip%": "\\baselineskip"
"baselineskip%": "\\baselineskip",
}
for unit in list(units.keys()):
i = slen.find(unit)
@ -392,7 +432,8 @@ def length_in_bp(length):
em_width = 10.0 / 72.27 # assume 10pt font size
text_width = 8.27 / 1.7 # assume A4 with default margins
# scale factors are taken from Length::inInch()
scales = {"bp" : 1.0,
scales = {
"bp": 1.0,
"cc": (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
"cm": (72.0 / 2.54),
"dd": (72.0 / (72.27 / (0.376 * 2.845))),
@ -409,9 +450,10 @@ def length_in_bp(length):
"page%": (72.0 * text_width * 1.7 / 100.0),
"line%": (72.0 * text_width / 100.0),
"theight%": (72.0 * text_width * 1.787 / 100.0),
"pheight%" : (72.0 * text_width * 2.2 / 100.0)}
"pheight%": (72.0 * text_width * 2.2 / 100.0),
}
rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
rx = re.compile(r"^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$")
m = rx.match(length)
if not m:
document.warning("Invalid length value: " + length + ".")
@ -428,7 +470,7 @@ def revert_flex_inset(lines, name, LaTeXname):
"Convert flex insets to TeX code"
i = 0
while True:
i = find_token(lines, '\\begin_inset Flex ' + name, i)
i = find_token(lines, "\\begin_inset Flex " + name, i)
if i == -1:
return
z = find_end_of_inset(lines, i)
@ -439,17 +481,17 @@ def revert_flex_inset(lines, name, LaTeXname):
# remove the \end_inset
lines[z - 2 : z + 1] = put_cmd_in_ert("}")
# we need to reset character layouts if necessary
j = find_token(lines, '\\emph on', i, z)
k = find_token(lines, '\\noun on', i, z)
l = find_token(lines, '\\series', i, z)
m = find_token(lines, '\\family', i, z)
n = find_token(lines, '\\shape', i, z)
o = find_token(lines, '\\color', i, z)
p = find_token(lines, '\\size', i, z)
q = find_token(lines, '\\bar under', i, z)
r = find_token(lines, '\\uuline on', i, z)
s = find_token(lines, '\\uwave on', i, z)
t = find_token(lines, '\\strikeout on', i, z)
j = find_token(lines, "\\emph on", i, z)
k = find_token(lines, "\\noun on", i, z)
l = find_token(lines, "\\series", i, z)
m = find_token(lines, "\\family", i, z)
n = find_token(lines, "\\shape", i, z)
o = find_token(lines, "\\color", i, z)
p = find_token(lines, "\\size", i, z)
q = find_token(lines, "\\bar under", i, z)
r = find_token(lines, "\\uuline on", i, z)
s = find_token(lines, "\\uwave on", i, z)
t = find_token(lines, "\\strikeout on", i, z)
if j != -1:
lines.insert(z - 2, "\\emph default")
if k != -1:
@ -481,17 +523,17 @@ def revert_font_attrs(lines, name, LaTeXname):
i = 0
changed = False
while True:
i = find_token(lines, name + ' on', i)
i = find_token(lines, name + " on", i)
if i == -1:
break
j = find_token(lines, name + ' default', i)
k = find_token(lines, name + ' on', i + 1)
j = find_token(lines, name + " default", i)
k = find_token(lines, name + " on", i + 1)
# if there is no default set, the style ends with the layout
# assure hereby that we found the correct layout end
if j != -1 and (j < k or k == -1):
lines[j : j + 1] = put_cmd_in_ert("}")
else:
j = find_token(lines, '\\end_layout', i)
j = find_token(lines, "\\end_layout", i)
lines[j:j] = put_cmd_in_ert("}")
lines[i : i + 1] = put_cmd_in_ert(LaTeXname + "{")
changed = True
@ -512,26 +554,26 @@ def revert_layout_command(lines, name, LaTeXname):
"Reverts a command from a layout to TeX code"
i = 0
while True:
i = find_token(lines, '\\begin_layout ' + name, i)
i = find_token(lines, "\\begin_layout " + name, i)
if i == -1:
return
k = -1
# find the next layout
j = i + 1
while k == -1:
j = find_token(lines, '\\begin_layout', j)
j = find_token(lines, "\\begin_layout", j)
l = len(lines)
# if nothing was found it was the last layout of the document
if j == -1:
lines[l - 4 : l - 4] = put_cmd_in_ert("}")
k = 0
# exclude plain layout because this can be TeX code or another inset
elif lines[j] != '\\begin_layout Plain Layout':
elif lines[j] != "\\begin_layout Plain Layout":
lines[j - 2 : j - 2] = put_cmd_in_ert("}")
k = 0
else:
j += 1
lines[i] = '\\begin_layout Standard'
lines[i] = "\\begin_layout Standard"
lines[i + 1 : i + 1] = put_cmd_in_ert(LaTeXname + "{")
i += 1
@ -591,13 +633,13 @@ def remove_document_option(document, option):
i = find_token(document.header, "\\options")
options = get_value(document.header, "\\options", i)
options = [op.strip() for op in options.split(',')]
options = [op.strip() for op in options.split(",")]
# Remove `option` from \options
options = [op for op in options if op != option]
if options:
document.header[i] = "\\options " + ','.join(options)
document.header[i] = "\\options " + ",".join(options)
else:
del document.header[i]
@ -606,20 +648,23 @@ def is_document_option(document, option):
"Find if _option_ is a document option"
options = get_value(document.header, "\\options")
options = [op.strip() for op in options.split(',')]
options = [op.strip() for op in options.split(",")]
return option in options
singlepar_insets = [s.strip() for s in
"Argument, Caption Above, Caption Below, Caption Bicaption,"
singlepar_insets = [
s.strip()
for s in "Argument, Caption Above, Caption Below, Caption Bicaption,"
"Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
"IPADeco, Index, Info, Phantom, Script".split(',')]
"IPADeco, Index, Info, Phantom, Script".split(",")
]
# print(singlepar_insets)
def revert_language(document, lyxname, babelname="", polyglossianame=""):
"Revert native language support"
@ -691,21 +736,24 @@ def revert_language(document, lyxname, babelname="", polyglossianame=""):
# \end_layout
# Ensure correct handling of list labels
if (parent[0] in ["Labeling", "Description"]
and not " " in "\n".join(document.body[parent[3]:i])):
if parent[0] in ["Labeling", "Description"] and not " " in "\n".join(
document.body[parent[3] : i]
):
# line `i+1` is first line of a list item,
# part before a space character is the label
# TODO: insets or language change before first space character
labelline = document.body[i+1].split(' ', 1)
labelline = document.body[i + 1].split(" ", 1)
if len(labelline) > 1:
# Insert a space in the (original) document language
# between label and remainder.
# print(" Label:", labelline, file=sys.stderr)
lines = [labelline[0],
lines = [
labelline[0],
"\\lang %s" % orig_doc_language,
" ",
"\\lang %s" % (primary and "english" or lyxname),
labelline[1]]
labelline[1],
]
document.body[i + 1 : i + 2] = lines
i_e += 4
@ -719,7 +767,11 @@ def revert_language(document, lyxname, babelname="", polyglossianame=""):
# skip insets
i_a = parent[3] # paragraph start line
container = get_containing_inset(document.body[i_a:i_e], langswitch - i_a)
if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a:
if (
container
and container[1] < langswitch - i_a
and container[2] > langswitch - i_a
):
# print(" inset", container, file=sys.stderr)
continue
i_e = langswitch
@ -752,7 +804,7 @@ def revert_language(document, lyxname, babelname="", polyglossianame=""):
begin_cmd = "\\begin{otherlanguage}{%s}" % texname
end_cmd = "\\end{otherlanguage}"
if (not primary or texname == "english"):
if not primary or texname == "english":
try:
document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
document.body[i + 1 : i + 1] = put_cmd_in_ert(begin_cmd)
@ -776,13 +828,14 @@ def revert_language(document, lyxname, babelname="", polyglossianame=""):
if with_polyglossia:
# Define language in the user preamble
# (don't use \AtBeginDocument, this fails with some languages).
add_to_preamble(document, ["\\usepackage{polyglossia}",
"\\setotherlanguage{%s}" % polyglossianame])
add_to_preamble(
document,
["\\usepackage{polyglossia}", "\\setotherlanguage{%s}" % polyglossianame],
)
if primary:
# Changing the main language must be done in the document body.
doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
# Reset LaTeX main language if required and not already done
if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
document.body[2:2] = put_cmd_in_ert(doc_lang_switch,
is_open=True, as_paragraph=True)
document.body[2:2] = put_cmd_in_ert(doc_lang_switch, is_open=True, as_paragraph=True)

View File

@ -24,4 +24,3 @@ revert = []
if __name__ == "__main__":
pass

View File

@ -17,9 +17,10 @@
"""Convert files to the file format generated by lyx 0.8"""
def add_inputencoding(document):
"Add the input encoding, latin1"
document.header.append('\\inputencoding latin1')
document.header.append("\\inputencoding latin1")
document.inputencoding = "latin1"
@ -30,4 +31,3 @@ revert = []
if __name__ == "__main__":
pass

View File

@ -17,13 +17,14 @@
"""Convert files to the file format generated by lyx 0.10"""
def regularise_header(document):
"Put each entry in header into a separate line."
i = 0
while i < len(document.header):
line = document.header[i]
if len(line.split('\\')) > 1:
tmp = [ '\\'+ token.strip() for token in line.split('\\')][1:]
if len(line.split("\\")) > 1:
tmp = ["\\" + token.strip() for token in line.split("\\")][1:]
document.header[i : i + 1] = tmp
i += len(tmp)
i += 1
@ -33,11 +34,11 @@ def find_next_space(line, j):
"""Return position of next space or backslash, which one comes
first, starting from position j, if none exists returns last
position in line (+1)."""
space_pos = line.find(' ', j)
space_pos = line.find(" ", j)
if space_pos == -1:
space_pos = len(line)
bksl_pos = line.find('\\', j)
bksl_pos = line.find("\\", j)
if bksl_pos == -1:
bksl_pos = len(line)
@ -47,17 +48,40 @@ def find_next_space(line, j):
def regularise_body(document):
"""Place tokens starting with a backslash into a separate line."""
getline_tokens = ["added_space_bottom", "added_space_top",
"align", "layout", "fill_bottom", "fill_top",
"labelwidthstring", "pagebreak_top",
"pagebreak_bottom", "noindent"]
getline_tokens = [
"added_space_bottom",
"added_space_top",
"align",
"layout",
"fill_bottom",
"fill_top",
"labelwidthstring",
"pagebreak_top",
"pagebreak_bottom",
"noindent",
]
noargs_tokens = ["backslash", "begin_deeper", "end_deeper",
"end_float", "end_inset", "hfill", "newline",
"protected_separator"]
noargs_tokens = [
"backslash",
"begin_deeper",
"end_deeper",
"end_float",
"end_inset",
"hfill",
"newline",
"protected_separator",
]
onearg_tokens = ["bar", "begin_float", "family", "latex", "shape",
"size", "series", "cursor"]
onearg_tokens = [
"bar",
"begin_float",
"family",
"latex",
"shape",
"size",
"series",
"cursor",
]
i = 0
while i < len(document.body):
@ -65,7 +89,7 @@ def regularise_body(document):
j = 0
new_block = []
while j < len(line):
k = line.find('\\', j)
k = line.find("\\", j)
if k == -1:
new_block += [line[j:]]
@ -108,8 +132,7 @@ def regularise_body(document):
j = l
continue
if inset in ["LatexCommand", "LatexDel", "Label", "Figure",
"Formula"]:
if inset in ["LatexCommand", "LatexDel", "Label", "Figure", "Formula"]:
new_block += [line[j:]]
break
@ -120,7 +143,7 @@ def regularise_body(document):
continue
document.warning("unkown inset %s" % inset)
assert(False)
assert False
# We are inside a latex inset, pass the text verbatim
new_block += [line[j:]]

View File

@ -26,12 +26,12 @@ def space_before_layout(document):
lines = document.body
i = 2 # skip first layout
while True:
i = find_token(lines, '\\layout', i)
i = find_token(lines, "\\layout", i)
if i == -1:
break
prot_space = lines[i-2].find('\\protected_separator')
if lines[i - 1] == '' and prot_space == -1:
prot_space = lines[i - 2].find("\\protected_separator")
if lines[i - 1] == "" and prot_space == -1:
del lines[i - 1]
i = i + 1
@ -45,7 +45,7 @@ def formula_inset_space_eat(document):
if i == -1:
break
if len(lines[i]) > 22 and lines[i][21] == ' ':
if len(lines[i]) > 22 and lines[i][21] == " ":
lines[i] = lines[i][:20] + lines[i][21:]
i = i + 1
@ -62,26 +62,26 @@ def update_tabular(document):
i = i + 1
format = lines[i][8:]
lines[i] = 'multicol4'
lines[i] = "multicol4"
i = i + 1
rows = int(lines[i].split()[0])
columns = int(lines[i].split()[1])
lines[i] = lines[i] + ' 0 0 -1 -1 -1 -1'
lines[i] = lines[i] + " 0 0 -1 -1 -1 -1"
i = i + 1
for j in range(rows):
lines[i] = lines[i] + ' 0 0'
lines[i] = lines[i] + " 0 0"
i = i + 1
for j in range(columns):
lines[i] = lines[i] + ' '
lines[i] = lines[i] + " "
i = i + 1
while lines[i].strip():
if not format:
lines[i] = lines[i] + ' 1 1'
lines[i] = lines[i] + ' 0 0 0'
lines[i] = lines[i] + " 1 1"
lines[i] = lines[i] + " 0 0 0"
i = i + 1
lines[i] = lines[i].strip()
@ -92,11 +92,13 @@ def final_dot(document):
lines = document.body
i = 0
while i < len(lines):
if lines[i][-1:] == '.' and lines[i+1][:1] != '\\' and \
lines[i+1][:1] != ' ' and len(lines[i]) + len(lines[i+1])<= 72 \
and lines[i+1] != '':
if (
lines[i][-1:] == "."
and lines[i + 1][:1] != "\\"
and lines[i + 1][:1] != " "
and len(lines[i]) + len(lines[i + 1]) <= 72
and lines[i + 1] != ""
):
lines[i] = lines[i] + lines[i + 1]
del lines[i + 1]
else:
@ -108,10 +110,10 @@ def update_inset_label(document):
lines = document.body
i = 0
while True:
i = find_token(lines, '\\begin_inset Label', i)
i = find_token(lines, "\\begin_inset Label", i)
if i == -1:
return
lines[i] = '\\begin_inset LatexCommand \\label{' + lines[i][19:] + '}'
lines[i] = "\\begin_inset LatexCommand \\label{" + lines[i][19:] + "}"
i = i + 1
@ -120,11 +122,10 @@ def update_latexdel(document):
lines = document.body
i = 0
while True:
i = find_token(lines, '\\begin_inset LatexDel', i)
i = find_token(lines, "\\begin_inset LatexDel", i)
if i == -1:
return
lines[i] = lines[i].replace('\\begin_inset LatexDel',
'\\begin_inset LatexCommand')
lines[i] = lines[i].replace("\\begin_inset LatexDel", "\\begin_inset LatexCommand")
i = i + 1
@ -132,28 +133,26 @@ def update_vfill(document):
"Update fill_top and fill_bottom."
lines = document.body
for i in range(len(lines)):
lines[i] = lines[i].replace('\\fill_top',
'\\added_space_top vfill')
lines[i] = lines[i].replace('\\fill_bottom',
'\\added_space_bottom vfill')
lines[i] = lines[i].replace("\\fill_top", "\\added_space_top vfill")
lines[i] = lines[i].replace("\\fill_bottom", "\\added_space_bottom vfill")
def update_space_units(document):
"Update space units."
lines = document.body
added_space_bottom = re.compile(r'\\added_space_bottom ([^ ]*)')
added_space_top = re.compile(r'\\added_space_top ([^ ]*)')
added_space_bottom = re.compile(r"\\added_space_bottom ([^ ]*)")
added_space_top = re.compile(r"\\added_space_top ([^ ]*)")
for i in range(len(lines)):
result = added_space_bottom.search(lines[i])
if result:
old = '\\added_space_bottom ' + result.group(1)
new = '\\added_space_bottom ' + str(float(result.group(1))) + 'cm'
old = "\\added_space_bottom " + result.group(1)
new = "\\added_space_bottom " + str(float(result.group(1))) + "cm"
lines[i] = lines[i].replace(old, new)
result = added_space_top.search(lines[i])
if result:
old = '\\added_space_top ' + result.group(1)
new = '\\added_space_top ' + str(float(result.group(1))) + 'cm'
old = "\\added_space_top " + result.group(1)
new = "\\added_space_top " + str(float(result.group(1))) + "cm"
lines[i] = lines[i].replace(old, new)
@ -161,13 +160,13 @@ def remove_cursor(document):
"Remove cursor, it is not saved on the file anymore."
lines = document.body
i = 0
cursor_re = re.compile(r'.*(\\cursor \d*)')
cursor_re = re.compile(r".*(\\cursor \d*)")
while True:
i = find_re(lines, cursor_re, i)
if i == -1:
break
cursor = cursor_re.search(lines[i]).group(1)
lines[i] = lines[i].replace(cursor, '')
lines[i] = lines[i].replace(cursor, "")
i = i + 1
@ -176,10 +175,10 @@ def remove_empty_insets(document):
lines = document.body
i = 0
while True:
i = find_token(lines, '\\begin_inset ', i)
i = find_token(lines, "\\begin_inset ", i)
if i == -1:
break
if lines[i] == '\\begin_inset ' and lines[i+1] == '\\end_inset ':
if lines[i] == "\\begin_inset " and lines[i + 1] == "\\end_inset ":
del lines[i]
del lines[i]
i = i + 1
@ -190,12 +189,12 @@ def remove_formula_latex(document):
lines = document.body
i = 0
while True:
i = find_token(lines, '\\latex formula_latex ', i)
i = find_token(lines, "\\latex formula_latex ", i)
if i == -1:
break
del lines[i]
i = find_token(lines, '\\latex default', i)
i = find_token(lines, "\\latex default", i)
if i == -1:
break
del lines[i]
@ -204,9 +203,9 @@ def remove_formula_latex(document):
def add_end_document(document):
"Add \\the_end to the end of the document."
lines = document.body
i = find_token(lines, '\\the_end', 0)
i = find_token(lines, "\\the_end", 0)
if i == -1:
lines.append('\\the_end')
lines.append("\\the_end")
def header_update(document):
@ -215,48 +214,47 @@ def header_update(document):
i = 0
l = len(lines)
while i < l:
if lines[i][-1:] == ' ':
if lines[i][-1:] == " ":
lines[i] = lines[i][:-1]
if check_token(lines[i], '\\epsfig'):
lines[i] = lines[i].replace('\\epsfig', '\\graphics')
if check_token(lines[i], "\\epsfig"):
lines[i] = lines[i].replace("\\epsfig", "\\graphics")
i = i + 1
continue
if check_token(lines[i], '\\papersize'):
if check_token(lines[i], "\\papersize"):
size = lines[i].split()[1]
new_size = size
paperpackage = ""
if size == 'usletter':
new_size = 'letterpaper'
if size == 'a4wide':
new_size = 'Default'
if size == "usletter":
new_size = "letterpaper"
if size == "a4wide":
new_size = "Default"
paperpackage = "widemarginsa4"
lines[i] = '\\papersize ' + new_size
lines[i] = "\\papersize " + new_size
i = i + 1
if paperpackage:
lines.insert(i, '\\paperpackage ' + paperpackage)
lines.insert(i, "\\paperpackage " + paperpackage)
i = i + 1
lines.insert(i,'\\use_geometry 0')
lines.insert(i + 1,'\\use_amsmath 0')
lines.insert(i, "\\use_geometry 0")
lines.insert(i + 1, "\\use_amsmath 0")
i = i + 2
continue
if check_token(lines[i], '\\baselinestretch'):
if check_token(lines[i], "\\baselinestretch"):
size = lines[i].split()[1]
if size == '1.00':
name = 'single'
elif size == '1.50':
name = 'onehalf'
elif size == '2.00':
name = 'double'
if size == "1.00":
name = "single"
elif size == "1.50":
name = "onehalf"
elif size == "2.00":
name = "double"
else:
name = 'other ' + size
lines[i] = '\\spacing %s ' % name
name = "other " + size
lines[i] = "\\spacing %s " % name
i = i + 1
continue
@ -268,27 +266,27 @@ def update_latexaccents(document):
body = document.body
i = 1
while True:
i = find_token(body, '\\i ', i)
i = find_token(body, "\\i ", i)
if i == -1:
return
contents = body[i][2:].strip()
if contents.find('{') != -1 and contents.find('}') != -1:
if contents.find("{") != -1 and contents.find("}") != -1:
i = i + 1
continue
if len(contents) == 2:
contents = contents + '{}'
contents = contents + "{}"
elif len(contents) == 3:
contents = contents[:2] + '{' + contents[2] + '}'
contents = contents[:2] + "{" + contents[2] + "}"
elif len(contents) == 4:
if contents[2] == ' ':
contents = contents[:2] + '{' + contents[3] + '}'
elif contents[2:4] == '\\i' or contents[2:4] == '\\j':
contents = contents[:2] + '{' + contents[2:] + '}'
if contents[2] == " ":
contents = contents[:2] + "{" + contents[3] + "}"
elif contents[2:4] == "\\i" or contents[2:4] == "\\j":
contents = contents[:2] + "{" + contents[2:] + "}"
body[i] = '\\i ' + contents
body[i] = "\\i " + contents
i = i + 1
@ -297,12 +295,12 @@ def obsolete_latex_title(document):
body = document.body
i = 0
while True:
i = find_token(body, '\\layout', i)
i = find_token(body, "\\layout", i)
if i == -1:
return
if body[i].lower().find('latex_title') != -1:
body[i] = '\\layout Title'
if body[i].lower().find("latex_title") != -1:
body[i] = "\\layout Title"
i = i + 1
@ -313,12 +311,12 @@ def remove_inset_latex(document):
i = 0
while True:
i = find_token(body, '\\begin_inset Latex', i)
i = find_token(body, "\\begin_inset Latex", i)
if i == -1:
return
body[i] = body[i].replace('\\begin_inset Latex', '\\layout LaTeX')
i = find_token(body, '\\end_inset', i)
body[i] = body[i].replace("\\begin_inset Latex", "\\layout LaTeX")
i = find_token(body, "\\end_inset", i)
if i == -1:
# this should not happen
return
@ -326,13 +324,29 @@ def remove_inset_latex(document):
supported_versions = ["0.12.0", "0.12.1", "0.12"]
convert = [[215, [header_update, add_end_document, remove_cursor,
final_dot, update_inset_label, update_latexdel,
update_space_units, space_before_layout,
formula_inset_space_eat, update_tabular,
update_vfill, remove_empty_insets,
remove_formula_latex, update_latexaccents,
obsolete_latex_title, remove_inset_latex]]]
convert = [
[
215,
[
header_update,
add_end_document,
remove_cursor,
final_dot,
update_inset_label,
update_latexdel,
update_space_units,
space_before_layout,
formula_inset_space_eat,
update_tabular,
update_vfill,
remove_empty_insets,
remove_formula_latex,
update_latexaccents,
obsolete_latex_title,
remove_inset_latex,
],
]
]
revert = []

View File

@ -20,18 +20,19 @@
import re
from parser_tools import find_token, find_re
def obsolete_latex_title(document):
"Replace LatexTitle layout with Title."
body = document.body
i = 0
while True:
i = find_token(body, '\\layout', i)
i = find_token(body, "\\layout", i)
if i == -1:
return
if body[i].lower().find('latex title') != -1:
body[i] = '\\layout Title'
if body[i].lower().find("latex title") != -1:
body[i] = "\\layout Title"
i = i + 1
@ -49,27 +50,27 @@ def update_tabular(document):
i = i + 1
format = lines[i][8:]
if format != '3':
if format != "3":
continue
lines[i] = 'multicol4'
lines[i] = "multicol4"
i = i + 1
rows = int(lines[i].split()[0])
columns = int(lines[i].split()[1])
lines[i] = lines[i] + ' 0 0 -1 -1 -1 -1'
lines[i] = lines[i] + " 0 0 -1 -1 -1 -1"
i = i + 1
for j in range(rows):
lines[i] = lines[i] + ' 0 0'
lines[i] = lines[i] + " 0 0"
i = i + 1
for j in range(columns):
lines[i] = lines[i] + ' '
lines[i] = lines[i] + " "
i = i + 1
while lines[i].strip():
lines[i] = lines[i] + ' 0 0 0'
lines[i] = lines[i] + " 0 0 0"
i = i + 1
lines[i] = lines[i].strip()
@ -82,4 +83,3 @@ revert = []
if __name__ == "__main__":
pass

View File

@ -24,4 +24,3 @@ revert = []
if __name__ == "__main__":
pass

View File

@ -23,6 +23,7 @@ from parser_tools import find_token, find_token_backwards, find_re
####################################################################
# Private helper functions
def get_layout(line, default_layout):
"Get the line layout, beware of the empty layout."
tokens = line.split()
@ -35,6 +36,7 @@ def get_layout(line, default_layout):
math_env = ["\\[", "\\begin{eqnarray*}", "\\begin{eqnarray}", "\\begin{equation}"]
def replace_protected_separator(document):
"Replace protected separator."
lines = document.body
@ -66,7 +68,8 @@ def merge_formula_inset(document):
i = 0
while True:
i = find_token(lines, "\\begin_inset Formula", i)
if i == -1: break
if i == -1:
break
if lines[i + 1] in math_env:
lines[i] = lines[i] + lines[i + 1]
del lines[i + 1]
@ -84,10 +87,10 @@ def update_tabular(document):
break
i = i + 1
format = lines[i][8]
if format != '4':
if format != "4":
continue
lines[i]='multicol5'
lines[i] = "multicol5"
i = i + 1
rows = int(lines[i].split()[0])
columns = int(lines[i].split()[1])
@ -111,18 +114,17 @@ def update_toc(document):
lines = document.body
i = 0
while True:
i = find_token(lines,
'\\begin_inset LatexCommand \\tableofcontents', i)
i = find_token(lines, "\\begin_inset LatexCommand \\tableofcontents", i)
if i == -1:
break
lines[i] = lines[i] + '{}'
lines[i] = lines[i] + "{}"
i = i + 1
def remove_cursor(document):
"Remove cursor."
lines = document.body
i = find_token(lines, '\\cursor', 0)
i = find_token(lines, "\\cursor", 0)
if i != -1:
del lines[i]
@ -130,10 +132,10 @@ def remove_cursor(document):
def remove_vcid(document):
"Remove \\lyxvcid and \\lyxrcsid."
lines = document.header
i = find_token(lines, '\\lyxvcid', 0)
i = find_token(lines, "\\lyxvcid", 0)
if i != -1:
del lines[i]
i = find_token(lines, '\\lyxrcsid', 0)
i = find_token(lines, "\\lyxrcsid", 0)
if i != -1:
del lines[i]
@ -141,19 +143,18 @@ def remove_vcid(document):
def first_layout(document):
"Fix first layout, if empty use the default layout."
lines = document.body
while (lines[0] == ""):
while lines[0] == "":
del lines[0]
if lines[0][:7] != "\\layout":
lines[:0] = ['\\layout %s' % document.default_layout, '']
lines[:0] = ["\\layout %s" % document.default_layout, ""]
def remove_space_in_units(document):
"Remove space in units."
lines = document.header
margins = ["\\topmargin","\\rightmargin",
"\\leftmargin","\\bottommargin"]
margins = ["\\topmargin", "\\rightmargin", "\\leftmargin", "\\bottommargin"]
unit_rexp = re.compile(r'[^ ]* (.*) (.*)')
unit_rexp = re.compile(r"[^ ]* (.*) (.*)")
for margin in margins:
i = 0
@ -178,13 +179,13 @@ def latexdel_getargs(document, i):
break
del lines[i]
j = find_token(lines, '\\end_inset', i)
j = find_token(lines, "\\end_inset", i)
if i == j:
del lines[i]
else:
document.warning("Unexpected end of inset.")
j = find_token(lines, '\\begin_inset LatexDel }{', i)
j = find_token(lines, "\\begin_inset LatexDel }{", i)
ref = " ".join(lines[i:j])
del lines[i : j + 1]
@ -195,12 +196,12 @@ def latexdel_getargs(document, i):
break
del lines[i]
j = find_token(lines, '\\end_inset', i - 1)
j = find_token(lines, "\\end_inset", i - 1)
if i == j:
del lines[i]
else:
document.warning("Unexpected end of inset.")
j = find_token(lines, '\\begin_inset LatexDel }', i)
j = find_token(lines, "\\begin_inset LatexDel }", i)
label = " ".join(lines[i:j])
del lines[i : j + 1]
@ -212,7 +213,7 @@ def update_ref(document):
lines = document.body
i = 0
while True:
i = find_token(lines, '\\begin_inset LatexCommand', i)
i = find_token(lines, "\\begin_inset LatexCommand", i)
if i == -1:
return
@ -233,10 +234,9 @@ def update_latexdel(document):
i = find_re(lines, latexdel_re, i)
if i == -1:
return
lines[i] = lines[i].replace('\\begin_inset LatexDel',
'\\begin_inset LatexCommand')
lines[i] = lines[i].replace("\\begin_inset LatexDel", "\\begin_inset LatexCommand")
j = lines[i].find('\\begin_inset')
j = lines[i].find("\\begin_inset")
lines.insert(i + 1, lines[i][j:])
lines[i] = lines[i][:j].strip()
i = i + 1
@ -251,11 +251,23 @@ def update_latexdel(document):
supported_versions = ["1.1.5", "1.1.5fix1", "1.1.5fix2", "1.1"]
convert = [[216, [first_layout, remove_vcid, remove_cursor,
update_toc, replace_protected_separator,
merge_formula_inset, update_tabular,
remove_space_in_units, update_ref,
update_latexdel]]]
convert = [
[
216,
[
first_layout,
remove_vcid,
remove_cursor,
update_toc,
replace_protected_separator,
merge_formula_inset,
update_tabular,
remove_space_in_units,
update_ref,
update_latexdel,
],
]
]
revert = []

View File

@ -21,6 +21,8 @@ import re
from parser_tools import find_re, find_tokens, find_token, check_token
lyxtable_re = re.compile(r".*\\LyXTable$")
def update_tabular(document):
"Update tabular to version 1 (xml like syntax)."
lines = document.body
@ -29,15 +31,22 @@ def update_tabular(document):
i = find_re(lines, lyxtable_re, i)
if i == -1:
break
prop_dict = {"family" : "default", "series" : "default",
"shape" : "default", "size" : "default",
"emph" : "default", "bar" : "default",
"noun" : "default", "latex" : "default", "color" : "default"}
prop_dict = {
"family": "default",
"series": "default",
"shape": "default",
"size": "default",
"emph": "default",
"bar": "default",
"noun": "default",
"latex": "default",
"color": "default",
}
# remove \LyXTable
lines[i] = lines[i][:-9]
i = i + 1
lines.insert(i,'')
lines.insert(i, "")
i = i + 1
lines[i] = "\\begin_inset Tabular"
i = i + 1
@ -47,7 +56,10 @@ def update_tabular(document):
tabular_line = i
i = i + 1
lines.insert(i, f'<Features rotate="{head[2]}" islongtable="{head[3]}" endhead="{head[4]}" endfirsthead="{head[5]}" endfoot="{head[6]}" endlastfoot="{head[7]}">')
lines.insert(
i,
f'<Features rotate="{head[2]}" islongtable="{head[3]}" endhead="{head[4]}" endfirsthead="{head[5]}" endfoot="{head[6]}" endlastfoot="{head[7]}">',
)
i = i + 1
@ -55,7 +67,7 @@ def update_tabular(document):
cont_row = []
for j in range(rows):
row_info.append(lines[i].split())
if lines[i].split()[2] == '1':
if lines[i].split()[2] == "1":
cont_row.append(j)
del lines[i]
@ -78,7 +90,9 @@ def update_tabular(document):
ncells = ncells + 1
del lines[i]
lines[tabular_line] = f'<LyXTabular version="1" rows="{rows-len(cont_row)}" columns="{columns}">'
lines[tabular_line] = (
f'<LyXTabular version="1" rows="{rows-len(cont_row)}" columns="{columns}">'
)
del lines[i]
if not lines[i]:
del lines[i]
@ -96,14 +110,18 @@ def update_tabular(document):
for j in range(rows):
for k in range(columns):
m = j * columns + k
if cell_info[m][0] == '2':
if cell_info[m][0] == "2":
continue
if l == ncells - 1:
# the end variable refers to cell end, not to document end.
end = find_tokens(lines, ['\\layout','\\the_end','\\end_deeper','\\end_float'], i)
end = find_tokens(
lines,
["\\layout", "\\the_end", "\\end_deeper", "\\end_float"],
i,
)
else:
end = find_token(lines, '\\newline', i)
end = find_token(lines, "\\newline", i)
if end == -1:
document.error("Malformed LyX file.")
@ -114,7 +132,7 @@ def update_tabular(document):
del lines[i]
end = end - 1
if lines[i].find('\\newline') != -1:
if lines[i].find("\\newline") != -1:
del lines[i]
l = l + 1
@ -124,70 +142,107 @@ def update_tabular(document):
for j in range(rows):
if j in cont_row:
continue
tmp.append(f'<Row topline="{row_info[j][0]}" bottomline="{row_info[j][1]}" newpage="{row_info[j][3]}">')
tmp.append(
f'<Row topline="{row_info[j][0]}" bottomline="{row_info[j][1]}" newpage="{row_info[j][3]}">'
)
for k in range(columns):
if j:
tmp.append('<Column>')
tmp.append("<Column>")
else:
tmp.append(f'<Column alignment="{column_info[k][0]}" valignment="0" leftline="{column_info[k][1]}" rightline="{column_info[k][2]}" width={column_info[k][3]} special={column_info[k][4]}>')
tmp.append(
f'<Column alignment="{column_info[k][0]}" valignment="0" leftline="{column_info[k][1]}" rightline="{column_info[k][2]}" width={column_info[k][3]} special={column_info[k][4]}>'
)
m = j * columns + k
leftline = int(column_info[k][1])
if cell_info[m][0] == '1':
if cell_info[m][0] == "1":
n = m + 1
while n < rows * columns - 1 and cell_info[n][0] == '2':
while n < rows * columns - 1 and cell_info[n][0] == "2":
n = n + 1
rightline = int(column_info[cell_col[n - 1]][2])
else:
# not a multicolumn main cell
rightline = int(column_info[k][2])
tmp.append('<Cell multicolumn="%s" alignment="%s" valignment="0" topline="%s" bottomline="%s" leftline="%d" rightline="%d" rotate="%s" usebox="%s" width=%s special=%s>' % (cell_info[m][0],cell_info[m][1],cell_info[m][2],cell_info[m][3],leftline,rightline,cell_info[m][5],cell_info[m][6],cell_info[m][7],cell_info[m][8]))
tmp.append('\\begin_inset Text')
tmp.append('')
tmp.append('\\layout %s' % document.default_layout)
tmp.append('')
tmp.append(
'<Cell multicolumn="%s" alignment="%s" valignment="0" topline="%s" bottomline="%s" leftline="%d" rightline="%d" rotate="%s" usebox="%s" width=%s special=%s>'
% (
cell_info[m][0],
cell_info[m][1],
cell_info[m][2],
cell_info[m][3],
leftline,
rightline,
cell_info[m][5],
cell_info[m][6],
cell_info[m][7],
cell_info[m][8],
)
)
tmp.append("\\begin_inset Text")
tmp.append("")
tmp.append("\\layout %s" % document.default_layout)
tmp.append("")
if cell_info[m][0] != '2':
if cell_info[m][0] != "2":
paragraph = []
if cell_info[m][4] == '1':
if cell_info[m][4] == "1":
l = j
paragraph = paragraph + cell_content[j][k]
while cell_info[m][4] == '1':
while cell_info[m][4] == "1":
m = m + columns
l = l + 1
if l >= rows: break
if l >= rows:
break
paragraph = paragraph + cell_content[l][k]
else:
paragraph = cell_content[j][k]
tmp = tmp + set_paragraph_properties(paragraph, prop_dict)
tmp.append('\\end_inset ')
tmp.append('</Cell>')
tmp.append('</Column>')
tmp.append('</Row>')
tmp.append("\\end_inset ")
tmp.append("</Cell>")
tmp.append("</Column>")
tmp.append("</Row>")
tmp.append('</LyXTabular>')
tmp.append('')
tmp.append('\\end_inset ')
tmp.append('')
tmp.append('')
tmp.append("</LyXTabular>")
tmp.append("")
tmp.append("\\end_inset ")
tmp.append("")
tmp.append("")
lines[i:i] = tmp
i = i + len(tmp)
prop_exp = re.compile(r"\\(\S*)\s*(\S*)")
def set_paragraph_properties(lines, prop_dict):
"Set paragraph properties."
# we need to preserve the order of options
properties = ["family","series","shape","size",
"emph","bar","noun","latex","color"]
prop_value = {"family" : "default", "series" : "medium",
"shape" : "up", "size" : "normal",
"emph" : "off", "bar" : "no",
"noun" : "off", "latex" : "no_latex", "color" : "none"}
properties = [
"family",
"series",
"shape",
"size",
"emph",
"bar",
"noun",
"latex",
"color",
]
prop_value = {
"family": "default",
"series": "medium",
"shape": "up",
"size": "normal",
"emph": "off",
"bar": "no",
"noun": "off",
"latex": "no_latex",
"color": "none",
}
start = 0
end = 0
@ -214,7 +269,7 @@ def set_paragraph_properties(lines, prop_dict):
aux = []
insert = 0
for prop in properties:
if prop_dict[prop] != 'default':
if prop_dict[prop] != "default":
insert = 1
if prop == "color":
aux.append(f"\\{prop} {prop_dict[prop]}")
@ -231,7 +286,7 @@ def set_paragraph_properties(lines, prop_dict):
del lines[n]
continue
if lines[n][:1] == '\\':
if lines[n][:1] == "\\":
result = prop_exp.match(lines[n])
prop = result.group(1)
if prop in properties:
@ -240,14 +295,14 @@ def set_paragraph_properties(lines, prop_dict):
del lines[n]
continue
if check_token(lines[n],'\\end_inset'):
if check_token(lines[n], "\\end_inset"):
# ensure proper newlines after inset end
lines.append('')
lines.append('')
lines.append("")
lines.append("")
break
for line in lines[end:]:
if line[:1] == '\\':
if line[:1] == "\\":
result = prop_exp.match(line)
prop = result.group(1)
if prop in properties and prop not in changed_prop:
@ -257,8 +312,8 @@ def set_paragraph_properties(lines, prop_dict):
return []
result = lines[:start] + aux[:] + lines[end:]
if insert and result[0] != '':
return [''] + result[:]
if insert and result[0] != "":
return [""] + result[:]
return result[:]
@ -270,11 +325,11 @@ def update_language(document):
i = find_token(header, "\\language", 0)
if i == -1:
# no language, should emit a warning
header.append('\\language english')
header.append("\\language english")
return
# This is the lyx behaviour: defaults to english
if header[i].split()[1] == 'default':
header[i] = '\\language english'
if header[i].split()[1] == "default":
header[i] = "\\language english"
return

View File

@ -20,6 +20,7 @@
import re
from parser_tools import find_token, find_re
def bool_table(item):
"Convert 0, 1 to false, true."
if item == "0":
@ -33,9 +34,10 @@ align_table = {"0": "top", "2": "left", "4": "right", "8": "center"}
use_table = {"0": "none", "1": "parbox"}
table_meta_re = re.compile(r'<LyXTabular version="?1"? rows="?(\d*)"? columns="?(\d*)"?>')
def update_tabular(document):
"Update tabular format to version 2 (xml like syntax)."
regexp = re.compile(r'^\\begin_inset\s+Tabular')
regexp = re.compile(r"^\\begin_inset\s+Tabular")
lines = document.body
i = 0
while True:
@ -51,7 +53,7 @@ def update_tabular(document):
val = res.groups()
lines[i] = '<lyxtabular version="2" rows="%s" columns="%s">' % val
j = find_token(lines, '</LyXTabular>', i) + 1
j = find_token(lines, "</LyXTabular>", i) + 1
if j == 0:
document.warning("Error: Bad lyx format i=%d j=%d" % (i, j))
break
@ -61,50 +63,63 @@ def update_tabular(document):
i = i + len(new_table)
col_re = re.compile(r'<column alignment="?(\d)"? valignment="?(\d)"? leftline="?(\d)"? rightline="?(\d)"? width="(.*)" special="(.*)">')
cell_re = re.compile(r'<cell multicolumn="?(\d)"? alignment="?(\d)"? valignment="?(\d)"? topline="?(\d)"? bottomline="?(\d)"? leftline="?(\d)"? rightline="?(\d)"? rotate="?(\d)"? usebox="?(\d)"? width="(.*)" special="(.*)">')
features_re = re.compile(r'<features rotate="?(\d)"? islongtable="?(\d)"? endhead="?(-?\d)"? endfirsthead="?(-?\d)"? endfoot="?(-?\d)"? endlastfoot="?(-?\d)"?>')
col_re = re.compile(
r'<column alignment="?(\d)"? valignment="?(\d)"? leftline="?(\d)"? rightline="?(\d)"? width="(.*)" special="(.*)">'
)
cell_re = re.compile(
r'<cell multicolumn="?(\d)"? alignment="?(\d)"? valignment="?(\d)"? topline="?(\d)"? bottomline="?(\d)"? leftline="?(\d)"? rightline="?(\d)"? rotate="?(\d)"? usebox="?(\d)"? width="(.*)" special="(.*)">'
)
features_re = re.compile(
r'<features rotate="?(\d)"? islongtable="?(\d)"? endhead="?(-?\d)"? endfirsthead="?(-?\d)"? endfoot="?(-?\d)"? endlastfoot="?(-?\d)"?>'
)
row_re = re.compile(r'<row topline="?(\d)"? bottomline="?(\d)"? newpage="?(\d)"?>')
def table_update(lines):
"Update table's internal content to format 2."
lines[1] = lines[1].replace('<Features', '<features')
lines[1] = lines[1].replace("<Features", "<features")
res = features_re.match(lines[1])
if res:
val = res.groups()
lines[1] = f'<features rotate="{bool_table(val[0])}" islongtable="{bool_table(val[1])}" endhead="{val[2]}" endfirsthead="{val[3]}" endfoot="{val[4]}" endlastfoot="{val[5]}">'
lines[1] = (
f'<features rotate="{bool_table(val[0])}" islongtable="{bool_table(val[1])}" endhead="{val[2]}" endfirsthead="{val[3]}" endfoot="{val[4]}" endlastfoot="{val[5]}">'
)
if lines[2] == "":
del lines[2]
i = 2
col_info = []
while i < len(lines):
lines[i] = lines[i].replace('<Cell', '<cell')
lines[i] = lines[i].replace('</Cell', '</cell')
lines[i] = lines[i].replace('<Row', '<row')
lines[i] = lines[i].replace('</Row', '</row')
lines[i] = lines[i].replace('<Column', '<column')
lines[i] = lines[i].replace('</Column', '</column')
lines[i] = lines[i].replace('</LyXTabular', '</lyxtabular')
k = lines[i].find ('<column ')
lines[i] = lines[i].replace("<Cell", "<cell")
lines[i] = lines[i].replace("</Cell", "</cell")
lines[i] = lines[i].replace("<Row", "<row")
lines[i] = lines[i].replace("</Row", "</row")
lines[i] = lines[i].replace("<Column", "<column")
lines[i] = lines[i].replace("</Column", "</column")
lines[i] = lines[i].replace("</LyXTabular", "</lyxtabular")
k = lines[i].find("<column ")
if k != -1:
col_info.append(lines[i])
del lines[i]
continue
if lines[i] == '</column>' or lines[i] == '<column>':
if lines[i] == "</column>" or lines[i] == "<column>":
del lines[i]
continue
res = cell_re.match(lines[i])
if res:
val = res.groups()
lines[i] = f'<cell multicolumn="{val[0]}" alignment="{align_table[val[1]]}" valignment="{align_vertical[val[2]]}" topline="{bool_table(val[3])}" bottomline="{bool_table(val[4])}" leftline="{bool_table(val[5])}" rightline="{bool_table(val[6])}" rotate="{bool_table(val[7])}" usebox="{use_table[val[8]]}" width="{val[9]}" special="{val[10]}">'
lines[i] = (
f'<cell multicolumn="{val[0]}" alignment="{align_table[val[1]]}" valignment="{align_vertical[val[2]]}" topline="{bool_table(val[3])}" bottomline="{bool_table(val[4])}" leftline="{bool_table(val[5])}" rightline="{bool_table(val[6])}" rotate="{bool_table(val[7])}" usebox="{use_table[val[8]]}" width="{val[9]}" special="{val[10]}">'
)
res = row_re.match(lines[i])
if res:
val = res.groups()
lines[i] = f'<row topline="{bool_table(val[0])}" bottomline="{bool_table(val[1])}" newpage="{bool_table(val[2])}">'
lines[i] = (
f'<row topline="{bool_table(val[0])}" bottomline="{bool_table(val[1])}" newpage="{bool_table(val[2])}">'
)
i = i + 1
@ -113,8 +128,17 @@ def table_update(lines):
res = col_re.match(col_info[i])
if res:
val = res.groups()
col_info[i] = '<column alignment="%s" valignment="%s" leftline="%s" rightline="%s" width="%s" special="%s">' \
% ( align_table[val[0]], align_vertical[val[1]], bool_table(val[2]), bool_table(val[3]), val[4],val[5])
col_info[i] = (
'<column alignment="%s" valignment="%s" leftline="%s" rightline="%s" width="%s" special="%s">'
% (
align_table[val[0]],
align_vertical[val[1]],
bool_table(val[2]),
bool_table(val[3]),
val[4],
val[5],
)
)
return lines[:2] + col_info + lines[2:]

View File

@ -20,15 +20,24 @@
import re
from parser_tools import find_token, find_token_backwards, \
find_tokens, find_tokens_backwards, \
find_beginning_of, find_end_of, find_re, \
is_nonempty_line, find_nonempty_line, \
get_value, check_token
from parser_tools import (
find_token,
find_token_backwards,
find_tokens,
find_tokens_backwards,
find_beginning_of,
find_end_of,
find_re,
is_nonempty_line,
find_nonempty_line,
get_value,
check_token,
)
####################################################################
# Private helper functions
def get_layout(line, default_layout):
"Get layout, if empty return the default layout."
tokens = line.split()
@ -43,7 +52,8 @@ def get_paragraph(lines, i, format):
while i != -1:
i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
if i == -1: return -1
if i == -1:
return -1
if check_token(lines[i], begin_layout):
return i
i = find_beginning_of_inset(lines, i)
@ -93,41 +103,42 @@ def get_tabular_lines(lines, i):
i = i + 1
return result
# End of helper functions
####################################################################
floats = {
"footnote": ["\\begin_inset Foot",
"collapsed true"],
"margin": ["\\begin_inset Marginal",
"collapsed true"],
"fig": ["\\begin_inset Float figure",
"wide false",
"collapsed false"],
"tab": ["\\begin_inset Float table",
"wide false",
"collapsed false"],
"alg": ["\\begin_inset Float algorithm",
"wide false",
"collapsed false"],
"wide-fig": ["\\begin_inset Float figure",
"wide true",
"collapsed false"],
"wide-tab": ["\\begin_inset Float table",
"wide true",
"collapsed false"]
"footnote": ["\\begin_inset Foot", "collapsed true"],
"margin": ["\\begin_inset Marginal", "collapsed true"],
"fig": ["\\begin_inset Float figure", "wide false", "collapsed false"],
"tab": ["\\begin_inset Float table", "wide false", "collapsed false"],
"alg": ["\\begin_inset Float algorithm", "wide false", "collapsed false"],
"wide-fig": ["\\begin_inset Float figure", "wide true", "collapsed false"],
"wide-tab": ["\\begin_inset Float table", "wide true", "collapsed false"],
}
font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph",
"\\bar", "\\noun", "\\color", "\\lang", "\\latex"]
font_tokens = [
"\\family",
"\\series",
"\\shape",
"\\size",
"\\emph",
"\\bar",
"\\noun",
"\\color",
"\\lang",
"\\latex",
]
pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3")
pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
r"(\s+\\pextra_alignment\s+(\S+))?"+\
r"(\s+\\pextra_hfill\s+(\S+))?"+\
r"(\s+\\pextra_start_minipage\s+(\S+))?"+\
r"(\s+(\\pextra_widthp?)\s+(\S*))?")
pextra_rexp = re.compile(
r"\\pextra_type\s+(\S+)"
+ r"(\s+\\pextra_alignment\s+(\S+))?"
+ r"(\s+\\pextra_hfill\s+(\S+))?"
+ r"(\s+\\pextra_start_minipage\s+(\S+))?"
+ r"(\s+(\\pextra_widthp?)\s+(\S*))?"
)
def get_width(mo):
@ -173,10 +184,12 @@ def remove_oldfloat(document):
mo = pextra_rexp.search(lines[k])
width = get_width(mo)
lines[k] = re.sub(pextra_rexp, "", lines[k])
new = ["\\begin_inset Wrap figure",
new = [
"\\begin_inset Wrap figure",
'width "%s"' % width,
"collapsed false",
""]
"",
]
new = new + lines[i2:j] + ["\\end_inset ", ""]
@ -209,6 +222,7 @@ pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
pextra_widthp = re.compile(r"\\pextra_widthp")
def remove_pextra(document):
"Remove pextra token."
lines = document.body
@ -222,7 +236,7 @@ def remove_pextra(document):
# Sometimes the \pextra_widthp argument comes in it own
# line. If that happens insert it back in this line.
if pextra_widthp.search(lines[i + 1]):
lines[i] = lines[i] + ' ' + lines[i+1]
lines[i] = lines[i] + " " + lines[i + 1]
del lines[i + 1]
mo = pextra_rexp.search(lines[i])
@ -239,19 +253,20 @@ def remove_pextra(document):
hfill = mo.group(5)
lines[i] = re.sub(pextra_rexp, "", lines[i])
start = ["\\begin_inset Minipage",
start = [
"\\begin_inset Minipage",
"position " + position,
"inner_position 0",
'height "0pt"',
'width "%s"' % width,
"collapsed false"
"collapsed false",
]
if flag:
flag = 0
if hfill:
start = ["", r"\hfill", ""] + start
else:
start = ['\\layout %s' % document.default_layout,''] + start
start = ["\\layout %s" % document.default_layout, ""] + start
j0 = find_token_backwards(lines, "\\layout", i - 1)
j = get_next_paragraph(lines, i, document.format + 1)
@ -293,11 +308,13 @@ spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
def remove_oldert(document):
"Remove old ERT inset."
ert_begin = ["\\begin_inset ERT",
ert_begin = [
"\\begin_inset ERT",
"status Collapsed",
"",
'\\layout %s' % document.default_layout,
""]
"\\layout %s" % document.default_layout,
"",
]
lines = document.body
i = 0
while True:
@ -308,8 +325,18 @@ def remove_oldert(document):
while True:
# \end_inset is for ert inside a tabular cell. The other tokens
# are obvious.
j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
j)
j = find_tokens(
lines,
[
"\\latex default",
"\\layout",
"\\begin_inset",
"\\end_inset",
"\\end_float",
"\\the_end",
],
j,
)
if check_token(lines[j], "\\begin_inset"):
j = find_end_of_inset(lines, j) + 1
else:
@ -323,7 +350,7 @@ def remove_oldert(document):
new = []
new2 = []
if check_token(lines[i], "\\layout LaTeX"):
new = [r'\layout %s' % document.default_layout, "", ""]
new = [r"\layout %s" % document.default_layout, "", ""]
k = i + 1
while True:
@ -369,7 +396,9 @@ def remove_oldert(document):
if inset:
k3 = find_end_of_inset(lines, k2)
new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
new = (
new + [""] + lines[k2 : k3 + 1] + [""]
) # Put an empty line after \end_inset
k = k3 + 1
# Skip the empty line after \end_inset
if not is_nonempty_line(lines[k]):
@ -452,7 +481,6 @@ def combine_ert(document):
count = 0
text = []
while is_ert_paragraph(document, j):
count = count + 1
i2 = find_token(lines, "\\layout", j + 1)
k = find_token(lines, "\\end_inset", i2 + 1)
@ -470,6 +498,7 @@ def combine_ert(document):
oldunits = ["pt", "cm", "in", "text%", "col%"]
def get_length(lines, name, start, end):
"Get lenght."
i = find_token(lines, name, start, end)
@ -495,7 +524,7 @@ def remove_figinset(document):
break
j = find_end_of_inset(lines, i)
if ( len(lines[i].split()) > 2 ):
if len(lines[i].split()) > 2:
lyxwidth = lines[i].split()[3] + "pt"
lyxheight = lines[i].split()[4] + "pt"
else:
@ -556,11 +585,12 @@ def remove_figinset(document):
attr_re = re.compile(r' \w*="(false|0|)"')
line_re = re.compile(r'<(features|column|row|cell)')
line_re = re.compile(r"<(features|column|row|cell)")
def update_tabular(document):
"Convert tabular format 2 to 3."
regexp = re.compile(r'^\\begin_inset\s+Tabular')
regexp = re.compile(r"^\\begin_inset\s+Tabular")
lines = document.body
i = 0
while True:
@ -597,8 +627,10 @@ def update_tabular(document):
false = 0
true = 1
class row:
"Simple data structure to deal with long table info."
def __init__(self):
self.endhead = false # header row
self.endfirsthead = false # first header row
@ -619,7 +651,7 @@ def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
endfirsthead_empty = false
endlastfoot_empty = false
# set header info
while (hr > 0):
while hr > 0:
hr = hr - 1
row_info[hr].endhead = true
@ -671,8 +703,17 @@ def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
lfr = lfr - 1
row_info[lfr].endlastfoot = true
row_info[lfr].endfoot = false
elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead and not row_info[fr - 1].endfoot:
while lfr > 0 and not row_info[lfr - 1].endhead and not row_info[lfr - 1].endfirsthead and not row_info[lfr - 1].endfoot:
elif (
not row_info[fr - 1].endhead
and not row_info[fr - 1].endfirsthead
and not row_info[fr - 1].endfoot
):
while (
lfr > 0
and not row_info[lfr - 1].endhead
and not row_info[lfr - 1].endfirsthead
and not row_info[lfr - 1].endfoot
):
lfr = lfr - 1
row_info[lfr].endlastfoot = true
elif haveLTFoot(row_info):
@ -683,17 +724,23 @@ def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
def insert_attribute(lines, i, attribute):
"Insert attribute in lines[i]."
last = lines[i].find('>')
lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:]
last = lines[i].find(">")
lines[i] = lines[i][:last] + " " + attribute + lines[i][last:]
rows_re = re.compile(r'rows="(\d*)"')
longtable_re = re.compile(r'islongtable="(\w)"')
ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"')
lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")')
ltvalues_re = re.compile(
r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"'
)
lt_features_re = re.compile(
r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")'
)
def update_longtables(document):
"Update longtables to new format."
regexp = re.compile(r'^\\begin_inset\s+Tabular')
regexp = re.compile(r"^\\begin_inset\s+Tabular")
body = document.body
i = 0
while True:
@ -709,7 +756,7 @@ def update_longtables(document):
rows = int(rows_re.search(body[i]).group(1))
i = i + 1
i = find_token(body, '<features', i)
i = find_token(body, "<features", i)
if i == -1:
break
@ -732,7 +779,9 @@ def update_longtables(document):
if not res:
continue
endfirsthead_empty, endlastfoot_empty = setHeaderFooterRows(res.group(1), res.group(2), res.group(3), res.group(4), rows, row_info)
endfirsthead_empty, endlastfoot_empty = setHeaderFooterRows(
res.group(1), res.group(2), res.group(3), res.group(4), rows, row_info
)
if endfirsthead_empty:
insert_attribute(body, i, 'firstHeadEmpty="true"')
@ -742,7 +791,7 @@ def update_longtables(document):
i = i + 1
for j in range(rows):
i = find_token(body, '<row', i)
i = find_token(body, "<row", i)
row_info[i].endfoot = false # footer row
row_info[i].endlastfoot = false # last footer row
@ -807,8 +856,8 @@ def change_infoinset(document):
note_lines = [txt] + note_lines
for line in note_lines:
new = new + [r'\layout %s' % document.default_layout, ""]
tmp = line.split('\\')
new = new + [r"\layout %s" % document.default_layout, ""]
tmp = line.split("\\")
new = new + [tmp[0]]
for x in tmp[1:]:
new = new + ["\\backslash ", x]
@ -822,15 +871,29 @@ def change_header(document):
i = find_token(lines, "\\use_amsmath", 0)
if i == -1:
return
lines[i+1:i+1] = ["\\use_natbib 0",
"\\use_numerical_citations 0"]
lines[i + 1 : i + 1] = ["\\use_natbib 0", "\\use_numerical_citations 0"]
supported_versions = ["1.2.%d" % i for i in range(5)] + ["1.2"]
convert = [[220, [change_header, change_listof, fix_oldfloatinset,
update_tabular, update_longtables, remove_pextra,
remove_oldfloat, remove_figinset, remove_oldertinset,
remove_oldert, combine_ert, change_infoinset]]]
convert = [
[
220,
[
change_header,
change_listof,
fix_oldfloatinset,
update_tabular,
update_longtables,
remove_pextra,
remove_oldfloat,
remove_figinset,
remove_oldertinset,
remove_oldert,
combine_ert,
change_infoinset,
],
]
]
revert = []

View File

@ -19,12 +19,12 @@
"""Convert files to the file format generated by lyx 1.3"""
import re
from parser_tools import find_token, find_end_of, get_value,\
find_token_exact
from parser_tools import find_token, find_end_of, get_value, find_token_exact
####################################################################
# Private helper functions
def find_end_of_inset(lines, i):
r"Finds the matching \end_inset"
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
@ -45,6 +45,7 @@ def del_token(lines, token, start, end):
del lines[k]
return end - 1
# End of helper functions
####################################################################
@ -101,8 +102,10 @@ def change_insetgraphics(document):
j = j - 1
j = del_token(lines, "lyxwidth", i, j)
j = del_token(lines, "lyxheight", i, j)
if lyxsize_type not in ["2", "scale"] or \
get_value(lines, "lyxscale", i, j) == "100":
if (
lyxsize_type not in ["2", "scale"]
or get_value(lines, "lyxscale", i, j) == "100"
):
j = del_token(lines, "lyxscale", i, j)
i = i + 1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,12 +21,7 @@ import re
# Uncomment only what you need to import, please.
from parser_tools import (
find_end_of_inset,
find_token,
find_re,
get_value
)
from parser_tools import find_end_of_inset, find_token, find_re, get_value
# count_pars_in_inset, del_complete_lines, del_token, find_end_of,
# find_end_of_layout,
# find_token_backwards, find_token_exact, get_bool_value,
@ -38,10 +33,7 @@ from parser_tools import (
# set_bool_value
# find_tokens, check_token
from lyx2lyx_tools import (
add_to_preamble,
latex_length
)
from lyx2lyx_tools import add_to_preamble, latex_length
# put_cmd_in_ert, insert_to_preamble, lyx2latex,
# revert_language, revert_flex_inset, str2bool,
# revert_font_attrs,
@ -52,23 +44,28 @@ from lyx2lyx_tools import (
# Private helper functions
###############################################################################
###
### Conversion and reversion routines
###
###############################################################################
def convert_url_escapes(document):
"""Unescape # and % in URLs with hyperref."""
hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
beamer = document.textclass in ['beamer', 'scrarticle-beamer', 'beamerposter', 'article-beamer']
beamer = document.textclass in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]
if not hyperref and not beamer:
return
rurl = re.compile(r'^[%#].*')
rurl = re.compile(r"^[%#].*")
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex URL", i)
@ -93,12 +90,17 @@ def revert_url_escapes(document):
"""Unescape # and % in URLs with hyperref."""
hyperref = find_token(document.header, "\\use_hyperref true", 0) != -1
beamer = document.textclass in ['beamer', 'scrarticle-beamer', 'beamerposter', 'article-beamer']
beamer = document.textclass in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]
if not hyperref and not beamer:
return
rurl = re.compile(r'^(.*)([%#].*)')
rurl = re.compile(r"^(.*)([%#].*)")
i = 0
while True:
i = find_token(document.body, "\\begin_inset Flex URL", i)
@ -121,12 +123,18 @@ def revert_url_escapes(document):
document.body[surl : surl + 1] = [m.group(1), "\\backslash", m.group(2)]
i = surl
def convert_url_escapes2(document):
"""Unescape backslashes in URLs with hyperref."""
i = find_token(document.header, "\\use_hyperref true", 0)
if i == -1 and document.textclass not in ['beamer', 'scrarticle-beamer', 'beamerposter', 'article-beamer']:
if i == -1 and document.textclass not in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]:
return
i = 0
@ -147,12 +155,18 @@ def convert_url_escapes2(document):
del document.body[bs + 2]
i = bs + 1
def revert_url_escapes2(document):
"""Escape backslashes in URLs with hyperref."""
i = find_token(document.header, "\\use_hyperref true", 0)
if i == -1 and document.textclass not in ['beamer', 'scrarticle-beamer', 'beamerposter', 'article-beamer']:
if i == -1 and document.textclass not in [
"beamer",
"scrarticle-beamer",
"beamerposter",
"article-beamer",
]:
return
i = 0
@ -196,6 +210,7 @@ def revert_glue_parskip(document):
document.header[i] = "\\paragraph_separation indent"
document.header[j] = "\\paragraph_indentation default"
def convert_he_letter(document):
"""Convert hebrew letter to letter document class"""
@ -211,13 +226,14 @@ supported_versions = ["2.5.0", "2.5"]
convert = [
[621, [convert_url_escapes, convert_url_escapes2]],
[622, []],
[623, [convert_he_letter]]
[623, [convert_he_letter]],
]
revert = [[622, []],
revert = [
[622, []],
[621, [revert_glue_parskip]],
[620, [revert_url_escapes2, revert_url_escapes]]
[620, [revert_url_escapes2, revert_url_escapes]],
]

View File

@ -166,6 +166,7 @@ count_pars_in_inset(lines, i):
import re
# Utilities for one line
def check_token(line, token):
"""check_token(line, token) -> bool
@ -336,7 +337,8 @@ def find_complete_lines(lines, sublines, start=0, end=0):
>>> l = [1, 1, 2]
>>> s = find_complete_lines(l, [1, 2])
>>> if s != -1:
... l[s:s+2] = [3]; l
... l[s : s + 2] = [3]
... l
[1, 3]
See also del_complete_lines().
@ -367,8 +369,9 @@ def find_across_lines(lines, sub, start=0, end=0):
if i < start + 1:
return -1
try:
if (lines[i-1].endswith(sublines[0]) and
lines[i+len(sublines)].startswith(sublines[-1])):
if lines[i - 1].endswith(sublines[0]) and lines[i + len(sublines)].startswith(
sublines[-1]
):
return i - 1
except IndexError:
pass
@ -407,7 +410,7 @@ def get_value(lines, token, start=0, end=0, default="", delete=False):
# see test_parser_tools.py
l = lines[i].split(None, 1)
if delete:
del(lines[i])
del lines[i]
if len(l) > 1:
return l[1].strip()
return default
@ -430,8 +433,8 @@ def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
return val.strip('"')
bool_values = {"true": True, "1": True,
"false": False, "0": False}
bool_values = {"true": True, "1": True, "false": False, "0": False}
def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
"""get_bool_value(lines, token, start[[, end], default]) -> string
@ -460,7 +463,7 @@ def set_bool_value(lines, token, value, start=0, end=0):
if oldvalue is value:
return oldvalue
# set to new value
if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
if get_quoted_value(lines, token, i, i + 1) in ("0", "1"):
lines[i] = "%s %d" % (token, value)
else:
lines[i] = f"{token} {str(value).lower()}"
@ -478,12 +481,12 @@ def get_option_value(line, option):
def set_option_value(line, option, value):
rx = '(' + option + r'\s*=\s*")[^"]+"'
rx = "(" + option + r'\s*=\s*")[^"]+"'
rx = re.compile(rx)
m = rx.search(line)
if not m:
return line
return re.sub(rx, r'\g<1>' + value + '"', line)
return re.sub(rx, r"\g<1>" + value + '"', line)
def del_token(lines, token, start=0, end=0):
@ -499,6 +502,7 @@ def del_token(lines, token, start=0, end=0):
del lines[k]
return True
def del_complete_lines(lines, sublines, start=0, end=0):
"""Delete first occurence of `sublines` in list `lines`.
@ -516,7 +520,7 @@ def del_complete_lines(lines, sublines, start=0, end=0):
i = find_complete_lines(lines, sublines, start, end)
if i == -1:
return False
del(lines[i:i+len(sublines)])
del lines[i : i + len(sublines)]
return True
@ -609,12 +613,12 @@ def is_in_inset(lines, i, inset, default=(-1,-1)):
def get_containing_inset(lines, i):
'''
"""
Finds out what kind of inset line i is within. Returns a
list containing (i) what follows \\begin_inset on the line
on which the inset begins, plus the starting and ending line.
Returns False on any kind of error or if it isn't in an inset.
'''
"""
j = i
while True:
stins = find_token_backwards(lines, "\\begin_inset", j)
@ -636,7 +640,7 @@ def get_containing_inset(lines, i):
def get_containing_layout(lines, i):
'''
"""
Find out what kind of layout line `i` is within.
Return a tuple
(layoutname, layoutstart, layoutend, startofcontent)
@ -646,7 +650,7 @@ def get_containing_layout(lines, i):
* end line number, and
* number of first paragraph line (after all params).
Return `False` on any kind of error.
'''
"""
j = i
while True:
stlay = find_token_backwards(lines, "\\begin_layout", j)
@ -667,27 +671,34 @@ def get_containing_layout(lines, i):
# layoutname == "Standard" # use same fallback as the LyX parser:
# raise ValueError("Missing layout name on line %d"%stlay) # diagnosis
# return False # generic error response
par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
"\\start_of_appendix", "\\paragraph_spacing", "\\align",
"\\labelwidthstring"]
par_params = [
"\\noindent",
"\\indent",
"\\indent-toggle",
"\\leftindent",
"\\start_of_appendix",
"\\paragraph_spacing",
"\\align",
"\\labelwidthstring",
]
stpar = stlay
while True:
stpar += 1
if lines[stpar].split(' ', 1)[0] not in par_params:
if lines[stpar].split(" ", 1)[0] not in par_params:
break
return (layoutname, stlay, endlay, stpar)
def count_pars_in_inset(lines, i):
'''
"""
Counts the paragraphs within this inset
'''
"""
ins = get_containing_inset(lines, i)
if ins == -1:
return -1
pars = 0
for j in range(ins[1], ins[2]):
m = re.match(r'\\begin_layout (.*)', lines[j])
m = re.match(r"\\begin_layout (.*)", lines[j])
found_inset = get_containing_inset(lines, j)
if m and found_inset and found_inset[1] == ins[1]:
pars += 1
@ -696,9 +707,9 @@ def count_pars_in_inset(lines, i):
def find_end_of_sequence(lines, i):
'''
"""
Returns the end of a sequence of identical layouts.
'''
"""
lay = get_containing_layout(lines, i)
if lay == False:
return -1
@ -706,7 +717,7 @@ def find_end_of_sequence(lines, i):
endlay = lay[2]
i = endlay
while True:
m = re.match(r'\\begin_layout (.*)', lines[i])
m = re.match(r"\\begin_layout (.*)", lines[i])
if m and m.group(1) != layout:
return endlay
elif lines[i] == "\\begin_deeper":

View File

@ -17,6 +17,7 @@
# We need all this because lyx2lyx does not have the .py termination
import imp
lyx2lyx = imp.load_source("lyx2lyx", "lyx2lyx", open("lyx2lyx"))
# Profiler used in the study
@ -34,6 +35,7 @@ Example:
./profiling.py -ou.lyx ../doc/UserGuide.lyx
"""
def main():
# This will only work with python >= 2.2, the version where this module was added
prof = hotshot.Profile("lyx2lyx.prof") # Use temporary file, here?
@ -43,7 +45,7 @@ def main():
# After the tests, show the profile analysis.
stats = hotshot.stats.load("lyx2lyx.prof")
stats.strip_dirs()
stats.sort_stats('time', 'calls')
stats.sort_stats("time", "calls")
stats.print_stats(20)
os.unlink("lyx2lyx.prof")

View File

@ -21,46 +21,50 @@ from lyx2lyx_tools import *
import unittest
class TestParserTools(unittest.TestCase):
class TestParserTools(unittest.TestCase):
def test_put_cmd_in_ert(self):
ert = ['\\begin_inset ERT',
'status collapsed',
'',
'\\begin_layout Plain Layout',
'',
'',
'\\backslash',
'texttt{Gr',
'\\backslash',
ert = [
"\\begin_inset ERT",
"status collapsed",
"",
"\\begin_layout Plain Layout",
"",
"",
"\\backslash",
"texttt{Gr",
"\\backslash",
'"{u}',
'\\backslash',
'ss{}e}',
'\\end_layout',
'',
'\\end_inset']
"\\backslash",
"ss{}e}",
"\\end_layout",
"",
"\\end_inset",
]
ert_open = ert[:]
ert_open[1] = 'status open'
ert_paragraph = ["\\begin_layout Standard",
'\\begin_inset ERT',
'status collapsed',
'',
'\\begin_layout Plain Layout',
'',
'',
'\\backslash',
'texttt{Gr',
'\\backslash',
ert_open[1] = "status open"
ert_paragraph = [
"\\begin_layout Standard",
"\\begin_inset ERT",
"status collapsed",
"",
"\\begin_layout Plain Layout",
"",
"",
"\\backslash",
"texttt{Gr",
"\\backslash",
'"{u}',
'\\backslash',
'ss{}e}',
'\\end_layout',
'',
'\\end_inset',
'',
'',
'\\end_layout',
'']
"\\backslash",
"ss{}e}",
"\\end_layout",
"",
"\\end_inset",
"",
"",
"\\end_layout",
"",
]
self.assertEqual(put_cmd_in_ert("\\texttt{Grüße}"), ert)
self.assertEqual(put_cmd_in_ert(["\\texttt{Grüße}"]), ert)
self.assertEqual(put_cmd_in_ert("\\texttt{Grüße}", is_open=True), ert_open)
@ -73,6 +77,5 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

View File

@ -77,45 +77,40 @@ newheader = r"""\begin_header
class TestParserTools(unittest.TestCase):
def test_check_token(self):
line = "\\begin_layout Standard"
self.assertEqual(check_token(line, '\\begin_layout'), True)
self.assertEqual(check_token(line, 'Standard'), False)
self.assertEqual(check_token(line, "\\begin_layout"), True)
self.assertEqual(check_token(line, "Standard"), False)
def test_is_nonempty_line(self):
self.assertEqual(is_nonempty_line(lines[0]), False)
self.assertEqual(is_nonempty_line(lines[1]), True)
self.assertEqual(is_nonempty_line(" " * 5), False)
def test_find_token(self):
self.assertEqual(find_token(lines, '\\emph', 0), 7)
self.assertEqual(find_token(lines, "\\emph", 0), 7)
# no line starts with "emph" (without backspace):
self.assertEqual(find_token(lines, 'emph', 0), -1)
self.assertEqual(find_token(lines, "emph", 0), -1)
# token on line[start] is found:
self.assertEqual(find_token(lines, '\\emph', 7), 7)
self.assertEqual(find_token(lines, '\\emph', 8), 9)
self.assertEqual(find_token(lines, "\\emph", 7), 7)
self.assertEqual(find_token(lines, "\\emph", 8), 9)
# token on line[end] is not found:
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
self.assertEqual(find_token(lines, "\\emph", 0, 7), -1)
# `ignorews` looks for whitespace-separated tokens:
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
self.assertEqual(find_token(lines, "\\emp", 0, ignorews=True), -1)
self.assertEqual(find_token(lines, "\\emph", 0, ignorews=True), 7)
self.assertEqual(find_token(lines, "\\emph", 7, ignorews=True), 7)
self.assertEqual(find_token(lines, "\\emph", 0, 7, True), -1)
# only first token is found:
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
self.assertEqual(find_token(lines, "Quotes", 0), -1)
self.assertEqual(find_token(lines, "Quotes", 0, ignorews=True), -1)
def test_find_tokens(self):
tokens = ['\\emph', '\\end_inset']
tokens = ["\\emph", "\\end_inset"]
self.assertEqual(find_tokens(lines, tokens, 0), 4)
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
def test_find_substring(self):
# Quotes is not a "token" (substring at the start of any line):
self.assertEqual(find_token(lines, "Quotes", 0), -1)
@ -123,9 +118,8 @@ class TestParserTools(unittest.TestCase):
# return -1 on failure:
self.assertEqual(find_substring(lines, "Qualen", 0), -1)
def test_find_re(self):
regexp_object = re.compile(r'\\begin.*Quote')
regexp_object = re.compile(r"\\begin.*Quote")
# matching starts with line[start] (default: start=0)
self.assertEqual(find_re(lines, regexp_object), 3)
self.assertEqual(find_re(lines, regexp_object, start=3), 3)
@ -134,13 +128,12 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(find_re(lines, regexp_object, start=4, end=11), -1)
def test_find_complete_lines(self):
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
sublines = ["\\begin_inset Quotes eld", "\\end_inset"]
# return index of first line of sublines:
self.assertEqual(find_complete_lines(lines, sublines), 3)
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
# return -1 if sublines is not found:
self.assertEqual(find_complete_lines(lines, ['x']), -1)
self.assertEqual(find_complete_lines(lines, ["x"]), -1)
# search includes line `start`:
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
@ -150,7 +143,6 @@ class TestParserTools(unittest.TestCase):
# an empty list is always found
self.assertEqual(find_complete_lines(lines, []), 0)
def test_find_across_lines(self):
# sub with at least 2 line-breaks (uses find_complete_lines):
sub = "Quotes eld\n\\end_inset\n\n\n"
@ -182,7 +174,6 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
self.assertEqual(find_across_lines(lines, "XXX"), -1)
def test_get_value(self):
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
# TODO: do we want this:
@ -216,8 +207,7 @@ class TestParserTools(unittest.TestCase):
def test_del_complete_lines(self):
l = lines[:]
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
sublines = ["\\begin_inset Quotes eld", "\\end_inset"]
# normal operation: remove the first occurence of sublines:
self.assertEqual(del_complete_lines(l, sublines), True)
self.assertEqual(l[3], "")
@ -239,5 +229,5 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

View File

@ -23,7 +23,7 @@ import sys, os, re, codecs
def read_unicodesymbols():
"Read the unicodesymbols list of unicode characters and corresponding commands."
pathname = os.path.abspath(os.path.dirname(__file__))
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
filename = os.path.join(pathname.strip("lyx2lyx"), "unicodesymbols")
# Read as Unicode strings in both, Python 2 and 3
# Specify the encoding for those systems where the default is not UTF-8
@ -35,28 +35,31 @@ def read_unicodesymbols():
# as: \"u or even \" u.
# The two backslashes in the string literal are needed to specify a literal
# backslash in the regex. Without r prefix, these would be four backslashes.
r = re.compile(r'\\(\W)\{(\w)\}')
r = re.compile(r"\\(\W)\{(\w)\}")
spec_chars = []
for line in fp.readlines():
if not line.strip() or line.startswith('#'):
if not line.strip() or line.startswith("#"):
# skip empty lines and comments
continue
# Note: backslashes in the string literals with r prefix are not escaped,
# so one backslash in the source file equals one backslash in memory.
# Without r prefix backslahses are escaped, so two backslashes in the
# source file equal one backslash in memory.
line=line.replace(' "',' ') # remove all quotation marks with spaces before
line=line.replace('" ',' ') # remove all quotation marks with spaces after
line=line.replace(r'\"','"') # unescape "
line=line.replace(r'\\','\\') # unescape \
line = line.replace(' "', " ") # remove all quotation marks with spaces before
line = line.replace('" ', " ") # remove all quotation marks with spaces after
line = line.replace(r"\"", '"') # unescape "
line = line.replace(r"\\", "\\") # unescape \
try:
[ucs4, command, dead] = line.split(None, 2)
if command[0:1] != "\\":
continue
literal_char = chr(int(ucs4, 16))
if (line.find("notermination=text") < 0 and
line.find("notermination=both") < 0 and command[-1] != "}"):
if (
line.find("notermination=text") < 0
and line.find("notermination=both") < 0
and command[-1] != "}"
):
command = command + "{}"
spec_chars.append([command, literal_char])
except:
@ -66,7 +69,7 @@ def read_unicodesymbols():
command = "\\"
commandbl = command
command += m.group(1) + m.group(2)
commandbl += m.group(1) + ' ' + m.group(2)
commandbl += m.group(1) + " " + m.group(2)
spec_chars.append([command, literal_char])
spec_chars.append([commandbl, literal_char])
fp.close()