Make lyx2lyx infrastructure python3 ready

The LyX class works now with python 3. Certain file format conversions may
still fail (convert_multiencoding() is a hot candidate), but this will need
to be fixed in the individual modules.
This commit is contained in:
Georg Baum 2016-06-12 21:21:15 +02:00
parent 881b7ec673
commit 166420d02c
4 changed files with 108 additions and 42 deletions

View File

@ -28,6 +28,7 @@ import locale
import sys
import re
import time
import io
try:
import lyx2lyx_version
@ -37,6 +38,10 @@ except: # we are running from build directory so assume the last version
default_debug__ = 2
# Provide support for both python 2 and 3
PY2 = sys.version_info[0] == 2
# End of code to support for both python 2 and 3
####################################################################
# Private helper functions
@ -158,6 +163,17 @@ def trim_eol(line):
return line[:-1]
def trim_eol_binary(line):
" Remove end of line char(s)."
if line[-1] != 10 and line[-1] != 13:
# May happen for the last line of a document
return line
if line[-2:-1] == 13:
return line[:-2]
else:
return line[:-1]
def get_encoding(language, inputencoding, format, cjk_encoding):
" Returns enconding of the lyx file"
if format > 248:
@ -200,7 +216,8 @@ class LyX_base:
error: the name of the error file, if empty use the standard error.
debug: debug level, O means no debug, as its value increases be more verbose.
"""
self.choose_io(input, output)
self.choose_input(input)
self.output = output
if error:
self.err = open(error, "w")
@ -281,25 +298,41 @@ class LyX_base:
self.body parts, from self.input."""
# First pass: Read header to determine file encoding
# If we are running under python3 then all strings are binary in this
# pass. In some cases we need to convert binary to unicode in order to
# use our parser tools. Since we do not know the true encoding yet we
# use latin1. This works since a) the parts we are interested in are
# pure ASCII (subset of latin1) and b) in contrast to pure ascii or
# utf8, one can decode any 8byte string using latin1.
while True:
line = self.input.readline()
if not line:
# eof found before end of header
self.error("Invalid LyX file: Missing body.")
line = trim_eol(line)
if check_token(line, '\\begin_preamble'):
if PY2:
line = trim_eol(line)
decoded = line
else:
line = trim_eol_binary(line)
decoded = line.decode('latin1')
if check_token(decoded, '\\begin_preamble'):
while 1:
line = self.input.readline()
if not line:
# eof found before end of header
self.error("Invalid LyX file: Missing body.")
line = trim_eol(line)
if check_token(line, '\\end_preamble'):
if PY2:
line = trim_eol(line)
decoded = line
else:
line = trim_eol_binary(line)
decoded = line.decode('latin1')
if check_token(decoded, '\\end_preamble'):
break
if line.split()[:0] in ("\\layout",
if decoded.split()[:0] in ("\\layout",
"\\begin_layout", "\\begin_body"):
self.warning("Malformed LyX file:"
@ -309,33 +342,48 @@ class LyX_base:
self.preamble.append(line)
if check_token(line, '\\end_preamble'):
if check_token(decoded, '\\end_preamble'):
continue
line = line.strip()
if not line:
continue
if line.split()[0] in ("\\layout", "\\begin_layout",
if decoded.split()[0] in ("\\layout", "\\begin_layout",
"\\begin_body", "\\begin_deeper"):
self.body.append(line)
break
self.header.append(line)
i = find_token(self.header, '\\textclass', 0)
if PY2:
i = find_token(self.header, '\\textclass', 0)
else:
i = find_token(self.header, b'\\textclass', 0)
if i == -1:
self.warning("Malformed LyX file: Missing '\\textclass'.")
i = find_token(self.header, '\\lyxformat', 0) + 1
self.header[i:i] = ['\\textclass article']
if PY2:
i = find_token(self.header, '\\lyxformat', 0) + 1
self.header[i:i] = ['\\textclass article']
else:
i = find_token(self.header, b'\\lyxformat', 0) + 1
self.header[i:i] = [b'\\textclass article']
self.textclass = get_value(self.header, "\\textclass", 0)
self.backend = get_backend(self.textclass)
self.format = self.read_format()
self.language = get_value(self.header, "\\language", 0,
default = "english")
self.inputencoding = get_value(self.header, "\\inputencoding",
0, default = "auto")
if PY2:
self.textclass = get_value(self.header, "\\textclass", 0,
default = "")
self.language = get_value(self.header, "\\language", 0,
default = "english")
self.inputencoding = get_value(self.header, "\\inputencoding", 0,
default = "auto")
else:
self.textclass = get_value(self.header, b"\\textclass", 0,
default = b"")
self.language = get_value(self.header, b"\\language", 0,
default = b"english").decode('ascii')
self.inputencoding = get_value(self.header, b"\\inputencoding", 0,
default = b"auto").decode('ascii')
self.format = self.read_format()
self.encoding = get_encoding(self.language,
self.inputencoding, self.format,
self.cjk_encoding)
@ -344,6 +392,7 @@ class LyX_base:
# Second pass over header and preamble, now we know the file encoding
# Do not forget the textclass (Debian bug #700828)
self.textclass = self.textclass.decode(self.encoding)
self.backend = get_backend(self.textclass)
for i in range(len(self.header)):
self.header[i] = self.header[i].decode(self.encoding)
for i in range(len(self.preamble)):
@ -361,6 +410,7 @@ class LyX_base:
def write(self):
" Writes the LyX file to self.output."
self.choose_output(self.output)
self.set_version()
self.set_format()
self.set_textclass()
@ -375,29 +425,53 @@ class LyX_base:
header = self.header
for line in header + [''] + self.body:
self.output.write(line.encode(self.encoding)+"\n")
self.output.write(line+u"\n")
def choose_io(self, input, output):
"""Choose input and output streams, dealing transparently with
def choose_output(self, output):
"""Choose output streams dealing transparently with
compressed files."""
if output:
self.output = open(output, "wb")
# This is a bit complicated, because we need to be compatible both with
# python 2 and python 3. Therefore we handle the encoding here and not
# when writing individual lines and may need up to 3 layered file like
# interfaces.
if self.compressed:
if output:
outputfileobj = open(output, 'wb')
else:
# We cannot not use stdout directly since it needs text, not bytes in python 3
outputfileobj = os.fdopen(sys.stdout.fileno(), 'wb')
# We cannot not use gzip.open() since it is not supported by python 2
zipbuffer = gzip.GzipFile(mode='wb', fileobj=outputfileobj)
# We do not want to use different newlines on different OSes inside zipped files
self.output = io.TextIOWrapper(zipbuffer, encoding=self.encoding, newline='\n')
else:
self.output = sys.stdout
if output:
self.output = io.open(output, 'w', encoding=self.encoding)
else:
self.output = io.open(sys.stdout.fileno(), 'w', encoding=self.encoding)
def choose_input(self, input):
"""Choose input stream, dealing transparently with
compressed files."""
# Since we do not know the encoding yet we need to read the input as
# bytes in binary mode, and convert later to unicode.
if input and input != '-':
self.dir = os.path.dirname(os.path.abspath(input))
try:
gzip.open(input).readline()
self.input = gzip.open(input)
self.output = gzip.GzipFile(mode="wb", fileobj=self.output)
self.compressed = True
except:
self.input = open(input)
self.input = open(input, 'rb')
self.compressed = False
else:
self.dir = ''
self.input = sys.stdin
self.input = os.fdopen(sys.stdin.fileno(), 'rb')
self.compressed = False
def lyxformat(self, format):
@ -470,11 +544,14 @@ class LyX_base:
def read_format(self):
" Read from the header the fileformat of the present LyX file."
for line in self.header:
result = fileformat.match(line)
if PY2:
result = fileformat.match(line)
else:
result = fileformat.match(line.decode('ascii'))
if result:
return self.lyxformat(result.group(1))
else:
self.error("Invalid LyX File.")
self.error("Invalid LyX File: Missing format.")
return None

View File

@ -33,7 +33,6 @@ if not PY2:
unichr = chr
else:
text_type = unicode
unichr = unichr
# End of code to support for both python 2 and 3
####################################################################

View File

@ -27,11 +27,7 @@ from parser_tools import find_token, find_end_of, find_tokens, get_value
# Provide support for both python 2 and 3
PY2 = sys.version_info[0] == 2
if not PY2:
text_type = str
unichr = chr
else:
text_type = unicode
unichr = unichr
# End of code to support for both python 2 and 3
####################################################################

View File

@ -41,12 +41,6 @@ from parser_tools import find_token, find_token_backwards, find_re, \
# Provide support for both python 2 and 3
PY2 = sys.version_info[0] == 2
if not PY2:
text_type = str
unichr = chr
else:
text_type = unicode
unichr = unichr
# End of code to support for both python 2 and 3
####################################################################
@ -1164,8 +1158,8 @@ def convert_origin(document):
origin = document.dir.replace('\\', '/') + '/'
else:
origin = os.path.join("/systemlyxdir", relpath).replace('\\', '/') + '/'
if os.name != 'nt':
origin = text_type(origin, sys.getfilesystemencoding())
if os.name != 'nt' and PY2:
origin = unicode(origin, sys.getfilesystemencoding())
document.header[i:i] = ["\\origin " + origin]