Add support for input encoding handling by lyx2lyx.

Support all lyx files (support goes to lyx-0.6 the first public available lyx version).

Fine tune the guessing of the lyx version used to produce the document (only useful for 2.10 and 2.15 documents).

Support to convertion to utf-8 encoding was added but it is not (yet) active.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14558 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
José Matox 2006-08-07 14:10:41 +00:00
parent 9e832550ad
commit 6785e442cf
10 changed files with 119 additions and 40 deletions

View File

@ -1,6 +1,7 @@
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>, José Matos <jamatos@lyx.org>
# Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>
# Copyright (C) 2002-2006 José Matos <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@ -36,6 +37,18 @@ def find_end_of_inset(lines, i):
" Find beginning of inset, where lines[i] is included."
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
def generate_minor_versions(major, last_minor_version):
""" Generate minor versions, using major as prefix and minor
versions from 0 until last_minor_version, plus the generic version.
Example:
generate_minor_versions("1.2", 4) ->
[ "1.2", "1.2.0", "1.2.1", "1.2.2", "1.2.3"]
"""
return [major] + [major + ".%d" % i for i in range(last_minor_version + 1)]
# End of helper functions
####################################################################
@ -43,24 +56,24 @@ def find_end_of_inset(lines, i):
# Regular expressions used
format_re = re.compile(r"(\d)[\.,]?(\d\d)")
fileformat = re.compile(r"\\lyxformat\s*(\S*)")
original_version = re.compile(r"\#LyX (\S*)")
original_version = re.compile(r".*?LyX ([\d.]*)")
##
# file format information:
# file, supported formats, stable release versions
format_relation = [("0_08", [210], ["0.8.%d" % i for i in range(7)] + ["0.8"]),
("0_10", [210], ["0.10.%d" % i for i in range(8)] + ["0.10"]),
("0_12", [215], ["0.12.0","0.12.1","0.12"]),
("1_0_0", [215], ["1.0.0","1.0"]),
("1_0_1", [215], ["1.0.1","1.0.2","1.0.3","1.0.4", "1.1.2","1.1"]),
("1_1_4", [215], ["1.1.4","1.1"]),
("1_1_5", [216], ["1.1.5","1.1.5fix1","1.1.5fix2","1.1"]),
("1_1_6_0", [217], ["1.1.6","1.1.6fix1","1.1.6fix2","1.1"]),
("1_1_6_3", [218], ["1.1.6fix3","1.1.6fix4","1.1"]),
("1_2", [220], ["1.2.%d" % i for i in range(5)] + ["1.2"]),
("1_3", [221], ["1.3.%d" % i for i in range(8)] + ["1.3"]),
("1_4", range(222,246), ["1.4.0", "1.4.1", "1.4.2","1.4.3svn"]),
("1_5", range(246,249), ["1.5.0svn","1.5"])]
format_relation = [("0_06", [200], generate_minor_versions("0.6" , 4)),
("0_08", [210], generate_minor_versions("0.8" , 6) + ["0.7"]),
("0_10", [210], generate_minor_versions("0.10", 7) + ["0.9"]),
("0_12", [215], generate_minor_versions("0.12", 1) + ["0.11"]),
("1_0", [215], generate_minor_versions("1.0" , 4)),
("1_1", [215], generate_minor_versions("1.1" , 4)),
("1_1_5", [216], ["1.1.5","1.1.5.1","1.1.5.2","1.1"]),
("1_1_6_0", [217], ["1.1.6","1.1.6.1","1.1.6.2","1.1"]),
("1_1_6_3", [218], ["1.1.6.3","1.1.6.4","1.1"]),
("1_2", [220], generate_minor_versions("1.2" , 4)),
("1_3", [221], generate_minor_versions("1.3" , 7)),
("1_4", range(222,246), generate_minor_versions("1.4" , 3)),
("1_5", range(246,249), generate_minor_versions("1.5" , 0))]
def formats_list():
@ -95,6 +108,17 @@ def trim_eol(line):
return line[:-1]
def get_encoding(language, inputencoding):
from lyx2lyx_lang import lang
if inputencoding == "auto":
return lang[language][3]
if inputencoding == "default" or inputencoding == "":
return "latin1"
# python does not know the alias latin9
if inputencoding == "latin9":
return "iso-8859-15"
return inputencoding
##
# Class
#
@ -189,20 +213,27 @@ class LyX_Base:
self.header.append(line)
while 1:
line = self.input.readline()
if not line:
break
self.body.append(trim_eol(line))
self.textclass = get_value(self.header, "\\textclass", 0)
self.backend = get_backend(self.textclass)
self.format = self.read_format()
self.language = get_value(self.header, "\\language", 0)
if self.language == "":
self.language = "english"
self.language = get_value(self.header, "\\language", 0, default = "english")
self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto")
self.encoding = get_encoding(self.language, self.inputencoding)
self.initial_version = self.read_version()
# Second pass over header and preamble, now we know the file encoding
for i in range(len(self.header)):
self.header[i] = self.header[i].decode(self.encoding)
for i in range(len(self.preamble)):
self.preamble[i] = self.preamble[i].decode(self.encoding)
# Read document body
while 1:
line = self.input.readline().decode(self.encoding)
if not line:
break
self.body.append(trim_eol(line))
def write(self):
" Writes the LyX file to self.output."
@ -220,7 +251,7 @@ class LyX_Base:
header = self.header
for line in header + [''] + self.body:
self.output.write(line+"\n")
self.output.write(line.encode(self.encoding)+"\n")
def choose_io(self, input, output):
@ -250,6 +281,8 @@ class LyX_Base:
result = format_re.match(format)
if result:
format = int(result.group(1) + result.group(2))
elif format == '2':
format = 200
else:
self.error(str(format) + ": " + "Invalid LyX file.")
@ -267,9 +300,19 @@ class LyX_Base:
if line[0] != "#":
return None
line = line.replace("fix",".")
result = original_version.match(line)
if result:
return result.group(1)
# Special know cases: reLyX and KLyX
if line.find("reLyX") != -1 or line.find("KLyX") != -1:
return "0.12"
res = result.group(1)
if not res:
self.warning(line)
#self.warning("Version %s" % result.group(1))
return res
self.warning(str(self.header[:2]))
return None
@ -374,7 +417,7 @@ class LyX_Base:
if not correct_version:
if format <= 215:
self.warning("Version does not match file format, discarding it.")
self.warning("Version does not match file format, discarding it. (Version %s, format %d)" %(self.initial_version, self.format))
for rel in format_relation:
if format in rel[1]:
initial_step = rel[0]

View File

@ -1,6 +1,6 @@
# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2004 José Matos <jamatos@lyx.org>
# Copyright (C) 2006 José Matos <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@ -16,10 +16,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" Convert files to the file format generated by lyx 1.1.4"""
""" Convert files to the file format generated by lyx 0.6"""
supported_versions = ["1.1.4","1.1"]
convert = [[215, []]]
supported_versions = ["0.6.%d" % i for i in range(5)] + ["0.6"]
convert = [[200, []]]
revert = []

View File

@ -18,8 +18,14 @@
""" Convert files to the file format generated by lyx 0.8"""
def add_inputencoding(document):
" Add the input encoding, latin1"
document.header.append('\\inputencoding latin1')
document.inputencoding = "latin1"
supported_versions = ["0.8.%d" % i for i in range(7)] + ["0.8"]
convert = [[210, []]]
convert = [[210, [add_inputencoding]]]
revert = []

View File

@ -118,6 +118,7 @@ def regularise_body(document):
j = l
continue
document.warning("unkown inset %s" % line)
assert(False)
# We are inside a latex inset, pass the text verbatim

View File

@ -308,6 +308,24 @@ def obsolete_latex_title(document):
i = i + 1
def remove_inset_latex(document):
"Replace inset latex with layout LaTeX"
body = document.body
i = 0
while 1:
i = find_token(body, '\\begin_inset Latex', i)
if i == -1:
return
body[i] = body[i].replace('\\begin_inset Latex', '\\layout LaTeX')
i = find_token(body, '\\end_inset', i)
if i == -1:
#this should not happen
return
del body[i]
supported_versions = ["0.12.0","0.12.1","0.12"]
convert = [[215, [header_update, add_end_document, remove_cursor,
final_dot, update_inset_label, update_latexdel,
@ -315,7 +333,7 @@ convert = [[215, [header_update, add_end_document, remove_cursor,
formula_inset_space_eat, update_tabular,
update_vfill, remove_empty_insets,
remove_formula_latex, update_latexaccents,
obsolete_latex_title]]]
obsolete_latex_title, remove_inset_latex]]]
revert = []

View File

@ -76,7 +76,7 @@ def update_tabular(document):
lines[i] = lines[i].strip()
supported_versions = ["1.0.0","1.0"]
supported_versions = ["1.0.%d" % i for i in range(5)] + ["1.0"]
convert = [[215, [obsolete_latex_title, update_tabular]]]
revert = []

View File

@ -16,9 +16,9 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" Convert files to the file format generated by lyx 1.0.1"""
""" Convert files to the file format generated by lyx 1.1 series, until 1.1.4"""
supported_versions = ["1.0.1","1.0.2","1.0.3","1.0.4", "1.1.2","1.1"]
supported_versions = ["1.1.%d" % i for i in range(5)] + ["1.1"]
convert = [[215, []]]
revert = []

View File

@ -16,7 +16,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" Convert files to the file format generated by lyx 1.1.6"""
""" Convert files to the file format generated by lyx 1.1.6, until fix2"""
import re
from parser_tools import find_re, find_tokens, find_token, check_token

View File

@ -16,7 +16,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" Convert files to the file format generated by lyx 1.1.6fix3"""
""" Convert files to the file format generated by lyx 1.1.6, fix3 and fix4"""
import re
from parser_tools import find_token, find_re

View File

@ -216,14 +216,25 @@ def revert_booktabs(document):
i = i + 1
def convert_utf8(document):
i = find_token(document.header, "\\inputencoding", 0)
if i == -1:
document.header.append("\\inputencoding utf-8")
else:
document.header[i] = "\\inputencoding utf-8"
document.inputencoding = "utf-8"
document.encoding = "utf-8"
##
# Conversion hub
#
supported_versions = ["1.5.0svn","1.5"]
supported_versions = ["1.5.0","1.5"]
convert = [[246, []],
[247, [convert_font_settings]],
[248, []]]
[248, []]
# ,[xxx, [convert_utf8]] uncomment to support convertion to utf-8
]
revert = [[247, [revert_booktabs]],
[246, [revert_font_settings]],