Add commandline switch to read files from CJK-LyX

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17162 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2007-02-13 16:57:48 +00:00
parent b5777b0909
commit 5b79207f3d
3 changed files with 40 additions and 16 deletions

View File

@ -21,6 +21,7 @@ from parser_tools import get_value, check_token, find_token,\
find_tokens, find_end_of
import os.path
import gzip
import locale
import sys
import re
import time
@ -108,9 +109,17 @@ def trim_eol(line):
return line[:-1]
def get_encoding(language, inputencoding, format):
def get_encoding(language, inputencoding, format, cjk_encoding):
if format > 248:
return "utf8"
# CJK-LyX encodes files using the current locale encoding.
# This means that files created by CJK-LyX can only be converted using
# the correct locale settings unless the encoding is given as commandline
# argument.
if cjk_encoding == 'auto':
return locale.getpreferredencoding()
elif cjk_encoding != '':
return cjk_encoding
from lyx2lyx_lang import lang
if inputencoding == "auto" or inputencoding == "default":
return lang[language][3]
@ -128,9 +137,9 @@ class LyX_Base:
"""This class carries all the information of the LyX file."""
def __init__(self, end_format = 0, input = "", output = "", error
= "", debug = default_debug_level, try_hard = 0, language = "english",
encoding = "auto"):
= "", debug = default_debug_level, try_hard = 0, cjk_encoding = '',
language = "english", encoding = "auto"):
"""Arguments:
end_format: final format that the file should be converted. (integer)
input: the name of the input source, if empty resort to standard input.
@ -147,6 +156,7 @@ class LyX_Base:
self.debug = debug
self.try_hard = try_hard
self.cjk_encoding = cjk_encoding
if end_format:
self.end_format = self.lyxformat(end_format)
@ -226,7 +236,7 @@ class LyX_Base:
self.format = self.read_format()
self.language = get_value(self.header, "\\language", 0, default = "english")
self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto")
self.encoding = get_encoding(self.language, self.inputencoding, self.format)
self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding)
self.initial_version = self.read_version()
# Second pass over header and preamble, now we know the file encoding
@ -248,7 +258,7 @@ class LyX_Base:
self.set_version()
self.set_format()
if self.encoding == "auto":
self.encoding = get_encoding(self.language, self.encoding, self.format)
self.encoding = get_encoding(self.language, self.encoding, self.format, self.cjk_encoding)
if self.preamble:
i = find_token(self.header, '\\textclass', 0) + 1
@ -532,8 +542,8 @@ class LyX_Base:
class File(LyX_Base):
" This class reads existing LyX files."
def __init__(self, end_format = 0, input = "", output = "", error = "", debug = default_debug_level, try_hard = 0):
LyX_Base.__init__(self, end_format, input, output, error, debug, try_hard)
def __init__(self, end_format = 0, input = "", output = "", error = "", debug = default_debug_level, try_hard = 0, cjk_encoding = ''):
LyX_Base.__init__(self, end_format, input, output, error, debug, try_hard, cjk_encoding)
self.read()

View File

@ -36,18 +36,23 @@ Options:
-t, --to version final version (optional)
-o, --output name name of the output file or else goes to stdout
-n, --try-hard try hard (ignore any convertion errors)
-c, --cjk [encoding] files in format 248 and lower are read and
written in the format of CJK-LyX.
If encoding is not given or 'auto' the encoding
is determined from the locale.
-q, --quiet same as --debug=0"""
def parse_options(argv):
_options = ["help", "version", "list", "debug=", "err=", "from=", "to=", "output=", "try-hard", "quiet"]
_options = ["help", "version", "list", "debug=", "err=", "from=", "to=", "output=", "try-hard", "cjk", "quiet"]
try:
opts, args = getopt.getopt(argv[1:], "d:e:f:hlno:qt:v", _options)
opts, args = getopt.getopt(argv[1:], "c:d:e:f:hlno:qt:v", _options)
except getopt.error:
usage()
sys.exit(2)
end_format, input, output, error, debug, try_hard = 0, "", "", "", LyX.default_debug_level, 0
cjk_encoding = ''
for o, a in opts:
if o in ("-h", "--help"):
usage()
@ -71,15 +76,20 @@ def parse_options(argv):
error = a
if o in ("-n", "--try-hard"):
try_hard = 1
if o in ("-c", "--cjk"):
if a == '':
cjk_encoding = 'auto'
else:
cjk_encoding = a
if args:
input = args[0]
return end_format, input, output, error, debug, try_hard
return end_format, input, output, error, debug, try_hard, cjk_encoding
def main(argv):
end_format, input, output, error, debug, try_hard = parse_options(argv)
file = LyX.File(end_format, input, output, error, debug, try_hard)
end_format, input, output, error, debug, try_hard, cjk_encoding = parse_options(argv)
file = LyX.File(end_format, input, output, error, debug, try_hard, cjk_encoding)
file.convert()
file.write()

View File

@ -230,6 +230,8 @@ where at least two languages have different default encodings are encoded
in multiple encodings for file formats < 249. These files are incorrectly
read and written (as if the whole file was in the encoding of the main
language).
This is not true for files written by CJK-LyX, they are always in the locale
encoding.
This function
- converts from fake unicode values to true unicode if forward is true, and
@ -239,6 +241,8 @@ document.encoding must be set to the old value (format 248) in both cases.
We do this here and not in LyX.py because it is far easier to do the
necessary parsing in modern formats than in ancient ones.
"""
if document.cjk_encoding != '':
return
encoding_stack = [document.encoding]
lang_re = re.compile(r"^\\lang\s(\S+)")
if document.inputencoding == "auto" or document.inputencoding == "default":
@ -292,7 +296,7 @@ def revert_utf8(document):
elif get_value(document.header, "\\inputencoding", i) == "utf8":
document.header[i] = "\\inputencoding auto"
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
document.encoding = get_encoding(document.language, document.inputencoding, 248)
document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
convert_multiencoding(document, False)
@ -1016,11 +1020,11 @@ def revert_accent(document):
# Replace accented characters with InsetLaTeXAccent
# Do not convert characters that can be represented in the chosen
# encoding.
encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
lang_re = re.compile(r"^\\lang\s(\S+)")
for i in range(len(document.body)):
if document.inputencoding == "auto" or document.inputencoding == "default":
if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
# Track the encoding of the current line
result = lang_re.match(document.body[i])
if result: