Change lyx2lyx conversion and LaTeX export of documents with

\inputencoding default * src/paragraph_pimpl.C (isEncoding): Explain why bparams.inputenc == "default" is ignored * src/bufferparams.C (BufferParams::encoding): Determine the encoding from the language for inputenc == "default" * src/buffer.h (writeLaTeXSource): Mention inputenc == "default" in documentation * src/bufferparams.h (inputenc): Update documentation of "default" * src/output_latex.C (switchEncoding): Switch the encoding also for inputenc == "default", but don't output \inputencoding commands in that case * lib/lyx2lyx/LyX.py (get_encoding): Determine the encoding from the language for inputencoding == "default" * lib/lyx2lyx/lyx_1_5.py (convert_multiencoding): ditto * development/FORMAT: Update documentation of \inputencoding default git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16667 a592a061-630c-0410-9148-cb99ea01b6c8
2024-11-09 18:31:04 +00:00 · 2007-01-13 14:36:54 +00:00 · 2007-01-13 14:36:54 +00:00 · 0ddb4d5f30
commit 0ddb4d5f30
parent 6c145ad6d8
8 changed files with 44 additions and 35 deletions
--- a/development/FORMAT
+++ b/development/FORMAT
@ -78,11 +78,14 @@ LyX file-format changes
 	encoding of the LyX file:

 	\inputencoding       LyX file encoding
-	auto                 as determined by the document language(s)
-	default              unspecified 8bit (treated as latin1 internally,
-	                     see comment in bufferparams.h)
+	auto                 as determined by the document and character
+	                     languages
+	default              ditto
 	everything else      as determined by \inputencoding

+	The difference between auto and default is only the LaTeX output:
+	auto causes loading of the inputenc package, default does not.
+
 2006-07-03  Georg Baum  <Georg.Baum@post.rwth-aachen.de>

 	* format incremented to 248: Basic booktabs support
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@ -112,9 +112,9 @@ def get_encoding(language, inputencoding, format):
    if format > 248:
        return "utf8"
    from lyx2lyx_lang import lang
-    if inputencoding == "auto":        
+    if inputencoding == "auto" or inputencoding == "default":
        return lang[language][3]
-    if inputencoding == "default" or inputencoding == "":
+    if inputencoding == "":
        return "latin1"
    # python does not know the alias latin9
    if inputencoding == "latin9":
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@ -219,10 +219,11 @@ def revert_booktabs(document):

 def convert_multiencoding(document, forward):
    """ Fix files with multiple encodings.
-Files with an inputencoding of "auto" and multiple languages where at least
-two languages have different default encodings are encoded in multiple
-encodings for file formats < 249. These files are incorrectly read and
-written (as if the whole file was in the encoding of the main language).
+Files with an inputencoding of "auto" or "default" and multiple languages
+where at least two languages have different default encodings are encoded
+in multiple encodings for file formats < 249. These files are incorrectly
+read and written (as if the whole file was in the encoding of the main
+language).

 This function
 - converts from fake unicode values to true unicode if forward is true, and
@ -234,7 +235,7 @@ necessary parsing in modern formats than in ancient ones.
 """
    encoding_stack = [document.encoding]
    lang_re = re.compile(r"^\\lang\s(\S+)")
-    if document.inputencoding == "auto":
+    if document.inputencoding == "auto" or document.inputencoding == "default":
        for i in range(len(document.body)):
            result = lang_re.match(document.body[i])
            if result:
--- a/src/buffer.h
+++ b/src/buffer.h
@ -153,11 +153,12 @@ public:
 			   bool output_preamble = true,
 			   bool output_body = true);
 	/** Export the buffer to LaTeX.
-	    If \p os is a file stream, and params().inputenc == "auto", and
-	    the buffer contains text in different languages with more than
-	    one encoding, then this method will change the encoding
-	    associated to \p os. Therefore you must not call this method with
-	    a string stream if the output is supposed to go to a file. \code
+	    If \p os is a file stream, and params().inputenc is "auto" or
+	    "default", and the buffer contains text in different languages
+	    with more than one encoding, then this method will change the
+	    encoding associated to \p os. Therefore you must not call this
+	    method with a string stream if the output is supposed to go to a
+	    file. \code
 	    odocfstream ofs;
 	    ofs.open("test.tex");
 	    writeLaTeXSource(ofs, ...);
--- a/src/bufferparams.C
+++ b/src/bufferparams.C
@ -1466,18 +1466,12 @@ string const BufferParams::loadFonts(LaTeXFeatures & features, string const & rm

 Encoding const & BufferParams::encoding() const
 {
-	if (inputenc == "auto")
+	if (inputenc == "auto" || inputenc == "default")
 		return *(language->encoding());
-	Encoding const * const enc = (inputenc == "default") ?
-		encodings.getFromLyXName("iso8859-1") :
+	Encoding const * const enc =
 		encodings.getFromLaTeXName(inputenc);
 	if (enc)
 		return *enc;
-	if (inputenc == "default")
-		lyxerr << "Could not find iso8859-1 encoding for inputenc "
-		          "value `default'. Using inputenc `auto' instead."
-		       << endl;
-	else
 	lyxerr << "Unknown inputenc value `" << inputenc
 	       << "'. Using `auto' instead." << endl;
 	return *(language->encoding());
--- a/src/bufferparams.h
+++ b/src/bufferparams.h
@ -178,15 +178,18 @@ public:
 	BranchList const & branchlist() const;
 	/**
 	 * The input encoding for LaTeX. This can be one of
-	 * - auto: find out the input encoding from the used languages
-	 * - default: Don't load the inputenc package and hope that it will
-	 *   work (unlikely). The encoding is an unspecified 8bit encoding,
-	 *   the interpretation is up to the LaTeX compiler. Because we need
-	 *   a rule how to create this from our internal UCS4 encoded
-	 *   document contents we treat this as latin1 internally.
+	 * - \c auto: find out the input encoding from the used languages
+	 * - \c default: ditto
 	 * - any encoding supported by the inputenc package
 	 * The encoding of the LyX file is always utf8 and has nothing to
 	 * do with this setting.
+	 * The difference between \c auto and \c default is that \c auto also
+	 * causes loading of the inputenc package, while \c default does not.
+	 * \c default will not work unless the user takes additional measures
+	 * (such as using special environments like the CJK environment from
+	 * CJK.sty).
+	 * \c default can be seen as an unspecified 8bit encoding, since LyX
+	 * does not interpret it in any way apart from display on screen.
 	 */
 	std::string inputenc;
 	/// The main encoding used by this buffer for LaTeX output.
--- a/src/output_latex.C
+++ b/src/output_latex.C
@ -600,16 +600,19 @@ int switchEncoding(odocstream & os, BufferParams const & bparams,
 	// ignore switches from/to tis620-0 encoding here. This does of
 	// course only work as long as the non-thai text contains ASCII
 	// only, but it is the best we can do.
-	if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() &&
+	if ((bparams.inputenc == "auto" || bparams.inputenc == "default") &&
+	    oldEnc.name() != newEnc.name() &&
 	    oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") {
 		lyxerr[Debug::LATEX] << "Changing LaTeX encoding from "
 		                     << oldEnc.name() << " to "
 		                     << newEnc.name() << endl;
 		os << setEncoding(newEnc.iconvName());
+		if (bparams.inputenc != "default") {
 			docstring const inputenc(from_ascii(newEnc.latexName()));
 			os << "\\inputencoding{" << inputenc << '}';
 			return 16 + inputenc.length();
 		}
+	}
 	return 0;
 }

--- a/src/paragraph_pimpl.C
+++ b/src/paragraph_pimpl.C
@ -62,6 +62,10 @@ size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
 bool isEncoding(BufferParams const & bparams, LyXFont const & font,
 		string const & encoding)
 {
+	// We do ignore bparams.inputenc == "default" here because characters
+	// in this encoding could be treated by TeX as something different,
+	// e.g. if they are inside a CJK environment. See also
+	// http://bugzilla.lyx.org/show_bug.cgi?id=3043.
 	return (bparams.inputenc == encoding
 		|| (bparams.inputenc == "auto"
 		    && font.language()->encoding()->latexName() == encoding));