Separation of the various names for encodings.

Provide functions for translating to the LyX name of an encoding from either a LaTeX name or an Iconv name, with the possibility to specify the package. This is in anticipation of changing to use the LyX name of the encoding in the .lyx file format and allowing multiple lib/encodings entries to have the same LaTeX name (but different packages!). The tex2lyx parser needs to worry about the iconv name of the input encoding, so store that instead of the latex name.
2024-11-09 18:31:04 +00:00 · 2013-01-19 19:47:15 +01:00 · 2013-01-19 19:47:15 +01:00 · 2eea1590b1
commit 2eea1590b1
parent b42604c7aa
10 changed files with 79 additions and 46 deletions
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@ -37,6 +37,8 @@ using namespace lyx::support;

 namespace lyx {

+int const Encoding::any;
+
 Encodings encodings;

 Encodings::MathCommandSet Encodings::mathcmd;
@ -852,7 +854,7 @@ Encodings::fromLyXName(string const & name, bool allowUnsafe) const


 Encoding const *
-Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const
+Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) const
 {
 	string name = n;
 	// FIXME: if we have to test for too many of these synonyms,
@ -867,11 +869,21 @@ Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const
 	// most at the top of lib/encodings.
 	EncodingList::const_iterator const end = encodinglist.end();
 	for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
-		if (it->second.latexName() == name) {
-			if (!allowUnsafe && it->second.unsafe())
-				return 0;
+		if ((it->second.latexName() == name) && (it->second.package() & p)
+				&& (!it->second.unsafe() || allowUnsafe))
+			return &it->second;
+	return 0;
+}
+
+
+Encoding const *
+Encodings::fromIconvName(string const & n, int const & p, bool allowUnsafe) const
+{
+	EncodingList::const_iterator const end = encodinglist.end();
+	for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
+		if ((it->second.iconvName() == n) && (it->second.package() & p)
+				&& (!it->second.unsafe() || allowUnsafe))
 			return &it->second;
-		}
 	return 0;
 }

--- a/src/Encoding.h
+++ b/src/Encoding.h
@ -44,11 +44,13 @@ class Encoding {
 public:
 	/// Which LaTeX package handles this encoding?
 	enum Package {
-		none,
-		inputenc,
-		CJK,
-		japanese
+		none = 1,
+		inputenc = 2,
+		CJK = 4,
+		japanese = 8
 	};
+	/// Represent any of the above packages
+	static int const any = -1;
 	///
 	Encoding() {}
 	///
@ -172,9 +174,12 @@ public:
 	/// Get encoding from LyX name \p name
 	Encoding const *
 	fromLyXName(std::string const & name, bool allowUnsafe = false) const;
-	/// Get encoding from LaTeX name \p name
-	Encoding const *
-	fromLaTeXName(std::string const & name, bool allowUnsafe = false) const;
+	/// Get encoding from LaTeX name \p name and package \p package
+	Encoding const * fromLaTeXName(std::string const & name,
+		int const & package = Encoding::any, bool allowUnsafe = false) const;
+	/// Get encoding from iconv name \p name and package \p package
+	Encoding const * fromIconvName(std::string const & name,
+		int const & package = Encoding::any, bool allowUnsafe = false) const;

 	///
 	const_iterator begin() const { return encodinglist.begin(); }
--- a/src/Layout.cpp
+++ b/src/Layout.cpp
@ -13,7 +13,6 @@
 #include <config.h>

 #include "Layout.h"
-#include "Encoding.h"
 #include "FontInfo.h"
 #include "Language.h"
 #include "Lexer.h"
--- a/src/Text.cpp
+++ b/src/Text.cpp
@ -30,7 +30,6 @@
 #include "Cursor.h"
 #include "CutAndPaste.h"
 #include "DispatchResult.h"
-#include "Encoding.h"
 #include "ErrorList.h"
 #include "FuncRequest.h"
 #include "factory.h"
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@ -158,7 +158,7 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)


 Parser::Parser(idocstream & is)
-	: lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8")
+	: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8")
 {
 }

@ -166,7 +166,7 @@ Parser::Parser(idocstream & is)
 Parser::Parser(string const & s)
 	: lineno_(0), pos_(0),
 	  iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
-	  encoding_latex_("utf8")
+	  encoding_iconv_("UTF-8")
 {
 }

@ -177,20 +177,26 @@ Parser::~Parser()
 }


-void Parser::setEncoding(std::string const & e)
+void Parser::setEncoding(std::string const & e, int const & p)
 {
 	// We may (and need to) use unsafe encodings here: Since the text is
 	// converted to unicode while reading from is_, we never see text in
 	// the original encoding of the parser, but operate on utf8 strings
 	// instead. Therefore, we cannot misparse high bytes as {, } or \\.
-	Encoding const * enc = encodings.fromLaTeXName(e, true);
+	Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
 	if (!enc) {
 		cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
 		return;
 	}
-	//cerr << "setting encoding to " << enc->iconvName() << std::endl;
-	is_ << lyx::setEncoding(enc->iconvName());
-	encoding_latex_ = e;
+	setEncoding(enc->iconvName());
+}
+
+
+void Parser::setEncoding(std::string const & e)
+{
+	//cerr << "setting encoding to " << e << std::endl;
+	is_ << lyx::setEncoding(e);
+	encoding_iconv_ = e;
 }


--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@ -135,10 +135,13 @@ public:
 	///
 	~Parser();

-	/// change the latex encoding of the input stream
+	/// change the iconv encoding of the input stream
+	/// according to the latex encoding and package
+	void setEncoding(std::string const & encoding, int const & package);
+	/// change the iconv encoding of the input stream
 	void setEncoding(std::string const & encoding);
-	/// get the current latex encoding of the input stream
-	std::string getEncoding() const { return encoding_latex_; }
+	/// get the current iconv encoding of the input stream
+	std::string getEncoding() const { return encoding_iconv_; }

 	///
 	int lineno() const { return lineno_; }
@ -271,8 +274,8 @@ private:
 	idocstringstream * iss_;
 	///
 	idocstream & is_;
-	/// latex name of the current encoding
-	std::string encoding_latex_;
+	/// iconv name of the current encoding
+	std::string encoding_iconv_;
 };


--- a/src/tex2lyx/Preamble.cpp
+++ b/src/tex2lyx/Preamble.cpp
@ -16,6 +16,7 @@
 #include "Preamble.h"
 #include "tex2lyx.h"

+#include "Encoding.h"
 #include "LayoutFile.h"
 #include "Layout.h"
 #include "Lexer.h"
@ -651,7 +652,7 @@ void Preamble::handle_package(Parser &p, string const & name,
 		h_use_non_tex_fonts = "true";
 		registerAutomaticallyLoadedPackage("fontspec");
 		if (h_inputencoding == "auto")
-			p.setEncoding("utf8");
+			p.setEncoding("UTF-8");
 	}

 	// roman fonts
@ -756,7 +757,7 @@ void Preamble::handle_package(Parser &p, string const & name,
 		xetex = true;
 		registerAutomaticallyLoadedPackage("xunicode");
 		if (h_inputencoding == "auto")
-			p.setEncoding("utf8");
+			p.setEncoding("UTF-8");
 	}

 	else if (name == "CJK") {
@ -769,7 +770,7 @@ void Preamble::handle_package(Parser &p, string const & name,

 	else if (name == "CJKutf8") {
 		h_inputencoding = "UTF8";
-		p.setEncoding(h_inputencoding);
+		p.setEncoding("UTF-8");
 		registerAutomaticallyLoadedPackage("CJKutf8");
 	}

@ -793,7 +794,7 @@ void Preamble::handle_package(Parser &p, string const & name,
 		if (opts.find(",") == string::npos && one_language == true)
 			h_inputencoding = opts;
 		if (!options.empty())
-			p.setEncoding(options.back());
+			p.setEncoding(options.back(), Encoding::inputenc);
 		options.clear();
 	}

@ -1421,7 +1422,7 @@ void Preamble::parse(Parser & p, string const & forceclass,
 		else if (t.cs() == "inputencoding") {
 			string const encoding = p.getArg('{','}');
 			h_inputencoding = encoding;
-			p.setEncoding(encoding);
+			p.setEncoding(encoding, Encoding::inputenc);
 		}

 		else if (t.cs() == "newenvironment") {
--- a/src/tex2lyx/tex2lyx.cpp
+++ b/src/tex2lyx/tex2lyx.cpp
@ -833,14 +833,17 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding)
 {
 	// Set a sensible default encoding.
 	// This is used until an encoding command is found.
-	// For child documents use the encoding of the master, else latin1,
-	// since latin1 does not cause an iconv error if the actual encoding
-	// is different (bug 7509).
+	// For child documents use the encoding of the master, else ISO8859-1,
+	// (formerly known by its latex name latin1), since ISO8859-1 does not
+	// cause an iconv error if the actual encoding is different (bug 7509).
 	if (encoding.empty()) {
 		if (preamble.inputencoding() == "auto")
-			encoding = "latin1";
-		else
-			encoding = preamble.inputencoding();
+			encoding = "ISO8859-1";
+		else {
+			Encoding const * const enc = encodings.fromLaTeXName(
+				preamble.inputencoding(), Encoding::any, true);
+			encoding = enc->iconvName();
+		}
 	}

 	Parser p(is);
@ -1077,8 +1080,13 @@ int main(int argc, char * argv[])
 		return EXIT_FAILURE;
 	}
 	encodings.read(enc_path, symbols_path);
-	if (!default_encoding.empty() && !encodings.fromLaTeXName(default_encoding))
-		error_message("Unknown LaTeX encoding `" + default_encoding + "'");
+	if (!default_encoding.empty()) {
+		Encoding const * const enc = encodings.fromLaTeXName(
+			default_encoding, Encoding::any, true);
+		if (!enc)
+			error_message("Unknown LaTeX encoding `" + default_encoding + "'");
+		default_encoding = enc->iconvName();
+	}

 	// Load the layouts
 	LayoutFileList::get().read();
--- a/src/tex2lyx/tex2lyx.h
+++ b/src/tex2lyx/tex2lyx.h
@ -195,7 +195,7 @@ extern bool skipChildren();

 /*!
 *  Reads tex input from \a infilename and writes lyx output to \a outfilename.
- *  The (latex) encoding can be provided as \a encoding.
+ *  The iconv name of the encoding can be provided as \a encoding.
 *  Uses some common settings for the preamble, so this should only
 *  be used more than once for included documents.
 *  Caution: Overwrites the existing preamble settings if the new document
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@ -1465,10 +1465,10 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 		// you set buggy_encoding to false for JIS.
 		bool const buggy_encoding = encoding == "JIS";
 		if (!buggy_encoding)
-			p.setEncoding(encoding);
+			p.setEncoding(encoding, Encoding::CJK);
 		else {
 			// FIXME: This will read garbage, since the data is not encoded in utf8.
-			p.setEncoding("utf8");
+			p.setEncoding("UTF-8");
 		}
 		// LyX only supports the same mapping for all CJK
 		// environments, so we might need to output everything as ERT
@ -3706,7 +3706,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "inputencoding") {
 			// nothing to write here
 			string const enc = subst(p.verbatim_item(), "\n", " ");
-			p.setEncoding(enc);
+			p.setEncoding(enc, Encoding::inputenc);
 		}

 		else if ((where = is_known(t.cs(), known_special_chars))) {
@ -4505,7 +4505,7 @@ string guessLanguage(Parser & p, string const & lang)
 		if (t.cat() == catEscape) {
 			if (t.cs() == "inputencoding") {
 				string const enc = subst(p.verbatim_item(), "\n", " ");
-				p.setEncoding(enc);
+				p.setEncoding(enc, Encoding::inputenc);
 				continue;
 			}
 			if (t.cs() != "begin")
@ -4535,9 +4535,9 @@ string guessLanguage(Parser & p, string const & lang)
 		char const * const * const where =
 			is_known(encoding, supported_CJK_encodings);
 		if (where)
-			p.setEncoding(encoding);
+			p.setEncoding(encoding, Encoding::CJK);
 		else
-			p.setEncoding("utf8");
+			p.setEncoding("UTF-8");
 		string const text = p.verbatimEnvironment("CJK");
 		p.setEncoding(encoding_old);
 		p.skip_spaces();