Separation of the various names for encodings.

Provide functions for translating to the LyX name
of an encoding from either a LaTeX name or an Iconv
name, with the possibility to specify the package.
This is in anticipation of changing to use the LyX
name of the encoding in the .lyx file format and
allowing multiple lib/encodings entries to have
the same LaTeX name (but different packages!).

The tex2lyx parser needs to worry about the iconv
name of the input encoding, so store that instead
of the latex name.
This commit is contained in:
Julien Rioux 2013-01-19 19:47:15 +01:00
parent b42604c7aa
commit 2eea1590b1
10 changed files with 79 additions and 46 deletions

View File

@ -37,6 +37,8 @@ using namespace lyx::support;
namespace lyx {
int const Encoding::any;
Encodings encodings;
Encodings::MathCommandSet Encodings::mathcmd;
@ -852,7 +854,7 @@ Encodings::fromLyXName(string const & name, bool allowUnsafe) const
Encoding const *
Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const
Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) const
{
string name = n;
// FIXME: if we have to test for too many of these synonyms,
@ -867,11 +869,21 @@ Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const
// most at the top of lib/encodings.
EncodingList::const_iterator const end = encodinglist.end();
for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
if (it->second.latexName() == name) {
if (!allowUnsafe && it->second.unsafe())
return 0;
if ((it->second.latexName() == name) && (it->second.package() & p)
&& (!it->second.unsafe() || allowUnsafe))
return &it->second;
return 0;
}
Encoding const *
Encodings::fromIconvName(string const & n, int const & p, bool allowUnsafe) const
{
EncodingList::const_iterator const end = encodinglist.end();
for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
if ((it->second.iconvName() == n) && (it->second.package() & p)
&& (!it->second.unsafe() || allowUnsafe))
return &it->second;
}
return 0;
}

View File

@ -44,11 +44,13 @@ class Encoding {
public:
/// Which LaTeX package handles this encoding?
enum Package {
none,
inputenc,
CJK,
japanese
none = 1,
inputenc = 2,
CJK = 4,
japanese = 8
};
/// Represent any of the above packages
static int const any = -1;
///
Encoding() {}
///
@ -172,9 +174,12 @@ public:
/// Get encoding from LyX name \p name
Encoding const *
fromLyXName(std::string const & name, bool allowUnsafe = false) const;
/// Get encoding from LaTeX name \p name
Encoding const *
fromLaTeXName(std::string const & name, bool allowUnsafe = false) const;
/// Get encoding from LaTeX name \p name and package \p package
Encoding const * fromLaTeXName(std::string const & name,
int const & package = Encoding::any, bool allowUnsafe = false) const;
/// Get encoding from iconv name \p name and package \p package
Encoding const * fromIconvName(std::string const & name,
int const & package = Encoding::any, bool allowUnsafe = false) const;
///
const_iterator begin() const { return encodinglist.begin(); }

View File

@ -13,7 +13,6 @@
#include <config.h>
#include "Layout.h"
#include "Encoding.h"
#include "FontInfo.h"
#include "Language.h"
#include "Lexer.h"

View File

@ -30,7 +30,6 @@
#include "Cursor.h"
#include "CutAndPaste.h"
#include "DispatchResult.h"
#include "Encoding.h"
#include "ErrorList.h"
#include "FuncRequest.h"
#include "factory.h"

View File

@ -158,7 +158,7 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
Parser::Parser(idocstream & is)
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8")
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8")
{
}
@ -166,7 +166,7 @@ Parser::Parser(idocstream & is)
Parser::Parser(string const & s)
: lineno_(0), pos_(0),
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
encoding_latex_("utf8")
encoding_iconv_("UTF-8")
{
}
@ -177,20 +177,26 @@ Parser::~Parser()
}
void Parser::setEncoding(std::string const & e)
void Parser::setEncoding(std::string const & e, int const & p)
{
// We may (and need to) use unsafe encodings here: Since the text is
// converted to unicode while reading from is_, we never see text in
// the original encoding of the parser, but operate on utf8 strings
// instead. Therefore, we cannot misparse high bytes as {, } or \\.
Encoding const * enc = encodings.fromLaTeXName(e, true);
Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
if (!enc) {
cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
return;
}
//cerr << "setting encoding to " << enc->iconvName() << std::endl;
is_ << lyx::setEncoding(enc->iconvName());
encoding_latex_ = e;
setEncoding(enc->iconvName());
}
void Parser::setEncoding(std::string const & e)
{
//cerr << "setting encoding to " << e << std::endl;
is_ << lyx::setEncoding(e);
encoding_iconv_ = e;
}

View File

@ -135,10 +135,13 @@ public:
///
~Parser();
/// change the latex encoding of the input stream
/// change the iconv encoding of the input stream
/// according to the latex encoding and package
void setEncoding(std::string const & encoding, int const & package);
/// change the iconv encoding of the input stream
void setEncoding(std::string const & encoding);
/// get the current latex encoding of the input stream
std::string getEncoding() const { return encoding_latex_; }
/// get the current iconv encoding of the input stream
std::string getEncoding() const { return encoding_iconv_; }
///
int lineno() const { return lineno_; }
@ -271,8 +274,8 @@ private:
idocstringstream * iss_;
///
idocstream & is_;
/// latex name of the current encoding
std::string encoding_latex_;
/// iconv name of the current encoding
std::string encoding_iconv_;
};

View File

@ -16,6 +16,7 @@
#include "Preamble.h"
#include "tex2lyx.h"
#include "Encoding.h"
#include "LayoutFile.h"
#include "Layout.h"
#include "Lexer.h"
@ -651,7 +652,7 @@ void Preamble::handle_package(Parser &p, string const & name,
h_use_non_tex_fonts = "true";
registerAutomaticallyLoadedPackage("fontspec");
if (h_inputencoding == "auto")
p.setEncoding("utf8");
p.setEncoding("UTF-8");
}
// roman fonts
@ -756,7 +757,7 @@ void Preamble::handle_package(Parser &p, string const & name,
xetex = true;
registerAutomaticallyLoadedPackage("xunicode");
if (h_inputencoding == "auto")
p.setEncoding("utf8");
p.setEncoding("UTF-8");
}
else if (name == "CJK") {
@ -769,7 +770,7 @@ void Preamble::handle_package(Parser &p, string const & name,
else if (name == "CJKutf8") {
h_inputencoding = "UTF8";
p.setEncoding(h_inputencoding);
p.setEncoding("UTF-8");
registerAutomaticallyLoadedPackage("CJKutf8");
}
@ -793,7 +794,7 @@ void Preamble::handle_package(Parser &p, string const & name,
if (opts.find(",") == string::npos && one_language == true)
h_inputencoding = opts;
if (!options.empty())
p.setEncoding(options.back());
p.setEncoding(options.back(), Encoding::inputenc);
options.clear();
}
@ -1421,7 +1422,7 @@ void Preamble::parse(Parser & p, string const & forceclass,
else if (t.cs() == "inputencoding") {
string const encoding = p.getArg('{','}');
h_inputencoding = encoding;
p.setEncoding(encoding);
p.setEncoding(encoding, Encoding::inputenc);
}
else if (t.cs() == "newenvironment") {

View File

@ -833,14 +833,17 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding)
{
// Set a sensible default encoding.
// This is used until an encoding command is found.
// For child documents use the encoding of the master, else latin1,
// since latin1 does not cause an iconv error if the actual encoding
// is different (bug 7509).
// For child documents use the encoding of the master, else ISO8859-1,
// (formerly known by its latex name latin1), since ISO8859-1 does not
// cause an iconv error if the actual encoding is different (bug 7509).
if (encoding.empty()) {
if (preamble.inputencoding() == "auto")
encoding = "latin1";
else
encoding = preamble.inputencoding();
encoding = "ISO8859-1";
else {
Encoding const * const enc = encodings.fromLaTeXName(
preamble.inputencoding(), Encoding::any, true);
encoding = enc->iconvName();
}
}
Parser p(is);
@ -1077,8 +1080,13 @@ int main(int argc, char * argv[])
return EXIT_FAILURE;
}
encodings.read(enc_path, symbols_path);
if (!default_encoding.empty() && !encodings.fromLaTeXName(default_encoding))
error_message("Unknown LaTeX encoding `" + default_encoding + "'");
if (!default_encoding.empty()) {
Encoding const * const enc = encodings.fromLaTeXName(
default_encoding, Encoding::any, true);
if (!enc)
error_message("Unknown LaTeX encoding `" + default_encoding + "'");
default_encoding = enc->iconvName();
}
// Load the layouts
LayoutFileList::get().read();

View File

@ -195,7 +195,7 @@ extern bool skipChildren();
/*!
* Reads tex input from \a infilename and writes lyx output to \a outfilename.
* The (latex) encoding can be provided as \a encoding.
* The iconv name of the encoding can be provided as \a encoding.
* Uses some common settings for the preamble, so this should only
* be used more than once for included documents.
* Caution: Overwrites the existing preamble settings if the new document

View File

@ -1465,10 +1465,10 @@ void parse_environment(Parser & p, ostream & os, bool outer,
// you set buggy_encoding to false for JIS.
bool const buggy_encoding = encoding == "JIS";
if (!buggy_encoding)
p.setEncoding(encoding);
p.setEncoding(encoding, Encoding::CJK);
else {
// FIXME: This will read garbage, since the data is not encoded in utf8.
p.setEncoding("utf8");
p.setEncoding("UTF-8");
}
// LyX only supports the same mapping for all CJK
// environments, so we might need to output everything as ERT
@ -3706,7 +3706,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
else if (t.cs() == "inputencoding") {
// nothing to write here
string const enc = subst(p.verbatim_item(), "\n", " ");
p.setEncoding(enc);
p.setEncoding(enc, Encoding::inputenc);
}
else if ((where = is_known(t.cs(), known_special_chars))) {
@ -4505,7 +4505,7 @@ string guessLanguage(Parser & p, string const & lang)
if (t.cat() == catEscape) {
if (t.cs() == "inputencoding") {
string const enc = subst(p.verbatim_item(), "\n", " ");
p.setEncoding(enc);
p.setEncoding(enc, Encoding::inputenc);
continue;
}
if (t.cs() != "begin")
@ -4535,9 +4535,9 @@ string guessLanguage(Parser & p, string const & lang)
char const * const * const where =
is_known(encoding, supported_CJK_encodings);
if (where)
p.setEncoding(encoding);
p.setEncoding(encoding, Encoding::CJK);
else
p.setEncoding("utf8");
p.setEncoding("UTF-8");
string const text = p.verbatimEnvironment("CJK");
p.setEncoding(encoding_old);
p.skip_spaces();