mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-25 19:07:45 +00:00
Open tex2lyx input files with correct encoding
We open the input file now twice: The first time in latin1 encoding to read the document encoding from the preamble. This does always work, since traditional TeX does not allow non-ASCII contents without an encoding changing command (except for comments, but we do not need them, and using latin1 rather than utf8 ensures that they do not produce an iconv exception, but are simply recored with wrong characters), and we do detect the utf8 based TeX engines XeTeX and LuaTeX as well. The second time we open the file directly with the document encoding. This fixes a few tex2lyx tests on OS X, since changing the encoding of an open file steam does not work with clang on OS X. Files using more than one encoding are still broken, but all single-encoding files are fixed now.
This commit is contained in:
parent
92d0835e14
commit
de3e5280f6
@ -334,6 +334,12 @@ ifdocstream::ifdocstream() : base()
|
||||
}
|
||||
|
||||
|
||||
ifdocstream::ifdocstream(SetEnc const & enc) : base()
|
||||
{
|
||||
setEncoding(*this, enc.encoding, in);
|
||||
}
|
||||
|
||||
|
||||
ifdocstream::ifdocstream(const char* s, ios_base::openmode mode,
|
||||
string const & encoding)
|
||||
: base()
|
||||
@ -349,6 +355,12 @@ ofdocstream::ofdocstream(): base()
|
||||
}
|
||||
|
||||
|
||||
ofdocstream::ofdocstream(SetEnc const & enc) : base()
|
||||
{
|
||||
setEncoding(*this, enc.encoding, out);
|
||||
}
|
||||
|
||||
|
||||
ofdocstream::ofdocstream(const char* s, ios_base::openmode mode,
|
||||
string const & encoding)
|
||||
: base()
|
||||
|
@ -40,6 +40,8 @@ typedef std::basic_istream<char_type> idocstream;
|
||||
*/
|
||||
typedef std::basic_ostream<char_type> odocstream;
|
||||
|
||||
struct SetEnc;
|
||||
|
||||
/// File stream for reading UTF8-encoded files with automatic conversion to
|
||||
/// UCS4.
|
||||
/// Buffering must be switched off if the encoding is changed after
|
||||
@ -48,6 +50,9 @@ class ifdocstream : public std::basic_ifstream<char_type> {
|
||||
typedef std::basic_ifstream<char_type> base;
|
||||
public:
|
||||
ifdocstream();
|
||||
/// Create a stream with a specific encoding \p enc.
|
||||
/// We must not pass \p enc as string, to avoid confusing it with a file name.
|
||||
explicit ifdocstream(SetEnc const & enc);
|
||||
explicit ifdocstream(const char* s,
|
||||
std::ios_base::openmode mode = std::ios_base::in,
|
||||
std::string const & encoding = "UTF-8");
|
||||
@ -61,6 +66,9 @@ class ofdocstream : public std::basic_ofstream<char_type> {
|
||||
typedef std::basic_ofstream<char_type> base;
|
||||
public:
|
||||
ofdocstream();
|
||||
/// Create a stream with a specific encoding \p enc.
|
||||
/// We must not pass \p enc as string, to avoid confusing it with a file name.
|
||||
explicit ofdocstream(SetEnc const & enc);
|
||||
explicit ofdocstream(const char* s,
|
||||
std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc,
|
||||
std::string const & encoding = "UTF-8");
|
||||
|
@ -672,7 +672,8 @@ void Preamble::handle_geometry(vector<string> & options)
|
||||
|
||||
|
||||
void Preamble::handle_package(Parser &p, string const & name,
|
||||
string const & opts, bool in_lyx_preamble)
|
||||
string const & opts, bool in_lyx_preamble,
|
||||
bool detectEncoding)
|
||||
{
|
||||
vector<string> options = split_options(opts);
|
||||
add_package(name, options);
|
||||
@ -909,9 +910,11 @@ void Preamble::handle_package(Parser &p, string const & name,
|
||||
string const encoding = options.back();
|
||||
Encoding const * const enc = encodings.fromLaTeXName(
|
||||
encoding, Encoding::inputenc, true);
|
||||
if (!enc)
|
||||
cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl;
|
||||
else {
|
||||
if (!enc) {
|
||||
if (!detectEncoding)
|
||||
cerr << "Unknown encoding " << encoding
|
||||
<< ". Ignoring." << std::endl;
|
||||
} else {
|
||||
if (!enc->unsafe() && options.size() == 1 && one_language == true)
|
||||
h_inputencoding = enc->name();
|
||||
p.setEncoding(enc->iconvName());
|
||||
@ -1026,7 +1029,7 @@ void Preamble::handle_package(Parser &p, string const & name,
|
||||
}
|
||||
|
||||
// We need to do something with the options...
|
||||
if (!options.empty())
|
||||
if (!options.empty() && !detectEncoding)
|
||||
cerr << "Ignoring options '" << join(options, ",")
|
||||
<< "' of package " << name << '.' << endl;
|
||||
|
||||
@ -1260,6 +1263,13 @@ void Preamble::parse(Parser & p, string const & forceclass,
|
||||
{
|
||||
// initialize fixed types
|
||||
special_columns_['D'] = 3;
|
||||
parse(p, forceclass, false, tc);
|
||||
}
|
||||
|
||||
|
||||
void Preamble::parse(Parser & p, string const & forceclass,
|
||||
bool detectEncoding, TeX2LyXDocClass & tc)
|
||||
{
|
||||
bool is_full_document = false;
|
||||
bool is_lyx_file = false;
|
||||
bool in_lyx_preamble = false;
|
||||
@ -1275,11 +1285,19 @@ void Preamble::parse(Parser & p, string const & forceclass,
|
||||
}
|
||||
p.reset();
|
||||
|
||||
if (detectEncoding && !is_full_document)
|
||||
return;
|
||||
|
||||
while (is_full_document && p.good()) {
|
||||
if (detectEncoding && h_inputencoding != "auto" &&
|
||||
h_inputencoding != "default")
|
||||
return;
|
||||
|
||||
Token const & t = p.get_token();
|
||||
|
||||
#ifdef FILEDEBUG
|
||||
cerr << "t: " << t << "\n";
|
||||
if (!detectEncoding)
|
||||
cerr << "t: " << t << '\n';
|
||||
#endif
|
||||
|
||||
//
|
||||
@ -1314,7 +1332,8 @@ void Preamble::parse(Parser & p, string const & forceclass,
|
||||
if (comment.size() > magicXeLaTeX.size()
|
||||
&& comment.substr(0, magicXeLaTeX.size()) == magicXeLaTeX
|
||||
&& h_inputencoding == "auto") {
|
||||
cerr << "XeLaTeX comment found, switching to UTF8\n";
|
||||
if (!detectEncoding)
|
||||
cerr << "XeLaTeX comment found, switching to UTF8\n";
|
||||
h_inputencoding = "utf8";
|
||||
}
|
||||
smatch sub;
|
||||
@ -1657,16 +1676,18 @@ void Preamble::parse(Parser & p, string const & forceclass,
|
||||
vector<string>::const_iterator end = vecnames.end();
|
||||
for (; it != end; ++it)
|
||||
handle_package(p, trimSpaceAndEol(*it), options,
|
||||
in_lyx_preamble);
|
||||
in_lyx_preamble, detectEncoding);
|
||||
}
|
||||
|
||||
else if (t.cs() == "inputencoding") {
|
||||
string const encoding = p.getArg('{','}');
|
||||
Encoding const * const enc = encodings.fromLaTeXName(
|
||||
encoding, Encoding::inputenc, true);
|
||||
if (!enc)
|
||||
cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl;
|
||||
else {
|
||||
if (!enc) {
|
||||
if (!detectEncoding)
|
||||
cerr << "Unknown encoding " << encoding
|
||||
<< ". Ignoring." << std::endl;
|
||||
} else {
|
||||
if (!enc->unsafe())
|
||||
h_inputencoding = enc->name();
|
||||
p.setEncoding(enc->iconvName());
|
||||
@ -1949,6 +1970,16 @@ void Preamble::parse(Parser & p, string const & forceclass,
|
||||
}
|
||||
|
||||
|
||||
string Preamble::parseEncoding(Parser & p, string const & forceclass)
|
||||
{
|
||||
TeX2LyXDocClass dummy;
|
||||
parse(p, forceclass, true, dummy);
|
||||
if (h_inputencoding != "auto" && h_inputencoding != "default")
|
||||
return h_inputencoding;
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
string babel2lyx(string const & language)
|
||||
{
|
||||
char const * const * where = is_known(language, known_languages);
|
||||
|
@ -92,6 +92,8 @@ public:
|
||||
/// Parses the LaTeX preamble into internal data
|
||||
void parse(Parser & p, std::string const & forceclass,
|
||||
TeX2LyXDocClass & tc);
|
||||
/// Parse the encoding from a preamble. *this is unusable afterwards.
|
||||
std::string parseEncoding(Parser & p, std::string const & forceclass);
|
||||
/// Writes the LyX file header from internal data
|
||||
bool writeLyXHeader(std::ostream & os, bool subdoc,
|
||||
std::string const & outfiledir);
|
||||
@ -103,6 +105,9 @@ public:
|
||||
|
||||
private:
|
||||
///
|
||||
void parse(Parser & p, std::string const & forceclass,
|
||||
bool detectEncoding, TeX2LyXDocClass & tc);
|
||||
///
|
||||
std::map<std::string, std::vector<std::string> > used_packages;
|
||||
/// Packages that will be loaded automatically by LyX
|
||||
std::set<std::string> auto_packages;
|
||||
@ -218,7 +223,8 @@ private:
|
||||
void handle_geometry(std::vector<std::string> & options);
|
||||
///
|
||||
void handle_package(Parser &p, std::string const & name,
|
||||
std::string const & opts, bool in_lyx_preamble);
|
||||
std::string const & opts, bool in_lyx_preamble,
|
||||
bool detectEncoding);
|
||||
///
|
||||
void handle_if(Parser & p, bool in_lyx_preamble);
|
||||
|
||||
|
@ -839,24 +839,9 @@ namespace {
|
||||
* You must ensure that \p parentFilePathTeX is properly set before calling
|
||||
* this function!
|
||||
*/
|
||||
bool tex2lyx(idocstream & is, ostream & os, string encoding,
|
||||
bool tex2lyx(idocstream & is, ostream & os, string const & encoding,
|
||||
string const & outfiledir)
|
||||
{
|
||||
// Set a sensible default encoding.
|
||||
// This is used until an encoding command is found.
|
||||
// For child documents use the encoding of the master, else ISO-8859-1,
|
||||
// (formerly known by its latex name latin1), since ISO-8859-1 does not
|
||||
// cause an iconv error if the actual encoding is different (bug 7509).
|
||||
if (encoding.empty()) {
|
||||
if (preamble.inputencoding() == "auto")
|
||||
encoding = "ISO-8859-1";
|
||||
else {
|
||||
Encoding const * const enc = encodings.fromLyXName(
|
||||
preamble.inputencoding(), true);
|
||||
encoding = enc->iconvName();
|
||||
}
|
||||
}
|
||||
|
||||
Parser p(is, fixed_encoding ? default_encoding : string());
|
||||
p.setEncoding(encoding);
|
||||
//p.dump();
|
||||
@ -925,12 +910,45 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding,
|
||||
|
||||
|
||||
/// convert TeX from \p infilename to LyX and write it to \p os
|
||||
bool tex2lyx(FileName const & infilename, ostream & os, string const & encoding,
|
||||
bool tex2lyx(FileName const & infilename, ostream & os, string encoding,
|
||||
string const & outfiledir)
|
||||
{
|
||||
ifdocstream is;
|
||||
// Set a sensible default encoding.
|
||||
// This is used until an encoding command is found.
|
||||
// For child documents use the encoding of the master, else try to
|
||||
// detect it from the preamble, since setting an encoding of an open
|
||||
// fstream does currently not work on OS X.
|
||||
// Always start with ISO-8859-1, (formerly known by its latex name
|
||||
// latin1), since ISO-8859-1 does not cause an iconv error if the
|
||||
// actual encoding is different (bug 7509).
|
||||
if (encoding.empty()) {
|
||||
Encoding const * enc = 0;
|
||||
if (preamble.inputencoding() == "auto") {
|
||||
ifdocstream is(setEncoding("ISO-8859-1"));
|
||||
// forbid buffering on this stream
|
||||
is.rdbuf()->pubsetbuf(0, 0);
|
||||
is.open(infilename.toFilesystemEncoding().c_str());
|
||||
if (is.good()) {
|
||||
Parser ep(is, string());
|
||||
ep.setEncoding("ISO-8859-1");
|
||||
Preamble encodingpreamble;
|
||||
string const e = encodingpreamble
|
||||
.parseEncoding(ep, documentclass);
|
||||
if (!e.empty())
|
||||
enc = encodings.fromLyXName(e, true);
|
||||
}
|
||||
} else
|
||||
enc = encodings.fromLyXName(
|
||||
preamble.inputencoding(), true);
|
||||
if (enc)
|
||||
encoding = enc->iconvName();
|
||||
else
|
||||
encoding = "ISO-8859-1";
|
||||
}
|
||||
|
||||
ifdocstream is(setEncoding(encoding));
|
||||
// forbid buffering on this stream
|
||||
is.rdbuf()->pubsetbuf(0,0);
|
||||
is.rdbuf()->pubsetbuf(0, 0);
|
||||
is.open(infilename.toFilesystemEncoding().c_str());
|
||||
if (!is.good()) {
|
||||
cerr << "Could not open input file \"" << infilename
|
||||
|
Loading…
Reference in New Issue
Block a user