diff --git a/src/support/docstream.cpp b/src/support/docstream.cpp index e33dffc3c0..de5a6df035 100644 --- a/src/support/docstream.cpp +++ b/src/support/docstream.cpp @@ -334,6 +334,12 @@ ifdocstream::ifdocstream() : base() } +ifdocstream::ifdocstream(SetEnc const & enc) : base() +{ + setEncoding(*this, enc.encoding, in); +} + + ifdocstream::ifdocstream(const char* s, ios_base::openmode mode, string const & encoding) : base() @@ -349,6 +355,12 @@ ofdocstream::ofdocstream(): base() } +ofdocstream::ofdocstream(SetEnc const & enc) : base() +{ + setEncoding(*this, enc.encoding, out); +} + + ofdocstream::ofdocstream(const char* s, ios_base::openmode mode, string const & encoding) : base() diff --git a/src/support/docstream.h b/src/support/docstream.h index a6197cccd6..460a9b7393 100644 --- a/src/support/docstream.h +++ b/src/support/docstream.h @@ -40,6 +40,8 @@ typedef std::basic_istream idocstream; */ typedef std::basic_ostream odocstream; +struct SetEnc; + /// File stream for reading UTF8-encoded files with automatic conversion to /// UCS4. /// Buffering must be switched off if the encoding is changed after @@ -48,6 +50,9 @@ class ifdocstream : public std::basic_ifstream { typedef std::basic_ifstream base; public: ifdocstream(); + /// Create a stream with a specific encoding \p enc. + /// We must not pass \p enc as string, to avoid confusing it with a file name. + explicit ifdocstream(SetEnc const & enc); explicit ifdocstream(const char* s, std::ios_base::openmode mode = std::ios_base::in, std::string const & encoding = "UTF-8"); @@ -61,6 +66,9 @@ class ofdocstream : public std::basic_ofstream { typedef std::basic_ofstream base; public: ofdocstream(); + /// Create a stream with a specific encoding \p enc. + /// We must not pass \p enc as string, to avoid confusing it with a file name. + explicit ofdocstream(SetEnc const & enc); explicit ofdocstream(const char* s, std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc, std::string const & encoding = "UTF-8"); diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 2a14aeeb0a..24c403d23e 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -672,7 +672,8 @@ void Preamble::handle_geometry(vector & options) void Preamble::handle_package(Parser &p, string const & name, - string const & opts, bool in_lyx_preamble) + string const & opts, bool in_lyx_preamble, + bool detectEncoding) { vector options = split_options(opts); add_package(name, options); @@ -909,9 +910,11 @@ void Preamble::handle_package(Parser &p, string const & name, string const encoding = options.back(); Encoding const * const enc = encodings.fromLaTeXName( encoding, Encoding::inputenc, true); - if (!enc) - cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl; - else { + if (!enc) { + if (!detectEncoding) + cerr << "Unknown encoding " << encoding + << ". Ignoring." << std::endl; + } else { if (!enc->unsafe() && options.size() == 1 && one_language == true) h_inputencoding = enc->name(); p.setEncoding(enc->iconvName()); @@ -1026,7 +1029,7 @@ void Preamble::handle_package(Parser &p, string const & name, } // We need to do something with the options... - if (!options.empty()) + if (!options.empty() && !detectEncoding) cerr << "Ignoring options '" << join(options, ",") << "' of package " << name << '.' << endl; @@ -1260,6 +1263,13 @@ void Preamble::parse(Parser & p, string const & forceclass, { // initialize fixed types special_columns_['D'] = 3; + parse(p, forceclass, false, tc); +} + + +void Preamble::parse(Parser & p, string const & forceclass, + bool detectEncoding, TeX2LyXDocClass & tc) +{ bool is_full_document = false; bool is_lyx_file = false; bool in_lyx_preamble = false; @@ -1275,11 +1285,19 @@ void Preamble::parse(Parser & p, string const & forceclass, } p.reset(); + if (detectEncoding && !is_full_document) + return; + while (is_full_document && p.good()) { + if (detectEncoding && h_inputencoding != "auto" && + h_inputencoding != "default") + return; + Token const & t = p.get_token(); #ifdef FILEDEBUG - cerr << "t: " << t << "\n"; + if (!detectEncoding) + cerr << "t: " << t << '\n'; #endif // @@ -1314,7 +1332,8 @@ void Preamble::parse(Parser & p, string const & forceclass, if (comment.size() > magicXeLaTeX.size() && comment.substr(0, magicXeLaTeX.size()) == magicXeLaTeX && h_inputencoding == "auto") { - cerr << "XeLaTeX comment found, switching to UTF8\n"; + if (!detectEncoding) + cerr << "XeLaTeX comment found, switching to UTF8\n"; h_inputencoding = "utf8"; } smatch sub; @@ -1657,16 +1676,18 @@ void Preamble::parse(Parser & p, string const & forceclass, vector::const_iterator end = vecnames.end(); for (; it != end; ++it) handle_package(p, trimSpaceAndEol(*it), options, - in_lyx_preamble); + in_lyx_preamble, detectEncoding); } else if (t.cs() == "inputencoding") { string const encoding = p.getArg('{','}'); Encoding const * const enc = encodings.fromLaTeXName( encoding, Encoding::inputenc, true); - if (!enc) - cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl; - else { + if (!enc) { + if (!detectEncoding) + cerr << "Unknown encoding " << encoding + << ". Ignoring." << std::endl; + } else { if (!enc->unsafe()) h_inputencoding = enc->name(); p.setEncoding(enc->iconvName()); @@ -1949,6 +1970,16 @@ void Preamble::parse(Parser & p, string const & forceclass, } +string Preamble::parseEncoding(Parser & p, string const & forceclass) +{ + TeX2LyXDocClass dummy; + parse(p, forceclass, true, dummy); + if (h_inputencoding != "auto" && h_inputencoding != "default") + return h_inputencoding; + return ""; +} + + string babel2lyx(string const & language) { char const * const * where = is_known(language, known_languages); diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h index 01a5cd32e4..f9342efca2 100644 --- a/src/tex2lyx/Preamble.h +++ b/src/tex2lyx/Preamble.h @@ -92,6 +92,8 @@ public: /// Parses the LaTeX preamble into internal data void parse(Parser & p, std::string const & forceclass, TeX2LyXDocClass & tc); + /// Parse the encoding from a preamble. *this is unusable afterwards. + std::string parseEncoding(Parser & p, std::string const & forceclass); /// Writes the LyX file header from internal data bool writeLyXHeader(std::ostream & os, bool subdoc, std::string const & outfiledir); @@ -103,6 +105,9 @@ public: private: /// + void parse(Parser & p, std::string const & forceclass, + bool detectEncoding, TeX2LyXDocClass & tc); + /// std::map > used_packages; /// Packages that will be loaded automatically by LyX std::set auto_packages; @@ -218,7 +223,8 @@ private: void handle_geometry(std::vector & options); /// void handle_package(Parser &p, std::string const & name, - std::string const & opts, bool in_lyx_preamble); + std::string const & opts, bool in_lyx_preamble, + bool detectEncoding); /// void handle_if(Parser & p, bool in_lyx_preamble); diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 4ce2512179..7a6bbb0412 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -839,24 +839,9 @@ namespace { * You must ensure that \p parentFilePathTeX is properly set before calling * this function! */ -bool tex2lyx(idocstream & is, ostream & os, string encoding, +bool tex2lyx(idocstream & is, ostream & os, string const & encoding, string const & outfiledir) { - // Set a sensible default encoding. - // This is used until an encoding command is found. - // For child documents use the encoding of the master, else ISO-8859-1, - // (formerly known by its latex name latin1), since ISO-8859-1 does not - // cause an iconv error if the actual encoding is different (bug 7509). - if (encoding.empty()) { - if (preamble.inputencoding() == "auto") - encoding = "ISO-8859-1"; - else { - Encoding const * const enc = encodings.fromLyXName( - preamble.inputencoding(), true); - encoding = enc->iconvName(); - } - } - Parser p(is, fixed_encoding ? default_encoding : string()); p.setEncoding(encoding); //p.dump(); @@ -925,12 +910,45 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding, /// convert TeX from \p infilename to LyX and write it to \p os -bool tex2lyx(FileName const & infilename, ostream & os, string const & encoding, +bool tex2lyx(FileName const & infilename, ostream & os, string encoding, string const & outfiledir) { - ifdocstream is; + // Set a sensible default encoding. + // This is used until an encoding command is found. + // For child documents use the encoding of the master, else try to + // detect it from the preamble, since setting an encoding of an open + // fstream does currently not work on OS X. + // Always start with ISO-8859-1, (formerly known by its latex name + // latin1), since ISO-8859-1 does not cause an iconv error if the + // actual encoding is different (bug 7509). + if (encoding.empty()) { + Encoding const * enc = 0; + if (preamble.inputencoding() == "auto") { + ifdocstream is(setEncoding("ISO-8859-1")); + // forbid buffering on this stream + is.rdbuf()->pubsetbuf(0, 0); + is.open(infilename.toFilesystemEncoding().c_str()); + if (is.good()) { + Parser ep(is, string()); + ep.setEncoding("ISO-8859-1"); + Preamble encodingpreamble; + string const e = encodingpreamble + .parseEncoding(ep, documentclass); + if (!e.empty()) + enc = encodings.fromLyXName(e, true); + } + } else + enc = encodings.fromLyXName( + preamble.inputencoding(), true); + if (enc) + encoding = enc->iconvName(); + else + encoding = "ISO-8859-1"; + } + + ifdocstream is(setEncoding(encoding)); // forbid buffering on this stream - is.rdbuf()->pubsetbuf(0,0); + is.rdbuf()->pubsetbuf(0, 0); is.open(infilename.toFilesystemEncoding().c_str()); if (!is.good()) { cerr << "Could not open input file \"" << infilename