Now tex2lyx is able to set the encoding from what it reads in the preamble.

What works:
- parsing of inputenc should work
- \inputencoding is acted on in the preamble

What does not work:
- \inputencoding in the text
- all the corner cases I have not considered, and all buggy stuff in the
  'what works' paragraph
- InsetLatexAccent are still created, but I do not know when they got added
to the code.

The only notable trick in the code is that I had to disable buffering. Otherwise
the whole text was read before I had a chance to change the encoding...

Finally I remove the artificial limitation that forbid
  \usepackage[opt1,opt2]{package1,package2}



git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27592 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Jean-Marc Lasgouttes 2008-11-16 21:51:18 +00:00
parent e66cbe70b6
commit 28f43b1896
7 changed files with 76 additions and 23 deletions

View File

@ -513,6 +513,10 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem)
void Encodings::initUnicodeMath(Buffer const & buffer) void Encodings::initUnicodeMath(Buffer const & buffer)
{ {
#ifdef TEX2LYX
// The code below is not needed in tex2lyx and requires additional stuff
(void)buffer;
#else
mathcmd.clear(); mathcmd.clear();
textcmd.clear(); textcmd.clear();
mathsym.clear(); mathsym.clear();
@ -523,11 +527,18 @@ void Encodings::initUnicodeMath(Buffer const & buffer)
for (; it != end; ++it) for (; it != end; ++it)
it->initUnicodeMath(); it->initUnicodeMath();
#endif
} }
void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed) void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed)
{ {
#ifdef TEX2LYX
// The code below is not needed in tex2lyx and requires additional stuff
(void)c;
(void)features;
(void)for_mathed;
#else
CharInfoMap::const_iterator const it = unicodesymbols.find(c); CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it != unicodesymbols.end()) { if (it != unicodesymbols.end()) {
// In mathed, c could be used both in textmode and mathmode // In mathed, c could be used both in textmode and mathmode
@ -566,6 +577,7 @@ void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed)
features.require("relsize"); features.require("relsize");
features.require("lyxmathsym"); features.require("lyxmathsym");
} }
#endif
} }

View File

@ -41,7 +41,8 @@ LINKED_FILES = \
../Lexer.cpp \ ../Lexer.cpp \
../lengthcommon.cpp \ ../lengthcommon.cpp \
../Color.cpp \ ../Color.cpp \
../Color.h ../Color.h \
../Encoding.cpp
BUILT_SOURCES = $(PCH_FILE) BUILT_SOURCES = $(PCH_FILE)

View File

@ -10,6 +10,7 @@
#include <config.h> #include <config.h>
#include "Encoding.h"
#include "Parser.h" #include "Parser.h"
#include <iostream> #include <iostream>
@ -149,6 +150,14 @@ Parser::~Parser()
} }
void Parser::setEncoding(std::string const & e)
{
Encoding const * enc = encodings.fromLaTeXName(e);
cerr << "setting encoding to " << enc->iconvName();
is_ << lyx::setEncoding(enc->iconvName());
}
void Parser::push_back(Token const & t) void Parser::push_back(Token const & t)
{ {
tokens_.push_back(t); tokens_.push_back(t);

View File

@ -119,6 +119,9 @@ public:
/// ///
~Parser(); ~Parser();
/// change the encoding of the input stream
void setEncoding(std::string const & encoding);
/// ///
int lineno() const { return lineno_; } int lineno() const { return lineno_; }
/// ///

View File

@ -248,7 +248,7 @@ string const scale_as_percentage(string const & scale)
} }
void handle_package(string const & name, string const & opts, void handle_package(Parser &p, string const & name, string const & opts,
bool in_lyx_preamble) bool in_lyx_preamble)
{ {
vector<string> options = split_options(opts); vector<string> options = split_options(opts);
@ -326,9 +326,10 @@ void handle_package(string const & name, string const & opts,
; // ignore this ; // ignore this
else if (name == "inputenc") { else if (name == "inputenc") {
// only set when there is not more than one inputenc option // only set when there is not more than one inputenc
// therefore check for the "," character // option therefore check for the "," character also
// also only set when there is not more then one babel language option // only set when there is not more then one babel
// language option
if (opts.find(",") == string::npos && one_language == true) { if (opts.find(",") == string::npos && one_language == true) {
if (opts == "ascii") if (opts == "ascii")
//change ascii to auto to be in the unicode range, see //change ascii to auto to be in the unicode range, see
@ -337,6 +338,8 @@ void handle_package(string const & name, string const & opts,
else if (!opts.empty()) else if (!opts.empty())
h_inputencoding = opts; h_inputencoding = opts;
} }
if (!options.empty())
p.setEncoding(options.back());
options.clear(); options.clear();
} }
@ -663,21 +666,19 @@ void parse_preamble(Parser & p, ostream & os,
else if (t.cs() == "usepackage") { else if (t.cs() == "usepackage") {
string const options = p.getArg('[', ']'); string const options = p.getArg('[', ']');
string const name = p.getArg('{', '}'); string const name = p.getArg('{', '}');
if (options.empty() && name.find(',')) { vector<string> vecnames;
vector<string> vecnames; split(name, vecnames, ',');
split(name, vecnames, ','); vector<string>::const_iterator it = vecnames.begin();
vector<string>::const_iterator it = vecnames.begin(); vector<string>::const_iterator end = vecnames.end();
vector<string>::const_iterator end = vecnames.end(); for (; it != end; ++it)
for (; it != end; ++it) handle_package(p, trim(*it), options,
handle_package(trim(*it), string(), in_lyx_preamble);
in_lyx_preamble);
} else {
handle_package(name, options, in_lyx_preamble);
}
} }
else if (t.cs() == "inputencoding") { else if (t.cs() == "inputencoding") {
h_inputencoding = p.getArg('{','}'); string const encoding = p.getArg('{','}');
h_inputencoding = encoding;
p.setEncoding(encoding);
} }
else if (t.cs() == "newenvironment") { else if (t.cs() == "newenvironment") {

View File

@ -15,8 +15,9 @@
#include "tex2lyx.h" #include "tex2lyx.h"
#include "Context.h" #include "Context.h"
#include "TextClass.h" #include "Encoding.h"
#include "Layout.h" #include "Layout.h"
#include "TextClass.h"
#include "support/convert.h" #include "support/convert.h"
#include "support/debug.h" #include "support/debug.h"
@ -421,7 +422,10 @@ void tex2lyx(idocstream & is, ostream & os)
/// convert TeX from \p infilename to LyX and write it to \p os /// convert TeX from \p infilename to LyX and write it to \p os
bool tex2lyx(FileName const & infilename, ostream & os) bool tex2lyx(FileName const & infilename, ostream & os)
{ {
ifdocstream is(infilename.toFilesystemEncoding().c_str()); ifdocstream is;
// forbid buffering on this stream
is.rdbuf()->pubsetbuf(0,0);
is.open(infilename.toFilesystemEncoding().c_str());
if (!is.good()) { if (!is.good()) {
cerr << "Could not open input file \"" << infilename cerr << "Could not open input file \"" << infilename
<< "\" for reading." << endl; << "\" for reading." << endl;
@ -487,11 +491,11 @@ int main(int argc, char * argv[])
try { try {
init_package(internal_path(to_utf8(from_local8bit(argv[0]))), init_package(internal_path(to_utf8(from_local8bit(argv[0]))),
cl_system_support, cl_user_support, cl_system_support, cl_user_support,
top_build_dir_is_two_levels_up); top_build_dir_is_two_levels_up);
} catch (ExceptionMessage const & message) { } catch (ExceptionMessage const & message) {
cerr << to_utf8(message.title_) << ":\n" cerr << to_utf8(message.title_) << ":\n"
<< to_utf8(message.details_) << endl; << to_utf8(message.details_) << endl;
if (message.type_ == ErrorException) if (message.type_ == ErrorException)
exit(1); exit(1);
} }
@ -509,6 +513,7 @@ int main(int argc, char * argv[])
} else } else
outfilename = changeExtension(infilename, ".lyx"); outfilename = changeExtension(infilename, ".lyx");
// Read the syntax tables
FileName const system_syntaxfile = libFileSearch("", "syntax.default"); FileName const system_syntaxfile = libFileSearch("", "syntax.default");
if (system_syntaxfile.empty()) { if (system_syntaxfile.empty()) {
cerr << "Error: Could not find syntax file \"syntax.default\"." << endl; cerr << "Error: Could not find syntax file \"syntax.default\"." << endl;
@ -518,9 +523,24 @@ int main(int argc, char * argv[])
if (!syntaxfile.empty()) if (!syntaxfile.empty())
read_syntaxfile(makeAbsPath(syntaxfile)); read_syntaxfile(makeAbsPath(syntaxfile));
// Read the encodings table.
FileName const symbols_path = libFileSearch(string(), "unicodesymbols");
if (symbols_path.empty()) {
cerr << "Error: Could not find file \"unicodesymbols\"."
<< endl;
exit(1);
}
FileName const enc_path = libFileSearch(string(), "encodings");
if (enc_path.empty()) {
cerr << "Error: Could not find file \"encodings\"."
<< endl;
exit(1);
}
encodings.read(enc_path, symbols_path);
// The real work now.
masterFilePath = onlyPath(infilename); masterFilePath = onlyPath(infilename);
parentFilePath = masterFilePath; parentFilePath = masterFilePath;
if (outfilename == "-") { if (outfilename == "-") {
if (tex2lyx(FileName(infilename), cout)) if (tex2lyx(FileName(infilename), cout))
return EXIT_SUCCESS; return EXIT_SUCCESS;

View File

@ -1763,6 +1763,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
p.skip_spaces(); p.skip_spaces();
context.check_layout(os); context.check_layout(os);
string const s = p.verbatim_item(); string const s = p.verbatim_item();
//FIXME: this never triggers in UTF8
if (s == "\xb1" || s == "\xb3" || s == "\xb2" || s == "\xb5") if (s == "\xb1" || s == "\xb3" || s == "\xb2" || s == "\xb5")
os << s; os << s;
else else
@ -2238,6 +2239,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
handle_ert(os, oss.str(), context); handle_ert(os, oss.str(), context);
} }
#if 0
//FIXME: rewrite this
else if (t.cs() == "\"") { else if (t.cs() == "\"") {
context.check_layout(os); context.check_layout(os);
string const name = p.verbatim_item(); string const name = p.verbatim_item();
@ -2249,6 +2252,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
else if (name == "U") os << '\xdc'; else if (name == "U") os << '\xdc';
else handle_ert(os, "\"{" + name + "}", context); else handle_ert(os, "\"{" + name + "}", context);
} }
#endif
// Problem: \= creates a tabstop inside the tabbing environment // Problem: \= creates a tabstop inside the tabbing environment
// and else an accent. In the latter case we really would want // and else an accent. In the latter case we really would want
@ -2273,11 +2277,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
<< "}\n"; << "}\n";
} }
#if 0
//FIXME: rewrite this
else if (t.cs() == "ss") { else if (t.cs() == "ss") {
context.check_layout(os); context.check_layout(os);
os << "\xdf"; os << "\xdf";
skip_braces(p); // eat {} skip_braces(p); // eat {}
} }
#endif
else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" || else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" ||
t.cs() == "L") { t.cs() == "L") {