backport the tex2lyx unicode stuff from trunk

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/branches/BRANCH_1_6_X@28858 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Jean-Marc Lasgouttes 2009-03-18 22:40:08 +00:00
parent 6dec0f3e0d
commit f9a4e1cc95
13 changed files with 316 additions and 239 deletions

View File

@ -530,6 +530,11 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem)
void Encodings::initUnicodeMath(Buffer const & buffer, bool clear_sets)
{
#ifdef TEX2LYX
// The code below is not needed in tex2lyx and requires additional stuff
(void)buffer;
(void)clear_sets;
#else
if (clear_sets) {
mathcmd.clear();
textcmd.clear();
@ -549,11 +554,18 @@ void Encodings::initUnicodeMath(Buffer const & buffer, bool clear_sets)
for (; bit != bend; ++bit)
if (buffer.isChild(*bit))
initUnicodeMath(**bit, false);
#endif
}
void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed)
{
#ifdef TEX2LYX
// The code below is not needed in tex2lyx and requires additional stuff
(void)c;
(void)features;
(void)for_mathed;
#else
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it != unicodesymbols.end()) {
// In mathed, c could be used both in textmode and mathmode
@ -592,6 +604,7 @@ void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed)
features.require("relsize");
features.require("lyxmathsym");
}
#endif
}

View File

@ -41,7 +41,8 @@ LINKED_FILES = \
../Lexer.cpp \
../lengthcommon.cpp \
../Color.cpp \
../Color.h
../Color.h \
../Encoding.cpp
BUILT_SOURCES = $(PCH_FILE)

View File

@ -10,10 +10,10 @@
#include <config.h>
#include "Encoding.h"
#include "Parser.h"
#include <iostream>
#include <sstream>
using namespace std;
@ -25,6 +25,11 @@ CatCode theCatcode[256];
void catInit()
{
static bool init_done = false;
if (init_done)
return;
init_done = true;
fill(theCatcode, theCatcode + 256, catOther);
fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
@ -49,13 +54,12 @@ void catInit()
theCatcode[int('@')] = catLetter;
}
/*!
* Translate a line ending to '\n'.
* \p c must have catcode catNewline, and it must be the last character read
* from \p is.
*/
char getNewline(istream & is, char c)
char getNewline(idocstream & is, char c)
{
// we have to handle 3 different line endings:
// - UNIX (\n)
@ -63,9 +67,10 @@ char getNewline(istream & is, char c)
// - DOS (\r\n)
if (c == '\r') {
// MAC or DOS
if (is.get(c) && c != '\n') {
char_type wc;
if (is.get(wc) && wc != '\n') {
// MAC
is.putback(c);
is.putback(wc);
}
return '\n';
}
@ -73,18 +78,14 @@ char getNewline(istream & is, char c)
return c;
}
}
//
// catcodes
//
CatCode catcode(unsigned char c)
CatCode catcode(char_type c)
{
return theCatcode[c];
if (c < 256)
return theCatcode[(unsigned char)c];
return catOther;
}
}
//
@ -100,18 +101,18 @@ ostream & operator<<(ostream & os, Token const & t)
else if (t.cat() == catEscape)
os << '\\' << t.cs() << ' ';
else if (t.cat() == catLetter)
os << t.character();
os << t.cs();
else if (t.cat() == catNewline)
os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
else
os << '[' << t.character() << ',' << t.cat() << ']';
os << '[' << t.cs() << ',' << t.cat() << ']';
return os;
}
string Token::asString() const
{
return cs_.size() ? cs_ : string(1, char_);
return cs_;
}
@ -119,9 +120,9 @@ string Token::asInput() const
{
if (cat_ == catComment)
return '%' + cs_ + '\n';
if (cat_ == catSpace || cat_ == catNewline)
return cs_;
return char_ ? string(1, char_) : '\\' + cs_;
if (cat_ == catEscape)
return '\\' + cs_;
return cs_;
}
@ -130,18 +131,32 @@ string Token::asInput() const
//
Parser::Parser(istream & is)
: lineno_(0), pos_(0)
Parser::Parser(idocstream & is)
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8")
{
tokenize(is);
}
Parser::Parser(string const & s)
: lineno_(0), pos_(0)
: lineno_(0), pos_(0),
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
encoding_latex_("utf8")
{
istringstream is(s);
tokenize(is);
}
Parser::~Parser()
{
delete iss_;
}
void Parser::setEncoding(std::string const & e)
{
Encoding const * enc = encodings.fromLaTeXName(e);
//cerr << "setting encoding to " << enc->iconvName()<<std::endl;
is_ << lyx::setEncoding(enc->iconvName());
encoding_latex_ = e;
}
@ -165,7 +180,7 @@ Token const & Parser::curr_token() const
}
Token const & Parser::next_token() const
Token const & Parser::next_token()
{
static const Token dummy;
return good() ? tokens_[pos_] : dummy;
@ -180,7 +195,7 @@ Token const & Parser::get_token()
}
bool Parser::isParagraph() const
bool Parser::isParagraph()
{
// A new paragraph in TeX ist started
// - either by a newline, following any amount of whitespace
@ -246,8 +261,11 @@ void Parser::putback()
}
bool Parser::good() const
bool Parser::good()
{
if (pos_ < tokens_.size())
return true;
tokenize_one();
return pos_ < tokens_.size();
}
@ -256,7 +274,7 @@ char Parser::getChar()
{
if (!good())
error("The input stream is not well...");
return tokens_[pos_++].character();
return get_token().character();
}
@ -351,86 +369,80 @@ string const Parser::verbatimEnvironment(string const & name)
}
void Parser::tokenize(istream & is)
void Parser::tokenize_one()
{
static bool init_done = false;
catInit();
char_type c;
if (!is_.get(c))
return;
if (!init_done) {
catInit();
init_done = true;
switch (catcode(c)) {
case catSpace: {
docstring s(1, c);
while (is_.get(c) && catcode(c) == catSpace)
s += c;
if (catcode(c) != catSpace)
is_.putback(c);
push_back(Token(s, catSpace));
break;
}
char c;
while (is.get(c)) {
//cerr << "reading c: " << c << "\n";
switch (catcode(c)) {
case catSpace: {
string s(1, c);
while (is.get(c) && catcode(c) == catSpace)
s += c;
if (catcode(c) != catSpace)
is.putback(c);
push_back(Token(s, catSpace));
break;
}
case catNewline: {
++lineno_;
string s(1, getNewline(is, c));
while (is.get(c) && catcode(c) == catNewline) {
++lineno_;
s += getNewline(is, c);
}
if (catcode(c) != catNewline)
is.putback(c);
push_back(Token(s, catNewline));
break;
}
case catComment: {
// We don't treat "%\n" combinations here specially because
// we want to preserve them in the preamble
string s;
while (is.get(c) && catcode(c) != catNewline)
s += c;
// handle possible DOS line ending
if (catcode(c) == catNewline)
c = getNewline(is, c);
// Note: The '%' at the beginning and the '\n' at the end
// of the comment are not stored.
++lineno_;
push_back(Token(s, catComment));
break;
}
case catEscape: {
is.get(c);
if (!is) {
error("unexpected end of input");
} else {
string s(1, c);
if (catcode(c) == catLetter) {
// collect letters
while (is.get(c) && catcode(c) == catLetter)
s += c;
if (catcode(c) != catLetter)
is.putback(c);
}
push_back(Token(s, catEscape));
}
break;
}
case catIgnore: {
cerr << "ignoring a char: " << int(c) << "\n";
break;
}
default:
push_back(Token(c, catcode(c)));
case catNewline: {
++lineno_;
docstring s(1, getNewline(is_, c));
while (is_.get(c) && catcode(c) == catNewline) {
++lineno_;
s += getNewline(is_, c);
}
if (catcode(c) != catNewline)
is_.putback(c);
push_back(Token(s, catNewline));
break;
}
case catComment: {
// We don't treat "%\n" combinations here specially because
// we want to preserve them in the preamble
docstring s;
while (is_.get(c) && catcode(c) != catNewline)
s += c;
// handle possible DOS line ending
if (catcode(c) == catNewline)
c = getNewline(is_, c);
// Note: The '%' at the beginning and the '\n' at the end
// of the comment are not stored.
++lineno_;
push_back(Token(s, catComment));
break;
}
case catEscape: {
is_.get(c);
if (!is_) {
error("unexpected end of input");
} else {
docstring s(1, c);
if (catcode(c) == catLetter) {
// collect letters
while (is_.get(c) && catcode(c) == catLetter)
s += c;
if (catcode(c) != catLetter)
is_.putback(c);
}
push_back(Token(s, catEscape));
}
break;
}
case catIgnore: {
cerr << "ignoring a char: " << c << "\n";
break;
}
default:
push_back(Token(docstring(1, c), catcode(c)));
}
//cerr << tokens_.back();
}
@ -459,7 +471,7 @@ string Parser::verbatimOption()
string res;
if (next_token().character() == '[') {
Token t = get_token();
for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
if (t.cat() == catBegin) {
putback();
res += '{' + verbatim_item() + '}';

View File

@ -12,10 +12,11 @@
#ifndef PARSER_H
#define PARSER_H
#include <vector>
#include <string>
#include <utility>
#include <vector>
#include "support/docstream.h"
namespace lyx {
@ -46,9 +47,6 @@ enum CatCode {
};
CatCode catcode(unsigned char c);
enum {
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
@ -75,18 +73,16 @@ enum {
class Token {
public:
///
Token() : cs_(), char_(0), cat_(catIgnore) {}
Token() : cs_(), cat_(catIgnore) {}
///
Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
///
Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
///
std::string const & cs() const { return cs_; }
/// Returns the catcode of the token
CatCode cat() const { return cat_; }
///
char character() const { return char_; }
char character() const { return cs_.empty() ? 0 : cs_[0]; }
/// Returns the token as string
std::string asString() const;
/// Returns the token verbatim
@ -96,8 +92,6 @@ private:
///
std::string cs_;
///
char char_;
///
CatCode cat_;
};
@ -119,9 +113,16 @@ class Parser {
public:
///
Parser(std::istream & is);
Parser(idocstream & is);
///
Parser(std::string const & s);
///
~Parser();
/// change the latex encoding of the input stream
void setEncoding(std::string const & encoding);
/// get the current latex encoding of the input stream
std::string getEncoding() const { return encoding_latex_; }
///
int lineno() const { return lineno_; }
@ -174,8 +175,8 @@ public:
char getChar();
///
void error(std::string const & msg);
/// Parses \p is into tokens
void tokenize(std::istream & is);
/// Parses one token from \p is
void tokenize_one();
///
void push_back(Token const & t);
/// The previous token.
@ -183,11 +184,11 @@ public:
/// The current token.
Token const & curr_token() const;
/// The next token.
Token const & next_token() const;
Token const & next_token();
/// Make the next token current and return that.
Token const & get_token();
/// \return whether the current token starts a new paragraph
bool isParagraph() const;
bool isParagraph();
/// skips spaces (and comments if \p skip_comments is true)
void skip_spaces(bool skip_comments = false);
/// puts back spaces (and comments if \p skip_comments is true)
@ -195,7 +196,7 @@ public:
///
void lex(std::string const & s);
///
bool good() const;
bool good();
///
std::string verbatim_item();
///
@ -214,6 +215,12 @@ private:
std::vector<Token> tokens_;
///
unsigned pos_;
///
idocstringstream * iss_;
///
idocstream & is_;
/// latex name of the current encoding
std::string encoding_latex_;
};

View File

@ -94,7 +94,7 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode)
t.cat() == catAlign ||
t.cat() == catActive ||
t.cat() == catParameter)
os << t.character();
os << t.cs();
else if (t.cat() == catBegin) {
os << '{';

View File

@ -248,7 +248,7 @@ string const scale_as_percentage(string const & scale)
}
void handle_package(string const & name, string const & opts,
void handle_package(Parser &p, string const & name, string const & opts,
bool in_lyx_preamble)
{
vector<string> options = split_options(opts);
@ -326,9 +326,10 @@ void handle_package(string const & name, string const & opts,
; // ignore this
else if (name == "inputenc") {
// only set when there is not more than one inputenc option
// therefore check for the "," character
// also only set when there is not more then one babel language option
// only set when there is not more than one inputenc
// option therefore check for the "," character also
// only set when there is not more then one babel
// language option
if (opts.find(",") == string::npos && one_language == true) {
if (opts == "ascii")
//change ascii to auto to be in the unicode range, see
@ -337,6 +338,8 @@ void handle_package(string const & name, string const & opts,
else if (!opts.empty())
h_inputencoding = opts;
}
if (!options.empty())
p.setEncoding(options.back());
options.clear();
}
@ -413,7 +416,7 @@ void handle_package(string const & name, string const & opts,
void end_preamble(ostream & os, TextClass const & /*textclass*/)
{
os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
<< "\\lyxformat 247\n"
<< "\\lyxformat 249\n"
<< "\\begin_document\n"
<< "\\begin_header\n"
<< "\\textclass " << h_textclass << "\n";
@ -664,21 +667,19 @@ void parse_preamble(Parser & p, ostream & os,
else if (t.cs() == "usepackage") {
string const options = p.getArg('[', ']');
string const name = p.getArg('{', '}');
if (options.empty() && name.find(',')) {
vector<string> vecnames;
split(name, vecnames, ',');
vector<string>::const_iterator it = vecnames.begin();
vector<string>::const_iterator end = vecnames.end();
for (; it != end; ++it)
handle_package(trim(*it), string(),
in_lyx_preamble);
} else {
handle_package(name, options, in_lyx_preamble);
}
vector<string> vecnames;
split(name, vecnames, ',');
vector<string>::const_iterator it = vecnames.begin();
vector<string>::const_iterator end = vecnames.end();
for (; it != end; ++it)
handle_package(p, trim(*it), options,
in_lyx_preamble);
}
else if (t.cs() == "inputencoding") {
h_inputencoding = p.getArg('{','}');
string const encoding = p.getArg('{','}');
h_inputencoding = encoding;
p.setEncoding(encoding);
}
else if (t.cs() == "newenvironment") {

View File

@ -661,16 +661,15 @@ void parse_table(Parser & p, ostream & os, bool is_long_tabular,
}
}
else if (t.cat() == catSpace || t.cat() == catNewline)
os << t.cs();
else if (t.cat() == catLetter ||
t.cat() == catSuper ||
t.cat() == catSub ||
t.cat() == catOther ||
t.cat() == catActive ||
t.cat() == catParameter)
os << t.character();
else if (t.cat() == catSpace
|| t.cat() == catNewline
|| t.cat() == catLetter
|| t.cat() == catSuper
|| t.cat() == catSub
|| t.cat() == catOther
|| t.cat() == catActive
|| t.cat() == catParameter)
os << t.cs();
else if (t.cat() == catBegin) {
os << '{';

View File

@ -2,4 +2,5 @@
\subsection{\label{sub:External-Subsection}External Subsection}
This is a small dummy child document to show how files can be inserted
to another document.
to another document. Here are some accented characters to make sure
the encoding is passed to included files: éè

View File

@ -75,6 +75,12 @@ foo & bar \\
bar & foo
\end{tabular}
Let's try a few unicode characters: the (R) symbol \textregistered
(and the same one with braces \textregistered{} and a space after) or
maybe an accented a \'{a} or this one \'a or this \^\i.
Watch out: \textregistered should be glued to its successor here.
Final Text.
\end{document}

View File

@ -15,20 +15,20 @@
#include "tex2lyx.h"
#include "Context.h"
#include "TextClass.h"
#include "Encoding.h"
#include "Layout.h"
#include "TextClass.h"
#include "support/lassert.h"
#include "support/convert.h"
#include "support/debug.h"
#include "support/ExceptionMessage.h"
#include "support/filetools.h"
#include "support/lassert.h"
#include "support/lstrings.h"
#include "support/os.h"
#include "support/Package.h"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
@ -202,7 +202,7 @@ void read_environment(Parser & p, string const & begin,
*/
void read_syntaxfile(FileName const & file_name)
{
ifstream is(file_name.toFilesystemEncoding().c_str());
ifdocstream is(file_name.toFilesystemEncoding().c_str());
if (!is.good()) {
cerr << "Could not open syntax file \"" << file_name
<< "\" for reading." << endl;
@ -236,6 +236,7 @@ void read_syntaxfile(FileName const & file_name)
string documentclass;
string default_encoding;
string syntaxfile;
bool overwrite_files = false;
@ -253,6 +254,7 @@ int parse_help(string const &, string const &)
"\t-userdir dir try to set user directory to dir\n"
"\t-sysdir dir try to set system directory to dir\n"
"\t-c textclass declare the textclass\n"
"\t-e encoding set the default encoding (latex name)\n"
"\t-n translate a noweb (aka literate programming) file.\n"
"\t-s syntaxfile read additional syntax file" << endl;
exit(0);
@ -270,6 +272,17 @@ int parse_class(string const & arg, string const &)
}
int parse_encoding(string const & arg, string const &)
{
if (arg.empty()) {
cerr << "Missing encoding string after -e switch" << endl;
exit(1);
}
default_encoding = arg;
return 1;
}
int parse_syntaxfile(string const & arg, string const &)
{
if (arg.empty()) {
@ -328,6 +341,7 @@ void easyParse(int & argc, char * argv[])
map<string, cmd_helper> cmdmap;
cmdmap["-c"] = parse_class;
cmdmap["-e"] = parse_encoding;
cmdmap["-f"] = parse_force;
cmdmap["-s"] = parse_syntaxfile;
cmdmap["-help"] = parse_help;
@ -389,9 +403,11 @@ namespace {
* You must ensure that \p parentFilePath is properly set before calling
* this function!
*/
void tex2lyx(istream & is, ostream & os)
void tex2lyx(idocstream & is, ostream & os, string const & encoding)
{
Parser p(is);
if (!encoding.empty())
p.setEncoding(encoding);
//p.dump();
stringstream ss;
@ -411,7 +427,7 @@ void tex2lyx(istream & is, ostream & os)
os << ss.str();
#ifdef TEST_PARSER
p.reset();
ofstream parsertest("parsertest.tex");
ofdocstream parsertest("parsertest.tex");
while (p.good())
parsertest << p.get_token().asInput();
// <origfile> and parsertest.tex should now have identical content
@ -420,9 +436,12 @@ void tex2lyx(istream & is, ostream & os)
/// convert TeX from \p infilename to LyX and write it to \p os
bool tex2lyx(FileName const & infilename, ostream & os)
bool tex2lyx(FileName const & infilename, ostream & os, string const & encoding)
{
ifstream is(infilename.toFilesystemEncoding().c_str());
ifdocstream is;
// forbid buffering on this stream
is.rdbuf()->pubsetbuf(0,0);
is.open(infilename.toFilesystemEncoding().c_str());
if (!is.good()) {
cerr << "Could not open input file \"" << infilename
<< "\" for reading." << endl;
@ -430,7 +449,7 @@ bool tex2lyx(FileName const & infilename, ostream & os)
}
string const oldParentFilePath = parentFilePath;
parentFilePath = onlyPath(infilename.absFilename());
tex2lyx(is, os);
tex2lyx(is, os, encoding);
parentFilePath = oldParentFilePath;
return true;
}
@ -438,7 +457,8 @@ bool tex2lyx(FileName const & infilename, ostream & os)
} // anonymous namespace
bool tex2lyx(string const & infilename, FileName const & outfilename)
bool tex2lyx(string const & infilename, FileName const & outfilename,
string const & encoding)
{
if (outfilename.isReadableFile()) {
if (overwrite_files) {
@ -462,7 +482,7 @@ bool tex2lyx(string const & infilename, FileName const & outfilename)
cerr << "Input file: " << infilename << "\n";
cerr << "Output file: " << outfilename << "\n";
#endif
return tex2lyx(FileName(infilename), os);
return tex2lyx(FileName(infilename), os, encoding);
}
} // namespace lyx
@ -485,11 +505,11 @@ int main(int argc, char * argv[])
os::init(argc, argv);
try { init_package(internal_path(to_utf8(from_local8bit(argv[0]))),
cl_system_support, cl_user_support,
top_build_dir_is_two_levels_up);
cl_system_support, cl_user_support,
top_build_dir_is_two_levels_up);
} catch (ExceptionMessage const & message) {
cerr << to_utf8(message.title_) << ":\n"
<< to_utf8(message.details_) << endl;
<< to_utf8(message.details_) << endl;
if (message.type_ == ErrorException)
exit(1);
}
@ -507,6 +527,7 @@ int main(int argc, char * argv[])
} else
outfilename = changeExtension(infilename, ".lyx");
// Read the syntax tables
FileName const system_syntaxfile = libFileSearch("", "syntax.default");
if (system_syntaxfile.empty()) {
cerr << "Error: Could not find syntax file \"syntax.default\"." << endl;
@ -516,16 +537,31 @@ int main(int argc, char * argv[])
if (!syntaxfile.empty())
read_syntaxfile(makeAbsPath(syntaxfile));
// Read the encodings table.
FileName const symbols_path = libFileSearch(string(), "unicodesymbols");
if (symbols_path.empty()) {
cerr << "Error: Could not find file \"unicodesymbols\"."
<< endl;
exit(1);
}
FileName const enc_path = libFileSearch(string(), "encodings");
if (enc_path.empty()) {
cerr << "Error: Could not find file \"encodings\"."
<< endl;
exit(1);
}
encodings.read(enc_path, symbols_path);
// The real work now.
masterFilePath = onlyPath(infilename);
parentFilePath = masterFilePath;
if (outfilename == "-") {
if (tex2lyx(FileName(infilename), cout))
if (tex2lyx(FileName(infilename), cout, default_encoding))
return EXIT_SUCCESS;
else
return EXIT_FAILURE;
} else {
if (tex2lyx(infilename, FileName(outfilename)))
if (tex2lyx(infilename, FileName(outfilename), default_encoding))
return EXIT_SUCCESS;
else
return EXIT_FAILURE;

View File

@ -114,13 +114,16 @@ extern std::string getParentFilePath();
/*!
* Reads tex input from \a infilename and writes lyx output to \a outfilename.
* The (latex) encoding can be provided as \a encoding.
* Uses some common settings for the preamble, so this should only
* be used more than once for included documents.
* Caution: Overwrites the existing preamble settings if the new document
* contains a preamble.
* \return true if the conversion was successful, else false.
*/
bool tex2lyx(std::string const & infilename, support::FileName const & outfilename);
bool tex2lyx(std::string const & infilename,
support::FileName const & outfilename,
std::string const & encoding);
} // namespace lyx

View File

@ -17,6 +17,7 @@
#include "tex2lyx.h"
#include "Context.h"
#include "Encoding.h"
#include "FloatList.h"
#include "Layout.h"
#include "Length.h"
@ -344,8 +345,6 @@ void translate_box_len(string const & length, string & value, string & unit, str
string find_file(string const & name, string const & path,
char const * const * extensions)
{
// FIXME UNICODE encoding of name and path may be wrong (makeAbsPath
// expects utf8)
for (char const * const * what = extensions; *what; ++what) {
string const trial = addExtension(name, *what);
if (makeAbsPath(trial, path).exists())
@ -509,7 +508,7 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
* The drawback is that the logic inside the function becomes
* complicated, and that is the reason why it is not implemented.
*/
void check_space(Parser const & p, ostream & os, Context & context)
void check_space(Parser & p, ostream & os, Context & context)
{
Token const next = p.next_token();
Token const curr = p.curr_token();
@ -1054,8 +1053,6 @@ void fix_relative_filename(string & name)
if (fname.isAbsolute())
return;
// FIXME UNICODE encoding of name may be wrong (makeAbsPath expects
// utf8)
name = to_utf8(makeRelPath(from_utf8(makeAbsPath(name, getMasterFilePath()).absFilename()),
from_utf8(getParentFilePath())));
}
@ -1262,7 +1259,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
t.cat() == catParameter) {
// This translates "&" to "\\&" which may be wrong...
context.check_layout(os);
os << t.character();
os << t.cs();
}
else if (p.isParagraph()) {
@ -1281,7 +1278,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
else
os << "\\InsetSpace ~\n";
} else
os << t.character();
os << t.cs();
}
else if (t.cat() == catBegin &&
@ -1309,7 +1306,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
next.character() == '*') {
p.get_token();
if (p.next_token().cat() == catEnd) {
os << next.character();
os << next.cs();
p.get_token();
} else {
p.putback();
@ -1552,8 +1549,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
TeXFont const oldFont = context.font;
// save the current font size
string const size = oldFont.size;
// reset the font size to default, because the font size switches don't
// affect section headings and the like
// reset the font size to default, because the
// font size switches don't affect section
// headings and the like
context.font.size = known_coded_sizes[0];
output_font_change(os, oldFont, context.font);
// write the layout
@ -1605,8 +1603,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
string const path = getMasterFilePath();
// We want to preserve relative / absolute filenames,
// therefore path is only used for testing
// FIXME UNICODE encoding of name and path may be
// wrong (makeAbsPath expects utf8)
if (!makeAbsPath(name, path).exists()) {
// The file extension is probably missing.
// Now try to find it out.
@ -1637,8 +1633,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
name = pdftex_name;
}
// FIXME UNICODE encoding of name and path may be
// wrong (makeAbsPath expects utf8)
if (makeAbsPath(name, path).exists())
fix_relative_filename(name);
else
@ -1763,6 +1757,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
p.skip_spaces();
context.check_layout(os);
string const s = p.verbatim_item();
//FIXME: this never triggers in UTF8
if (s == "\xb1" || s == "\xb3" || s == "\xb2" || s == "\xb5")
os << s;
else
@ -2127,25 +2122,31 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
else if (t.cs() == "selectlanguage") {
context.check_layout(os);
// save the language for the case that a \foreignlanguage is used
// save the language for the case that a
// \foreignlanguage is used
//FIXME: this is wrong, the language should
// be saved in the context. (JMarc)
selectlang = subst(p.verbatim_item(), "\n", " ");
os << "\\lang " << selectlang << "\n";
}
else if (t.cs() == "foreignlanguage") {
context.check_layout(os);
os << "\n\\lang " << subst(p.verbatim_item(), "\n", " ") << "\n";
os << subst(p.verbatim_item(), "\n", " ");
// FIXME: the second argument of selectlanguage
// has to be parsed (like for \textsf, for
// example).
// set back to last selectlanguage
os << "\n\\lang " << selectlang << "\n";
}
else if (t.cs() == "inputencoding")
// write nothing because this is done by LyX using the "\lang"
// information given by selectlanguage and foreignlanguage
subst(p.verbatim_item(), "\n", " ");
else if (t.cs() == "inputencoding") {
// nothing to write here
string const enc = subst(p.verbatim_item(), "\n", " ");
p.setEncoding(enc);
}
else if (t.cs() == "LyX" || t.cs() == "TeX"
|| t.cs() == "LaTeX") {
context.check_layout(os);
@ -2238,18 +2239,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
handle_ert(os, oss.str(), context);
}
else if (t.cs() == "\"") {
context.check_layout(os);
string const name = p.verbatim_item();
if (name == "a") os << '\xe4';
else if (name == "o") os << '\xf6';
else if (name == "u") os << '\xfc';
else if (name == "A") os << '\xc4';
else if (name == "O") os << '\xd6';
else if (name == "U") os << '\xdc';
else handle_ert(os, "\"{" + name + "}", context);
}
// Problem: \= creates a tabstop inside the tabbing environment
// and else an accent. In the latter case we really would want
// \={o} instead of \= o.
@ -2260,30 +2249,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|| t.cs() == "'" || t.cs() == "`"
|| t.cs() == "~" || t.cs() == "." || t.cs() == "=") {
// we need the trim as the LyX parser chokes on such spaces
// The argument of InsetLatexAccent is parsed as a
// subset of LaTeX, so don't parse anything here,
// but use the raw argument.
// Otherwise we would convert \~{\i} wrongly.
// This will of course not translate \~{\ss} to \~{ß},
// but that does at least compile and does only look
// strange on screen.
context.check_layout(os);
os << "\\i \\" << t.cs() << "{"
<< trim(p.verbatim_item(), " ")
<< "}\n";
}
else if (t.cs() == "ss") {
context.check_layout(os);
os << "\xdf";
skip_braces(p); // eat {}
}
else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" ||
t.cs() == "L") {
context.check_layout(os);
os << "\\i \\" << t.cs() << "{}\n";
skip_braces(p); // eat {}
// try to see whether the string is in unicodesymbols
docstring rem;
string command = t.asInput() + "{"
+ trim(p.verbatim_item())
+ "}";
docstring s = encodings.fromLaTeXCommand(from_utf8(command), rem);
if (!s.empty()) {
if (!rem.empty())
cerr << "When parsing " << command
<< ", result is " << to_utf8(s)
<< "+" << to_utf8(rem) << endl;
os << to_utf8(s);
} else
// we did not find a non-ert version
handle_ert(os, command, context);
}
else if (t.cs() == "\\") {
@ -2319,8 +2300,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
string const path = getMasterFilePath();
// We want to preserve relative / absolute filenames,
// therefore path is only used for testing
// FIXME UNICODE encoding of filename and path may be
// wrong (makeAbsPath expects utf8)
if ((t.cs() == "include" || t.cs() == "input") &&
!makeAbsPath(filename, path).exists()) {
// The file extension is probably missing.
@ -2331,8 +2310,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
if (!tex_name.empty())
filename = tex_name;
}
// FIXME UNICODE encoding of filename and path may be
// wrong (makeAbsPath expects utf8)
if (makeAbsPath(filename, path).exists()) {
string const abstexname =
makeAbsPath(filename, path).absFilename();
@ -2342,7 +2319,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
string const lyxname =
changeExtension(filename, ".lyx");
if (t.cs() != "verbatiminput" &&
tex2lyx(abstexname, FileName(abslyxname))) {
tex2lyx(abstexname, FileName(abslyxname),
p.getEncoding())) {
os << name << '{' << lyxname << "}\n";
} else {
os << name << '{' << filename << "}\n";
@ -2537,6 +2515,19 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
}
else {
// try to see whether the string is in unicodesymbols
docstring rem;
docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()), rem);
if (!s.empty()) {
if (!rem.empty())
cerr << "When parsing " << t.cs()
<< ", result is " << to_utf8(s)
<< "+" << to_utf8(rem) << endl;
context.check_layout(os);
os << to_utf8(s);
p.skip_spaces();
skip_braces(p); // eat {}
}
//cerr << "#: " << t << " mode: " << mode << endl;
// heuristic: read up to next non-nested space
/*
@ -2550,14 +2541,16 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
cerr << "found ERT: " << s << endl;
handle_ert(os, s + ' ', context);
*/
string name = t.asInput();
if (p.next_token().asInput() == "*") {
// Starred commands like \vspace*{}
p.get_token(); // Eat '*'
name += '*';
else {
string name = t.asInput();
if (p.next_token().asInput() == "*") {
// Starred commands like \vspace*{}
p.get_token(); // Eat '*'
name += '*';
}
if (!parse_command(name, p, os, outer, context))
handle_ert(os, name, context);
}
if (! parse_command(name, p, os, outer, context))
handle_ert(os, name, context);
}
if (flags & FLAG_LEAVE) {

View File

@ -33,6 +33,11 @@ What's new
* DOCUMENT INPUT/OUTPUT
- Tex2lyx is now able to read files in all latex supported encodings and
transform them into the proper unicode-based format introduced in 1.5.0.
This paves the way for many other improvements in LaTeX->LyX translation
(bugs 3035, 4379, 4917).
- Quotes in InsetListings are now normal quotes, rather than InsetQuote
entries (bug 5782).