mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-18 21:45:24 +00:00
backport the tex2lyx unicode stuff from trunk
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/branches/BRANCH_1_6_X@28858 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
6dec0f3e0d
commit
f9a4e1cc95
@ -530,6 +530,11 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem)
|
||||
|
||||
void Encodings::initUnicodeMath(Buffer const & buffer, bool clear_sets)
|
||||
{
|
||||
#ifdef TEX2LYX
|
||||
// The code below is not needed in tex2lyx and requires additional stuff
|
||||
(void)buffer;
|
||||
(void)clear_sets;
|
||||
#else
|
||||
if (clear_sets) {
|
||||
mathcmd.clear();
|
||||
textcmd.clear();
|
||||
@ -549,11 +554,18 @@ void Encodings::initUnicodeMath(Buffer const & buffer, bool clear_sets)
|
||||
for (; bit != bend; ++bit)
|
||||
if (buffer.isChild(*bit))
|
||||
initUnicodeMath(**bit, false);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed)
|
||||
{
|
||||
#ifdef TEX2LYX
|
||||
// The code below is not needed in tex2lyx and requires additional stuff
|
||||
(void)c;
|
||||
(void)features;
|
||||
(void)for_mathed;
|
||||
#else
|
||||
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
|
||||
if (it != unicodesymbols.end()) {
|
||||
// In mathed, c could be used both in textmode and mathmode
|
||||
@ -592,6 +604,7 @@ void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed)
|
||||
features.require("relsize");
|
||||
features.require("lyxmathsym");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -41,7 +41,8 @@ LINKED_FILES = \
|
||||
../Lexer.cpp \
|
||||
../lengthcommon.cpp \
|
||||
../Color.cpp \
|
||||
../Color.h
|
||||
../Color.h \
|
||||
../Encoding.cpp
|
||||
|
||||
BUILT_SOURCES = $(PCH_FILE)
|
||||
|
||||
|
@ -10,10 +10,10 @@
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include "Encoding.h"
|
||||
#include "Parser.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -25,6 +25,11 @@ CatCode theCatcode[256];
|
||||
|
||||
void catInit()
|
||||
{
|
||||
static bool init_done = false;
|
||||
if (init_done)
|
||||
return;
|
||||
init_done = true;
|
||||
|
||||
fill(theCatcode, theCatcode + 256, catOther);
|
||||
fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
|
||||
fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
|
||||
@ -49,13 +54,12 @@ void catInit()
|
||||
theCatcode[int('@')] = catLetter;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
* Translate a line ending to '\n'.
|
||||
* \p c must have catcode catNewline, and it must be the last character read
|
||||
* from \p is.
|
||||
*/
|
||||
char getNewline(istream & is, char c)
|
||||
char getNewline(idocstream & is, char c)
|
||||
{
|
||||
// we have to handle 3 different line endings:
|
||||
// - UNIX (\n)
|
||||
@ -63,9 +67,10 @@ char getNewline(istream & is, char c)
|
||||
// - DOS (\r\n)
|
||||
if (c == '\r') {
|
||||
// MAC or DOS
|
||||
if (is.get(c) && c != '\n') {
|
||||
char_type wc;
|
||||
if (is.get(wc) && wc != '\n') {
|
||||
// MAC
|
||||
is.putback(c);
|
||||
is.putback(wc);
|
||||
}
|
||||
return '\n';
|
||||
}
|
||||
@ -73,18 +78,14 @@ char getNewline(istream & is, char c)
|
||||
return c;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// catcodes
|
||||
//
|
||||
|
||||
CatCode catcode(unsigned char c)
|
||||
CatCode catcode(char_type c)
|
||||
{
|
||||
return theCatcode[c];
|
||||
if (c < 256)
|
||||
return theCatcode[(unsigned char)c];
|
||||
return catOther;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
@ -100,18 +101,18 @@ ostream & operator<<(ostream & os, Token const & t)
|
||||
else if (t.cat() == catEscape)
|
||||
os << '\\' << t.cs() << ' ';
|
||||
else if (t.cat() == catLetter)
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
else if (t.cat() == catNewline)
|
||||
os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
|
||||
else
|
||||
os << '[' << t.character() << ',' << t.cat() << ']';
|
||||
os << '[' << t.cs() << ',' << t.cat() << ']';
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
string Token::asString() const
|
||||
{
|
||||
return cs_.size() ? cs_ : string(1, char_);
|
||||
return cs_;
|
||||
}
|
||||
|
||||
|
||||
@ -119,9 +120,9 @@ string Token::asInput() const
|
||||
{
|
||||
if (cat_ == catComment)
|
||||
return '%' + cs_ + '\n';
|
||||
if (cat_ == catSpace || cat_ == catNewline)
|
||||
return cs_;
|
||||
return char_ ? string(1, char_) : '\\' + cs_;
|
||||
if (cat_ == catEscape)
|
||||
return '\\' + cs_;
|
||||
return cs_;
|
||||
}
|
||||
|
||||
|
||||
@ -130,18 +131,32 @@ string Token::asInput() const
|
||||
//
|
||||
|
||||
|
||||
Parser::Parser(istream & is)
|
||||
: lineno_(0), pos_(0)
|
||||
Parser::Parser(idocstream & is)
|
||||
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8")
|
||||
{
|
||||
tokenize(is);
|
||||
}
|
||||
|
||||
|
||||
Parser::Parser(string const & s)
|
||||
: lineno_(0), pos_(0)
|
||||
: lineno_(0), pos_(0),
|
||||
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
|
||||
encoding_latex_("utf8")
|
||||
{
|
||||
istringstream is(s);
|
||||
tokenize(is);
|
||||
}
|
||||
|
||||
|
||||
Parser::~Parser()
|
||||
{
|
||||
delete iss_;
|
||||
}
|
||||
|
||||
|
||||
void Parser::setEncoding(std::string const & e)
|
||||
{
|
||||
Encoding const * enc = encodings.fromLaTeXName(e);
|
||||
//cerr << "setting encoding to " << enc->iconvName()<<std::endl;
|
||||
is_ << lyx::setEncoding(enc->iconvName());
|
||||
encoding_latex_ = e;
|
||||
}
|
||||
|
||||
|
||||
@ -165,7 +180,7 @@ Token const & Parser::curr_token() const
|
||||
}
|
||||
|
||||
|
||||
Token const & Parser::next_token() const
|
||||
Token const & Parser::next_token()
|
||||
{
|
||||
static const Token dummy;
|
||||
return good() ? tokens_[pos_] : dummy;
|
||||
@ -180,7 +195,7 @@ Token const & Parser::get_token()
|
||||
}
|
||||
|
||||
|
||||
bool Parser::isParagraph() const
|
||||
bool Parser::isParagraph()
|
||||
{
|
||||
// A new paragraph in TeX ist started
|
||||
// - either by a newline, following any amount of whitespace
|
||||
@ -246,8 +261,11 @@ void Parser::putback()
|
||||
}
|
||||
|
||||
|
||||
bool Parser::good() const
|
||||
bool Parser::good()
|
||||
{
|
||||
if (pos_ < tokens_.size())
|
||||
return true;
|
||||
tokenize_one();
|
||||
return pos_ < tokens_.size();
|
||||
}
|
||||
|
||||
@ -256,7 +274,7 @@ char Parser::getChar()
|
||||
{
|
||||
if (!good())
|
||||
error("The input stream is not well...");
|
||||
return tokens_[pos_++].character();
|
||||
return get_token().character();
|
||||
}
|
||||
|
||||
|
||||
@ -351,86 +369,80 @@ string const Parser::verbatimEnvironment(string const & name)
|
||||
}
|
||||
|
||||
|
||||
void Parser::tokenize(istream & is)
|
||||
void Parser::tokenize_one()
|
||||
{
|
||||
static bool init_done = false;
|
||||
catInit();
|
||||
char_type c;
|
||||
if (!is_.get(c))
|
||||
return;
|
||||
|
||||
if (!init_done) {
|
||||
catInit();
|
||||
init_done = true;
|
||||
switch (catcode(c)) {
|
||||
case catSpace: {
|
||||
docstring s(1, c);
|
||||
while (is_.get(c) && catcode(c) == catSpace)
|
||||
s += c;
|
||||
if (catcode(c) != catSpace)
|
||||
is_.putback(c);
|
||||
push_back(Token(s, catSpace));
|
||||
break;
|
||||
}
|
||||
|
||||
char c;
|
||||
while (is.get(c)) {
|
||||
//cerr << "reading c: " << c << "\n";
|
||||
|
||||
switch (catcode(c)) {
|
||||
case catSpace: {
|
||||
string s(1, c);
|
||||
while (is.get(c) && catcode(c) == catSpace)
|
||||
s += c;
|
||||
if (catcode(c) != catSpace)
|
||||
is.putback(c);
|
||||
push_back(Token(s, catSpace));
|
||||
break;
|
||||
}
|
||||
|
||||
case catNewline: {
|
||||
++lineno_;
|
||||
string s(1, getNewline(is, c));
|
||||
while (is.get(c) && catcode(c) == catNewline) {
|
||||
++lineno_;
|
||||
s += getNewline(is, c);
|
||||
}
|
||||
if (catcode(c) != catNewline)
|
||||
is.putback(c);
|
||||
push_back(Token(s, catNewline));
|
||||
break;
|
||||
}
|
||||
|
||||
case catComment: {
|
||||
// We don't treat "%\n" combinations here specially because
|
||||
// we want to preserve them in the preamble
|
||||
string s;
|
||||
while (is.get(c) && catcode(c) != catNewline)
|
||||
s += c;
|
||||
// handle possible DOS line ending
|
||||
if (catcode(c) == catNewline)
|
||||
c = getNewline(is, c);
|
||||
// Note: The '%' at the beginning and the '\n' at the end
|
||||
// of the comment are not stored.
|
||||
++lineno_;
|
||||
push_back(Token(s, catComment));
|
||||
break;
|
||||
}
|
||||
|
||||
case catEscape: {
|
||||
is.get(c);
|
||||
if (!is) {
|
||||
error("unexpected end of input");
|
||||
} else {
|
||||
string s(1, c);
|
||||
if (catcode(c) == catLetter) {
|
||||
// collect letters
|
||||
while (is.get(c) && catcode(c) == catLetter)
|
||||
s += c;
|
||||
if (catcode(c) != catLetter)
|
||||
is.putback(c);
|
||||
}
|
||||
push_back(Token(s, catEscape));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case catIgnore: {
|
||||
cerr << "ignoring a char: " << int(c) << "\n";
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
push_back(Token(c, catcode(c)));
|
||||
|
||||
case catNewline: {
|
||||
++lineno_;
|
||||
docstring s(1, getNewline(is_, c));
|
||||
while (is_.get(c) && catcode(c) == catNewline) {
|
||||
++lineno_;
|
||||
s += getNewline(is_, c);
|
||||
}
|
||||
if (catcode(c) != catNewline)
|
||||
is_.putback(c);
|
||||
push_back(Token(s, catNewline));
|
||||
break;
|
||||
}
|
||||
|
||||
case catComment: {
|
||||
// We don't treat "%\n" combinations here specially because
|
||||
// we want to preserve them in the preamble
|
||||
docstring s;
|
||||
while (is_.get(c) && catcode(c) != catNewline)
|
||||
s += c;
|
||||
// handle possible DOS line ending
|
||||
if (catcode(c) == catNewline)
|
||||
c = getNewline(is_, c);
|
||||
// Note: The '%' at the beginning and the '\n' at the end
|
||||
// of the comment are not stored.
|
||||
++lineno_;
|
||||
push_back(Token(s, catComment));
|
||||
break;
|
||||
}
|
||||
|
||||
case catEscape: {
|
||||
is_.get(c);
|
||||
if (!is_) {
|
||||
error("unexpected end of input");
|
||||
} else {
|
||||
docstring s(1, c);
|
||||
if (catcode(c) == catLetter) {
|
||||
// collect letters
|
||||
while (is_.get(c) && catcode(c) == catLetter)
|
||||
s += c;
|
||||
if (catcode(c) != catLetter)
|
||||
is_.putback(c);
|
||||
}
|
||||
push_back(Token(s, catEscape));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case catIgnore: {
|
||||
cerr << "ignoring a char: " << c << "\n";
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
push_back(Token(docstring(1, c), catcode(c)));
|
||||
}
|
||||
//cerr << tokens_.back();
|
||||
}
|
||||
|
||||
|
||||
@ -459,7 +471,7 @@ string Parser::verbatimOption()
|
||||
string res;
|
||||
if (next_token().character() == '[') {
|
||||
Token t = get_token();
|
||||
for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
|
||||
for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
|
||||
if (t.cat() == catBegin) {
|
||||
putback();
|
||||
res += '{' + verbatim_item() + '}';
|
||||
|
@ -12,10 +12,11 @@
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "support/docstream.h"
|
||||
|
||||
namespace lyx {
|
||||
|
||||
@ -46,9 +47,6 @@ enum CatCode {
|
||||
};
|
||||
|
||||
|
||||
CatCode catcode(unsigned char c);
|
||||
|
||||
|
||||
enum {
|
||||
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
|
||||
FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
|
||||
@ -75,18 +73,16 @@ enum {
|
||||
class Token {
|
||||
public:
|
||||
///
|
||||
Token() : cs_(), char_(0), cat_(catIgnore) {}
|
||||
Token() : cs_(), cat_(catIgnore) {}
|
||||
///
|
||||
Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
|
||||
///
|
||||
Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
|
||||
Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
|
||||
|
||||
///
|
||||
std::string const & cs() const { return cs_; }
|
||||
/// Returns the catcode of the token
|
||||
CatCode cat() const { return cat_; }
|
||||
///
|
||||
char character() const { return char_; }
|
||||
char character() const { return cs_.empty() ? 0 : cs_[0]; }
|
||||
/// Returns the token as string
|
||||
std::string asString() const;
|
||||
/// Returns the token verbatim
|
||||
@ -96,8 +92,6 @@ private:
|
||||
///
|
||||
std::string cs_;
|
||||
///
|
||||
char char_;
|
||||
///
|
||||
CatCode cat_;
|
||||
};
|
||||
|
||||
@ -119,9 +113,16 @@ class Parser {
|
||||
|
||||
public:
|
||||
///
|
||||
Parser(std::istream & is);
|
||||
Parser(idocstream & is);
|
||||
///
|
||||
Parser(std::string const & s);
|
||||
///
|
||||
~Parser();
|
||||
|
||||
/// change the latex encoding of the input stream
|
||||
void setEncoding(std::string const & encoding);
|
||||
/// get the current latex encoding of the input stream
|
||||
std::string getEncoding() const { return encoding_latex_; }
|
||||
|
||||
///
|
||||
int lineno() const { return lineno_; }
|
||||
@ -174,8 +175,8 @@ public:
|
||||
char getChar();
|
||||
///
|
||||
void error(std::string const & msg);
|
||||
/// Parses \p is into tokens
|
||||
void tokenize(std::istream & is);
|
||||
/// Parses one token from \p is
|
||||
void tokenize_one();
|
||||
///
|
||||
void push_back(Token const & t);
|
||||
/// The previous token.
|
||||
@ -183,11 +184,11 @@ public:
|
||||
/// The current token.
|
||||
Token const & curr_token() const;
|
||||
/// The next token.
|
||||
Token const & next_token() const;
|
||||
Token const & next_token();
|
||||
/// Make the next token current and return that.
|
||||
Token const & get_token();
|
||||
/// \return whether the current token starts a new paragraph
|
||||
bool isParagraph() const;
|
||||
bool isParagraph();
|
||||
/// skips spaces (and comments if \p skip_comments is true)
|
||||
void skip_spaces(bool skip_comments = false);
|
||||
/// puts back spaces (and comments if \p skip_comments is true)
|
||||
@ -195,7 +196,7 @@ public:
|
||||
///
|
||||
void lex(std::string const & s);
|
||||
///
|
||||
bool good() const;
|
||||
bool good();
|
||||
///
|
||||
std::string verbatim_item();
|
||||
///
|
||||
@ -214,6 +215,12 @@ private:
|
||||
std::vector<Token> tokens_;
|
||||
///
|
||||
unsigned pos_;
|
||||
///
|
||||
idocstringstream * iss_;
|
||||
///
|
||||
idocstream & is_;
|
||||
/// latex name of the current encoding
|
||||
std::string encoding_latex_;
|
||||
};
|
||||
|
||||
|
||||
|
@ -94,7 +94,7 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode)
|
||||
t.cat() == catAlign ||
|
||||
t.cat() == catActive ||
|
||||
t.cat() == catParameter)
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
|
||||
else if (t.cat() == catBegin) {
|
||||
os << '{';
|
||||
|
@ -248,7 +248,7 @@ string const scale_as_percentage(string const & scale)
|
||||
}
|
||||
|
||||
|
||||
void handle_package(string const & name, string const & opts,
|
||||
void handle_package(Parser &p, string const & name, string const & opts,
|
||||
bool in_lyx_preamble)
|
||||
{
|
||||
vector<string> options = split_options(opts);
|
||||
@ -326,9 +326,10 @@ void handle_package(string const & name, string const & opts,
|
||||
; // ignore this
|
||||
|
||||
else if (name == "inputenc") {
|
||||
// only set when there is not more than one inputenc option
|
||||
// therefore check for the "," character
|
||||
// also only set when there is not more then one babel language option
|
||||
// only set when there is not more than one inputenc
|
||||
// option therefore check for the "," character also
|
||||
// only set when there is not more then one babel
|
||||
// language option
|
||||
if (opts.find(",") == string::npos && one_language == true) {
|
||||
if (opts == "ascii")
|
||||
//change ascii to auto to be in the unicode range, see
|
||||
@ -337,6 +338,8 @@ void handle_package(string const & name, string const & opts,
|
||||
else if (!opts.empty())
|
||||
h_inputencoding = opts;
|
||||
}
|
||||
if (!options.empty())
|
||||
p.setEncoding(options.back());
|
||||
options.clear();
|
||||
}
|
||||
|
||||
@ -413,7 +416,7 @@ void handle_package(string const & name, string const & opts,
|
||||
void end_preamble(ostream & os, TextClass const & /*textclass*/)
|
||||
{
|
||||
os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
|
||||
<< "\\lyxformat 247\n"
|
||||
<< "\\lyxformat 249\n"
|
||||
<< "\\begin_document\n"
|
||||
<< "\\begin_header\n"
|
||||
<< "\\textclass " << h_textclass << "\n";
|
||||
@ -664,21 +667,19 @@ void parse_preamble(Parser & p, ostream & os,
|
||||
else if (t.cs() == "usepackage") {
|
||||
string const options = p.getArg('[', ']');
|
||||
string const name = p.getArg('{', '}');
|
||||
if (options.empty() && name.find(',')) {
|
||||
vector<string> vecnames;
|
||||
split(name, vecnames, ',');
|
||||
vector<string>::const_iterator it = vecnames.begin();
|
||||
vector<string>::const_iterator end = vecnames.end();
|
||||
for (; it != end; ++it)
|
||||
handle_package(trim(*it), string(),
|
||||
in_lyx_preamble);
|
||||
} else {
|
||||
handle_package(name, options, in_lyx_preamble);
|
||||
}
|
||||
vector<string> vecnames;
|
||||
split(name, vecnames, ',');
|
||||
vector<string>::const_iterator it = vecnames.begin();
|
||||
vector<string>::const_iterator end = vecnames.end();
|
||||
for (; it != end; ++it)
|
||||
handle_package(p, trim(*it), options,
|
||||
in_lyx_preamble);
|
||||
}
|
||||
|
||||
else if (t.cs() == "inputencoding") {
|
||||
h_inputencoding = p.getArg('{','}');
|
||||
string const encoding = p.getArg('{','}');
|
||||
h_inputencoding = encoding;
|
||||
p.setEncoding(encoding);
|
||||
}
|
||||
|
||||
else if (t.cs() == "newenvironment") {
|
||||
|
@ -661,16 +661,15 @@ void parse_table(Parser & p, ostream & os, bool is_long_tabular,
|
||||
}
|
||||
}
|
||||
|
||||
else if (t.cat() == catSpace || t.cat() == catNewline)
|
||||
os << t.cs();
|
||||
|
||||
else if (t.cat() == catLetter ||
|
||||
t.cat() == catSuper ||
|
||||
t.cat() == catSub ||
|
||||
t.cat() == catOther ||
|
||||
t.cat() == catActive ||
|
||||
t.cat() == catParameter)
|
||||
os << t.character();
|
||||
else if (t.cat() == catSpace
|
||||
|| t.cat() == catNewline
|
||||
|| t.cat() == catLetter
|
||||
|| t.cat() == catSuper
|
||||
|| t.cat() == catSub
|
||||
|| t.cat() == catOther
|
||||
|| t.cat() == catActive
|
||||
|| t.cat() == catParameter)
|
||||
os << t.cs();
|
||||
|
||||
else if (t.cat() == catBegin) {
|
||||
os << '{';
|
||||
|
@ -2,4 +2,5 @@
|
||||
\subsection{\label{sub:External-Subsection}External Subsection}
|
||||
|
||||
This is a small dummy child document to show how files can be inserted
|
||||
to another document.
|
||||
to another document. Here are some accented characters to make sure
|
||||
the encoding is passed to included files: éè
|
||||
|
@ -75,6 +75,12 @@ foo & bar \\
|
||||
bar & foo
|
||||
\end{tabular}
|
||||
|
||||
Let's try a few unicode characters: the (R) symbol \textregistered
|
||||
(and the same one with braces \textregistered{} and a space after) or
|
||||
maybe an accented a \'{a} or this one \'a or this \^\i.
|
||||
|
||||
Watch out: \textregistered should be glued to its successor here.
|
||||
|
||||
Final Text.
|
||||
\end{document}
|
||||
|
||||
|
@ -15,20 +15,20 @@
|
||||
#include "tex2lyx.h"
|
||||
|
||||
#include "Context.h"
|
||||
#include "TextClass.h"
|
||||
#include "Encoding.h"
|
||||
#include "Layout.h"
|
||||
#include "TextClass.h"
|
||||
|
||||
#include "support/lassert.h"
|
||||
#include "support/convert.h"
|
||||
#include "support/debug.h"
|
||||
#include "support/ExceptionMessage.h"
|
||||
#include "support/filetools.h"
|
||||
#include "support/lassert.h"
|
||||
#include "support/lstrings.h"
|
||||
#include "support/os.h"
|
||||
#include "support/Package.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@ -202,7 +202,7 @@ void read_environment(Parser & p, string const & begin,
|
||||
*/
|
||||
void read_syntaxfile(FileName const & file_name)
|
||||
{
|
||||
ifstream is(file_name.toFilesystemEncoding().c_str());
|
||||
ifdocstream is(file_name.toFilesystemEncoding().c_str());
|
||||
if (!is.good()) {
|
||||
cerr << "Could not open syntax file \"" << file_name
|
||||
<< "\" for reading." << endl;
|
||||
@ -236,6 +236,7 @@ void read_syntaxfile(FileName const & file_name)
|
||||
|
||||
|
||||
string documentclass;
|
||||
string default_encoding;
|
||||
string syntaxfile;
|
||||
bool overwrite_files = false;
|
||||
|
||||
@ -253,6 +254,7 @@ int parse_help(string const &, string const &)
|
||||
"\t-userdir dir try to set user directory to dir\n"
|
||||
"\t-sysdir dir try to set system directory to dir\n"
|
||||
"\t-c textclass declare the textclass\n"
|
||||
"\t-e encoding set the default encoding (latex name)\n"
|
||||
"\t-n translate a noweb (aka literate programming) file.\n"
|
||||
"\t-s syntaxfile read additional syntax file" << endl;
|
||||
exit(0);
|
||||
@ -270,6 +272,17 @@ int parse_class(string const & arg, string const &)
|
||||
}
|
||||
|
||||
|
||||
int parse_encoding(string const & arg, string const &)
|
||||
{
|
||||
if (arg.empty()) {
|
||||
cerr << "Missing encoding string after -e switch" << endl;
|
||||
exit(1);
|
||||
}
|
||||
default_encoding = arg;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int parse_syntaxfile(string const & arg, string const &)
|
||||
{
|
||||
if (arg.empty()) {
|
||||
@ -328,6 +341,7 @@ void easyParse(int & argc, char * argv[])
|
||||
map<string, cmd_helper> cmdmap;
|
||||
|
||||
cmdmap["-c"] = parse_class;
|
||||
cmdmap["-e"] = parse_encoding;
|
||||
cmdmap["-f"] = parse_force;
|
||||
cmdmap["-s"] = parse_syntaxfile;
|
||||
cmdmap["-help"] = parse_help;
|
||||
@ -389,9 +403,11 @@ namespace {
|
||||
* You must ensure that \p parentFilePath is properly set before calling
|
||||
* this function!
|
||||
*/
|
||||
void tex2lyx(istream & is, ostream & os)
|
||||
void tex2lyx(idocstream & is, ostream & os, string const & encoding)
|
||||
{
|
||||
Parser p(is);
|
||||
if (!encoding.empty())
|
||||
p.setEncoding(encoding);
|
||||
//p.dump();
|
||||
|
||||
stringstream ss;
|
||||
@ -411,7 +427,7 @@ void tex2lyx(istream & is, ostream & os)
|
||||
os << ss.str();
|
||||
#ifdef TEST_PARSER
|
||||
p.reset();
|
||||
ofstream parsertest("parsertest.tex");
|
||||
ofdocstream parsertest("parsertest.tex");
|
||||
while (p.good())
|
||||
parsertest << p.get_token().asInput();
|
||||
// <origfile> and parsertest.tex should now have identical content
|
||||
@ -420,9 +436,12 @@ void tex2lyx(istream & is, ostream & os)
|
||||
|
||||
|
||||
/// convert TeX from \p infilename to LyX and write it to \p os
|
||||
bool tex2lyx(FileName const & infilename, ostream & os)
|
||||
bool tex2lyx(FileName const & infilename, ostream & os, string const & encoding)
|
||||
{
|
||||
ifstream is(infilename.toFilesystemEncoding().c_str());
|
||||
ifdocstream is;
|
||||
// forbid buffering on this stream
|
||||
is.rdbuf()->pubsetbuf(0,0);
|
||||
is.open(infilename.toFilesystemEncoding().c_str());
|
||||
if (!is.good()) {
|
||||
cerr << "Could not open input file \"" << infilename
|
||||
<< "\" for reading." << endl;
|
||||
@ -430,7 +449,7 @@ bool tex2lyx(FileName const & infilename, ostream & os)
|
||||
}
|
||||
string const oldParentFilePath = parentFilePath;
|
||||
parentFilePath = onlyPath(infilename.absFilename());
|
||||
tex2lyx(is, os);
|
||||
tex2lyx(is, os, encoding);
|
||||
parentFilePath = oldParentFilePath;
|
||||
return true;
|
||||
}
|
||||
@ -438,7 +457,8 @@ bool tex2lyx(FileName const & infilename, ostream & os)
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
bool tex2lyx(string const & infilename, FileName const & outfilename)
|
||||
bool tex2lyx(string const & infilename, FileName const & outfilename,
|
||||
string const & encoding)
|
||||
{
|
||||
if (outfilename.isReadableFile()) {
|
||||
if (overwrite_files) {
|
||||
@ -462,7 +482,7 @@ bool tex2lyx(string const & infilename, FileName const & outfilename)
|
||||
cerr << "Input file: " << infilename << "\n";
|
||||
cerr << "Output file: " << outfilename << "\n";
|
||||
#endif
|
||||
return tex2lyx(FileName(infilename), os);
|
||||
return tex2lyx(FileName(infilename), os, encoding);
|
||||
}
|
||||
|
||||
} // namespace lyx
|
||||
@ -485,11 +505,11 @@ int main(int argc, char * argv[])
|
||||
os::init(argc, argv);
|
||||
|
||||
try { init_package(internal_path(to_utf8(from_local8bit(argv[0]))),
|
||||
cl_system_support, cl_user_support,
|
||||
top_build_dir_is_two_levels_up);
|
||||
cl_system_support, cl_user_support,
|
||||
top_build_dir_is_two_levels_up);
|
||||
} catch (ExceptionMessage const & message) {
|
||||
cerr << to_utf8(message.title_) << ":\n"
|
||||
<< to_utf8(message.details_) << endl;
|
||||
<< to_utf8(message.details_) << endl;
|
||||
if (message.type_ == ErrorException)
|
||||
exit(1);
|
||||
}
|
||||
@ -507,6 +527,7 @@ int main(int argc, char * argv[])
|
||||
} else
|
||||
outfilename = changeExtension(infilename, ".lyx");
|
||||
|
||||
// Read the syntax tables
|
||||
FileName const system_syntaxfile = libFileSearch("", "syntax.default");
|
||||
if (system_syntaxfile.empty()) {
|
||||
cerr << "Error: Could not find syntax file \"syntax.default\"." << endl;
|
||||
@ -516,16 +537,31 @@ int main(int argc, char * argv[])
|
||||
if (!syntaxfile.empty())
|
||||
read_syntaxfile(makeAbsPath(syntaxfile));
|
||||
|
||||
// Read the encodings table.
|
||||
FileName const symbols_path = libFileSearch(string(), "unicodesymbols");
|
||||
if (symbols_path.empty()) {
|
||||
cerr << "Error: Could not find file \"unicodesymbols\"."
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
FileName const enc_path = libFileSearch(string(), "encodings");
|
||||
if (enc_path.empty()) {
|
||||
cerr << "Error: Could not find file \"encodings\"."
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
encodings.read(enc_path, symbols_path);
|
||||
|
||||
// The real work now.
|
||||
masterFilePath = onlyPath(infilename);
|
||||
parentFilePath = masterFilePath;
|
||||
|
||||
if (outfilename == "-") {
|
||||
if (tex2lyx(FileName(infilename), cout))
|
||||
if (tex2lyx(FileName(infilename), cout, default_encoding))
|
||||
return EXIT_SUCCESS;
|
||||
else
|
||||
return EXIT_FAILURE;
|
||||
} else {
|
||||
if (tex2lyx(infilename, FileName(outfilename)))
|
||||
if (tex2lyx(infilename, FileName(outfilename), default_encoding))
|
||||
return EXIT_SUCCESS;
|
||||
else
|
||||
return EXIT_FAILURE;
|
||||
|
@ -114,13 +114,16 @@ extern std::string getParentFilePath();
|
||||
|
||||
/*!
|
||||
* Reads tex input from \a infilename and writes lyx output to \a outfilename.
|
||||
* The (latex) encoding can be provided as \a encoding.
|
||||
* Uses some common settings for the preamble, so this should only
|
||||
* be used more than once for included documents.
|
||||
* Caution: Overwrites the existing preamble settings if the new document
|
||||
* contains a preamble.
|
||||
* \return true if the conversion was successful, else false.
|
||||
*/
|
||||
bool tex2lyx(std::string const & infilename, support::FileName const & outfilename);
|
||||
bool tex2lyx(std::string const & infilename,
|
||||
support::FileName const & outfilename,
|
||||
std::string const & encoding);
|
||||
|
||||
|
||||
} // namespace lyx
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "tex2lyx.h"
|
||||
|
||||
#include "Context.h"
|
||||
#include "Encoding.h"
|
||||
#include "FloatList.h"
|
||||
#include "Layout.h"
|
||||
#include "Length.h"
|
||||
@ -344,8 +345,6 @@ void translate_box_len(string const & length, string & value, string & unit, str
|
||||
string find_file(string const & name, string const & path,
|
||||
char const * const * extensions)
|
||||
{
|
||||
// FIXME UNICODE encoding of name and path may be wrong (makeAbsPath
|
||||
// expects utf8)
|
||||
for (char const * const * what = extensions; *what; ++what) {
|
||||
string const trial = addExtension(name, *what);
|
||||
if (makeAbsPath(trial, path).exists())
|
||||
@ -509,7 +508,7 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
|
||||
* The drawback is that the logic inside the function becomes
|
||||
* complicated, and that is the reason why it is not implemented.
|
||||
*/
|
||||
void check_space(Parser const & p, ostream & os, Context & context)
|
||||
void check_space(Parser & p, ostream & os, Context & context)
|
||||
{
|
||||
Token const next = p.next_token();
|
||||
Token const curr = p.curr_token();
|
||||
@ -1054,8 +1053,6 @@ void fix_relative_filename(string & name)
|
||||
if (fname.isAbsolute())
|
||||
return;
|
||||
|
||||
// FIXME UNICODE encoding of name may be wrong (makeAbsPath expects
|
||||
// utf8)
|
||||
name = to_utf8(makeRelPath(from_utf8(makeAbsPath(name, getMasterFilePath()).absFilename()),
|
||||
from_utf8(getParentFilePath())));
|
||||
}
|
||||
@ -1262,7 +1259,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
t.cat() == catParameter) {
|
||||
// This translates "&" to "\\&" which may be wrong...
|
||||
context.check_layout(os);
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
}
|
||||
|
||||
else if (p.isParagraph()) {
|
||||
@ -1281,7 +1278,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
else
|
||||
os << "\\InsetSpace ~\n";
|
||||
} else
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
}
|
||||
|
||||
else if (t.cat() == catBegin &&
|
||||
@ -1309,7 +1306,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
next.character() == '*') {
|
||||
p.get_token();
|
||||
if (p.next_token().cat() == catEnd) {
|
||||
os << next.character();
|
||||
os << next.cs();
|
||||
p.get_token();
|
||||
} else {
|
||||
p.putback();
|
||||
@ -1552,8 +1549,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
TeXFont const oldFont = context.font;
|
||||
// save the current font size
|
||||
string const size = oldFont.size;
|
||||
// reset the font size to default, because the font size switches don't
|
||||
// affect section headings and the like
|
||||
// reset the font size to default, because the
|
||||
// font size switches don't affect section
|
||||
// headings and the like
|
||||
context.font.size = known_coded_sizes[0];
|
||||
output_font_change(os, oldFont, context.font);
|
||||
// write the layout
|
||||
@ -1605,8 +1603,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
string const path = getMasterFilePath();
|
||||
// We want to preserve relative / absolute filenames,
|
||||
// therefore path is only used for testing
|
||||
// FIXME UNICODE encoding of name and path may be
|
||||
// wrong (makeAbsPath expects utf8)
|
||||
if (!makeAbsPath(name, path).exists()) {
|
||||
// The file extension is probably missing.
|
||||
// Now try to find it out.
|
||||
@ -1637,8 +1633,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
name = pdftex_name;
|
||||
}
|
||||
|
||||
// FIXME UNICODE encoding of name and path may be
|
||||
// wrong (makeAbsPath expects utf8)
|
||||
if (makeAbsPath(name, path).exists())
|
||||
fix_relative_filename(name);
|
||||
else
|
||||
@ -1763,6 +1757,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
p.skip_spaces();
|
||||
context.check_layout(os);
|
||||
string const s = p.verbatim_item();
|
||||
//FIXME: this never triggers in UTF8
|
||||
if (s == "\xb1" || s == "\xb3" || s == "\xb2" || s == "\xb5")
|
||||
os << s;
|
||||
else
|
||||
@ -2127,25 +2122,31 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
|
||||
else if (t.cs() == "selectlanguage") {
|
||||
context.check_layout(os);
|
||||
// save the language for the case that a \foreignlanguage is used
|
||||
// save the language for the case that a
|
||||
// \foreignlanguage is used
|
||||
|
||||
//FIXME: this is wrong, the language should
|
||||
// be saved in the context. (JMarc)
|
||||
selectlang = subst(p.verbatim_item(), "\n", " ");
|
||||
os << "\\lang " << selectlang << "\n";
|
||||
|
||||
}
|
||||
|
||||
else if (t.cs() == "foreignlanguage") {
|
||||
context.check_layout(os);
|
||||
os << "\n\\lang " << subst(p.verbatim_item(), "\n", " ") << "\n";
|
||||
os << subst(p.verbatim_item(), "\n", " ");
|
||||
// FIXME: the second argument of selectlanguage
|
||||
// has to be parsed (like for \textsf, for
|
||||
// example).
|
||||
// set back to last selectlanguage
|
||||
os << "\n\\lang " << selectlang << "\n";
|
||||
}
|
||||
|
||||
else if (t.cs() == "inputencoding")
|
||||
// write nothing because this is done by LyX using the "\lang"
|
||||
// information given by selectlanguage and foreignlanguage
|
||||
subst(p.verbatim_item(), "\n", " ");
|
||||
|
||||
else if (t.cs() == "inputencoding") {
|
||||
// nothing to write here
|
||||
string const enc = subst(p.verbatim_item(), "\n", " ");
|
||||
p.setEncoding(enc);
|
||||
}
|
||||
else if (t.cs() == "LyX" || t.cs() == "TeX"
|
||||
|| t.cs() == "LaTeX") {
|
||||
context.check_layout(os);
|
||||
@ -2238,18 +2239,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
handle_ert(os, oss.str(), context);
|
||||
}
|
||||
|
||||
else if (t.cs() == "\"") {
|
||||
context.check_layout(os);
|
||||
string const name = p.verbatim_item();
|
||||
if (name == "a") os << '\xe4';
|
||||
else if (name == "o") os << '\xf6';
|
||||
else if (name == "u") os << '\xfc';
|
||||
else if (name == "A") os << '\xc4';
|
||||
else if (name == "O") os << '\xd6';
|
||||
else if (name == "U") os << '\xdc';
|
||||
else handle_ert(os, "\"{" + name + "}", context);
|
||||
}
|
||||
|
||||
// Problem: \= creates a tabstop inside the tabbing environment
|
||||
// and else an accent. In the latter case we really would want
|
||||
// \={o} instead of \= o.
|
||||
@ -2260,30 +2249,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
|| t.cs() == "'" || t.cs() == "`"
|
||||
|| t.cs() == "~" || t.cs() == "." || t.cs() == "=") {
|
||||
// we need the trim as the LyX parser chokes on such spaces
|
||||
// The argument of InsetLatexAccent is parsed as a
|
||||
// subset of LaTeX, so don't parse anything here,
|
||||
// but use the raw argument.
|
||||
// Otherwise we would convert \~{\i} wrongly.
|
||||
// This will of course not translate \~{\ss} to \~{ß},
|
||||
// but that does at least compile and does only look
|
||||
// strange on screen.
|
||||
context.check_layout(os);
|
||||
os << "\\i \\" << t.cs() << "{"
|
||||
<< trim(p.verbatim_item(), " ")
|
||||
<< "}\n";
|
||||
}
|
||||
|
||||
else if (t.cs() == "ss") {
|
||||
context.check_layout(os);
|
||||
os << "\xdf";
|
||||
skip_braces(p); // eat {}
|
||||
}
|
||||
|
||||
else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" ||
|
||||
t.cs() == "L") {
|
||||
context.check_layout(os);
|
||||
os << "\\i \\" << t.cs() << "{}\n";
|
||||
skip_braces(p); // eat {}
|
||||
// try to see whether the string is in unicodesymbols
|
||||
docstring rem;
|
||||
string command = t.asInput() + "{"
|
||||
+ trim(p.verbatim_item())
|
||||
+ "}";
|
||||
docstring s = encodings.fromLaTeXCommand(from_utf8(command), rem);
|
||||
if (!s.empty()) {
|
||||
if (!rem.empty())
|
||||
cerr << "When parsing " << command
|
||||
<< ", result is " << to_utf8(s)
|
||||
<< "+" << to_utf8(rem) << endl;
|
||||
os << to_utf8(s);
|
||||
} else
|
||||
// we did not find a non-ert version
|
||||
handle_ert(os, command, context);
|
||||
}
|
||||
|
||||
else if (t.cs() == "\\") {
|
||||
@ -2319,8 +2300,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
string const path = getMasterFilePath();
|
||||
// We want to preserve relative / absolute filenames,
|
||||
// therefore path is only used for testing
|
||||
// FIXME UNICODE encoding of filename and path may be
|
||||
// wrong (makeAbsPath expects utf8)
|
||||
if ((t.cs() == "include" || t.cs() == "input") &&
|
||||
!makeAbsPath(filename, path).exists()) {
|
||||
// The file extension is probably missing.
|
||||
@ -2331,8 +2310,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
if (!tex_name.empty())
|
||||
filename = tex_name;
|
||||
}
|
||||
// FIXME UNICODE encoding of filename and path may be
|
||||
// wrong (makeAbsPath expects utf8)
|
||||
if (makeAbsPath(filename, path).exists()) {
|
||||
string const abstexname =
|
||||
makeAbsPath(filename, path).absFilename();
|
||||
@ -2342,7 +2319,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
string const lyxname =
|
||||
changeExtension(filename, ".lyx");
|
||||
if (t.cs() != "verbatiminput" &&
|
||||
tex2lyx(abstexname, FileName(abslyxname))) {
|
||||
tex2lyx(abstexname, FileName(abslyxname),
|
||||
p.getEncoding())) {
|
||||
os << name << '{' << lyxname << "}\n";
|
||||
} else {
|
||||
os << name << '{' << filename << "}\n";
|
||||
@ -2537,6 +2515,19 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
}
|
||||
|
||||
else {
|
||||
// try to see whether the string is in unicodesymbols
|
||||
docstring rem;
|
||||
docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()), rem);
|
||||
if (!s.empty()) {
|
||||
if (!rem.empty())
|
||||
cerr << "When parsing " << t.cs()
|
||||
<< ", result is " << to_utf8(s)
|
||||
<< "+" << to_utf8(rem) << endl;
|
||||
context.check_layout(os);
|
||||
os << to_utf8(s);
|
||||
p.skip_spaces();
|
||||
skip_braces(p); // eat {}
|
||||
}
|
||||
//cerr << "#: " << t << " mode: " << mode << endl;
|
||||
// heuristic: read up to next non-nested space
|
||||
/*
|
||||
@ -2550,14 +2541,16 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
cerr << "found ERT: " << s << endl;
|
||||
handle_ert(os, s + ' ', context);
|
||||
*/
|
||||
string name = t.asInput();
|
||||
if (p.next_token().asInput() == "*") {
|
||||
// Starred commands like \vspace*{}
|
||||
p.get_token(); // Eat '*'
|
||||
name += '*';
|
||||
else {
|
||||
string name = t.asInput();
|
||||
if (p.next_token().asInput() == "*") {
|
||||
// Starred commands like \vspace*{}
|
||||
p.get_token(); // Eat '*'
|
||||
name += '*';
|
||||
}
|
||||
if (!parse_command(name, p, os, outer, context))
|
||||
handle_ert(os, name, context);
|
||||
}
|
||||
if (! parse_command(name, p, os, outer, context))
|
||||
handle_ert(os, name, context);
|
||||
}
|
||||
|
||||
if (flags & FLAG_LEAVE) {
|
||||
|
@ -33,6 +33,11 @@ What's new
|
||||
|
||||
* DOCUMENT INPUT/OUTPUT
|
||||
|
||||
- Tex2lyx is now able to read files in all latex supported encodings and
|
||||
transform them into the proper unicode-based format introduced in 1.5.0.
|
||||
This paves the way for many other improvements in LaTeX->LyX translation
|
||||
(bugs 3035, 4379, 4917).
|
||||
|
||||
- Quotes in InsetListings are now normal quotes, rather than InsetQuote
|
||||
entries (bug 5782).
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user