mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-09 18:31:04 +00:00
First serious step for utf8 file format format. tex2lyx is now able to
read utf8 tex documents and translate them to lyxformat 249. There is still no code to discover the encoding and use it, but it is the easiest part (I hope). git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27563 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
f8f5a7b28d
commit
337c6d1577
@ -13,7 +13,6 @@
|
||||
#include "Parser.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -54,13 +53,12 @@ void catInit()
|
||||
theCatcode[int('@')] = catLetter;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
* Translate a line ending to '\n'.
|
||||
* \p c must have catcode catNewline, and it must be the last character read
|
||||
* from \p is.
|
||||
*/
|
||||
char getNewline(istream & is, char c)
|
||||
char getNewline(idocstream & is, char c)
|
||||
{
|
||||
// we have to handle 3 different line endings:
|
||||
// - UNIX (\n)
|
||||
@ -68,9 +66,10 @@ char getNewline(istream & is, char c)
|
||||
// - DOS (\r\n)
|
||||
if (c == '\r') {
|
||||
// MAC or DOS
|
||||
if (is.get(c) && c != '\n') {
|
||||
char_type wc;
|
||||
if (is.get(wc) && wc != '\n') {
|
||||
// MAC
|
||||
is.putback(c);
|
||||
is.putback(wc);
|
||||
}
|
||||
return '\n';
|
||||
}
|
||||
@ -78,18 +77,14 @@ char getNewline(istream & is, char c)
|
||||
return c;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// catcodes
|
||||
//
|
||||
|
||||
CatCode catcode(unsigned char c)
|
||||
CatCode catcode(char_type c)
|
||||
{
|
||||
return theCatcode[c];
|
||||
if (c < 256)
|
||||
return theCatcode[(unsigned char)c];
|
||||
return catOther;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
@ -105,18 +100,18 @@ ostream & operator<<(ostream & os, Token const & t)
|
||||
else if (t.cat() == catEscape)
|
||||
os << '\\' << t.cs() << ' ';
|
||||
else if (t.cat() == catLetter)
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
else if (t.cat() == catNewline)
|
||||
os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
|
||||
else
|
||||
os << '[' << t.character() << ',' << t.cat() << ']';
|
||||
os << '[' << t.cs() << ',' << t.cat() << ']';
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
string Token::asString() const
|
||||
{
|
||||
return cs_.size() ? cs_ : string(1, char_);
|
||||
return cs_;
|
||||
}
|
||||
|
||||
|
||||
@ -124,9 +119,9 @@ string Token::asInput() const
|
||||
{
|
||||
if (cat_ == catComment)
|
||||
return '%' + cs_ + '\n';
|
||||
if (cat_ == catSpace || cat_ == catNewline)
|
||||
return cs_;
|
||||
return char_ ? string(1, char_) : '\\' + cs_;
|
||||
if (cat_ == catEscape)
|
||||
return '\\' + cs_;
|
||||
return cs_;
|
||||
}
|
||||
|
||||
|
||||
@ -135,14 +130,15 @@ string Token::asInput() const
|
||||
//
|
||||
|
||||
|
||||
Parser::Parser(istream & is)
|
||||
Parser::Parser(idocstream & is)
|
||||
: lineno_(0), pos_(0), iss_(0), is_(is)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
Parser::Parser(string const & s)
|
||||
: lineno_(0), pos_(0), iss_(new istringstream(s)), is_(*iss_)
|
||||
: lineno_(0), pos_(0),
|
||||
iss_(new idocstringstream(from_utf8(s))), is_(*iss_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -267,7 +263,7 @@ char Parser::getChar()
|
||||
{
|
||||
if (!good())
|
||||
error("The input stream is not well...");
|
||||
return tokens_[pos_++].character();
|
||||
return get_token().character();
|
||||
}
|
||||
|
||||
|
||||
@ -365,14 +361,13 @@ string const Parser::verbatimEnvironment(string const & name)
|
||||
void Parser::tokenize_one()
|
||||
{
|
||||
catInit();
|
||||
char c;
|
||||
char_type c;
|
||||
if (!is_.get(c))
|
||||
return;
|
||||
//cerr << "reading c: " << c << "\n";
|
||||
|
||||
switch (catcode(c)) {
|
||||
case catSpace: {
|
||||
string s(1, c);
|
||||
docstring s(1, c);
|
||||
while (is_.get(c) && catcode(c) == catSpace)
|
||||
s += c;
|
||||
if (catcode(c) != catSpace)
|
||||
@ -383,7 +378,7 @@ void Parser::tokenize_one()
|
||||
|
||||
case catNewline: {
|
||||
++lineno_;
|
||||
string s(1, getNewline(is_, c));
|
||||
docstring s(1, getNewline(is_, c));
|
||||
while (is_.get(c) && catcode(c) == catNewline) {
|
||||
++lineno_;
|
||||
s += getNewline(is_, c);
|
||||
@ -397,7 +392,7 @@ void Parser::tokenize_one()
|
||||
case catComment: {
|
||||
// We don't treat "%\n" combinations here specially because
|
||||
// we want to preserve them in the preamble
|
||||
string s;
|
||||
docstring s;
|
||||
while (is_.get(c) && catcode(c) != catNewline)
|
||||
s += c;
|
||||
// handle possible DOS line ending
|
||||
@ -415,7 +410,7 @@ void Parser::tokenize_one()
|
||||
if (!is_) {
|
||||
error("unexpected end of input");
|
||||
} else {
|
||||
string s(1, c);
|
||||
docstring s(1, c);
|
||||
if (catcode(c) == catLetter) {
|
||||
// collect letters
|
||||
while (is_.get(c) && catcode(c) == catLetter)
|
||||
@ -429,13 +424,14 @@ void Parser::tokenize_one()
|
||||
}
|
||||
|
||||
case catIgnore: {
|
||||
cerr << "ignoring a char: " << int(c) << "\n";
|
||||
cerr << "ignoring a char: " << c << "\n";
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
push_back(Token(c, catcode(c)));
|
||||
push_back(Token(docstring(1, c), catcode(c)));
|
||||
}
|
||||
//cerr << tokens_.back();
|
||||
}
|
||||
|
||||
|
||||
@ -464,7 +460,7 @@ string Parser::verbatimOption()
|
||||
string res;
|
||||
if (next_token().character() == '[') {
|
||||
Token t = get_token();
|
||||
for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
|
||||
for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
|
||||
if (t.cat() == catBegin) {
|
||||
putback();
|
||||
res += '{' + verbatim_item() + '}';
|
||||
|
@ -12,10 +12,11 @@
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "support/docstream.h"
|
||||
|
||||
namespace lyx {
|
||||
|
||||
@ -46,9 +47,6 @@ enum CatCode {
|
||||
};
|
||||
|
||||
|
||||
CatCode catcode(unsigned char c);
|
||||
|
||||
|
||||
enum {
|
||||
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
|
||||
FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
|
||||
@ -75,18 +73,16 @@ enum {
|
||||
class Token {
|
||||
public:
|
||||
///
|
||||
Token() : cs_(), char_(0), cat_(catIgnore) {}
|
||||
Token() : cs_(), cat_(catIgnore) {}
|
||||
///
|
||||
Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
|
||||
///
|
||||
Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
|
||||
Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
|
||||
|
||||
///
|
||||
std::string const & cs() const { return cs_; }
|
||||
/// Returns the catcode of the token
|
||||
CatCode cat() const { return cat_; }
|
||||
///
|
||||
char character() const { return char_; }
|
||||
char character() const { return cs_.empty() ? 0 : cs_[0]; }
|
||||
/// Returns the token as string
|
||||
std::string asString() const;
|
||||
/// Returns the token verbatim
|
||||
@ -96,8 +92,6 @@ private:
|
||||
///
|
||||
std::string cs_;
|
||||
///
|
||||
char char_;
|
||||
///
|
||||
CatCode cat_;
|
||||
};
|
||||
|
||||
@ -119,7 +113,7 @@ class Parser {
|
||||
|
||||
public:
|
||||
///
|
||||
Parser(std::istream & is);
|
||||
Parser(idocstream & is);
|
||||
///
|
||||
Parser(std::string const & s);
|
||||
///
|
||||
@ -217,9 +211,9 @@ private:
|
||||
///
|
||||
unsigned pos_;
|
||||
///
|
||||
std::istringstream * iss_;
|
||||
idocstringstream * iss_;
|
||||
///
|
||||
std::istream & is_;
|
||||
idocstream & is_;
|
||||
};
|
||||
|
||||
|
||||
|
@ -94,7 +94,7 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode)
|
||||
t.cat() == catAlign ||
|
||||
t.cat() == catActive ||
|
||||
t.cat() == catParameter)
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
|
||||
else if (t.cat() == catBegin) {
|
||||
os << '{';
|
||||
|
@ -413,7 +413,7 @@ void handle_package(string const & name, string const & opts,
|
||||
void end_preamble(ostream & os, TextClass const & /*textclass*/)
|
||||
{
|
||||
os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
|
||||
<< "\\lyxformat 247\n"
|
||||
<< "\\lyxformat 249\n"
|
||||
<< "\\begin_document\n"
|
||||
<< "\\begin_header\n"
|
||||
<< "\\textclass " << h_textclass << "\n";
|
||||
|
@ -661,16 +661,15 @@ void parse_table(Parser & p, ostream & os, bool is_long_tabular,
|
||||
}
|
||||
}
|
||||
|
||||
else if (t.cat() == catSpace || t.cat() == catNewline)
|
||||
os << t.cs();
|
||||
|
||||
else if (t.cat() == catLetter ||
|
||||
t.cat() == catSuper ||
|
||||
t.cat() == catSub ||
|
||||
t.cat() == catOther ||
|
||||
t.cat() == catActive ||
|
||||
t.cat() == catParameter)
|
||||
os << t.character();
|
||||
else if (t.cat() == catSpace
|
||||
|| t.cat() == catNewline
|
||||
|| t.cat() == catLetter
|
||||
|| t.cat() == catSuper
|
||||
|| t.cat() == catSub
|
||||
|| t.cat() == catOther
|
||||
|| t.cat() == catActive
|
||||
|| t.cat() == catParameter)
|
||||
os << t.cs();
|
||||
|
||||
else if (t.cat() == catBegin) {
|
||||
os << '{';
|
||||
|
@ -18,17 +18,16 @@
|
||||
#include "TextClass.h"
|
||||
#include "Layout.h"
|
||||
|
||||
#include "support/lassert.h"
|
||||
#include "support/convert.h"
|
||||
#include "support/debug.h"
|
||||
#include "support/ExceptionMessage.h"
|
||||
#include "support/filetools.h"
|
||||
#include "support/lassert.h"
|
||||
#include "support/lstrings.h"
|
||||
#include "support/os.h"
|
||||
#include "support/Package.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@ -202,7 +201,7 @@ void read_environment(Parser & p, string const & begin,
|
||||
*/
|
||||
void read_syntaxfile(FileName const & file_name)
|
||||
{
|
||||
ifstream is(file_name.toFilesystemEncoding().c_str());
|
||||
ifdocstream is(file_name.toFilesystemEncoding().c_str());
|
||||
if (!is.good()) {
|
||||
cerr << "Could not open syntax file \"" << file_name
|
||||
<< "\" for reading." << endl;
|
||||
@ -389,7 +388,7 @@ namespace {
|
||||
* You must ensure that \p parentFilePath is properly set before calling
|
||||
* this function!
|
||||
*/
|
||||
void tex2lyx(istream & is, ostream & os)
|
||||
void tex2lyx(idocstream & is, ostream & os)
|
||||
{
|
||||
Parser p(is);
|
||||
//p.dump();
|
||||
@ -411,7 +410,7 @@ void tex2lyx(istream & is, ostream & os)
|
||||
os << ss.str();
|
||||
#ifdef TEST_PARSER
|
||||
p.reset();
|
||||
ofstream parsertest("parsertest.tex");
|
||||
ofdocstream parsertest("parsertest.tex");
|
||||
while (p.good())
|
||||
parsertest << p.get_token().asInput();
|
||||
// <origfile> and parsertest.tex should now have identical content
|
||||
@ -422,7 +421,7 @@ void tex2lyx(istream & is, ostream & os)
|
||||
/// convert TeX from \p infilename to LyX and write it to \p os
|
||||
bool tex2lyx(FileName const & infilename, ostream & os)
|
||||
{
|
||||
ifstream is(infilename.toFilesystemEncoding().c_str());
|
||||
ifdocstream is(infilename.toFilesystemEncoding().c_str());
|
||||
if (!is.good()) {
|
||||
cerr << "Could not open input file \"" << infilename
|
||||
<< "\" for reading." << endl;
|
||||
|
@ -1262,7 +1262,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
t.cat() == catParameter) {
|
||||
// This translates "&" to "\\&" which may be wrong...
|
||||
context.check_layout(os);
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
}
|
||||
|
||||
else if (p.isParagraph()) {
|
||||
@ -1281,7 +1281,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
else
|
||||
os << "\\InsetSpace ~\n";
|
||||
} else
|
||||
os << t.character();
|
||||
os << t.cs();
|
||||
}
|
||||
|
||||
else if (t.cat() == catBegin &&
|
||||
@ -1309,7 +1309,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
next.character() == '*') {
|
||||
p.get_token();
|
||||
if (p.next_token().cat() == catEnd) {
|
||||
os << next.character();
|
||||
os << next.cs();
|
||||
p.get_token();
|
||||
} else {
|
||||
p.putback();
|
||||
|
Loading…
Reference in New Issue
Block a user