lyx_mirror/src/lyxlex_pimpl.C

519 lines
10 KiB
C++
Raw Normal View History

/**
* \file lyxlex_pimpl.C
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Lars Gullik Bj<EFBFBD>nnes
* \author Jean-Marc Lasgouttes
* \author J<EFBFBD>rgen Vigna
*
* Full author contact details are available in file CREDITS.
*/
#include <config.h>
#include "lyxlex_pimpl.h"
#include "debug.h"
#include "support/filetools.h"
#include "support/lyxalgo.h"
#include "support/lstrings.h"
#include "support/types.h"
#include "support/unicode.h"
#include <functional>
namespace lyx {
using support::compare_ascii_no_case;
using support::FileName;
using support::getFormatFromContents;
using support::makeDisplayPath;
using support::split;
using support::subst;
using std::endl;
using std::getline;
using std::lower_bound;
using std::sort;
using std::string;
using std::ios;
using std::istream;
using std::ostream;
namespace {
class compare_tags
: public std::binary_function<keyword_item, keyword_item, bool> {
public:
// used by lower_bound, sort and sorted
bool operator()(keyword_item const & a, keyword_item const & b) const
{
// we use the ascii version, because in turkish, 'i'
// is not the lowercase version of 'I', and thus
// turkish locale breaks parsing of tags.
return compare_ascii_no_case(a.tag, b.tag) < 0;
}
};
} // end of anon namespace
LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
: is(&fb_), table(tab), no_items(num),
status(0), lineno(0), commentChar('#')
{
verifyTable();
}
string const LyXLex::Pimpl::getString() const
{
return buff;
}
docstring const LyXLex::Pimpl::getDocString() const
{
return from_utf8(buff);
}
void LyXLex::Pimpl::printError(string const & message) const
{
string const tmpmsg = subst(message, "$$Token", getString());
lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
<< " of file " << to_utf8(makeDisplayPath(name)) << ']' << endl;
}
void LyXLex::Pimpl::printTable(ostream & os)
{
os << "\nNumber of tags: " << no_items << endl;
for (int i= 0; i < no_items; ++i)
os << "table[" << i
<< "]: tag: `" << table[i].tag
<< "' code:" << table[i].code << '\n';
os.flush();
}
void LyXLex::Pimpl::verifyTable()
{
// Check if the table is sorted and if not, sort it.
if (table
&& !lyx::sorted(table, table + no_items, compare_tags())) {
lyxerr << "The table passed to LyXLex is not sorted!\n"
<< "Tell the developers to fix it!" << endl;
// We sort it anyway to avoid problems.
lyxerr << "\nUnsorted:" << endl;
printTable(lyxerr);
sort(table, table + no_items, compare_tags());
lyxerr << "\nSorted:" << endl;
printTable(lyxerr);
}
}
void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
{
pushed_table tmppu(table, no_items);
pushed.push(tmppu);
table = tab;
no_items = num;
verifyTable();
}
void LyXLex::Pimpl::popTable()
{
if (pushed.empty()) {
lyxerr << "LyXLex error: nothing to pop!" << endl;
return;
}
pushed_table tmp = pushed.top();
pushed.pop();
table = tmp.table_elem;
no_items = tmp.table_siz;
}
bool LyXLex::Pimpl::setFile(FileName const & filename)
{
// Check the format of the file.
string const format = getFormatFromContents(filename);
if (format == "gzip" || format == "zip" || format == "compress") {
LYXERR(Debug::LYXLEX) << "lyxlex: compressed" << endl;
// The check only outputs a debug message, because it triggers
// a bug in compaq cxx 6.2, where is_open() returns 'true' for
// a fresh new filebuf. (JMarc)
if (!gz_.empty() || istream::off_type(is.tellg()) > -1)
LYXERR(Debug::LYXLEX) << "Error in LyXLex::setFile: "
"file or stream already set." << endl;
gz_.push(io::gzip_decompressor());
gz_.push(io::file_source(filename.toFilesystemEncoding()));
is.rdbuf(&gz_);
name = filename.absFilename();
lineno = 0;
return gz_.component<io::file_source>(1)->is_open() && is.good();
} else {
LYXERR(Debug::LYXLEX) << "lyxlex: UNcompressed" << endl;
// The check only outputs a debug message, because it triggers
// a bug in compaq cxx 6.2, where is_open() returns 'true' for
// a fresh new filebuf. (JMarc)
if (fb_.is_open() || istream::off_type(is.tellg()) > 0)
LYXERR(Debug::LYXLEX) << "Error in LyXLex::setFile: "
"file or stream already set." << endl;
fb_.open(filename.toFilesystemEncoding().c_str(), ios::in);
is.rdbuf(&fb_);
name = filename.absFilename();
lineno = 0;
return fb_.is_open() && is.good();
}
}
void LyXLex::Pimpl::setStream(istream & i)
{
if (fb_.is_open() || istream::off_type(is.tellg()) > 0)
LYXERR(Debug::LYXLEX) << "Error in LyXLex::setStream: "
"file or stream already set." << endl;
is.rdbuf(i.rdbuf());
lineno = 0;
}
void LyXLex::Pimpl::setCommentChar(char c)
{
commentChar = c;
}
bool LyXLex::Pimpl::next(bool esc /* = false */)
{
if (!pushTok.empty()) {
// There can have been a whole line pushed so
// we extract the first word and leaves the rest
// in pushTok. (Lgb)
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
if (pushTok[0] == '\\' && pushTok.find(' ') != string::npos) {
buff.clear();
pushTok = split(pushTok, buff, ' ');
} else {
buff = pushTok;
pushTok.clear();
}
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
status = LEX_TOKEN;
return true;
}
if (!esc) {
unsigned char c = 0; // getc() returns an int
char cc = 0;
status = 0;
while (is && !status) {
is.get(cc);
c = cc;
if (c == commentChar) {
// Read rest of line (fast :-)
#if 1
// That is not fast... (Lgb)
string dummy;
getline(is, dummy);
LYXERR(Debug::LYXLEX) << "Comment read: `" << c
<< dummy << '\'' << endl;
#else
// unfortunately ignore is buggy (Lgb)
is.ignore(100, '\n');
#endif
++lineno;
continue;
}
if (c == '\"') {
buff.clear();
do {
is.get(cc);
c = cc;
if (c != '\r')
buff.push_back(c);
} while (c != '\"' && c != '\n' && is);
if (c != '\"') {
printError("Missing quote");
if (c == '\n')
++lineno;
}
buff.resize(buff.size()-1);
status = LEX_DATA;
break;
}
if (c == ',')
continue; /* Skip ','s */
// using relational operators with chars other
// than == and != is not safe. And if it is done
// the type _have_ to be unsigned. It usually a
// lot better to use the functions from cctype
if (c > ' ' && is) {
buff.clear();
do {
buff.push_back(c);
is.get(cc);
c = cc;
} while (c > ' ' && c != ',' && is);
status = LEX_TOKEN;
}
if (c == '\r' && is) {
// The Windows support has lead to the
// possibility of "\r\n" at the end of
// a line. This will stop LyX choking
// when it expected to find a '\n'
is.get(cc);
c = cc;
}
if (c == '\n')
++lineno;
}
if (status) return true;
status = is.eof() ? LEX_FEOF: LEX_UNDEF;
buff.clear();
return false;
} else {
unsigned char c = 0; // getc() returns an int
char cc = 0;
status = 0;
while (is && !status) {
is.get(cc);
c = cc;
// skip ','s
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
if (c == ',')
continue;
if (c == commentChar) {
// Read rest of line (fast :-)
#if 1
// That is still not fast... (Lgb)
string dummy;
getline(is, dummy);
LYXERR(Debug::LYXLEX) << "Comment read: `" << c
<< dummy << '\'' << endl;
#else
// but ignore is also still buggy (Lgb)
// This is fast (Lgb)
is.ignore(100, '\n');
#endif
++lineno;
continue;
}
// string
if (c == '\"') {
buff.clear();
bool escaped = false;
do {
escaped = false;
is.get(cc);
c = cc;
if (c == '\r') continue;
if (c == '\\') {
// escape the next char
is.get(cc);
c = cc;
if (c == '\"' || c == '\\')
escaped = true;
else
buff.push_back('\\');
}
buff.push_back(c);
if (!escaped && c == '\"') break;
} while (c != '\n' && is);
if (c != '\"') {
printError("Missing quote");
if (c == '\n')
++lineno;
}
buff.resize(buff.size() -1);
status = LEX_DATA;
break;
}
if (c > ' ' && is) {
buff.clear();
do {
if (c == '\\') {
// escape the next char
is.get(cc);
c = cc;
//escaped = true;
}
buff.push_back(c);
is.get(cc);
c = cc;
} while (c > ' ' && c != ',' && is);
status = LEX_TOKEN;
}
// new line
if (c == '\n')
++lineno;
}
if (status) return true;
status = is.eof() ? LEX_FEOF : LEX_UNDEF;
buff.clear();
return false;
}
}
int LyXLex::Pimpl::search_kw(char const * const tag) const
{
keyword_item search_tag = { tag, 0 };
keyword_item * res =
lower_bound(table, table + no_items,
search_tag, compare_tags());
// use the compare_ascii_no_case instead of compare_no_case,
// because in turkish, 'i' is not the lowercase version of 'I',
// and thus turkish locale breaks parsing of tags.
if (res != table + no_items
&& !compare_ascii_no_case(res->tag, tag))
return res->code;
return LEX_UNDEF;
}
int LyXLex::Pimpl::lex()
{
//NOTE: possible bug.
if (next() && status == LEX_TOKEN) {
return search_kw(getString().c_str());
} else
return status;
}
bool LyXLex::Pimpl::eatLine()
{
buff.clear();
unsigned char c = '\0';
char cc = 0;
while (is && c != '\n') {
is.get(cc);
c = cc;
//LYXERR(Debug::LYXLEX) << "LyXLex::EatLine read char: `"
// << c << '\'' << endl;
if (c != '\r')
buff.push_back(c);
}
if (c == '\n') {
++lineno;
buff.resize(buff.size() - 1);
status = LEX_DATA;
return true;
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
} else if (buff.length() > 0) { // last line
status = LEX_DATA;
return true;
} else {
return false;
}
}
bool LyXLex::Pimpl::nextToken()
{
if (!pushTok.empty()) {
// There can have been a whole line pushed so
// we extract the first word and leaves the rest
// in pushTok. (Lgb)
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
if (pushTok[0] == '\\' && pushTok.find(' ') != string::npos) {
buff.clear();
pushTok = split(pushTok, buff, ' ');
} else {
buff = pushTok;
pushTok.clear();
}
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
status = LEX_TOKEN;
return true;
}
status = 0;
while (is && !status) {
unsigned char c = 0;
char cc = 0;
is.get(cc);
c = cc;
if (c >= ' ' && is) {
buff.clear();
if (c == '\\') { // first char == '\\'
do {
buff.push_back(c);
is.get(cc);
c = cc;
} while (c > ' ' && c != '\\' && is);
} else {
do {
buff.push_back(c);
is.get(cc);
c = cc;
} while (c >= ' ' && c != '\\' && is);
}
if (c == '\\') is.putback(c); // put it back
status = LEX_TOKEN;
}
if (c == '\n')
++lineno;
}
if (status) return true;
status = is.eof() ? LEX_FEOF: LEX_UNDEF;
buff.clear();
return false;
}
Fix bug 3293 by Bernhard Roider: This changes the semantics of isOK() and operator(), comments from Bernhard below: With the old version of lyxlex it was _impossible_ to check whether reading an integer, float, ... succeeded or not. The current solution to check for is.bad() in some cases and in other cases use is.good() does not give the desired information. Moreover the result of is.bad() depends on the stl implementation and behaves different for linux and windows. the bug was introduced by the patch that fixed the bug that crashed lyx when "inset-insert ert" was executed from the command buffer. The lexer has the method isOK() which reflects the status of the stream is. The operators void* and ! are not really well defined (they depend on the value of is.bad()). What is missing is a test if the last reading operation was successful and thus the returned value is valid. That's what i implemented in this patch. The new rule for using the lexer: if you want to know if the lexer still has data to read (either from the stream or from the pushed token) then use "lex.isOK()". If you want to test if the last reading operation was successful then use eg. "if (lex) {...}" or unsuccessful then use eg. "if (!lex) {...}" an example: int readParam(LyxLex &lex) { int param = 1; // default value if (lex.isOK()) { // the lexer has data to read int p; // temporary variable lex >> p; if (lex) param = p; // only use the input if the reading operation was successful } return param; } git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17569 a592a061-630c-0410-9148-cb99ea01b6c8
2007-03-26 13:43:49 +00:00
bool LyXLex::Pimpl::inputAvailable() {
return is.good();
}
void LyXLex::Pimpl::pushToken(string const & pt)
{
pushTok = pt;
}
} // namespace lyx