2003-08-19 10:04:35 +00:00
|
|
|
/**
|
2007-04-26 04:53:06 +00:00
|
|
|
* \file Parser.cpp
|
2003-08-19 10:04:35 +00:00
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
* Licence details can be found in the file COPYING.
|
|
|
|
*
|
2012-12-14 11:30:08 +00:00
|
|
|
* \author André Pönitz
|
2003-08-19 10:04:35 +00:00
|
|
|
*
|
2003-08-23 00:17:00 +00:00
|
|
|
* Full author contact details are available in file CREDITS.
|
2003-08-19 10:04:35 +00:00
|
|
|
*/
|
|
|
|
|
2003-04-17 06:22:07 +00:00
|
|
|
#include <config.h>
|
|
|
|
|
2008-11-16 21:51:18 +00:00
|
|
|
#include "Encoding.h"
|
2007-04-26 04:53:06 +00:00
|
|
|
#include "Parser.h"
|
2013-02-04 14:14:30 +00:00
|
|
|
#include "support/lstrings.h"
|
2010-12-30 21:56:55 +00:00
|
|
|
#include "support/textutils.h"
|
2003-04-17 06:22:07 +00:00
|
|
|
|
2003-02-12 21:07:47 +00:00
|
|
|
#include <iostream>
|
2003-02-12 07:53:03 +00:00
|
|
|
|
2007-12-12 10:16:00 +00:00
|
|
|
using namespace std;
|
2013-02-04 14:14:30 +00:00
|
|
|
using namespace lyx::support;
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
2003-02-12 11:09:22 +00:00
|
|
|
namespace {
|
2003-02-12 07:53:03 +00:00
|
|
|
|
2004-11-08 08:24:43 +00:00
|
|
|
/*!
|
|
|
|
* Translate a line ending to '\n'.
|
|
|
|
* \p c must have catcode catNewline, and it must be the last character read
|
|
|
|
* from \p is.
|
|
|
|
*/
|
2013-02-15 16:03:18 +00:00
|
|
|
char_type getNewline(iparserdocstream & is, char_type c)
|
2004-11-08 08:24:43 +00:00
|
|
|
{
|
|
|
|
// we have to handle 3 different line endings:
|
|
|
|
// - UNIX (\n)
|
|
|
|
// - MAC (\r)
|
|
|
|
// - DOS (\r\n)
|
|
|
|
if (c == '\r') {
|
|
|
|
// MAC or DOS
|
2008-11-16 17:02:00 +00:00
|
|
|
char_type wc;
|
|
|
|
if (is.get(wc) && wc != '\n') {
|
2004-11-08 08:24:43 +00:00
|
|
|
// MAC
|
2008-11-16 17:02:00 +00:00
|
|
|
is.putback(wc);
|
2004-11-08 08:24:43 +00:00
|
|
|
}
|
|
|
|
return '\n';
|
|
|
|
}
|
|
|
|
// UNIX
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2017-07-23 11:11:54 +00:00
|
|
|
} // namespace
|
2003-02-12 11:09:22 +00:00
|
|
|
|
2003-02-12 07:53:03 +00:00
|
|
|
//
|
|
|
|
// Token
|
|
|
|
//
|
|
|
|
|
|
|
|
ostream & operator<<(ostream & os, Token const & t)
|
|
|
|
{
|
2003-11-05 10:14:13 +00:00
|
|
|
if (t.cat() == catComment)
|
|
|
|
os << '%' << t.cs() << '\n';
|
|
|
|
else if (t.cat() == catSpace)
|
|
|
|
os << t.cs();
|
|
|
|
else if (t.cat() == catEscape)
|
2003-03-06 10:39:54 +00:00
|
|
|
os << '\\' << t.cs() << ' ';
|
|
|
|
else if (t.cat() == catLetter)
|
2008-11-16 17:02:00 +00:00
|
|
|
os << t.cs();
|
2003-04-23 15:14:43 +00:00
|
|
|
else if (t.cat() == catNewline)
|
2003-11-05 10:14:13 +00:00
|
|
|
os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
|
2003-02-12 07:53:03 +00:00
|
|
|
else
|
2008-11-16 17:02:00 +00:00
|
|
|
os << '[' << t.cs() << ',' << t.cat() << ']';
|
2003-02-12 07:53:03 +00:00
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-02-12 12:05:31 +00:00
|
|
|
string Token::asInput() const
|
|
|
|
{
|
2003-11-05 10:14:13 +00:00
|
|
|
if (cat_ == catComment)
|
|
|
|
return '%' + cs_ + '\n';
|
2008-11-16 17:02:00 +00:00
|
|
|
if (cat_ == catEscape)
|
|
|
|
return '\\' + cs_;
|
|
|
|
return cs_;
|
2003-02-12 12:05:31 +00:00
|
|
|
}
|
|
|
|
|
2003-02-12 11:09:22 +00:00
|
|
|
|
2010-12-30 21:56:55 +00:00
|
|
|
bool Token::isAlnumASCII() const
|
|
|
|
{
|
|
|
|
return cat_ == catLetter ||
|
|
|
|
(cat_ == catOther && cs_.length() == 1 && isDigitASCII(cs_[0]));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-05 20:32:45 +00:00
|
|
|
#ifdef FILEDEBUG
|
|
|
|
void debugToken(std::ostream & os, Token const & t, unsigned int flags)
|
|
|
|
{
|
|
|
|
char sep = ' ';
|
|
|
|
os << "t: " << t << " flags: " << flags;
|
|
|
|
if (flags & FLAG_BRACE_LAST) { os << sep << "BRACE_LAST"; sep = '|'; }
|
|
|
|
if (flags & FLAG_RIGHT ) { os << sep << "RIGHT" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_END ) { os << sep << "END" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_BRACK_LAST) { os << sep << "BRACK_LAST"; sep = '|'; }
|
|
|
|
if (flags & FLAG_TEXTMODE ) { os << sep << "TEXTMODE" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_ITEM ) { os << sep << "ITEM" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_LEAVE ) { os << sep << "LEAVE" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_SIMPLE ) { os << sep << "SIMPLE" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_EQUATION ) { os << sep << "EQUATION" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_SIMPLE2 ) { os << sep << "SIMPLE2" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_OPTION ) { os << sep << "OPTION" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_BRACED ) { os << sep << "BRACED" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_CELL ) { os << sep << "CELL" ; sep = '|'; }
|
|
|
|
if (flags & FLAG_TABBING ) { os << sep << "TABBING" ; sep = '|'; }
|
|
|
|
os << "\n";
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2013-02-15 22:18:32 +00:00
|
|
|
//
|
|
|
|
// Wrapper
|
|
|
|
//
|
|
|
|
|
2013-03-04 18:48:26 +00:00
|
|
|
void iparserdocstream::setEncoding(std::string const & e)
|
2013-02-17 13:53:56 +00:00
|
|
|
{
|
|
|
|
is_ << lyx::setEncoding(e);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-15 22:18:32 +00:00
|
|
|
void iparserdocstream::putback(char_type c)
|
|
|
|
{
|
2013-02-22 14:32:13 +00:00
|
|
|
s_ = c + s_;
|
2013-02-15 22:18:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-22 14:32:13 +00:00
|
|
|
void iparserdocstream::putback(docstring s)
|
2013-02-15 22:18:32 +00:00
|
|
|
{
|
|
|
|
s_ = s + s_;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
iparserdocstream & iparserdocstream::get(char_type &c)
|
|
|
|
{
|
|
|
|
if (s_.empty())
|
|
|
|
is_.get(c);
|
|
|
|
else {
|
2013-02-22 14:32:13 +00:00
|
|
|
//cerr << "unparsed: " << to_utf8(s_) <<endl;
|
2013-02-15 22:18:32 +00:00
|
|
|
c = s_[0];
|
|
|
|
s_.erase(0,1);
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-02-12 07:53:03 +00:00
|
|
|
//
|
|
|
|
// Parser
|
|
|
|
//
|
|
|
|
|
|
|
|
|
2013-11-11 20:52:14 +00:00
|
|
|
Parser::Parser(idocstream & is, std::string const & fixedenc)
|
|
|
|
: lineno_(0), pos_(0), iss_(0), is_(is),
|
|
|
|
encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
|
|
|
|
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
|
|
|
|
fixed_enc_(!fixedenc.empty())
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
2013-11-11 20:52:14 +00:00
|
|
|
if (fixed_enc_)
|
|
|
|
is_.setEncoding(fixedenc);
|
2016-06-09 15:11:41 +00:00
|
|
|
catInit();
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
|
|
|
|
2003-04-23 15:14:43 +00:00
|
|
|
|
2003-04-16 12:52:49 +00:00
|
|
|
Parser::Parser(string const & s)
|
2012-12-14 11:30:08 +00:00
|
|
|
: lineno_(0), pos_(0),
|
|
|
|
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
|
2013-01-25 11:48:52 +00:00
|
|
|
encoding_iconv_("UTF-8"),
|
2013-11-11 20:52:14 +00:00
|
|
|
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
|
|
|
|
// An idocstringstream can not change the encoding
|
|
|
|
fixed_enc_(true)
|
2003-04-16 12:52:49 +00:00
|
|
|
{
|
2016-06-09 15:11:41 +00:00
|
|
|
catInit();
|
2008-11-15 15:09:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Parser::~Parser()
|
|
|
|
{
|
|
|
|
delete iss_;
|
2003-04-16 12:52:49 +00:00
|
|
|
}
|
|
|
|
|
2003-02-12 07:53:03 +00:00
|
|
|
|
2013-02-15 16:03:18 +00:00
|
|
|
void Parser::deparse()
|
|
|
|
{
|
2013-02-15 22:18:32 +00:00
|
|
|
string s;
|
2013-02-15 16:03:18 +00:00
|
|
|
for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
|
2013-02-15 22:18:32 +00:00
|
|
|
s += tokens_[i].asInput();
|
2013-02-15 16:03:18 +00:00
|
|
|
}
|
2013-02-22 14:32:13 +00:00
|
|
|
is_.putback(from_utf8(s));
|
2013-02-15 16:03:18 +00:00
|
|
|
tokens_.erase(tokens_.begin() + pos_, tokens_.end());
|
|
|
|
// make sure that next token is read
|
|
|
|
tokenize_one();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-17 13:53:56 +00:00
|
|
|
bool Parser::setEncoding(std::string const & e, int const & p)
|
2008-11-16 21:51:18 +00:00
|
|
|
{
|
2012-12-30 19:39:33 +00:00
|
|
|
// We may (and need to) use unsafe encodings here: Since the text is
|
|
|
|
// converted to unicode while reading from is_, we never see text in
|
|
|
|
// the original encoding of the parser, but operate on utf8 strings
|
|
|
|
// instead. Therefore, we cannot misparse high bytes as {, } or \\.
|
2013-01-19 18:47:15 +00:00
|
|
|
Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
|
2009-08-24 22:06:51 +00:00
|
|
|
if (!enc) {
|
|
|
|
cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
|
2013-02-17 13:53:56 +00:00
|
|
|
return false;
|
2009-08-24 22:06:51 +00:00
|
|
|
}
|
2013-02-17 13:53:56 +00:00
|
|
|
return setEncoding(enc->iconvName());
|
2013-01-19 18:47:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-01-25 11:48:52 +00:00
|
|
|
void Parser::catInit()
|
|
|
|
{
|
|
|
|
if (curr_cat_ == theCatcodesType_)
|
|
|
|
return;
|
|
|
|
curr_cat_ = theCatcodesType_;
|
|
|
|
|
|
|
|
fill(theCatcode_, theCatcode_ + 256, catOther);
|
|
|
|
fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
|
|
|
|
fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
|
|
|
|
// This is wrong!
|
|
|
|
theCatcode_[int('@')] = catLetter;
|
|
|
|
|
|
|
|
if (theCatcodesType_ == NORMAL_CATCODES) {
|
|
|
|
theCatcode_[int('\\')] = catEscape;
|
|
|
|
theCatcode_[int('{')] = catBegin;
|
|
|
|
theCatcode_[int('}')] = catEnd;
|
|
|
|
theCatcode_[int('$')] = catMath;
|
|
|
|
theCatcode_[int('&')] = catAlign;
|
|
|
|
theCatcode_[int('\n')] = catNewline;
|
|
|
|
theCatcode_[int('#')] = catParameter;
|
|
|
|
theCatcode_[int('^')] = catSuper;
|
|
|
|
theCatcode_[int('_')] = catSub;
|
|
|
|
theCatcode_[0x7f] = catIgnore;
|
|
|
|
theCatcode_[int(' ')] = catSpace;
|
|
|
|
theCatcode_[int('\t')] = catSpace;
|
|
|
|
theCatcode_[int('\r')] = catNewline;
|
|
|
|
theCatcode_[int('~')] = catActive;
|
|
|
|
theCatcode_[int('%')] = catComment;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CatCode Parser::catcode(char_type c) const
|
|
|
|
{
|
|
|
|
if (c < 256)
|
|
|
|
return theCatcode_[(unsigned char)c];
|
|
|
|
return catOther;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::setCatcode(char c, CatCode cat)
|
|
|
|
{
|
|
|
|
theCatcode_[(unsigned char)c] = cat;
|
2013-02-15 16:03:18 +00:00
|
|
|
deparse();
|
2013-01-25 11:48:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::setCatcodes(cat_type t)
|
|
|
|
{
|
|
|
|
theCatcodesType_ = t;
|
2013-02-15 16:03:18 +00:00
|
|
|
deparse();
|
2013-01-25 11:48:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-17 13:53:56 +00:00
|
|
|
bool Parser::setEncoding(std::string const & e)
|
2013-01-19 18:47:15 +00:00
|
|
|
{
|
|
|
|
//cerr << "setting encoding to " << e << std::endl;
|
|
|
|
encoding_iconv_ = e;
|
2013-11-11 20:52:14 +00:00
|
|
|
// If the encoding is fixed, we must not change the stream encoding
|
|
|
|
// (because the whole input uses that encoding, e.g. if it comes from
|
|
|
|
// the clipboard). We still need to track the original encoding in
|
|
|
|
// encoding_iconv_, so that the generated output is correct.
|
|
|
|
if (!fixed_enc_)
|
|
|
|
is_.setEncoding(e);
|
2013-03-04 18:48:26 +00:00
|
|
|
return true;
|
2008-11-16 21:51:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-02-12 07:53:03 +00:00
|
|
|
void Parser::push_back(Token const & t)
|
|
|
|
{
|
|
|
|
tokens_.push_back(t);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-05-11 11:02:25 +00:00
|
|
|
// We return a copy here because the tokens_ vector may get reallocated
|
|
|
|
Token const Parser::prev_token() const
|
2003-11-05 10:14:13 +00:00
|
|
|
{
|
|
|
|
static const Token dummy;
|
|
|
|
return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-05-11 11:02:25 +00:00
|
|
|
// We return a copy here because the tokens_ vector may get reallocated
|
|
|
|
Token const Parser::curr_token() const
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
|
|
|
static const Token dummy;
|
|
|
|
return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-05-11 11:02:25 +00:00
|
|
|
// We return a copy here because the tokens_ vector may get reallocated
|
|
|
|
Token const Parser::next_token()
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
|
|
|
static const Token dummy;
|
2013-02-17 13:53:56 +00:00
|
|
|
if (!good())
|
|
|
|
return dummy;
|
|
|
|
if (pos_ >= tokens_.size())
|
|
|
|
tokenize_one();
|
|
|
|
return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-20 20:28:55 +00:00
|
|
|
// We return a copy here because the tokens_ vector may get reallocated
|
|
|
|
Token const Parser::next_next_token()
|
|
|
|
{
|
|
|
|
static const Token dummy;
|
2013-02-17 13:53:56 +00:00
|
|
|
if (!good())
|
|
|
|
return dummy;
|
|
|
|
// If tokenize_one() has not been called after the last get_token() we
|
|
|
|
// need to tokenize two more tokens.
|
2013-12-11 20:54:26 +00:00
|
|
|
if (pos_ >= tokens_.size())
|
|
|
|
tokenize_one();
|
|
|
|
if (pos_ + 1 >= tokens_.size())
|
2011-11-20 20:28:55 +00:00
|
|
|
tokenize_one();
|
|
|
|
return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-05-11 11:02:25 +00:00
|
|
|
// We return a copy here because the tokens_ vector may get reallocated
|
|
|
|
Token const Parser::get_token()
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
|
|
|
static const Token dummy;
|
2013-02-17 13:53:56 +00:00
|
|
|
if (!good())
|
|
|
|
return dummy;
|
|
|
|
if (pos_ >= tokens_.size()) {
|
|
|
|
tokenize_one();
|
|
|
|
if (pos_ >= tokens_.size())
|
|
|
|
return dummy;
|
|
|
|
}
|
2017-07-03 17:53:14 +00:00
|
|
|
// cerr << "looking at token " << tokens_[pos_]
|
2013-02-17 13:53:56 +00:00
|
|
|
// << " pos: " << pos_ << '\n';
|
|
|
|
return tokens_[pos_++];
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-11-15 20:30:45 +00:00
|
|
|
bool Parser::isParagraph()
|
2004-06-18 06:47:19 +00:00
|
|
|
{
|
|
|
|
// A new paragraph in TeX ist started
|
|
|
|
// - either by a newline, following any amount of whitespace
|
|
|
|
// characters (including zero), and another newline
|
|
|
|
// - or the token \par
|
|
|
|
if (curr_token().cat() == catNewline &&
|
|
|
|
(curr_token().cs().size() > 1 ||
|
|
|
|
(next_token().cat() == catSpace &&
|
2011-11-20 20:28:55 +00:00
|
|
|
next_next_token().cat() == catNewline)))
|
2004-06-18 06:47:19 +00:00
|
|
|
return true;
|
|
|
|
if (curr_token().cat() == catEscape && curr_token().cs() == "par")
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-12-19 14:54:23 +00:00
|
|
|
bool Parser::skip_spaces(bool skip_comments)
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
2003-11-05 10:14:13 +00:00
|
|
|
// We just silently return if we have no more tokens.
|
|
|
|
// skip_spaces() should be callable at any time,
|
|
|
|
// the caller must check p::good() anyway.
|
2010-12-19 14:54:23 +00:00
|
|
|
bool skipped = false;
|
2003-11-05 10:14:13 +00:00
|
|
|
while (good()) {
|
2004-06-18 06:47:19 +00:00
|
|
|
get_token();
|
|
|
|
if (isParagraph()) {
|
|
|
|
putback();
|
2003-11-05 10:14:13 +00:00
|
|
|
break;
|
2004-06-18 06:47:19 +00:00
|
|
|
}
|
2010-12-19 14:54:23 +00:00
|
|
|
if (curr_token().cat() == catSpace ||
|
|
|
|
curr_token().cat() == catNewline) {
|
|
|
|
skipped = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ((curr_token().cat() == catComment && curr_token().cs().empty()))
|
2004-06-18 06:47:19 +00:00
|
|
|
continue;
|
2011-01-21 18:29:10 +00:00
|
|
|
if (skip_comments && curr_token().cat() == catComment) {
|
|
|
|
// If positions_ is not empty we are doing some kind
|
|
|
|
// of look ahead
|
|
|
|
if (!positions_.empty())
|
|
|
|
cerr << " Ignoring comment: "
|
|
|
|
<< curr_token().asInput();
|
|
|
|
} else {
|
2004-06-18 06:47:19 +00:00
|
|
|
putback();
|
|
|
|
break;
|
|
|
|
}
|
2003-11-05 10:14:13 +00:00
|
|
|
}
|
2010-12-19 14:54:23 +00:00
|
|
|
return skipped;
|
2003-11-05 10:14:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::unskip_spaces(bool skip_comments)
|
|
|
|
{
|
|
|
|
while (pos_ > 0) {
|
|
|
|
if ( curr_token().cat() == catSpace ||
|
|
|
|
(curr_token().cat() == catNewline && curr_token().cs().size() == 1))
|
|
|
|
putback();
|
|
|
|
else if (skip_comments && curr_token().cat() == catComment) {
|
|
|
|
// TODO: Get rid of this
|
2011-01-21 18:29:10 +00:00
|
|
|
// If positions_ is not empty we are doing some kind
|
|
|
|
// of look ahead
|
|
|
|
if (!positions_.empty())
|
|
|
|
cerr << "Unignoring comment: "
|
|
|
|
<< curr_token().asInput();
|
2003-11-05 10:14:13 +00:00
|
|
|
putback();
|
|
|
|
}
|
2003-03-03 17:49:26 +00:00
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::putback()
|
|
|
|
{
|
|
|
|
--pos_;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-02 18:16:23 +00:00
|
|
|
void Parser::pushPosition()
|
|
|
|
{
|
|
|
|
positions_.push_back(pos_);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::popPosition()
|
|
|
|
{
|
|
|
|
pos_ = positions_.back();
|
|
|
|
positions_.pop_back();
|
2013-02-22 14:32:13 +00:00
|
|
|
deparse();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::dropPosition()
|
|
|
|
{
|
|
|
|
positions_.pop_back();
|
2011-01-02 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-11-15 20:30:45 +00:00
|
|
|
bool Parser::good()
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
2008-11-15 20:30:45 +00:00
|
|
|
if (pos_ < tokens_.size())
|
|
|
|
return true;
|
2013-02-17 13:53:56 +00:00
|
|
|
if (!is_.good())
|
|
|
|
return false;
|
|
|
|
return is_.peek() != idocstream::traits_type::eof();
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-02 15:39:48 +00:00
|
|
|
bool Parser::hasOpt()
|
|
|
|
{
|
|
|
|
// An optional argument can occur in any of the following forms:
|
|
|
|
// - \foo[bar]
|
|
|
|
// - \foo [bar]
|
|
|
|
// - \foo
|
|
|
|
// [bar]
|
|
|
|
// - \foo %comment
|
|
|
|
// [bar]
|
|
|
|
|
|
|
|
// remember current position
|
|
|
|
unsigned int oldpos = pos_;
|
|
|
|
// skip spaces and comments
|
|
|
|
while (good()) {
|
|
|
|
get_token();
|
|
|
|
if (isParagraph()) {
|
|
|
|
putback();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (curr_token().cat() == catSpace ||
|
|
|
|
curr_token().cat() == catNewline ||
|
|
|
|
curr_token().cat() == catComment)
|
|
|
|
continue;
|
|
|
|
putback();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bool const retval = (next_token().asInput() == "[");
|
|
|
|
pos_ = oldpos;
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-10-04 22:12:18 +00:00
|
|
|
Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
2003-11-05 10:14:13 +00:00
|
|
|
skip_spaces(true);
|
|
|
|
|
|
|
|
// This is needed if a partial file ends with a command without arguments,
|
|
|
|
// e. g. \medskip
|
|
|
|
if (! good())
|
2007-12-12 19:28:07 +00:00
|
|
|
return make_pair(false, string());
|
2003-02-12 07:53:03 +00:00
|
|
|
|
|
|
|
string result;
|
2012-10-04 22:12:18 +00:00
|
|
|
Token t = get_token();
|
2003-02-12 07:53:03 +00:00
|
|
|
|
2012-10-04 22:12:18 +00:00
|
|
|
if (t.cat() == catComment || t.cat() == catEscape ||
|
|
|
|
t.character() != left) {
|
2003-02-12 07:53:03 +00:00
|
|
|
putback();
|
2007-12-12 19:28:07 +00:00
|
|
|
return make_pair(false, string());
|
2012-06-23 18:00:49 +00:00
|
|
|
} else {
|
2013-02-17 01:38:29 +00:00
|
|
|
while (good()) {
|
|
|
|
t = get_token();
|
2003-11-05 10:14:13 +00:00
|
|
|
// Ignore comments
|
2012-10-04 22:12:18 +00:00
|
|
|
if (t.cat() == catComment) {
|
|
|
|
if (!t.cs().empty())
|
|
|
|
cerr << "Ignoring comment: " << t.asInput();
|
|
|
|
continue;
|
2003-11-05 10:14:13 +00:00
|
|
|
}
|
2012-10-04 22:12:18 +00:00
|
|
|
if (allow_escaping) {
|
|
|
|
if (t.cat() != catEscape && t.character() == right)
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
if (t.character() == right) {
|
|
|
|
if (t.cat() == catEscape)
|
|
|
|
result += '\\';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result += t.asInput();
|
2003-11-05 10:14:13 +00:00
|
|
|
}
|
2012-06-23 18:00:49 +00:00
|
|
|
}
|
2007-12-12 19:28:07 +00:00
|
|
|
return make_pair(true, result);
|
2005-01-06 13:22:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-10-04 22:12:18 +00:00
|
|
|
string Parser::getArg(char left, char right, bool allow_escaping)
|
2005-01-06 13:22:20 +00:00
|
|
|
{
|
2012-10-04 22:12:18 +00:00
|
|
|
return getFullArg(left, right, allow_escaping).second;
|
2005-01-06 13:22:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-28 20:29:06 +00:00
|
|
|
string Parser::getFullOpt(bool keepws)
|
2005-01-06 13:22:20 +00:00
|
|
|
{
|
|
|
|
Arg arg = getFullArg('[', ']');
|
|
|
|
if (arg.first)
|
|
|
|
return '[' + arg.second + ']';
|
2011-01-28 20:29:06 +00:00
|
|
|
if (keepws)
|
|
|
|
unskip_spaces(true);
|
2008-04-18 12:26:21 +00:00
|
|
|
return string();
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-12-19 14:54:23 +00:00
|
|
|
string Parser::getOpt(bool keepws)
|
2003-02-28 13:37:43 +00:00
|
|
|
{
|
2003-05-19 07:22:16 +00:00
|
|
|
string const res = getArg('[', ']');
|
2010-12-19 14:54:23 +00:00
|
|
|
if (res.empty()) {
|
|
|
|
if (keepws)
|
|
|
|
unskip_spaces(true);
|
|
|
|
return string();
|
|
|
|
}
|
|
|
|
return '[' + res + ']';
|
2003-02-28 13:37:43 +00:00
|
|
|
}
|
|
|
|
|
2008-04-18 12:26:21 +00:00
|
|
|
|
2008-04-17 00:22:16 +00:00
|
|
|
string Parser::getFullParentheseArg()
|
2008-04-12 12:50:04 +00:00
|
|
|
{
|
|
|
|
Arg arg = getFullArg('(', ')');
|
|
|
|
if (arg.first)
|
|
|
|
return '(' + arg.second + ')';
|
2008-04-18 12:26:21 +00:00
|
|
|
return string();
|
2008-04-12 12:50:04 +00:00
|
|
|
}
|
2003-02-28 13:37:43 +00:00
|
|
|
|
2008-04-18 12:26:21 +00:00
|
|
|
|
2013-01-25 11:48:52 +00:00
|
|
|
string const Parser::ertEnvironment(string const & name)
|
2005-03-14 17:34:57 +00:00
|
|
|
{
|
|
|
|
if (!good())
|
|
|
|
return string();
|
|
|
|
|
|
|
|
ostringstream os;
|
|
|
|
for (Token t = get_token(); good(); t = get_token()) {
|
|
|
|
if (t.cat() == catBegin) {
|
|
|
|
putback();
|
|
|
|
os << '{' << verbatim_item() << '}';
|
|
|
|
} else if (t.asInput() == "\\begin") {
|
|
|
|
string const env = getArg('{', '}');
|
|
|
|
os << "\\begin{" << env << '}'
|
2013-01-25 11:48:52 +00:00
|
|
|
<< ertEnvironment(env)
|
2005-03-14 17:34:57 +00:00
|
|
|
<< "\\end{" << env << '}';
|
|
|
|
} else if (t.asInput() == "\\end") {
|
|
|
|
string const end = getArg('{', '}');
|
|
|
|
if (end != name)
|
|
|
|
cerr << "\\end{" << end
|
|
|
|
<< "} does not match \\begin{" << name
|
|
|
|
<< "}." << endl;
|
|
|
|
return os.str();
|
|
|
|
} else
|
|
|
|
os << t.asInput();
|
|
|
|
}
|
|
|
|
cerr << "unexpected end of input" << endl;
|
|
|
|
return os.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-03-04 13:27:53 +00:00
|
|
|
string const Parser::plainEnvironment(string const & name)
|
|
|
|
{
|
|
|
|
if (!good())
|
|
|
|
return string();
|
|
|
|
|
|
|
|
ostringstream os;
|
|
|
|
for (Token t = get_token(); good(); t = get_token()) {
|
2012-03-05 21:04:21 +00:00
|
|
|
if (t.asInput() == "\\end") {
|
2012-03-04 13:27:53 +00:00
|
|
|
string const end = getArg('{', '}');
|
|
|
|
if (end == name)
|
|
|
|
return os.str();
|
|
|
|
else
|
|
|
|
os << "\\end{" << end << '}';
|
|
|
|
} else
|
|
|
|
os << t.asInput();
|
|
|
|
}
|
|
|
|
cerr << "unexpected end of input" << endl;
|
|
|
|
return os.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-03-05 22:04:22 +00:00
|
|
|
string const Parser::plainCommand(char left, char right, string const & name)
|
|
|
|
{
|
|
|
|
if (!good())
|
|
|
|
return string();
|
2012-03-10 21:20:25 +00:00
|
|
|
// check if first token is really the start character
|
2012-03-05 22:04:22 +00:00
|
|
|
Token tok = get_token();
|
|
|
|
if (tok.character() != left) {
|
|
|
|
cerr << "first character does not match start character of command \\" << name << endl;
|
|
|
|
return string();
|
|
|
|
}
|
|
|
|
ostringstream os;
|
|
|
|
for (Token t = get_token(); good(); t = get_token()) {
|
|
|
|
if (t.character() == right) {
|
|
|
|
return os.str();
|
|
|
|
} else
|
|
|
|
os << t.asInput();
|
|
|
|
}
|
|
|
|
cerr << "unexpected end of input" << endl;
|
|
|
|
return os.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-22 14:35:38 +00:00
|
|
|
Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
|
2013-01-25 11:48:52 +00:00
|
|
|
{
|
|
|
|
if (!good())
|
2013-02-22 14:35:38 +00:00
|
|
|
return Arg(false, string());
|
2013-01-25 11:48:52 +00:00
|
|
|
|
2013-02-22 14:35:38 +00:00
|
|
|
pushPosition();
|
2013-01-25 11:48:52 +00:00
|
|
|
ostringstream oss;
|
|
|
|
size_t match_index = 0;
|
|
|
|
setCatcodes(VERBATIM_CATCODES);
|
|
|
|
for (Token t = get_token(); good(); t = get_token()) {
|
|
|
|
// FIXME t.asInput() might be longer than we need ?
|
|
|
|
if (t.asInput() == end_string.substr(match_index,
|
|
|
|
t.asInput().length())) {
|
|
|
|
match_index += t.asInput().length();
|
|
|
|
if (match_index >= end_string.length())
|
|
|
|
break;
|
2013-02-22 14:35:38 +00:00
|
|
|
} else {
|
|
|
|
if (!allow_linebreak && t.asInput() == "\n") {
|
|
|
|
cerr << "unexpected end of input" << endl;
|
|
|
|
popPosition();
|
|
|
|
setCatcodes(NORMAL_CATCODES);
|
|
|
|
return Arg(false, string());
|
|
|
|
}
|
|
|
|
if (match_index) {
|
2017-07-03 17:53:14 +00:00
|
|
|
oss << end_string.substr(0, match_index)
|
2013-02-22 14:35:38 +00:00
|
|
|
<< t.asInput();
|
|
|
|
match_index = 0;
|
|
|
|
} else
|
|
|
|
oss << t.asInput();
|
|
|
|
}
|
2013-01-25 11:48:52 +00:00
|
|
|
}
|
2013-02-22 14:35:38 +00:00
|
|
|
|
|
|
|
if (!good()) {
|
2013-01-25 11:48:52 +00:00
|
|
|
cerr << "unexpected end of input" << endl;
|
2013-02-22 14:35:38 +00:00
|
|
|
popPosition();
|
|
|
|
setCatcodes(NORMAL_CATCODES);
|
|
|
|
return Arg(false, string());
|
|
|
|
}
|
|
|
|
setCatcodes(NORMAL_CATCODES);
|
|
|
|
dropPosition();
|
|
|
|
return Arg(true, oss.str());
|
2013-01-25 11:48:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-04 14:14:30 +00:00
|
|
|
string const Parser::verbatimEnvironment(string const & name)
|
|
|
|
{
|
2013-02-22 14:35:38 +00:00
|
|
|
//FIXME: do something if endstring is not found
|
|
|
|
string s = verbatimStuff("\\end{" + name + "}").second;
|
2013-02-04 14:14:30 +00:00
|
|
|
// ignore one newline at beginning or end of string
|
|
|
|
if (prefixIs(s, "\n"))
|
|
|
|
s.erase(0,1);
|
|
|
|
if (suffixIs(s, "\n"))
|
|
|
|
s.erase(s.length() - 1,1);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-15 13:44:45 +00:00
|
|
|
string Parser::verbatimOption()
|
|
|
|
{
|
|
|
|
string res;
|
|
|
|
if (next_token().character() == '[') {
|
|
|
|
Token t = get_token();
|
|
|
|
for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
|
|
|
|
if (t.cat() == catBegin) {
|
|
|
|
putback();
|
|
|
|
res += '{' + verbatim_item() + '}';
|
|
|
|
} else
|
2015-01-03 19:45:06 +00:00
|
|
|
res += t.asInput();
|
2013-02-15 13:44:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
string Parser::verbatim_item()
|
|
|
|
{
|
|
|
|
if (!good())
|
|
|
|
error("stream bad");
|
|
|
|
skip_spaces();
|
|
|
|
if (next_token().cat() == catBegin) {
|
|
|
|
Token t = get_token(); // skip brace
|
|
|
|
string res;
|
|
|
|
for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
|
|
|
|
if (t.cat() == catBegin) {
|
|
|
|
putback();
|
|
|
|
res += '{' + verbatim_item() + '}';
|
|
|
|
}
|
|
|
|
else
|
|
|
|
res += t.asInput();
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
return get_token().asInput();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-11-15 15:09:59 +00:00
|
|
|
void Parser::tokenize_one()
|
2003-02-12 07:53:03 +00:00
|
|
|
{
|
2008-11-15 19:30:58 +00:00
|
|
|
catInit();
|
2008-11-16 17:02:00 +00:00
|
|
|
char_type c;
|
2012-12-14 11:30:08 +00:00
|
|
|
if (!is_.get(c))
|
2008-11-15 14:38:27 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
switch (catcode(c)) {
|
|
|
|
case catSpace: {
|
2008-11-16 17:02:00 +00:00
|
|
|
docstring s(1, c);
|
2008-11-15 15:09:59 +00:00
|
|
|
while (is_.get(c) && catcode(c) == catSpace)
|
2008-11-15 14:38:27 +00:00
|
|
|
s += c;
|
|
|
|
if (catcode(c) != catSpace)
|
2008-11-15 15:09:59 +00:00
|
|
|
is_.putback(c);
|
2008-11-15 14:38:27 +00:00
|
|
|
push_back(Token(s, catSpace));
|
|
|
|
break;
|
|
|
|
}
|
2012-10-05 20:16:46 +00:00
|
|
|
|
2008-11-15 14:38:27 +00:00
|
|
|
case catNewline: {
|
|
|
|
++lineno_;
|
2008-11-16 17:02:00 +00:00
|
|
|
docstring s(1, getNewline(is_, c));
|
2008-11-15 15:09:59 +00:00
|
|
|
while (is_.get(c) && catcode(c) == catNewline) {
|
2008-11-15 14:38:27 +00:00
|
|
|
++lineno_;
|
2008-11-15 15:09:59 +00:00
|
|
|
s += getNewline(is_, c);
|
2008-11-15 14:38:27 +00:00
|
|
|
}
|
|
|
|
if (catcode(c) != catNewline)
|
2008-11-15 15:09:59 +00:00
|
|
|
is_.putback(c);
|
2008-11-15 14:38:27 +00:00
|
|
|
push_back(Token(s, catNewline));
|
|
|
|
break;
|
|
|
|
}
|
2012-10-05 20:16:46 +00:00
|
|
|
|
2008-11-15 14:38:27 +00:00
|
|
|
case catComment: {
|
|
|
|
// We don't treat "%\n" combinations here specially because
|
|
|
|
// we want to preserve them in the preamble
|
2008-11-16 17:02:00 +00:00
|
|
|
docstring s;
|
2008-11-15 15:09:59 +00:00
|
|
|
while (is_.get(c) && catcode(c) != catNewline)
|
2008-11-15 14:38:27 +00:00
|
|
|
s += c;
|
|
|
|
// handle possible DOS line ending
|
|
|
|
if (catcode(c) == catNewline)
|
2008-11-15 15:09:59 +00:00
|
|
|
c = getNewline(is_, c);
|
2008-11-15 14:38:27 +00:00
|
|
|
// Note: The '%' at the beginning and the '\n' at the end
|
|
|
|
// of the comment are not stored.
|
|
|
|
++lineno_;
|
|
|
|
push_back(Token(s, catComment));
|
|
|
|
break;
|
|
|
|
}
|
2012-10-05 20:16:46 +00:00
|
|
|
|
2008-11-15 14:38:27 +00:00
|
|
|
case catEscape: {
|
2008-11-15 15:09:59 +00:00
|
|
|
is_.get(c);
|
|
|
|
if (!is_) {
|
2008-11-15 14:38:27 +00:00
|
|
|
error("unexpected end of input");
|
|
|
|
} else {
|
2008-11-16 17:02:00 +00:00
|
|
|
docstring s(1, c);
|
2008-11-15 14:38:27 +00:00
|
|
|
if (catcode(c) == catLetter) {
|
|
|
|
// collect letters
|
2008-11-15 15:09:59 +00:00
|
|
|
while (is_.get(c) && catcode(c) == catLetter)
|
2003-11-05 10:14:13 +00:00
|
|
|
s += c;
|
2008-11-15 14:38:27 +00:00
|
|
|
if (catcode(c) != catLetter)
|
2008-11-15 15:09:59 +00:00
|
|
|
is_.putback(c);
|
2003-02-12 07:53:03 +00:00
|
|
|
}
|
2008-11-15 14:38:27 +00:00
|
|
|
push_back(Token(s, catEscape));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2012-10-05 20:16:46 +00:00
|
|
|
|
2008-11-15 14:38:27 +00:00
|
|
|
case catIgnore: {
|
2008-11-16 17:02:00 +00:00
|
|
|
cerr << "ignoring a char: " << c << "\n";
|
2008-11-15 14:38:27 +00:00
|
|
|
break;
|
|
|
|
}
|
2012-10-05 20:16:46 +00:00
|
|
|
|
2008-11-15 14:38:27 +00:00
|
|
|
default:
|
2008-11-16 17:02:00 +00:00
|
|
|
push_back(Token(docstring(1, c), catcode(c)));
|
2008-11-15 14:38:27 +00:00
|
|
|
}
|
2008-11-16 17:02:00 +00:00
|
|
|
//cerr << tokens_.back();
|
2008-11-15 14:38:27 +00:00
|
|
|
}
|
2003-02-12 07:53:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
void Parser::dump() const
|
|
|
|
{
|
|
|
|
cerr << "\nTokens: ";
|
|
|
|
for (unsigned i = 0; i < tokens_.size(); ++i) {
|
|
|
|
if (i == pos_)
|
|
|
|
cerr << " <#> ";
|
|
|
|
cerr << tokens_[i];
|
|
|
|
}
|
|
|
|
cerr << " pos: " << pos_ << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Parser::error(string const & msg)
|
|
|
|
{
|
|
|
|
cerr << "Line ~" << lineno_ << ": parse error: " << msg << endl;
|
|
|
|
dump();
|
|
|
|
//exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-10-23 11:46:33 +00:00
|
|
|
void Parser::reset()
|
|
|
|
{
|
|
|
|
pos_ = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
} // namespace lyx
|