Make tex2lyx encoding changes more robust

This is achieved by not calling Parse::tokenize_one() anymore in
Parser::good(): The status of the input can be tested without performing the
actual tokenizing. Now there are only two methods that may prevent an encoding
change:next_token() and next_next_token().
This commit is contained in:
Georg Baum 2013-02-17 14:53:56 +01:00
parent 94cb22ac43
commit 25fe87e55c
2 changed files with 68 additions and 27 deletions

View File

@ -118,6 +118,17 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
// Wrapper
//
bool iparserdocstream::setEncoding(std::string const & e)
{
is_ << lyx::setEncoding(e);
if (s_.empty())
return true;
cerr << "Setting encoding " << e << " too late. The encoding of `"
<< to_utf8(s_) << "´ is wrong." << std::endl;
return false;
}
void iparserdocstream::putback(char_type c)
{
s_ += c;
@ -182,7 +193,7 @@ void Parser::deparse()
}
void Parser::setEncoding(std::string const & e, int const & p)
bool Parser::setEncoding(std::string const & e, int const & p)
{
// We may (and need to) use unsafe encodings here: Since the text is
// converted to unicode while reading from is_, we never see text in
@ -191,9 +202,9 @@ void Parser::setEncoding(std::string const & e, int const & p)
Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
if (!enc) {
cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
return;
return false;
}
setEncoding(enc->iconvName());
return setEncoding(enc->iconvName());
}
@ -250,11 +261,11 @@ void Parser::setCatcodes(cat_type t)
}
void Parser::setEncoding(std::string const & e)
bool Parser::setEncoding(std::string const & e)
{
//cerr << "setting encoding to " << e << std::endl;
is_.docstream() << lyx::setEncoding(e);
encoding_iconv_ = e;
return is_.setEncoding(e);
}
@ -284,7 +295,11 @@ Token const Parser::curr_token() const
Token const Parser::next_token()
{
static const Token dummy;
return good() ? tokens_[pos_] : dummy;
if (!good())
return dummy;
if (pos_ >= tokens_.size())
tokenize_one();
return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
}
@ -292,11 +307,14 @@ Token const Parser::next_token()
Token const Parser::next_next_token()
{
static const Token dummy;
// If good() has not been called after the last get_token() we need
// to tokenize two more tokens.
if (pos_ + 1 >= tokens_.size()) {
tokenize_one();
if (!good())
return dummy;
// If tokenize_one() has not been called after the last get_token() we
// need to tokenize two more tokens.
if (pos_ >= tokens_.size()) {
tokenize_one();
if (pos_ + 1 >= tokens_.size())
tokenize_one();
}
return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
}
@ -306,10 +324,16 @@ Token const Parser::next_next_token()
Token const Parser::get_token()
{
static const Token dummy;
// if (good())
// cerr << "looking at token " << tokens_[pos_]
// << " pos: " << pos_ << '\n';
return good() ? tokens_[pos_++] : dummy;
if (!good())
return dummy;
if (pos_ >= tokens_.size()) {
tokenize_one();
if (pos_ >= tokens_.size())
return dummy;
}
// cerr << "looking at token " << tokens_[pos_]
// << " pos: " << pos_ << '\n';
return tokens_[pos_++];
}
@ -408,8 +432,9 @@ bool Parser::good()
{
if (pos_ < tokens_.size())
return true;
tokenize_one();
return pos_ < tokens_.size();
if (!is_.good())
return false;
return is_.peek() != idocstream::traits_type::eof();
}

View File

@ -117,15 +117,19 @@ std::ostream & operator<<(std::ostream & os, Token const & t);
extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
#endif
// A docstream version that supports putback even when not buffered
/// A docstream version that supports putback even when not buffered
class iparserdocstream
{
public:
typedef idocstream::int_type int_type;
iparserdocstream(idocstream & is) : is_(is) {};
operator bool() const { return is_; };
/// Like std::istream::operator bool()
operator bool() const { return s_.empty() ? is_ : true; }
idocstream & docstream() { return is_; };
/// change the encoding of the input stream to \p e (iconv name)
bool setEncoding(std::string const & e);
// add to the list of characters to read before actually reading
// the stream
@ -135,7 +139,14 @@ public:
// the stream
void put_almost_back(docstring s);
/// Like std::istream::get()
iparserdocstream & get(char_type &c);
/// Like std::istream::good()
bool good() const { return s_.empty() ? is_.good() : true; }
/// Like std::istream::peek()
int_type peek() const { return s_.empty() ? is_.peek() : s_[0]; }
private:
///
idocstream & is_;
@ -172,11 +183,11 @@ public:
* re-reading. Useful when changing catcodes. */
void deparse();
/// change the iconv encoding of the input stream
/// according to the latex encoding and package
void setEncoding(std::string const & encoding, int const & package);
/// change the iconv encoding of the input stream
void setEncoding(std::string const & encoding);
/// change the encoding of the input stream according to \p encoding
/// (latex name) and package \p package
bool setEncoding(std::string const & encoding, int const & package);
/// change the encoding of the input stream to \p encoding (iconv name)
bool setEncoding(std::string const & encoding);
/// get the current iconv encoding of the input stream
std::string getEncoding() const { return encoding_iconv_; }
@ -288,9 +299,12 @@ public:
Token const prev_token() const;
/// The current token.
Token const curr_token() const;
/// The next token.
/// The next token. Caution: If this is called, an encoding change is
/// only possible again after get_token() has been called.
Token const next_token();
/// The next but one token.
/// The next but one token. Caution: If this is called, an encoding
/// change is only possible again after get_token() has been called
/// twice.
Token const next_next_token();
/// Make the next token current and return that.
Token const get_token();
@ -301,7 +315,9 @@ public:
bool skip_spaces(bool skip_comments = false);
/// puts back spaces (and comments if \p skip_comments is true)
void unskip_spaces(bool skip_comments = false);
///
/// Is any further input pending()? This is not like
/// std::istream::good(), which returns true if all available input
/// was read, and the next attempt to read would return EOF.
bool good();
/// resets the parser to initial state
void reset();