mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-22 10:00:33 +00:00
Make tex2lyx encoding changes more robust
This is achieved by not calling Parse::tokenize_one() anymore in Parser::good(): The status of the input can be tested without performing the actual tokenizing. Now there are only two methods that may prevent an encoding change:next_token() and next_next_token().
This commit is contained in:
parent
94cb22ac43
commit
25fe87e55c
@ -118,6 +118,17 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
|
||||
// Wrapper
|
||||
//
|
||||
|
||||
bool iparserdocstream::setEncoding(std::string const & e)
|
||||
{
|
||||
is_ << lyx::setEncoding(e);
|
||||
if (s_.empty())
|
||||
return true;
|
||||
cerr << "Setting encoding " << e << " too late. The encoding of `"
|
||||
<< to_utf8(s_) << "´ is wrong." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void iparserdocstream::putback(char_type c)
|
||||
{
|
||||
s_ += c;
|
||||
@ -182,7 +193,7 @@ void Parser::deparse()
|
||||
}
|
||||
|
||||
|
||||
void Parser::setEncoding(std::string const & e, int const & p)
|
||||
bool Parser::setEncoding(std::string const & e, int const & p)
|
||||
{
|
||||
// We may (and need to) use unsafe encodings here: Since the text is
|
||||
// converted to unicode while reading from is_, we never see text in
|
||||
@ -191,9 +202,9 @@ void Parser::setEncoding(std::string const & e, int const & p)
|
||||
Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
|
||||
if (!enc) {
|
||||
cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
setEncoding(enc->iconvName());
|
||||
return setEncoding(enc->iconvName());
|
||||
}
|
||||
|
||||
|
||||
@ -250,11 +261,11 @@ void Parser::setCatcodes(cat_type t)
|
||||
}
|
||||
|
||||
|
||||
void Parser::setEncoding(std::string const & e)
|
||||
bool Parser::setEncoding(std::string const & e)
|
||||
{
|
||||
//cerr << "setting encoding to " << e << std::endl;
|
||||
is_.docstream() << lyx::setEncoding(e);
|
||||
encoding_iconv_ = e;
|
||||
return is_.setEncoding(e);
|
||||
}
|
||||
|
||||
|
||||
@ -284,7 +295,11 @@ Token const Parser::curr_token() const
|
||||
Token const Parser::next_token()
|
||||
{
|
||||
static const Token dummy;
|
||||
return good() ? tokens_[pos_] : dummy;
|
||||
if (!good())
|
||||
return dummy;
|
||||
if (pos_ >= tokens_.size())
|
||||
tokenize_one();
|
||||
return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
|
||||
}
|
||||
|
||||
|
||||
@ -292,11 +307,14 @@ Token const Parser::next_token()
|
||||
Token const Parser::next_next_token()
|
||||
{
|
||||
static const Token dummy;
|
||||
// If good() has not been called after the last get_token() we need
|
||||
// to tokenize two more tokens.
|
||||
if (pos_ + 1 >= tokens_.size()) {
|
||||
tokenize_one();
|
||||
if (!good())
|
||||
return dummy;
|
||||
// If tokenize_one() has not been called after the last get_token() we
|
||||
// need to tokenize two more tokens.
|
||||
if (pos_ >= tokens_.size()) {
|
||||
tokenize_one();
|
||||
if (pos_ + 1 >= tokens_.size())
|
||||
tokenize_one();
|
||||
}
|
||||
return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
|
||||
}
|
||||
@ -306,10 +324,16 @@ Token const Parser::next_next_token()
|
||||
Token const Parser::get_token()
|
||||
{
|
||||
static const Token dummy;
|
||||
// if (good())
|
||||
// cerr << "looking at token " << tokens_[pos_]
|
||||
// << " pos: " << pos_ << '\n';
|
||||
return good() ? tokens_[pos_++] : dummy;
|
||||
if (!good())
|
||||
return dummy;
|
||||
if (pos_ >= tokens_.size()) {
|
||||
tokenize_one();
|
||||
if (pos_ >= tokens_.size())
|
||||
return dummy;
|
||||
}
|
||||
// cerr << "looking at token " << tokens_[pos_]
|
||||
// << " pos: " << pos_ << '\n';
|
||||
return tokens_[pos_++];
|
||||
}
|
||||
|
||||
|
||||
@ -408,8 +432,9 @@ bool Parser::good()
|
||||
{
|
||||
if (pos_ < tokens_.size())
|
||||
return true;
|
||||
tokenize_one();
|
||||
return pos_ < tokens_.size();
|
||||
if (!is_.good())
|
||||
return false;
|
||||
return is_.peek() != idocstream::traits_type::eof();
|
||||
}
|
||||
|
||||
|
||||
|
@ -117,15 +117,19 @@ std::ostream & operator<<(std::ostream & os, Token const & t);
|
||||
extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
|
||||
#endif
|
||||
|
||||
// A docstream version that supports putback even when not buffered
|
||||
/// A docstream version that supports putback even when not buffered
|
||||
class iparserdocstream
|
||||
{
|
||||
public:
|
||||
typedef idocstream::int_type int_type;
|
||||
|
||||
iparserdocstream(idocstream & is) : is_(is) {};
|
||||
|
||||
operator bool() const { return is_; };
|
||||
/// Like std::istream::operator bool()
|
||||
operator bool() const { return s_.empty() ? is_ : true; }
|
||||
|
||||
idocstream & docstream() { return is_; };
|
||||
/// change the encoding of the input stream to \p e (iconv name)
|
||||
bool setEncoding(std::string const & e);
|
||||
|
||||
// add to the list of characters to read before actually reading
|
||||
// the stream
|
||||
@ -135,7 +139,14 @@ public:
|
||||
// the stream
|
||||
void put_almost_back(docstring s);
|
||||
|
||||
/// Like std::istream::get()
|
||||
iparserdocstream & get(char_type &c);
|
||||
|
||||
/// Like std::istream::good()
|
||||
bool good() const { return s_.empty() ? is_.good() : true; }
|
||||
|
||||
/// Like std::istream::peek()
|
||||
int_type peek() const { return s_.empty() ? is_.peek() : s_[0]; }
|
||||
private:
|
||||
///
|
||||
idocstream & is_;
|
||||
@ -172,11 +183,11 @@ public:
|
||||
* re-reading. Useful when changing catcodes. */
|
||||
void deparse();
|
||||
|
||||
/// change the iconv encoding of the input stream
|
||||
/// according to the latex encoding and package
|
||||
void setEncoding(std::string const & encoding, int const & package);
|
||||
/// change the iconv encoding of the input stream
|
||||
void setEncoding(std::string const & encoding);
|
||||
/// change the encoding of the input stream according to \p encoding
|
||||
/// (latex name) and package \p package
|
||||
bool setEncoding(std::string const & encoding, int const & package);
|
||||
/// change the encoding of the input stream to \p encoding (iconv name)
|
||||
bool setEncoding(std::string const & encoding);
|
||||
/// get the current iconv encoding of the input stream
|
||||
std::string getEncoding() const { return encoding_iconv_; }
|
||||
|
||||
@ -288,9 +299,12 @@ public:
|
||||
Token const prev_token() const;
|
||||
/// The current token.
|
||||
Token const curr_token() const;
|
||||
/// The next token.
|
||||
/// The next token. Caution: If this is called, an encoding change is
|
||||
/// only possible again after get_token() has been called.
|
||||
Token const next_token();
|
||||
/// The next but one token.
|
||||
/// The next but one token. Caution: If this is called, an encoding
|
||||
/// change is only possible again after get_token() has been called
|
||||
/// twice.
|
||||
Token const next_next_token();
|
||||
/// Make the next token current and return that.
|
||||
Token const get_token();
|
||||
@ -301,7 +315,9 @@ public:
|
||||
bool skip_spaces(bool skip_comments = false);
|
||||
/// puts back spaces (and comments if \p skip_comments is true)
|
||||
void unskip_spaces(bool skip_comments = false);
|
||||
///
|
||||
/// Is any further input pending()? This is not like
|
||||
/// std::istream::good(), which returns true if all available input
|
||||
/// was read, and the next attempt to read would return EOF.
|
||||
bool good();
|
||||
/// resets the parser to initial state
|
||||
void reset();
|
||||
|
Loading…
Reference in New Issue
Block a user