improve tex2lyx paragraph and comment handling

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@8815 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2004-06-18 06:47:19 +00:00
parent 5603df4a5b
commit cb73d69ae4
9 changed files with 131 additions and 62 deletions

View File

@ -1,3 +1,7 @@
2004-06-18 Georg Baum <Georg.Baum@post.rwth-aachen.de>
* insetgraphics.C, insettabular.C: s/wether/whether/g
2004-06-10 Georg Baum <Georg.Baum@post.rwth-aachen.de> 2004-06-10 Georg Baum <Georg.Baum@post.rwth-aachen.de>
* insetgraphics.C (stripExtension): new * insetgraphics.C (stripExtension): new

View File

@ -477,7 +477,7 @@ string const InsetGraphics::prepareFile(Buffer const & buf,
if (zipped) { if (zipped) {
if (params().noUnzip) { if (params().noUnzip) {
// We don't know wether latex can actually handle // We don't know whether latex can actually handle
// this file, but we can't check, because that would // this file, but we can't check, because that would
// mean to unzip the file and thereby making the // mean to unzip the file and thereby making the
// noUnzip parameter meaningless. // noUnzip parameter meaningless.

View File

@ -1324,7 +1324,7 @@ void InsetTabular::tabularFeatures(LCursor & cur,
#if 0 #if 0
// just multicol for one Single Cell // just multicol for one Single Cell
if (!hasSelection()) { if (!hasSelection()) {
// check wether we are completly in a multicol // check whether we are completly in a multicol
if (tabular.isMultiColumn(actcell)) if (tabular.isMultiColumn(actcell))
tabular.unsetMultiColumn(actcell); tabular.unsetMultiColumn(actcell);
else else

View File

@ -1,3 +1,12 @@
2004-06-18 Georg Baum <Georg.Baum@post.rwth-aachen.de>
* preamble.C, text.C: s/wether/whether/g
* text.C (eat_whitespace): new method
* texparser.C (getArg): use always curr_token().asInput()
* texparser.[Ch] (isParagraph): new method
* texparser.C (skip_spaces): handle "\n +\n" correctly
* texparser.[Ch] (asMode): remove, since it is unused
2004-05-27 Lars Gullik Bjonnes <larsbj@gullik.net> 2004-05-27 Lars Gullik Bjonnes <larsbj@gullik.net>
* Makefile.am (BUILT_SOURCES): move lengthcommon.C from here... * Makefile.am (BUILT_SOURCES): move lengthcommon.C from here...

View File

@ -184,7 +184,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
special_columns['D'] = 3; special_columns['D'] = 3;
bool is_full_document = false; bool is_full_document = false;
// determine wether this is a full document or a fragment for inclusion // determine whether this is a full document or a fragment for inclusion
while (p.good()) { while (p.good()) {
Token const & t = p.get_token(); Token const & t = p.get_token();

View File

@ -34,6 +34,7 @@ void parse_text(Parser & p, std::ostream & os, unsigned flags, bool outer,
//std::string parse_text(Parser & p, unsigned flags, const bool outer, //std::string parse_text(Parser & p, unsigned flags, const bool outer,
// Context & context); // Context & context);
/// parses a subdocument, usually useful in insets (whence the name)
void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags, void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags,
bool outer, Context & context); bool outer, Context & context);

View File

@ -61,16 +61,6 @@ void catInit()
// catcodes // catcodes
// //
mode_type asMode(mode_type oldmode, string const & str)
{
if (str == "mathmode")
return MATH_MODE;
if (str == "textmode" || str == "forcetext")
return TEXT_MODE;
return oldmode;
}
CatCode catcode(unsigned char c) CatCode catcode(unsigned char c)
{ {
return theCatcode[c]; return theCatcode[c];
@ -177,21 +167,46 @@ Token const & Parser::get_token()
} }
bool Parser::isParagraph() const
{
// A new paragraph in TeX ist started
// - either by a newline, following any amount of whitespace
// characters (including zero), and another newline
// - or the token \par
if (curr_token().cat() == catNewline &&
(curr_token().cs().size() > 1 ||
(next_token().cat() == catSpace &&
pos_ < tokens_.size() - 1 &&
tokens_[pos_ + 1].cat() == catNewline)))
return true;
if (curr_token().cat() == catEscape && curr_token().cs() == "par")
return true;
return false;
}
void Parser::skip_spaces(bool skip_comments) void Parser::skip_spaces(bool skip_comments)
{ {
// We just silently return if we have no more tokens. // We just silently return if we have no more tokens.
// skip_spaces() should be callable at any time, // skip_spaces() should be callable at any time,
// the caller must check p::good() anyway. // the caller must check p::good() anyway.
while (good()) { while (good()) {
if ( next_token().cat() == catSpace ||
(next_token().cat() == catNewline && next_token().cs().size() == 1) ||
next_token().cat() == catComment && next_token().cs().empty())
get_token(); get_token();
else if (skip_comments && next_token().cat() == catComment) if (isParagraph()) {
cerr << " Ignoring comment: " << get_token().asInput(); putback();
else
break; break;
} }
if ( curr_token().cat() == catSpace ||
curr_token().cat() == catNewline ||
(curr_token().cat() == catComment && curr_token().cs().empty()))
continue;
if (skip_comments && curr_token().cat() == catComment)
cerr << " Ignoring comment: " << curr_token().asInput();
else {
putback();
break;
}
}
} }
@ -253,10 +268,8 @@ string Parser::getArg(char left, char right)
if (!curr_token().cs().empty()) if (!curr_token().cs().empty())
cerr << "Ignoring comment: " << curr_token().asInput(); cerr << "Ignoring comment: " << curr_token().asInput();
} }
else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
result += curr_token().cs();
else else
result += c; result += curr_token().asInput();
} }
return result; return result;

View File

@ -79,13 +79,13 @@ public:
/// ///
std::string const & cs() const { return cs_; } std::string const & cs() const { return cs_; }
/// /// Returns the catcode of the token
CatCode cat() const { return cat_; } CatCode cat() const { return cat_; }
/// ///
char character() const { return char_; } char character() const { return char_; }
/// /// Returns the token as string
std::string asString() const; std::string asString() const;
/// /// Returns the token verbatim
std::string asInput() const; std::string asInput() const;
private: private:
@ -130,27 +130,29 @@ public:
std::string getArg(char left, char right); std::string getArg(char left, char right);
/// getArg('[', ']') including the brackets /// getArg('[', ']') including the brackets
std::string getOpt(); std::string getOpt();
/// /// Returns the character of the current token and increments the token position.
char getChar(); char getChar();
/// ///
void error(std::string const & msg); void error(std::string const & msg);
/// /// Parses \p is into tokens
void tokenize(std::istream & is); void tokenize(std::istream & is);
/// ///
void push_back(Token const & t); void push_back(Token const & t);
/// ///
void pop_back(); void pop_back();
/// /// The previous token.
Token const & prev_token() const; Token const & prev_token() const;
/// /// The current token.
Token const & curr_token() const; Token const & curr_token() const;
/// /// The next token.
Token const & next_token() const; Token const & next_token() const;
/// Make the next token current and return that. /// Make the next token current and return that.
Token const & get_token(); Token const & get_token();
/// skips spaces (and comments if \param skip_comments is true) /// \return whether the current token starts a new paragraph
bool isParagraph() const;
/// skips spaces (and comments if \p skip_comments is true)
void skip_spaces(bool skip_comments = false); void skip_spaces(bool skip_comments = false);
/// puts back spaces (and comments if \param skip_comments is true) /// puts back spaces (and comments if \p skip_comments is true)
void unskip_spaces(bool skip_comments = false); void unskip_spaces(bool skip_comments = false);
/// ///
void lex(std::string const & s); void lex(std::string const & s);

View File

@ -144,8 +144,8 @@ bool splitLatexLength(string const & len, string & value, string & unit)
} }
// A simple function to translate a latex length to something lyx can /// A simple function to translate a latex length to something lyx can
// understand. Not perfect, but rather best-effort. /// understand. Not perfect, but rather best-effort.
bool translate_len(string const & length, string & valstring, string & unit) bool translate_len(string const & length, string & valstring, string & unit)
{ {
if (!splitLatexLength(length, valstring, unit)) if (!splitLatexLength(length, valstring, unit))
@ -313,6 +313,9 @@ LyXLayout_ptr findLayout(LyXTextClass const & textclass,
} }
void eat_whitespace(Parser &, ostream &, Context &, bool);
void output_command_layout(ostream & os, Parser & p, bool outer, void output_command_layout(ostream & os, Parser & p, bool outer,
Context & parent_context, Context & parent_context,
LyXLayout_ptr newlayout) LyXLayout_ptr newlayout)
@ -323,13 +326,14 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
context.check_deeper(os); context.check_deeper(os);
context.check_layout(os); context.check_layout(os);
if (context.layout->optionalargs > 0) { if (context.layout->optionalargs > 0) {
p.skip_spaces(); eat_whitespace(p, os, context, false);
if (p.next_token().character() == '[') { if (p.next_token().character() == '[') {
p.get_token(); // eat '[' p.get_token(); // eat '['
begin_inset(os, "OptArg\n"); begin_inset(os, "OptArg\n");
os << "status collapsed\n\n"; os << "status collapsed\n\n";
parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context); parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
end_inset(os); end_inset(os);
eat_whitespace(p, os, context, false);
} }
} }
parse_text_snippet(p, os, FLAG_ITEM, outer, context); parse_text_snippet(p, os, FLAG_ITEM, outer, context);
@ -378,7 +382,7 @@ void check_space(Parser const & p, ostream & os, Context & context)
/*! /*!
* Check wether \param command is a known command. If yes, * Check whether \p command is a known command. If yes,
* handle the command with all arguments. * handle the command with all arguments.
* \return true if the command was parsed, false otherwise. * \return true if the command was parsed, false otherwise.
*/ */
@ -522,8 +526,8 @@ void parse_environment(Parser & p, ostream & os, bool outer,
string const name = p.getArg('{', '}'); string const name = p.getArg('{', '}');
const bool is_starred = suffixIs(name, '*'); const bool is_starred = suffixIs(name, '*');
string const unstarred_name = rtrim(name, "*"); string const unstarred_name = rtrim(name, "*");
eat_whitespace(p, os, parent_context, false);
active_environments.push_back(name); active_environments.push_back(name);
p.skip_spaces();
if (is_math_env(name)) { if (is_math_env(name)) {
parent_context.check_layout(os); parent_context.check_layout(os);
@ -651,11 +655,54 @@ void parse_environment(Parser & p, ostream & os, bool outer,
p.skip_spaces(); p.skip_spaces();
} }
/// parses a comment and outputs it to \p os.
void parse_comment(Parser & p, ostream & os, Token const & t, Context & context)
{
BOOST_ASSERT(t.cat() == catComment);
context.check_layout(os);
if (!t.cs().empty()) {
handle_comment(os, '%' + t.cs(), context);
if (p.next_token().cat() == catNewline) {
// A newline after a comment line starts a new
// paragraph
if(!context.atParagraphStart()) {
// Only start a new paragraph if not already
// done (we might get called recursively)
context.new_paragraph(os);
}
eat_whitespace(p, os, context, true);
}
} else {
// "%\n" combination
p.skip_spaces();
}
}
/*!
* Reads spaces and comments until the first non-space, non-comment token.
* New paragraphs (double newlines or \\par) are handled like simple spaces
* if \p eatParagraph is true.
* Spaces are skipped, but comments are written to \p os.
*/
void eat_whitespace(Parser & p, ostream & os, Context & context,
bool eatParagraph)
{
while (p.good()) {
Token const & t = p.get_token();
if (t.cat() == catComment)
parse_comment(p, os, t, context);
else if ((! eatParagraph && p.isParagraph()) ||
(t.cat() != catSpace && t.cat() != catNewline)) {
p.putback();
return;
}
}
}
} // anonymous namespace } // anonymous namespace
void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
Context & context) Context & context)
{ {
@ -738,7 +785,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
skip_braces(p); skip_braces(p);
} }
else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1)) else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph()))
check_space(p, os, context); check_space(p, os, context);
else if (t.cat() == catLetter || else if (t.cat() == catLetter ||
@ -749,9 +796,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
os << t.character(); os << t.character();
} }
else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) { else if (p.isParagraph()) {
p.skip_spaces();
context.new_paragraph(os); context.new_paragraph(os);
eat_whitespace(p, os, context, true);
} }
else if (t.cat() == catActive) { else if (t.cat() == catActive) {
@ -792,20 +839,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
handle_ert(os, "}", context); handle_ert(os, "}", context);
} }
else if (t.cat() == catComment) { else if (t.cat() == catComment)
context.check_layout(os); parse_comment(p, os, t, context);
if (!t.cs().empty()) {
handle_comment(os, '%' + t.cs(), context);
if (p.next_token().cat() == catNewline) {
// A newline after a comment line starts a new paragraph
context.new_paragraph(os);
p.skip_spaces();
}
} else {
// "%\n" combination
p.skip_spaces();
}
}
// //
// control sequences // control sequences
@ -865,7 +900,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
} else if (!s.empty()) { } else if (!s.empty()) {
// The space is needed to separate the item from the rest of the sentence. // The space is needed to separate the item from the rest of the sentence.
os << s << ' '; os << s << ' ';
p.skip_spaces(); eat_whitespace(p, os, context, false);
} }
} }
} }
@ -879,8 +914,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
} }
else if (t.cs() == "def") { else if (t.cs() == "def") {
p.skip_spaces();
context.check_layout(os); context.check_layout(os);
eat_whitespace(p, os, context, false);
string name = p.get_token().cs(); string name = p.get_token().cs();
while (p.next_token().cat() != catBegin) while (p.next_token().cat() != catBegin)
name += p.get_token().asString(); name += p.get_token().asString();
@ -1010,7 +1045,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
} }
// TODO: Handle the unknown settings better. // TODO: Handle the unknown settings better.
// Warn about invalid options. // Warn about invalid options.
// Check wether some option was given twice. // Check whether some option was given twice.
end_inset(os); end_inset(os);
} }
@ -1172,6 +1207,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
begin_inset(os, "Quotes "); begin_inset(os, "Quotes ");
os << known_coded_quotes[where - known_quotes]; os << known_coded_quotes[where - known_quotes];
end_inset(os); end_inset(os);
// LyX adds {} after the quote, so we have to eat
// spaces here if there are any before a possible
// {} pair.
eat_whitespace(p, os, context, false);
skip_braces(p); skip_braces(p);
} }
@ -1179,7 +1218,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
char const ** where = is_known(t.cs(), known_sizes); char const ** where = is_known(t.cs(), known_sizes);
context.check_layout(os); context.check_layout(os);
os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n"; os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
p.skip_spaces(); eat_whitespace(p, os, context, false);
} }
else if (t.cs() == "LyX" || t.cs() == "TeX" else if (t.cs() == "LyX" || t.cs() == "TeX"
@ -1374,6 +1413,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
begin_inset(os, "VSpace "); begin_inset(os, "VSpace ");
os << t.cs(); os << t.cs();
end_inset(os); end_inset(os);
skip_braces(p);
} }
else if (t.cs() == "vspace") { else if (t.cs() == "vspace") {