improve tex2lyx paragraph and comment handling

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@8815 a592a061-630c-0410-9148-cb99ea01b6c8
2024-12-26 14:15:32 +00:00 · 2004-06-18 06:47:19 +00:00 · 2004-06-18 06:47:19 +00:00 · cb73d69ae4
commit cb73d69ae4
parent 5603df4a5b
9 changed files with 131 additions and 62 deletions
--- a/src/insets/ChangeLog
+++ b/src/insets/ChangeLog
@ -1,3 +1,7 @@
+2004-06-18  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
+
+	* insetgraphics.C, insettabular.C: s/wether/whether/g
+
 2004-06-10  Georg Baum  <Georg.Baum@post.rwth-aachen.de>

 	* insetgraphics.C (stripExtension): new
--- a/src/insets/insetgraphics.C
+++ b/src/insets/insetgraphics.C
@ -477,7 +477,7 @@ string const InsetGraphics::prepareFile(Buffer const & buf,

 	if (zipped) {
 		if (params().noUnzip) {
-			// We don't know wether latex can actually handle
+			// We don't know whether latex can actually handle
 			// this file, but we can't check, because that would
 			// mean to unzip the file and thereby making the
 			// noUnzip parameter meaningless.
--- a/src/insets/insettabular.C
+++ b/src/insets/insettabular.C
@ -1324,7 +1324,7 @@ void InsetTabular::tabularFeatures(LCursor & cur,
 #if 0
 		// just multicol for one Single Cell
 		if (!hasSelection()) {
-			// check wether we are completly in a multicol
+			// check whether we are completly in a multicol
 			if (tabular.isMultiColumn(actcell))
 				tabular.unsetMultiColumn(actcell);
 			else
--- a/src/tex2lyx/ChangeLog
+++ b/src/tex2lyx/ChangeLog
@ -1,3 +1,12 @@
+2004-06-18  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
+
+	* preamble.C, text.C: s/wether/whether/g
+	* text.C (eat_whitespace): new method
+	* texparser.C (getArg): use always curr_token().asInput()
+	* texparser.[Ch] (isParagraph): new method
+	* texparser.C (skip_spaces): handle "\n +\n" correctly
+	* texparser.[Ch] (asMode): remove, since it is unused
+
 2004-05-27  Lars Gullik Bjonnes  <larsbj@gullik.net>

 	* Makefile.am (BUILT_SOURCES): move lengthcommon.C from here...
--- a/src/tex2lyx/preamble.C
+++ b/src/tex2lyx/preamble.C
@ -184,7 +184,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
 	special_columns['D'] = 3;
 	bool is_full_document = false;

-	// determine wether this is a full document or a fragment for inclusion
+	// determine whether this is a full document or a fragment for inclusion
 	while (p.good()) {
 		Token const & t = p.get_token();

--- a/src/tex2lyx/tex2lyx.h
+++ b/src/tex2lyx/tex2lyx.h
@ -34,6 +34,7 @@ void parse_text(Parser & p, std::ostream & os, unsigned flags, bool outer,
 //std::string parse_text(Parser & p, unsigned flags, const bool outer,
 //		       Context & context);

+/// parses a subdocument, usually useful in insets (whence the name)
 void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags,
 			 bool outer, Context & context);

--- a/src/tex2lyx/texparser.C
+++ b/src/tex2lyx/texparser.C
@ -61,16 +61,6 @@ void catInit()
 // catcodes
 //

-mode_type asMode(mode_type oldmode, string const & str)
-{
-	if (str == "mathmode")
-		return MATH_MODE;
-	if (str == "textmode" || str == "forcetext")
-		return TEXT_MODE;
-	return oldmode;
-}
-
-
 CatCode catcode(unsigned char c)
 {
 	return theCatcode[c];
@ -177,20 +167,45 @@ Token const & Parser::get_token()
 }


+bool Parser::isParagraph() const
+{
+	// A new paragraph in TeX ist started
+	// - either by a newline, following any amount of whitespace
+	//   characters (including zero), and another newline
+	// - or the token \par
+	if (curr_token().cat() == catNewline &&
+	    (curr_token().cs().size() > 1 ||
+	     (next_token().cat() == catSpace &&
+	      pos_ < tokens_.size() - 1 &&
+	      tokens_[pos_ + 1].cat() == catNewline)))
+		return true;
+	if (curr_token().cat() == catEscape && curr_token().cs() == "par")
+		return true;
+	return false;
+}
+
+
 void Parser::skip_spaces(bool skip_comments)
 {
 	// We just silently return if we have no more tokens.
 	// skip_spaces() should be callable at any time,
 	// the caller must check p::good() anyway.
 	while (good()) {
-		if ( next_token().cat() == catSpace ||
-		    (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
-		     next_token().cat() == catComment && next_token().cs().empty())
-			get_token();
-		else if (skip_comments && next_token().cat() == catComment)
-			cerr << "  Ignoring comment: " << get_token().asInput();
-		else
+		get_token();
+		if (isParagraph()) {
+			putback();
 			break;
+		}
+		if ( curr_token().cat() == catSpace ||
+		     curr_token().cat() == catNewline ||
+		    (curr_token().cat() == catComment && curr_token().cs().empty()))
+			continue;
+		if (skip_comments && curr_token().cat() == catComment)
+			cerr << "  Ignoring comment: " << curr_token().asInput();
+		else {
+			putback();
+			break;
+		}
 	}
 }

@ -253,10 +268,8 @@ string Parser::getArg(char left, char right)
 				if (!curr_token().cs().empty())
 					cerr << "Ignoring comment: " << curr_token().asInput();
 			}
-			else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
-				result += curr_token().cs();
 			else
-				result += c;
+				result += curr_token().asInput();
 		}

 	return result;
--- a/src/tex2lyx/texparser.h
+++ b/src/tex2lyx/texparser.h
@ -79,13 +79,13 @@ public:

 	///
 	std::string const & cs() const { return cs_; }
-	///
+	/// Returns the catcode of the token
 	CatCode cat() const { return cat_; }
 	///
 	char character() const { return char_; }
-	///
+	/// Returns the token as string
 	std::string asString() const;
-	///
+	/// Returns the token verbatim
 	std::string asInput() const;

 private:
@ -130,27 +130,29 @@ public:
 	std::string getArg(char left, char right);
 	/// getArg('[', ']') including the brackets
 	std::string getOpt();
-	///
+	/// Returns the character of the current token and increments the token position.
 	char getChar();
 	///
 	void error(std::string const & msg);
-	///
+	/// Parses \p is into tokens
 	void tokenize(std::istream & is);
 	///
 	void push_back(Token const & t);
 	///
 	void pop_back();
-	///
+	/// The previous token.
 	Token const & prev_token() const;
-	///
+	/// The current token.
 	Token const & curr_token() const;
-	///
+	/// The next token.
 	Token const & next_token() const;
 	/// Make the next token current and return that.
 	Token const & get_token();
-	/// skips spaces (and comments if \param skip_comments is true)
+	/// \return whether the current token starts a new paragraph
+	bool isParagraph() const;
+	/// skips spaces (and comments if \p skip_comments is true)
 	void skip_spaces(bool skip_comments = false);
-	/// puts back spaces (and comments if \param skip_comments is true)
+	/// puts back spaces (and comments if \p skip_comments is true)
 	void unskip_spaces(bool skip_comments = false);
 	///
 	void lex(std::string const & s);
--- a/src/tex2lyx/text.C
+++ b/src/tex2lyx/text.C
@ -144,8 +144,8 @@ bool splitLatexLength(string const & len, string & value, string & unit)
 }


-// A simple function to translate a latex length to something lyx can
-// understand. Not perfect, but rather best-effort.
+/// A simple function to translate a latex length to something lyx can
+/// understand. Not perfect, but rather best-effort.
 bool translate_len(string const & length, string & valstring, string & unit)
 {
 	if (!splitLatexLength(length, valstring, unit))
@ -313,6 +313,9 @@ LyXLayout_ptr findLayout(LyXTextClass const & textclass,
 }


+void eat_whitespace(Parser &, ostream &, Context &, bool);
+
+
 void output_command_layout(ostream & os, Parser & p, bool outer,
 			   Context & parent_context,
 			   LyXLayout_ptr newlayout)
@ -323,13 +326,14 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
 	context.check_deeper(os);
 	context.check_layout(os);
 	if (context.layout->optionalargs > 0) {
-		p.skip_spaces();
+		eat_whitespace(p, os, context, false);
 		if (p.next_token().character() == '[') {
 			p.get_token(); // eat '['
 			begin_inset(os, "OptArg\n");
 			os << "status collapsed\n\n";
 			parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
 			end_inset(os);
+			eat_whitespace(p, os, context, false);
 		}
 	}
 	parse_text_snippet(p, os, FLAG_ITEM, outer, context);
@ -378,7 +382,7 @@ void check_space(Parser const & p, ostream & os, Context & context)


 /*!
- * Check wether \param command is a known command. If yes,
+ * Check whether \p command is a known command. If yes,
 * handle the command with all arguments.
 * \return true if the command was parsed, false otherwise.
 */
@ -522,8 +526,8 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 	string const name = p.getArg('{', '}');
 	const bool is_starred = suffixIs(name, '*');
 	string const unstarred_name = rtrim(name, "*");
+	eat_whitespace(p, os, parent_context, false);
 	active_environments.push_back(name);
-	p.skip_spaces();

 	if (is_math_env(name)) {
 		parent_context.check_layout(os);
@ -651,11 +655,54 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 		p.skip_spaces();
 }

+/// parses a comment and outputs it to \p os.
+void parse_comment(Parser & p, ostream & os, Token const & t, Context & context)
+{
+	BOOST_ASSERT(t.cat() == catComment);
+	context.check_layout(os);
+	if (!t.cs().empty()) {
+		handle_comment(os, '%' + t.cs(), context);
+		if (p.next_token().cat() == catNewline) {
+			// A newline after a comment line starts a new
+			// paragraph
+			if(!context.atParagraphStart()) {
+				// Only start a new paragraph if not already
+				// done (we might get called recursively)
+				context.new_paragraph(os);
+			}
+			eat_whitespace(p, os, context, true);
+		}
+	} else {
+		// "%\n" combination
+		p.skip_spaces();
+	}
+}
+
+
+/*!
+ * Reads spaces and comments until the first non-space, non-comment token.
+ * New paragraphs (double newlines or \\par) are handled like simple spaces
+ * if \p eatParagraph is true.
+ * Spaces are skipped, but comments are written to \p os.
+ */
+void eat_whitespace(Parser & p, ostream & os, Context & context,
+                    bool eatParagraph)
+{
+	while (p.good()) {
+		Token const & t = p.get_token();
+		if (t.cat() == catComment)
+			parse_comment(p, os, t, context);
+		else if ((! eatParagraph && p.isParagraph()) ||
+		         (t.cat() != catSpace && t.cat() != catNewline)) {
+			p.putback();
+			return;
+		}
+	}
+}
+
 } // anonymous namespace


-
-
 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		Context & context)
 {
@ -738,7 +785,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			skip_braces(p);
 		}

-		else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1))
+		else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph()))
 			check_space(p, os, context);

 		else if (t.cat() == catLetter ||
@ -749,9 +796,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			os << t.character();
 		}

-		else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) {
-			p.skip_spaces();
+		else if (p.isParagraph()) {
 			context.new_paragraph(os);
+			eat_whitespace(p, os, context, true);
 		}

 		else if (t.cat() == catActive) {
@ -792,20 +839,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			handle_ert(os, "}", context);
 		}

-		else if (t.cat() == catComment) {
-			context.check_layout(os);
-			if (!t.cs().empty()) {
-				handle_comment(os, '%' + t.cs(), context);
-				if (p.next_token().cat() == catNewline) {
-					// A newline after a comment line starts a new paragraph
-					context.new_paragraph(os);
-					p.skip_spaces();
-				}
-			} else {
-				// "%\n" combination
-				p.skip_spaces();
-			}
-		}
+		else if (t.cat() == catComment)
+			parse_comment(p, os, t, context);

 		//
 		// control sequences
@ -865,7 +900,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 				} else if (!s.empty()) {
 					// The space is needed to separate the item from the rest of the sentence.
 					os << s << ' ';
-					p.skip_spaces();
+					eat_whitespace(p, os, context, false);
 				}
 			}
 		}
@ -879,8 +914,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		}

 		else if (t.cs() == "def") {
-			p.skip_spaces();
 			context.check_layout(os);
+			eat_whitespace(p, os, context, false);
 			string name = p.get_token().cs();
 			while (p.next_token().cat() != catBegin)
 				name += p.get_token().asString();
@ -1010,7 +1045,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			}
 			// TODO: Handle the unknown settings better.
 			// Warn about invalid options.
-			// Check wether some option was given twice.
+			// Check whether some option was given twice.
 			end_inset(os);
 		}

@ -1046,7 +1081,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,

 		else if (t.cs() == "hfill") {
 			context.check_layout(os);
-			os << "\n\\hfill\n";
+			os << "\n\\hfill \n";
 			skip_braces(p);
 			p.skip_spaces();
 		}
@ -1172,6 +1207,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			begin_inset(os, "Quotes ");
 			os << known_coded_quotes[where - known_quotes];
 			end_inset(os);
+			// LyX adds {} after the quote, so we have to eat
+			// spaces here if there are any before a possible
+			// {} pair.
+			eat_whitespace(p, os, context, false);
 			skip_braces(p);
 		}

@ -1179,7 +1218,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			char const ** where = is_known(t.cs(), known_sizes);
 			context.check_layout(os);
 			os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
-			p.skip_spaces();
+			eat_whitespace(p, os, context, false);
 		}

 		else if (t.cs() == "LyX" || t.cs() == "TeX"
@ -1374,6 +1413,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			begin_inset(os, "VSpace ");
 			os << t.cs();
 			end_inset(os);
+			skip_braces(p);
 		}

 		else if (t.cs() == "vspace") {