Really fix bug #4468.

The old fix was incomplete (\verb~\~ was translated to \verb~~ in roundtrip). The real cause for this bug (and also the mistranslation of \href{...}{\}}) was the misbehaviour of Token::character() (see comment in Parser.h): This method even returns a character if the category is catEscape, and this is not wanted in most (all?) cases.
2025-01-03 08:28:25 +00:00 · 2012-10-05 00:12:18 +02:00 · 2012-10-05 00:12:18 +02:00 · 2f7f0c7631
commit 2f7f0c7631
parent 5afe35cc59
7 changed files with 73 additions and 26 deletions
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@ -383,7 +383,7 @@ bool Parser::hasOpt()
 }
-Parser::Arg Parser::getFullArg(char left, char right)
+Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 {
 	skip_spaces(true);
@ -393,36 +393,40 @@ Parser::Arg Parser::getFullArg(char left, char right)
 		return make_pair(false, string());
 	string result;
-	char c = getChar();
+	Token t = get_token();
-	if (c != left) {
+	if (t.cat() == catComment || t.cat() == catEscape ||
 	    t.character() != left) {
 		putback();
 		return make_pair(false, string());
 	} else {
-		// a single '\' is only allowed within \verb, no matter what the delimiter is,
+		for (t = get_token(); good(); t = get_token()) {
 		// for example "\verb+\+" (reported as bug #4468)
 		// To support this, we allow single '\' if it is the only character
 		// within equal delimiters
 		if (next_token().cat() == catEscape)
 			if (next_token().character() == right && right == left)
 				result += '\\';
 		while ((c = getChar()) != right && good()) {
 			// Ignore comments
-			if (curr_token().cat() == catComment) {
+			if (t.cat() == catComment) {
-				if (!curr_token().cs().empty())
+				if (!t.cs().empty())
-					cerr << "Ignoring comment: " << curr_token().asInput();
+					cerr << "Ignoring comment: " << t.asInput();
 				continue;
 			}
-			else
+			if (allow_escaping) {
-				result += curr_token().asInput();
+				if (t.cat() != catEscape && t.character() == right)
 					break;
 			} else {
 				if (t.character() == right) {
 					if (t.cat() == catEscape)
 						result += '\\';
 					break;
 				}
 			}
 			result += t.asInput();
 		}
 	}
 	return make_pair(true, result);
 }
-string Parser::getArg(char left, char right)
+string Parser::getArg(char left, char right, bool allow_escaping)
 {
-	return getFullArg(left, right).second;
+	return getFullArg(left, right, allow_escaping).second;
 }
--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@ -87,8 +87,8 @@ public:
 	 *        ../mathed/MathParser.cpp (which is the anchestor of this
 	 *        class) uses a separate char member for this method. I
 	 *        believe that the intended usage is to not cover tokens with
-	 *        catEscape, e.g. \code
+	 *        catEscape or catComment, e.g. \code
-	 *        return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0];
+	 *        return (cs_.empty() || cat_ == catEscape || cat_ == catComment) ? 0 : cs_[0];
 	 *        \endcode
 	 *        All usages of this method should be checked. gb 2011-01-05
 	 */
@ -157,18 +157,24 @@ public:
 	typedef std::pair<bool, std::string> Arg;
 	/*!
 	 * Get an argument enclosed by \p left and \p right.
 	 * If \p allow_escaping is true, a right delimiter escaped by a
 	 * backslash does not count as delimiter, but is included in the
 	 * argument.
 	 * \returns wether an argument was found in \p Arg.first and the
 	 * argument in \p Arg.second. \see getArg().
 	 */
-	Arg getFullArg(char left, char right);
+	Arg getFullArg(char left, char right, bool allow_escaping = true);
 	/*!
 	 * Get an argument enclosed by \p left and \p right.
 	 * If \p allow_escaping is true, a right delimiter escaped by a
 	 * backslash does not count as delimiter, but is included in the
 	 * argument.
 	 * \returns the argument (without \p left and \p right) or the empty
 	 * string if the next non-space token is not \p left. Use
 	 * getFullArg() if you need to know wether there was an empty
 	 * argument or no argument at all.
 	 */
-	std::string getArg(char left, char right);
+	std::string getArg(char left, char right, bool allow_escaping = true);
 	/*!
 	 * Like getOpt(), but distinguishes between a missing argument ""
 	 * and an empty argument "[]".
--- a/src/tex2lyx/test/test-insets.lyx.lyx
+++ b/src/tex2lyx/test/test-insets.lyx.lyx
@ -968,6 +968,19 @@ target "http://www.test.test"
 \end_inset
 \end_layout
 \begin_layout Standard
 parser test (escaped):
 \begin_inset CommandInset href
 LatexCommand href
 name "a brace } and another one { and something"
 target "http://www.test.test"
 \end_inset
 \end_layout
 \begin_layout Section
--- a/src/tex2lyx/test/test-insets.tex
+++ b/src/tex2lyx/test/test-insets.tex
@ -160,6 +160,8 @@ ftp2:\href{ftp://www.test.test}{www.test.test}
 parser test (stupid, but valid):\href{http://www.test.test}{\}}
 parser test (escaped):\href{http://www.test.test}{a brace \} and another one \{ and something}
 \section{Lists\index{Lists}}
--- a/src/tex2lyx/test/test-structure.lyx.lyx
+++ b/src/tex2lyx/test/test-structure.lyx.lyx
@ -1119,7 +1119,9 @@ status collapsed
 \backslash
-verb~~
+verb~
 \backslash
 ~
 \end_layout
 \end_inset
@ -1140,6 +1142,21 @@ item[ABC] first item+
 \end_inset
 \begin_inset ERT
 status collapsed
 \begin_layout Standard
 \backslash
 verb+something
 \backslash
 +
 \end_layout
 \end_inset
 bug 4468
 \end_layout
 \begin_layout Standard
--- a/src/tex2lyx/test/test-structure.tex
+++ b/src/tex2lyx/test/test-structure.tex
@ -321,6 +321,7 @@ zzz \section{
 \end{verbatim}
 \verb~\~
 \verb+\item[ABC] first item+
 \verb+something\+ bug 4468
 and bibliography:
 \begin{thebibliography}{9}
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@ -3210,8 +3210,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "href") {
 			context.check_layout(os);
-			string target = p.getArg('{', '}');
+			string target = convert_command_inset_arg(p.verbatim_item());
-			string name = p.getArg('{', '}');
+			string name = convert_command_inset_arg(p.verbatim_item());
 			string type;
 			size_t i = target.find(':');
 			if (i != string::npos) {
@ -3729,7 +3729,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "verb") {
 			context.check_layout(os);
 			char const delimiter = p.next_token().character();
-			string const arg = p.getArg(delimiter, delimiter);
+			// \verb is special: The usual escaping rules do not
 			// apply, e.g. "\verb+\+" is valid and denotes a single
 			// backslash (bug #4468). Therefore we do not allow
 			// escaping in getArg().
 			string const arg = p.getArg(delimiter, delimiter, false);
 			ostringstream oss;
 			oss << "\\verb" << delimiter << arg << delimiter;
 			handle_ert(os, oss.str(), context);