Really fix bug #4468.

The old fix was incomplete (\verb~\~ was translated to \verb~~ in roundtrip). The real cause for this bug (and also the mistranslation of \href{...}{\}}) was the misbehaviour of Token::character() (see comment in Parser.h): This method even returns a character if the category is catEscape, and this is not wanted in most (all?) cases.
2024-12-22 05:16:21 +00:00 · 2012-10-05 00:12:18 +02:00 · 2012-10-05 00:12:18 +02:00 · 2f7f0c7631
commit 2f7f0c7631
parent 5afe35cc59
7 changed files with 73 additions and 26 deletions
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@ -383,7 +383,7 @@ bool Parser::hasOpt()
 }


-Parser::Arg Parser::getFullArg(char left, char right)
+Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 {
 	skip_spaces(true);

@ -393,36 +393,40 @@ Parser::Arg Parser::getFullArg(char left, char right)
 		return make_pair(false, string());

 	string result;
-	char c = getChar();
+	Token t = get_token();

-	if (c != left) {
+	if (t.cat() == catComment || t.cat() == catEscape ||
+	    t.character() != left) {
 		putback();
 		return make_pair(false, string());
 	} else {
-		// a single '\' is only allowed within \verb, no matter what the delimiter is,
-		// for example "\verb+\+" (reported as bug #4468)
-		// To support this, we allow single '\' if it is the only character
-		// within equal delimiters
-		if (next_token().cat() == catEscape)
-			if (next_token().character() == right && right == left)
-				result += '\\';
-		while ((c = getChar()) != right && good()) {
+		for (t = get_token(); good(); t = get_token()) {
 			// Ignore comments
-			if (curr_token().cat() == catComment) {
-				if (!curr_token().cs().empty())
-					cerr << "Ignoring comment: " << curr_token().asInput();
+			if (t.cat() == catComment) {
+				if (!t.cs().empty())
+					cerr << "Ignoring comment: " << t.asInput();
+				continue;
 			}
-			else
-				result += curr_token().asInput();
+			if (allow_escaping) {
+				if (t.cat() != catEscape && t.character() == right)
+					break;
+			} else {
+				if (t.character() == right) {
+					if (t.cat() == catEscape)
+						result += '\\';
+					break;
+				}
+			}
+			result += t.asInput();
 		}
 	}
 	return make_pair(true, result);
 }


-string Parser::getArg(char left, char right)
+string Parser::getArg(char left, char right, bool allow_escaping)
 {
-	return getFullArg(left, right).second;
+	return getFullArg(left, right, allow_escaping).second;
 }


--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@ -87,8 +87,8 @@ public:
 	 *        ../mathed/MathParser.cpp (which is the anchestor of this
 	 *        class) uses a separate char member for this method. I
 	 *        believe that the intended usage is to not cover tokens with
-	 *        catEscape, e.g. \code
-	 *        return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0];
+	 *        catEscape or catComment, e.g. \code
+	 *        return (cs_.empty() || cat_ == catEscape || cat_ == catComment) ? 0 : cs_[0];
 	 *        \endcode
 	 *        All usages of this method should be checked. gb 2011-01-05
 	 */
@ -157,18 +157,24 @@ public:
 	typedef std::pair<bool, std::string> Arg;
 	/*!
 	 * Get an argument enclosed by \p left and \p right.
+	 * If \p allow_escaping is true, a right delimiter escaped by a
+	 * backslash does not count as delimiter, but is included in the
+	 * argument.
 	 * \returns wether an argument was found in \p Arg.first and the
 	 * argument in \p Arg.second. \see getArg().
 	 */
-	Arg getFullArg(char left, char right);
+	Arg getFullArg(char left, char right, bool allow_escaping = true);
 	/*!
 	 * Get an argument enclosed by \p left and \p right.
+	 * If \p allow_escaping is true, a right delimiter escaped by a
+	 * backslash does not count as delimiter, but is included in the
+	 * argument.
 	 * \returns the argument (without \p left and \p right) or the empty
 	 * string if the next non-space token is not \p left. Use
 	 * getFullArg() if you need to know wether there was an empty
 	 * argument or no argument at all.
 	 */
-	std::string getArg(char left, char right);
+	std::string getArg(char left, char right, bool allow_escaping = true);
 	/*!
 	 * Like getOpt(), but distinguishes between a missing argument ""
 	 * and an empty argument "[]".
--- a/src/tex2lyx/test/test-insets.lyx.lyx
+++ b/src/tex2lyx/test/test-insets.lyx.lyx
@ -968,6 +968,19 @@ target "http://www.test.test"
 \end_inset


+\end_layout
+
+\begin_layout Standard
+
+parser test (escaped):
+\begin_inset CommandInset href
+LatexCommand href
+name "a brace } and another one { and something"
+target "http://www.test.test"
+
+\end_inset
+
+
 \end_layout

 \begin_layout Section
--- a/src/tex2lyx/test/test-insets.tex
+++ b/src/tex2lyx/test/test-insets.tex
@ -160,6 +160,8 @@ ftp2:\href{ftp://www.test.test}{www.test.test}

 parser test (stupid, but valid):\href{http://www.test.test}{\}}

+parser test (escaped):\href{http://www.test.test}{a brace \} and another one \{ and something}
+

 \section{Lists\index{Lists}}

--- a/src/tex2lyx/test/test-structure.lyx.lyx
+++ b/src/tex2lyx/test/test-structure.lyx.lyx
@ -1119,7 +1119,9 @@ status collapsed


 \backslash
-verb~~
+verb~
+\backslash
+~
 \end_layout

 \end_inset
@ -1139,7 +1141,22 @@ item[ABC] first item+

 \end_inset

+ 
+\begin_inset ERT
+status collapsed

+\begin_layout Standard
+
+
+\backslash
+verb+something
+\backslash
+
+\end_layout
+
+\end_inset
+
+ bug 4468
 \end_layout

 \begin_layout Standard
--- a/src/tex2lyx/test/test-structure.tex
+++ b/src/tex2lyx/test/test-structure.tex
@ -321,6 +321,7 @@ zzz \section{
 \end{verbatim}
 \verb~\~
 \verb+\item[ABC] first item+
+\verb+something\+ bug 4468

 and bibliography:
 \begin{thebibliography}{9}
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@ -3210,8 +3210,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,

 		else if (t.cs() == "href") {
 			context.check_layout(os);
-			string target = p.getArg('{', '}');
-			string name = p.getArg('{', '}');
+			string target = convert_command_inset_arg(p.verbatim_item());
+			string name = convert_command_inset_arg(p.verbatim_item());
 			string type;
 			size_t i = target.find(':');
 			if (i != string::npos) {
@ -3729,7 +3729,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "verb") {
 			context.check_layout(os);
 			char const delimiter = p.next_token().character();
-			string const arg = p.getArg(delimiter, delimiter);
+			// \verb is special: The usual escaping rules do not
+			// apply, e.g. "\verb+\+" is valid and denotes a single
+			// backslash (bug #4468). Therefore we do not allow
+			// escaping in getArg().
+			string const arg = p.getArg(delimiter, delimiter, false);
 			ostringstream oss;
 			oss << "\\verb" << delimiter << arg << delimiter;
 			handle_ert(os, oss.str(), context);