Now tex2lyx is able to parse stuff that is in unicodesymbols file, as well

as accents. Time will tell how robust this is. All support for InsetLatexAccent has been removed. I did not do the support for \inputencoding yet, because it is more difficult to get right... git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27596 a592a061-630c-0410-9148-cb99ea01b6c8
2024-11-07 12:32:26 +00:00 · 2008-11-16 23:24:56 +00:00 · 2008-11-16 23:24:56 +00:00 · 552da336ac
commit 552da336ac
parent 22808f5265
2 changed files with 43 additions and 48 deletions
--- a/src/tex2lyx/test/test.ltx
+++ b/src/tex2lyx/test/test.ltx
@ -75,6 +75,12 @@ foo & bar \\
 bar & foo
 \end{tabular}
 Let's try a few unicode characters: the (R) symbol \textregistered
 (and the same one with braces \textregistered{} and a space after) or
 maybe an accented a \'{a} or this one \'a or this \^\i.
 Watch out: \textregistered should be glued to its successor here.
 Final Text.
 \end{document}
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@ -17,6 +17,7 @@
 #include "tex2lyx.h"
 #include "Context.h"
 #include "Encoding.h"
 #include "FloatList.h"
 #include "Layout.h"
 #include "Length.h"
@ -2239,21 +2240,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			handle_ert(os, oss.str(), context);
 		}
 #if 0
 //FIXME: rewrite this
 		else if (t.cs() == "\"") {
 			context.check_layout(os);
 			string const name = p.verbatim_item();
 			     if (name == "a") os << '\xe4';
 			else if (name == "o") os << '\xf6';
 			else if (name == "u") os << '\xfc';
 			else if (name == "A") os << '\xc4';
 			else if (name == "O") os << '\xd6';
 			else if (name == "U") os << '\xdc';
 			else handle_ert(os, "\"{" + name + "}", context);
 		}
 #endif
 		// Problem: \= creates a tabstop inside the tabbing environment
 		// and else an accent. In the latter case we really would want
 		// \={o} instead of \= o.
@ -2264,33 +2250,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			 || t.cs() == "'" || t.cs() == "`"
 			 || t.cs() == "~" || t.cs() == "." || t.cs() == "=") {
 			// we need the trim as the LyX parser chokes on such spaces
 			// The argument of InsetLatexAccent is parsed as a
 			// subset of LaTeX, so don't parse anything here,
 			// but use the raw argument.
 			// Otherwise we would convert \~{\i} wrongly.
 			// This will of course not translate \~{\ss} to \~{Ã},
 			// but that does at least compile and does only look
 			// strange on screen.
 			context.check_layout(os);
-			os << "\\i \\" << t.cs() << "{"
+			// try to see whether the string is in unicodesymbols
-			   << trim(p.verbatim_item(), " ")
+			docstring rem;
-			   << "}\n";
+			string command = t.asInput() + "{" 
-		}
+				+ trim(p.verbatim_item())
-
+				+ "}";
-#if 0
+			docstring s = encodings.fromLaTeXCommand(from_utf8(command), rem);
-//FIXME: rewrite this
+			if (!s.empty()) {
-		else if (t.cs() == "ss") {
+				if (!rem.empty())
-			context.check_layout(os);
+					cerr << "When parsing " << command 
-			os << "\xdf";
+					     << ", result is " << to_utf8(s)
-			skip_braces(p); // eat {}
+					     << "+" << to_utf8(rem) << endl;
-		}
+				os << to_utf8(s);
-#endif
+			} else
-
+				// we did not find a non-ert version
-		else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" ||
+				handle_ert(os, command, context);
 			 t.cs() == "L") {
 			context.check_layout(os);
 			os << "\\i \\" << t.cs() << "{}\n";
 			skip_braces(p); // eat {}
 		}
 		else if (t.cs() == "\\") {
@ -2544,6 +2519,18 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		}
 		else {
 			// try to see whether the string is in unicodesymbols
 			docstring rem;
 			docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()), rem);
 			if (!s.empty()) {
 				if (!rem.empty())
 					cerr << "When parsing " << t.cs() 
 					     << ", result is " << to_utf8(s)
 					     << "+" << to_utf8(rem) << endl;
 				context.check_layout(os);
 				os << to_utf8(s);
 				skip_braces(p); // eat {}
 			}
 			//cerr << "#: " << t << " mode: " << mode << endl;
 			// heuristic: read up to next non-nested space
 			/*
@ -2557,14 +2544,16 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			cerr << "found ERT: " << s << endl;
 			handle_ert(os, s + ' ', context);
 			*/
-			string name = t.asInput();
+			else {
-			if (p.next_token().asInput() == "*") {
+				string name = t.asInput();
-				// Starred commands like \vspace*{}
+				if (p.next_token().asInput() == "*") {
-				p.get_token();				// Eat '*'
+					// Starred commands like \vspace*{}
-				name += '*';
+					p.get_token();	// Eat '*'
 					name += '*';
 				}
 				if (!parse_command(name, p, os, outer, context))
 					handle_ert(os, name, context);
 			}
 			if (! parse_command(name, p, os, outer, context))
 				handle_ert(os, name, context);
 		}
 		if (flags & FLAG_LEAVE) {