Small tweaks

* src/Paragraph.cpp (knownLangChars): renamed as writeScriptChars. Now only deals with characters that cannot be encoded using the current latex encoding. (latexSpecialChars): only call writeScriptChars if the character cannot be encoded. * src/Encoding.{cpp,h} (isKnownLangChar): renamed as isKnownScriptChar. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@21183 a592a061-630c-0410-9148-cb99ea01b6c8
2024-11-22 10:00:33 +00:00 · 2007-10-24 17:22:57 +00:00 · 2007-10-24 17:22:57 +00:00 · f48d7caee3
commit f48d7caee3
parent b1d94940ee
3 changed files with 47 additions and 49 deletions
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@ -401,7 +401,7 @@ bool Encodings::isCombiningChar(char_type c)
 }


-bool Encodings::isKnownLangChar(char_type c, string & preamble)
+bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
 {
 	CharInfoMap::const_iterator const it = unicodesymbols.find(c);

--- a/src/Encoding.h
+++ b/src/Encoding.h
@ -145,12 +145,12 @@ public:
 	/**
 	 * Is this a known char from some language?
 	 * If \p preamble is empty and code point \p c is known to belong
-	 * to a supported language, true is returned and \p preamble is set
+	 * to a supported script, true is returned and \p preamble is set
 	 * to the corresponding entry in the unicodesymbols file.
 	 * If \p preamble is not empty, a check is made whether code point
 	 * \p c is a known character matching the preamble entry.
 	 */
-	static bool isKnownLangChar(char_type c, std::string & preamble);
+	static bool isKnownScriptChar(char_type const c, std::string & preamble);
 	/**
 	 * Add the preamble snippet needed for the output of \p c to
 	 * \p features.
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@ -70,7 +70,6 @@ using support::lowercase;
 using support::prefixIs;
 using support::suffixIs;
 using support::rsplit;
-using support::rtrim;
 using support::uppercase;

 namespace {
@ -110,10 +109,10 @@ public:
 			     Font const & font,
 			     Layout const & style);

-	/// Output consecutive known unicode chars, belonging to the same
-	/// language as specified by \p preamble, to \p os starting from \p c.
+	/// Output consecutive unicode chars, belonging to the same script as
+	/// specified by the latex macro \p ltx, to \p os starting from \p c.
 	/// \return the number of characters written.
-	int knownLangChars(odocstream & os, char_type c, string & preamble,
+	int writeScriptChars(odocstream & os, char_type c, docstring const & ltx,
 			   Change &, Encoding const &, pos_type &);

 	/// This could go to ParagraphParameters if we want to.
@ -570,28 +569,34 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
 }


-int Paragraph::Private::knownLangChars(odocstream & os,
-				     char_type c,
-				     string & preamble,
-				     Change & runningChange,
-				     Encoding const & encoding,
-				     pos_type & i)
+int Paragraph::Private::writeScriptChars(odocstream & os,
+					 char_type c,
+					 docstring const & ltx,
+					 Change & runningChange,
+					 Encoding const & encoding,
+					 pos_type & i)
 {
-	// When the character is marked by the proper language, we simply
-	// get its code point in some encoding, otherwise we get the
-	// translation specified in the unicodesymbols file, which is
-	// something like "\textLANG{<spec>}". So, we have to retain
-	// "\textLANG{<spec>" for the first char but only "<spec>" for
-	// all subsequent chars.
-	docstring const latex1 = rtrim(encoding.latexChar(c), "}");
-	int length = latex1.length();
-	os << latex1;
+	// We only arrive here when a proper language for character c has not
+	// been specified (i.e., it could not be translated in the current
+	// latex encoding) and it belongs to a known script.
+	// Parameter ltx contains the latex translation of c as specified in
+	// the unicodesymbols file and is something like "\textXXX{<spec>}".
+	// The latex macro name "textXXX" specifies the script to which c
+	// belongs and we use it in order to check whether characters from the
+	// same script immediately follow, such that we can collect them in a
+	// single "\textXXX" macro. So, we have to retain "\textXXX{<spec>"
+	// for the first char but only "<spec>" for all subsequent chars.
+	docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{"));
+	docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}"));
+	string script = to_ascii(ltx.substr(1, brace1 - 1));
+	int length = ltx.substr(0, brace2).length();
+	os << ltx.substr(0, brace2);
 	int size = text_.size();
 	while (i + 1 < size) {
-		char_type next = text_[i + 1];
-		// Stop here if next character belongs to another
-		// language or there is a change tracking status.
-		if (!Encodings::isKnownLangChar(next, preamble) ||
+		char_type const next = text_[i + 1];
+		// Stop here if next character belongs to another script
+		// or there is a change in change tracking status.
+		if (!Encodings::isKnownScriptChar(next, script) ||
 		    runningChange != owner_->lookupChange(i + 1))
 			break;
 		Font prev_font;
@ -606,27 +611,21 @@ int Paragraph::Private::knownLangChars(odocstream & os,
 			if (cit->pos() >= i + 1)
 				break;
 		}
-		// Stop here if there is a font attribute change.
+		// Stop here if there is a font attribute or encoding change.
 		if (found && cit != end && prev_font != cit->font())
 			break;
-		docstring const latex = rtrim(encoding.latexChar(next), "}");
-		docstring::size_type const j =
+		docstring const latex = encoding.latexChar(next);
+		docstring::size_type const b1 =
 					latex.find_first_of(from_ascii("{"));
-		if (j == docstring::npos) {
-			os << latex;
-			length += latex.length();
-		} else {
-			os << latex.substr(j + 1);
-			length += latex.substr(j + 1).length();
-		}
+		docstring::size_type const b2 =
+					latex.find_last_of(from_ascii("}"));
+		int const len = b2 - b1 - 1;
+		os << latex.substr(b1 + 1, len);
+		length += len;
 		++i;
 	}
-	// When the proper language is set, we are simply passed a code
-	// point, so we should not try to close the \textLANG command.
-	if (prefixIs(latex1, from_ascii("\\" + preamble))) {
-		os << '}';
-		++length;
-	}
+	os << '}';
+	++length;
 	return length;
 }

@ -896,14 +895,13 @@ void Paragraph::Private::latexSpecialChar(
 				break;
 			}
 		}
-		string preamble;
-		if (Encodings::isKnownLangChar(c, preamble)) {
-			column += knownLangChars(os, c, preamble, running_change,
-				encoding, i) - 1;
-			break;
-		}
+		string script;
 		docstring const latex = encoding.latexChar(c);
-		if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
+		if (Encodings::isKnownScriptChar(c, script)
+		    && prefixIs(latex, from_ascii("\\" + script)))
+			column += writeScriptChars(os, c, latex,
+					running_change, encoding, i) - 1;
+		else if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
 			// Prevent eating of a following
 			// space or command corruption by
 			// following characters