Draw right-to-left text string-wise using Qt

We rely on Qt built-in unicode support for handling Arabic and Hebrew compose characters. This allows to avoid to use our homegrown machinery. This should provide a nice speedup at a low cost and will eventually allow us to get rid of: * most of our Arabic/Hebrew machinery in Encodings.cpp, * Paragraph::transformChar, * and probably more.
2024-12-22 13:18:28 +00:00 · 2014-04-10 16:37:21 +02:00 · 2014-04-10 16:37:21 +02:00 · ea1a5cb80e
commit ea1a5cb80e
parent b8170e0e01
7 changed files with 91 additions and 143 deletions
--- a/25
+++ b/25
@ -11,7 +11,7 @@ what we have with force_paint_single_char. Moreover there has been
 some code factorization in TextMetrics, where the same row-breaking
 algorithm was basically implemented 3 times.
-Currently everything is supposed to work for LTR text, and RTL text
+Currently everything is supposed to work for LTR text, and RtL text
 should work too except possibly metrics with Arabic and Hebrew fonts.
 When KEEP_OLD_METRICS_CODE is defined in TextMetrics.cpp, the new code
@ -40,13 +40,14 @@ What is done:
  lyxrc.force_paint_single_char is false. In this case, remove also
  useless workarounds which disable kerning and ligatures.
 * when lyxrc.force_paint_single_char is false, draw also RtL text
  string-wise. This both speed-up drawing and prepare for code
  removal, since we now rely on Qt to do things we use to do by
  ourselves (see isArabic* and isHebrew* code in Encodings.cpp).
 Next steps:
 * check what happens with Arabic and/or Hebrew text. There may be some
  problems related to compose characters. Investigate whether RtL
  strings could be drawn on a string-wise basis.
 * investigate whether strings could be cut at separators in RowPainter
  only in justified text. This would speed-up painting in other cases
  by lowering the number of strings to draw.
@ -76,18 +77,18 @@ Steps for later (aka out of the scope of this branch):
 Known bugs:
 * there are still difference in what breaks words. In particular,
  RowPainter breaks strings at: selection end, spell-checking
  extremities. This seems to be harmless.
 * when clicking in the right margin, getPosNearX does not return
  the same value as before. I am not sure whether this is important.
 * When selecting text, the display seems to move around. This is
  because partly selected words are drawn in two parts, and in case
  like "ef|fort" or "V|AN", there are some ligature or kerning effects
  that change the display. I am not sure yet how to fix that.
 * there are other differences in what breaks words. In particular,
  RowPainter breaks strings at spell-checking extremities. This seems
  to be harmless.
 * when clicking in the right margin, getPosNearX does not return
  the same value as before. I am not sure whether this is important.
 Other differences in behavior (aka bug fixes):
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@ -711,6 +711,12 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
 }
 bool Encodings::isHebrewChar(char_type c)
 {
 	return c >= 0x0590 && c <= 0x05ff;
 }
 bool Encodings::isHebrewComposeChar(char_type c)
 {
 	return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0;
--- a/src/Encoding.h
+++ b/src/Encoding.h
@ -269,6 +269,8 @@ public:
 	///
 	static bool isHebrewComposeChar(char_type c);
 	///
 	static bool isHebrewChar(char_type c);
 	///
 	static bool isArabicComposeChar(char_type c);
 	///
 	static bool isArabicSpecialChar(char_type c);
--- a/src/rowpainter.cpp
+++ b/src/rowpainter.cpp
@ -161,72 +161,7 @@ void RowPainter::paintInset(Inset const * inset, pos_type const pos)
 }
-void RowPainter::paintHebrewComposeChar(pos_type & vpos, FontInfo const & font)
+void RowPainter::paintChars(pos_type & vpos, Font const & font)
 {
 	pos_type pos = bidi_.vis2log(vpos);
 	docstring str;
 	// first char
 	char_type c = par_.getChar(pos);
 	str += c;
 	++vpos;
 	int const width = theFontMetrics(font).width(c);
 	int dx = 0;
 	for (pos_type i = pos - 1; i >= 0; --i) {
 		c = par_.getChar(i);
 		if (!Encodings::isHebrewComposeChar(c)) {
 			if (isPrintableNonspace(c)) {
 				int const width2 = pm_.singleWidth(i,
 					text_metrics_.displayFont(pit_, i));
 				dx = (c == 0x05e8 || // resh
 				      c == 0x05d3)   // dalet
 					? width2 - width
 					: (width2 - width) / 2;
 			}
 			break;
 		}
 	}
 	// Draw nikud
 	pi_.pain.text(int(x_) + dx, yo_, str, font);
 }
 void RowPainter::paintArabicComposeChar(pos_type & vpos, FontInfo const & font)
 {
 	pos_type pos = bidi_.vis2log(vpos);
 	docstring str;
 	// first char
 	char_type c = par_.getChar(pos);
 	c = par_.transformChar(c, pos);
 	str += c;
 	++vpos;
 	int const width = theFontMetrics(font).width(c);
 	int dx = 0;
 	for (pos_type i = pos - 1; i >= 0; --i) {
 		c = par_.getChar(i);
 		if (!Encodings::isArabicComposeChar(c)) {
 			if (isPrintableNonspace(c)) {
 				int const width2 = pm_.singleWidth(i,
 						text_metrics_.displayFont(pit_, i));
 				dx = (width2 - width) / 2;
 			}
 			break;
 		}
 	}
 	// Draw nikud
 	pi_.pain.text(int(x_) + dx, yo_, str, font);
 }
 void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
 			    bool hebrew, bool arabic)
 {
 	// This method takes up 70% of time when typing
 	pos_type pos = bidi_.vis2log(vpos);
@ -236,15 +171,21 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
 	str.reserve(100);
 	str.push_back(prev_char);
 	// special case for arabic
 	string const & lang = font.language()->lang();
 	bool const swap_paren = lang == "arabic_arabtex"
 		|| lang == "arabic_arabi"
 		|| lang == "farsi";
 	// FIXME: Why only round brackets and why the difference to
 	// Hebrew? See also Paragraph::getUChar
-	if (arabic) {
+	if (swap_paren) {
 		char_type c = str[0];
 		if (c == '(')
 			c = ')';
 		else if (c == ')')
 			c = '(';
-		str[0] = par_.transformChar(c, pos);
+		str[0] = c;
 	}
 	pos_type const end = row_.endpos();
@ -260,6 +201,12 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
 	bool const spell_state =
 		lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
 	// are we building a RtL string? 
 	//FIXME: I would like to use the new isRTL() from textutils.h,
 	// but it does not give the same results for some reason I do
 	// not understand.
 	bool const rtl = Encodings::isArabicChar(str[0]) || Encodings::isHebrewChar(str[0]);
 	// collect as much similar chars as we can
 	for (++vpos ; vpos < end ; ++vpos) {
 		if (lyxrc.force_paint_single_char)
@ -287,48 +234,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
 		char_type c = par_.getChar(pos);
 		//FIXME: I would like to use the new isRTL() from textutils.h,
 		// but it does not give the same results for some reason I do
 		// not understand.
 		bool const new_rtl = Encodings::isArabicChar(c) || Encodings::isHebrewChar(c);
 		if (new_rtl != rtl)
 			// String direction has changed
 			break;
 		if (c == '\t')
 			break;
 		if (!isPrintableNonspace(c))
 			break;
 		/* Because we do our own bidi, at this point the strings are
 		 * already in visual order. However, Qt also applies its own
 		 * bidi algorithm to strings that it paints to the screen.
 		 * Therefore, if we were to paint Hebrew/Arabic words as a
 		 * single string, the letters in the words would get reversed
 		 * again. In order to avoid that, we don't collect Hebrew/
 		 * Arabic characters, but rather paint them one at a time.
 		 * See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
 		 */
 		if (hebrew)
 			break;
 		/* FIXME: these checks are irrelevant, since 'arabic' and
 		 * 'hebrew' alone are already going to trigger a break.
 		 * However, this should not be removed completely, because
 		 * if an alternative solution is found which allows grouping
 		 * of arabic and hebrew characters, then these breaks may have
 		 * to be re-applied.
 		if (arabic && Encodings::isArabicComposeChar(c))
 			break;
 		if (hebrew && Encodings::isHebrewComposeChar(c))
 			break;
 		*/
 		// FIXME: Why only round brackets and why the difference to
 		// Hebrew? See also Paragraph::getUChar
-		if (arabic) {
+		if (swap_paren) {
 			if (c == '(')
 				c = ')';
 			else if (c == ')')
 				c = '(';
 			c = par_.transformChar(c, pos);
 			/* see comment in hebrew, explaining why we break */
 			break;
 		}
 		str.push_back(c);
@ -337,15 +263,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
 	docstring s(&str[0], str.size());
 	/* Because we do our own bidi, at this point the strings are
 	 * already in visual order. However, Qt also applies its own
 	 * bidi algorithm to strings that it paints to the screen.
 	 * Therefore, if we were to paint Hebrew/Arabic words as a
 	 * single string, the letters in the words would get reversed
 	 * again. In order to avoid that, we reverse the string in advance.
 	 * See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
 	 * for an earlier thread on the subject
 	 */
 	if (rtl)
 		reverse(s.begin(), s.end());
 	if (s[0] == '\t')
 		s.replace(0,1,from_ascii("    "));
 	if (!selection && !change_running.changed()) {
-		x_ += pi_.pain.text(int(x_), yo_, s, font);
+		x_ += pi_.pain.text(int(x_), yo_, s, font.fontInfo());
 		return;
 	}
-	FontInfo copy = font;
+	FontInfo copy = font.fontInfo();
 	if (change_running.changed())
 		copy.setPaintColor(change_running.color());
 	else if (selection)
@ -394,36 +332,14 @@ void RowPainter::paintMisspelledMark(double orig_x, bool changed)
 void RowPainter::paintFromPos(pos_type & vpos, bool changed)
 {
 	pos_type const pos = bidi_.vis2log(vpos);
-	Font const orig_font = text_metrics_.displayFont(pit_, pos);
+	Font const font = text_metrics_.displayFont(pit_, pos);
 	double const orig_x = x_;
-	// usual characters, no insets
+	paintChars(vpos, font);
-	char_type const c = par_.getChar(pos);
+	paintForeignMark(orig_x, font.language());
-	// special case languages
+	// Paint the spelling mark if needed.
-	string const & lang = orig_font.language()->lang();
+	if (lyxrc.spellcheck_continuously && par_.isMisspelled(pos)) {
 	bool const hebrew = lang == "hebrew";
 	bool const arabic = lang == "arabic_arabtex" || lang == "arabic_arabi" ||
 						lang == "farsi";
 	// spelling correct?
 	bool const misspelled =
 		lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
 	// draw as many chars as we can
 	if ((!hebrew && !arabic)
 		|| (hebrew && !Encodings::isHebrewComposeChar(c))
 		|| (arabic && !Encodings::isArabicComposeChar(c))) {
 		paintChars(vpos, orig_font.fontInfo(), hebrew, arabic);
 	} else if (hebrew) {
 		paintHebrewComposeChar(vpos, orig_font.fontInfo());
 	} else if (arabic) {
 		paintArabicComposeChar(vpos, orig_font.fontInfo());
 	}
 	paintForeignMark(orig_x, orig_font.language());
 	if (lyxrc.spellcheck_continuously && misspelled) {
 		// check for cursor position
 		// don't draw misspelled marker for words at cursor position
 		// we don't want to disturb the process of text editing
--- a/src/rowpainter.h
+++ b/src/rowpainter.h
@ -61,10 +61,7 @@ private:
 	void paintSeparator(double orig_x, double width, FontInfo const & font);
 	void paintForeignMark(double orig_x, Language const * lang, int desc = 0);
 	void paintMisspelledMark(double orig_x, bool changed);
-	void paintHebrewComposeChar(pos_type & vpos, FontInfo const & font);
+	void paintChars(pos_type & vpos, Font const & font);
 	void paintArabicComposeChar(pos_type & vpos, FontInfo const & font);
 	void paintChars(pos_type & vpos, FontInfo const & font,
 			bool hebrew, bool arabic);
 	int paintAppendixStart(int y);
 	void paintFromPos(pos_type & vpos, bool changed);
 	void paintInset(Inset const * inset, pos_type const pos);
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@ -161,6 +161,29 @@ bool isNumber(char_type c)
 }
 bool isRTL(char_type c)
 {
 	if (!is_utf16(c))
 		// assume that no non-utf16 character is right-to-left
 		// c outside the UCS4 range is catched as well
 		return false;
 	QChar::Direction direction = ucs4_to_qchar(c).direction();
 	/**
 	 * See for example:
 	 *  http://en.wikipedia.org/wiki/Template:Bidi_Class_%28Unicode%29.
 	 * Here we only handle the easy cases:
 	 *  * R:  Hebrew alphabet and related punctuation
 	 *  * AL: Arabic, Thaana and Syriac alphabets, and most
 	 *        punctuation specific to those scripts
 	 *
 	 * FIXME: testing show that this does not work (see
 	 * RowPainter::paintChars), but my knowledge of unicode is too
 	 * poor to understand why.
 	 */
 	return direction == QChar::DirR || direction ==QChar::DirAL;
 }
 bool isDigitASCII(char_type c)
 {
 	return '0' <= c && c <= '9';
--- a/src/support/textutils.h
+++ b/src/support/textutils.h
@ -44,6 +44,9 @@ bool isSpace(char_type c);
 /// return true if a unicode char is a numeral.
 bool isNumber(char_type c);
 /// return true is a unicode char uses a right-to-left direction for layout
 bool isRTL(char_type c);
 /// return whether \p c is a digit in the ASCII range
 bool isDigitASCII(char_type c);