From ea1a5cb80eaef71bc1cb7021bc3dbf2ef7a793b7 Mon Sep 17 00:00:00 2001 From: Jean-Marc Lasgouttes Date: Thu, 10 Apr 2014 16:37:21 +0200 Subject: [PATCH] Draw right-to-left text string-wise using Qt We rely on Qt built-in unicode support for handling Arabic and Hebrew compose characters. This allows to avoid to use our homegrown machinery. This should provide a nice speedup at a low cost and will eventually allow us to get rid of: * most of our Arabic/Hebrew machinery in Encodings.cpp, * Paragraph::transformChar, * and probably more. --- 00README_STR_METRICS_BRANCH | 25 +++--- src/Encoding.cpp | 6 ++ src/Encoding.h | 2 + src/rowpainter.cpp | 170 +++++++++--------------------------- src/rowpainter.h | 5 +- src/support/lstrings.cpp | 23 +++++ src/support/textutils.h | 3 + 7 files changed, 91 insertions(+), 143 deletions(-) diff --git a/00README_STR_METRICS_BRANCH b/00README_STR_METRICS_BRANCH index 82026e895c..359bb033dd 100644 --- a/00README_STR_METRICS_BRANCH +++ b/00README_STR_METRICS_BRANCH @@ -11,7 +11,7 @@ what we have with force_paint_single_char. Moreover there has been some code factorization in TextMetrics, where the same row-breaking algorithm was basically implemented 3 times. -Currently everything is supposed to work for LTR text, and RTL text +Currently everything is supposed to work for LTR text, and RtL text should work too except possibly metrics with Arabic and Hebrew fonts. When KEEP_OLD_METRICS_CODE is defined in TextMetrics.cpp, the new code @@ -40,13 +40,14 @@ What is done: lyxrc.force_paint_single_char is false. In this case, remove also useless workarounds which disable kerning and ligatures. +* when lyxrc.force_paint_single_char is false, draw also RtL text + string-wise. This both speed-up drawing and prepare for code + removal, since we now rely on Qt to do things we use to do by + ourselves (see isArabic* and isHebrew* code in Encodings.cpp). + Next steps: -* check what happens with Arabic and/or Hebrew text. There may be some - problems related to compose characters. Investigate whether RtL - strings could be drawn on a string-wise basis. - * investigate whether strings could be cut at separators in RowPainter only in justified text. This would speed-up painting in other cases by lowering the number of strings to draw. @@ -76,18 +77,18 @@ Steps for later (aka out of the scope of this branch): Known bugs: -* there are still difference in what breaks words. In particular, - RowPainter breaks strings at: selection end, spell-checking - extremities. This seems to be harmless. - -* when clicking in the right margin, getPosNearX does not return - the same value as before. I am not sure whether this is important. - * When selecting text, the display seems to move around. This is because partly selected words are drawn in two parts, and in case like "ef|fort" or "V|AN", there are some ligature or kerning effects that change the display. I am not sure yet how to fix that. +* there are other differences in what breaks words. In particular, + RowPainter breaks strings at spell-checking extremities. This seems + to be harmless. + +* when clicking in the right margin, getPosNearX does not return + the same value as before. I am not sure whether this is important. + Other differences in behavior (aka bug fixes): diff --git a/src/Encoding.cpp b/src/Encoding.cpp index a6f6a60ea3..7c5dc87c9a 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -711,6 +711,12 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype, } +bool Encodings::isHebrewChar(char_type c) +{ + return c >= 0x0590 && c <= 0x05ff; +} + + bool Encodings::isHebrewComposeChar(char_type c) { return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0; diff --git a/src/Encoding.h b/src/Encoding.h index ed9c27e465..63aa93b545 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -269,6 +269,8 @@ public: /// static bool isHebrewComposeChar(char_type c); /// + static bool isHebrewChar(char_type c); + /// static bool isArabicComposeChar(char_type c); /// static bool isArabicSpecialChar(char_type c); diff --git a/src/rowpainter.cpp b/src/rowpainter.cpp index 5b69701bf8..ea0f946a6a 100644 --- a/src/rowpainter.cpp +++ b/src/rowpainter.cpp @@ -161,72 +161,7 @@ void RowPainter::paintInset(Inset const * inset, pos_type const pos) } -void RowPainter::paintHebrewComposeChar(pos_type & vpos, FontInfo const & font) -{ - pos_type pos = bidi_.vis2log(vpos); - - docstring str; - - // first char - char_type c = par_.getChar(pos); - str += c; - ++vpos; - - int const width = theFontMetrics(font).width(c); - int dx = 0; - - for (pos_type i = pos - 1; i >= 0; --i) { - c = par_.getChar(i); - if (!Encodings::isHebrewComposeChar(c)) { - if (isPrintableNonspace(c)) { - int const width2 = pm_.singleWidth(i, - text_metrics_.displayFont(pit_, i)); - dx = (c == 0x05e8 || // resh - c == 0x05d3) // dalet - ? width2 - width - : (width2 - width) / 2; - } - break; - } - } - - // Draw nikud - pi_.pain.text(int(x_) + dx, yo_, str, font); -} - - -void RowPainter::paintArabicComposeChar(pos_type & vpos, FontInfo const & font) -{ - pos_type pos = bidi_.vis2log(vpos); - docstring str; - - // first char - char_type c = par_.getChar(pos); - c = par_.transformChar(c, pos); - str += c; - ++vpos; - - int const width = theFontMetrics(font).width(c); - int dx = 0; - - for (pos_type i = pos - 1; i >= 0; --i) { - c = par_.getChar(i); - if (!Encodings::isArabicComposeChar(c)) { - if (isPrintableNonspace(c)) { - int const width2 = pm_.singleWidth(i, - text_metrics_.displayFont(pit_, i)); - dx = (width2 - width) / 2; - } - break; - } - } - // Draw nikud - pi_.pain.text(int(x_) + dx, yo_, str, font); -} - - -void RowPainter::paintChars(pos_type & vpos, FontInfo const & font, - bool hebrew, bool arabic) +void RowPainter::paintChars(pos_type & vpos, Font const & font) { // This method takes up 70% of time when typing pos_type pos = bidi_.vis2log(vpos); @@ -236,15 +171,21 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font, str.reserve(100); str.push_back(prev_char); + // special case for arabic + string const & lang = font.language()->lang(); + bool const swap_paren = lang == "arabic_arabtex" + || lang == "arabic_arabi" + || lang == "farsi"; + // FIXME: Why only round brackets and why the difference to // Hebrew? See also Paragraph::getUChar - if (arabic) { + if (swap_paren) { char_type c = str[0]; if (c == '(') c = ')'; else if (c == ')') c = '('; - str[0] = par_.transformChar(c, pos); + str[0] = c; } pos_type const end = row_.endpos(); @@ -260,6 +201,12 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font, bool const spell_state = lyxrc.spellcheck_continuously && par_.isMisspelled(pos); + // are we building a RtL string? + //FIXME: I would like to use the new isRTL() from textutils.h, + // but it does not give the same results for some reason I do + // not understand. + bool const rtl = Encodings::isArabicChar(str[0]) || Encodings::isHebrewChar(str[0]); + // collect as much similar chars as we can for (++vpos ; vpos < end ; ++vpos) { if (lyxrc.force_paint_single_char) @@ -287,48 +234,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font, char_type c = par_.getChar(pos); + //FIXME: I would like to use the new isRTL() from textutils.h, + // but it does not give the same results for some reason I do + // not understand. + bool const new_rtl = Encodings::isArabicChar(c) || Encodings::isHebrewChar(c); + if (new_rtl != rtl) + // String direction has changed + break; + if (c == '\t') break; if (!isPrintableNonspace(c)) break; - /* Because we do our own bidi, at this point the strings are - * already in visual order. However, Qt also applies its own - * bidi algorithm to strings that it paints to the screen. - * Therefore, if we were to paint Hebrew/Arabic words as a - * single string, the letters in the words would get reversed - * again. In order to avoid that, we don't collect Hebrew/ - * Arabic characters, but rather paint them one at a time. - * See also http://thread.gmane.org/gmane.editors.lyx.devel/79740 - */ - if (hebrew) - break; - - /* FIXME: these checks are irrelevant, since 'arabic' and - * 'hebrew' alone are already going to trigger a break. - * However, this should not be removed completely, because - * if an alternative solution is found which allows grouping - * of arabic and hebrew characters, then these breaks may have - * to be re-applied. - - if (arabic && Encodings::isArabicComposeChar(c)) - break; - - if (hebrew && Encodings::isHebrewComposeChar(c)) - break; - */ - // FIXME: Why only round brackets and why the difference to // Hebrew? See also Paragraph::getUChar - if (arabic) { + if (swap_paren) { if (c == '(') c = ')'; else if (c == ')') c = '('; - c = par_.transformChar(c, pos); - /* see comment in hebrew, explaining why we break */ - break; } str.push_back(c); @@ -337,15 +263,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font, docstring s(&str[0], str.size()); + /* Because we do our own bidi, at this point the strings are + * already in visual order. However, Qt also applies its own + * bidi algorithm to strings that it paints to the screen. + * Therefore, if we were to paint Hebrew/Arabic words as a + * single string, the letters in the words would get reversed + * again. In order to avoid that, we reverse the string in advance. + * See also http://thread.gmane.org/gmane.editors.lyx.devel/79740 + * for an earlier thread on the subject + */ + if (rtl) + reverse(s.begin(), s.end()); + if (s[0] == '\t') s.replace(0,1,from_ascii(" ")); if (!selection && !change_running.changed()) { - x_ += pi_.pain.text(int(x_), yo_, s, font); + x_ += pi_.pain.text(int(x_), yo_, s, font.fontInfo()); return; } - FontInfo copy = font; + FontInfo copy = font.fontInfo(); if (change_running.changed()) copy.setPaintColor(change_running.color()); else if (selection) @@ -394,36 +332,14 @@ void RowPainter::paintMisspelledMark(double orig_x, bool changed) void RowPainter::paintFromPos(pos_type & vpos, bool changed) { pos_type const pos = bidi_.vis2log(vpos); - Font const orig_font = text_metrics_.displayFont(pit_, pos); + Font const font = text_metrics_.displayFont(pit_, pos); double const orig_x = x_; - // usual characters, no insets - char_type const c = par_.getChar(pos); + paintChars(vpos, font); + paintForeignMark(orig_x, font.language()); - // special case languages - string const & lang = orig_font.language()->lang(); - bool const hebrew = lang == "hebrew"; - bool const arabic = lang == "arabic_arabtex" || lang == "arabic_arabi" || - lang == "farsi"; - - // spelling correct? - bool const misspelled = - lyxrc.spellcheck_continuously && par_.isMisspelled(pos); - - // draw as many chars as we can - if ((!hebrew && !arabic) - || (hebrew && !Encodings::isHebrewComposeChar(c)) - || (arabic && !Encodings::isArabicComposeChar(c))) { - paintChars(vpos, orig_font.fontInfo(), hebrew, arabic); - } else if (hebrew) { - paintHebrewComposeChar(vpos, orig_font.fontInfo()); - } else if (arabic) { - paintArabicComposeChar(vpos, orig_font.fontInfo()); - } - - paintForeignMark(orig_x, orig_font.language()); - - if (lyxrc.spellcheck_continuously && misspelled) { + // Paint the spelling mark if needed. + if (lyxrc.spellcheck_continuously && par_.isMisspelled(pos)) { // check for cursor position // don't draw misspelled marker for words at cursor position // we don't want to disturb the process of text editing diff --git a/src/rowpainter.h b/src/rowpainter.h index 644cb3773d..ae1a590310 100644 --- a/src/rowpainter.h +++ b/src/rowpainter.h @@ -61,10 +61,7 @@ private: void paintSeparator(double orig_x, double width, FontInfo const & font); void paintForeignMark(double orig_x, Language const * lang, int desc = 0); void paintMisspelledMark(double orig_x, bool changed); - void paintHebrewComposeChar(pos_type & vpos, FontInfo const & font); - void paintArabicComposeChar(pos_type & vpos, FontInfo const & font); - void paintChars(pos_type & vpos, FontInfo const & font, - bool hebrew, bool arabic); + void paintChars(pos_type & vpos, Font const & font); int paintAppendixStart(int y); void paintFromPos(pos_type & vpos, bool changed); void paintInset(Inset const * inset, pos_type const pos); diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp index 8508e4ef13..f06b2eb4cb 100644 --- a/src/support/lstrings.cpp +++ b/src/support/lstrings.cpp @@ -161,6 +161,29 @@ bool isNumber(char_type c) } +bool isRTL(char_type c) +{ + if (!is_utf16(c)) + // assume that no non-utf16 character is right-to-left + // c outside the UCS4 range is catched as well + return false; + QChar::Direction direction = ucs4_to_qchar(c).direction(); + /** + * See for example: + * http://en.wikipedia.org/wiki/Template:Bidi_Class_%28Unicode%29. + * Here we only handle the easy cases: + * * R: Hebrew alphabet and related punctuation + * * AL: Arabic, Thaana and Syriac alphabets, and most + * punctuation specific to those scripts + * + * FIXME: testing show that this does not work (see + * RowPainter::paintChars), but my knowledge of unicode is too + * poor to understand why. + */ + return direction == QChar::DirR || direction ==QChar::DirAL; +} + + bool isDigitASCII(char_type c) { return '0' <= c && c <= '9'; diff --git a/src/support/textutils.h b/src/support/textutils.h index 78e34cb489..4cb304a132 100644 --- a/src/support/textutils.h +++ b/src/support/textutils.h @@ -44,6 +44,9 @@ bool isSpace(char_type c); /// return true if a unicode char is a numeral. bool isNumber(char_type c); +/// return true is a unicode char uses a right-to-left direction for layout +bool isRTL(char_type c); + /// return whether \p c is a digit in the ASCII range bool isDigitASCII(char_type c);