mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-22 13:18:28 +00:00
Draw right-to-left text string-wise using Qt
We rely on Qt built-in unicode support for handling Arabic and Hebrew compose characters. This allows to avoid to use our homegrown machinery. This should provide a nice speedup at a low cost and will eventually allow us to get rid of: * most of our Arabic/Hebrew machinery in Encodings.cpp, * Paragraph::transformChar, * and probably more.
This commit is contained in:
parent
b8170e0e01
commit
ea1a5cb80e
@ -11,7 +11,7 @@ what we have with force_paint_single_char. Moreover there has been
|
||||
some code factorization in TextMetrics, where the same row-breaking
|
||||
algorithm was basically implemented 3 times.
|
||||
|
||||
Currently everything is supposed to work for LTR text, and RTL text
|
||||
Currently everything is supposed to work for LTR text, and RtL text
|
||||
should work too except possibly metrics with Arabic and Hebrew fonts.
|
||||
|
||||
When KEEP_OLD_METRICS_CODE is defined in TextMetrics.cpp, the new code
|
||||
@ -40,13 +40,14 @@ What is done:
|
||||
lyxrc.force_paint_single_char is false. In this case, remove also
|
||||
useless workarounds which disable kerning and ligatures.
|
||||
|
||||
* when lyxrc.force_paint_single_char is false, draw also RtL text
|
||||
string-wise. This both speed-up drawing and prepare for code
|
||||
removal, since we now rely on Qt to do things we use to do by
|
||||
ourselves (see isArabic* and isHebrew* code in Encodings.cpp).
|
||||
|
||||
|
||||
Next steps:
|
||||
|
||||
* check what happens with Arabic and/or Hebrew text. There may be some
|
||||
problems related to compose characters. Investigate whether RtL
|
||||
strings could be drawn on a string-wise basis.
|
||||
|
||||
* investigate whether strings could be cut at separators in RowPainter
|
||||
only in justified text. This would speed-up painting in other cases
|
||||
by lowering the number of strings to draw.
|
||||
@ -76,18 +77,18 @@ Steps for later (aka out of the scope of this branch):
|
||||
|
||||
Known bugs:
|
||||
|
||||
* there are still difference in what breaks words. In particular,
|
||||
RowPainter breaks strings at: selection end, spell-checking
|
||||
extremities. This seems to be harmless.
|
||||
|
||||
* when clicking in the right margin, getPosNearX does not return
|
||||
the same value as before. I am not sure whether this is important.
|
||||
|
||||
* When selecting text, the display seems to move around. This is
|
||||
because partly selected words are drawn in two parts, and in case
|
||||
like "ef|fort" or "V|AN", there are some ligature or kerning effects
|
||||
that change the display. I am not sure yet how to fix that.
|
||||
|
||||
* there are other differences in what breaks words. In particular,
|
||||
RowPainter breaks strings at spell-checking extremities. This seems
|
||||
to be harmless.
|
||||
|
||||
* when clicking in the right margin, getPosNearX does not return
|
||||
the same value as before. I am not sure whether this is important.
|
||||
|
||||
|
||||
Other differences in behavior (aka bug fixes):
|
||||
|
||||
|
@ -711,6 +711,12 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
|
||||
}
|
||||
|
||||
|
||||
bool Encodings::isHebrewChar(char_type c)
|
||||
{
|
||||
return c >= 0x0590 && c <= 0x05ff;
|
||||
}
|
||||
|
||||
|
||||
bool Encodings::isHebrewComposeChar(char_type c)
|
||||
{
|
||||
return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0;
|
||||
|
@ -269,6 +269,8 @@ public:
|
||||
///
|
||||
static bool isHebrewComposeChar(char_type c);
|
||||
///
|
||||
static bool isHebrewChar(char_type c);
|
||||
///
|
||||
static bool isArabicComposeChar(char_type c);
|
||||
///
|
||||
static bool isArabicSpecialChar(char_type c);
|
||||
|
@ -161,72 +161,7 @@ void RowPainter::paintInset(Inset const * inset, pos_type const pos)
|
||||
}
|
||||
|
||||
|
||||
void RowPainter::paintHebrewComposeChar(pos_type & vpos, FontInfo const & font)
|
||||
{
|
||||
pos_type pos = bidi_.vis2log(vpos);
|
||||
|
||||
docstring str;
|
||||
|
||||
// first char
|
||||
char_type c = par_.getChar(pos);
|
||||
str += c;
|
||||
++vpos;
|
||||
|
||||
int const width = theFontMetrics(font).width(c);
|
||||
int dx = 0;
|
||||
|
||||
for (pos_type i = pos - 1; i >= 0; --i) {
|
||||
c = par_.getChar(i);
|
||||
if (!Encodings::isHebrewComposeChar(c)) {
|
||||
if (isPrintableNonspace(c)) {
|
||||
int const width2 = pm_.singleWidth(i,
|
||||
text_metrics_.displayFont(pit_, i));
|
||||
dx = (c == 0x05e8 || // resh
|
||||
c == 0x05d3) // dalet
|
||||
? width2 - width
|
||||
: (width2 - width) / 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Draw nikud
|
||||
pi_.pain.text(int(x_) + dx, yo_, str, font);
|
||||
}
|
||||
|
||||
|
||||
void RowPainter::paintArabicComposeChar(pos_type & vpos, FontInfo const & font)
|
||||
{
|
||||
pos_type pos = bidi_.vis2log(vpos);
|
||||
docstring str;
|
||||
|
||||
// first char
|
||||
char_type c = par_.getChar(pos);
|
||||
c = par_.transformChar(c, pos);
|
||||
str += c;
|
||||
++vpos;
|
||||
|
||||
int const width = theFontMetrics(font).width(c);
|
||||
int dx = 0;
|
||||
|
||||
for (pos_type i = pos - 1; i >= 0; --i) {
|
||||
c = par_.getChar(i);
|
||||
if (!Encodings::isArabicComposeChar(c)) {
|
||||
if (isPrintableNonspace(c)) {
|
||||
int const width2 = pm_.singleWidth(i,
|
||||
text_metrics_.displayFont(pit_, i));
|
||||
dx = (width2 - width) / 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Draw nikud
|
||||
pi_.pain.text(int(x_) + dx, yo_, str, font);
|
||||
}
|
||||
|
||||
|
||||
void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
||||
bool hebrew, bool arabic)
|
||||
void RowPainter::paintChars(pos_type & vpos, Font const & font)
|
||||
{
|
||||
// This method takes up 70% of time when typing
|
||||
pos_type pos = bidi_.vis2log(vpos);
|
||||
@ -236,15 +171,21 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
||||
str.reserve(100);
|
||||
str.push_back(prev_char);
|
||||
|
||||
// special case for arabic
|
||||
string const & lang = font.language()->lang();
|
||||
bool const swap_paren = lang == "arabic_arabtex"
|
||||
|| lang == "arabic_arabi"
|
||||
|| lang == "farsi";
|
||||
|
||||
// FIXME: Why only round brackets and why the difference to
|
||||
// Hebrew? See also Paragraph::getUChar
|
||||
if (arabic) {
|
||||
if (swap_paren) {
|
||||
char_type c = str[0];
|
||||
if (c == '(')
|
||||
c = ')';
|
||||
else if (c == ')')
|
||||
c = '(';
|
||||
str[0] = par_.transformChar(c, pos);
|
||||
str[0] = c;
|
||||
}
|
||||
|
||||
pos_type const end = row_.endpos();
|
||||
@ -260,6 +201,12 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
||||
bool const spell_state =
|
||||
lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
|
||||
|
||||
// are we building a RtL string?
|
||||
//FIXME: I would like to use the new isRTL() from textutils.h,
|
||||
// but it does not give the same results for some reason I do
|
||||
// not understand.
|
||||
bool const rtl = Encodings::isArabicChar(str[0]) || Encodings::isHebrewChar(str[0]);
|
||||
|
||||
// collect as much similar chars as we can
|
||||
for (++vpos ; vpos < end ; ++vpos) {
|
||||
if (lyxrc.force_paint_single_char)
|
||||
@ -287,48 +234,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
||||
|
||||
char_type c = par_.getChar(pos);
|
||||
|
||||
//FIXME: I would like to use the new isRTL() from textutils.h,
|
||||
// but it does not give the same results for some reason I do
|
||||
// not understand.
|
||||
bool const new_rtl = Encodings::isArabicChar(c) || Encodings::isHebrewChar(c);
|
||||
if (new_rtl != rtl)
|
||||
// String direction has changed
|
||||
break;
|
||||
|
||||
if (c == '\t')
|
||||
break;
|
||||
|
||||
if (!isPrintableNonspace(c))
|
||||
break;
|
||||
|
||||
/* Because we do our own bidi, at this point the strings are
|
||||
* already in visual order. However, Qt also applies its own
|
||||
* bidi algorithm to strings that it paints to the screen.
|
||||
* Therefore, if we were to paint Hebrew/Arabic words as a
|
||||
* single string, the letters in the words would get reversed
|
||||
* again. In order to avoid that, we don't collect Hebrew/
|
||||
* Arabic characters, but rather paint them one at a time.
|
||||
* See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
|
||||
*/
|
||||
if (hebrew)
|
||||
break;
|
||||
|
||||
/* FIXME: these checks are irrelevant, since 'arabic' and
|
||||
* 'hebrew' alone are already going to trigger a break.
|
||||
* However, this should not be removed completely, because
|
||||
* if an alternative solution is found which allows grouping
|
||||
* of arabic and hebrew characters, then these breaks may have
|
||||
* to be re-applied.
|
||||
|
||||
if (arabic && Encodings::isArabicComposeChar(c))
|
||||
break;
|
||||
|
||||
if (hebrew && Encodings::isHebrewComposeChar(c))
|
||||
break;
|
||||
*/
|
||||
|
||||
// FIXME: Why only round brackets and why the difference to
|
||||
// Hebrew? See also Paragraph::getUChar
|
||||
if (arabic) {
|
||||
if (swap_paren) {
|
||||
if (c == '(')
|
||||
c = ')';
|
||||
else if (c == ')')
|
||||
c = '(';
|
||||
c = par_.transformChar(c, pos);
|
||||
/* see comment in hebrew, explaining why we break */
|
||||
break;
|
||||
}
|
||||
|
||||
str.push_back(c);
|
||||
@ -337,15 +263,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
||||
|
||||
docstring s(&str[0], str.size());
|
||||
|
||||
/* Because we do our own bidi, at this point the strings are
|
||||
* already in visual order. However, Qt also applies its own
|
||||
* bidi algorithm to strings that it paints to the screen.
|
||||
* Therefore, if we were to paint Hebrew/Arabic words as a
|
||||
* single string, the letters in the words would get reversed
|
||||
* again. In order to avoid that, we reverse the string in advance.
|
||||
* See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
|
||||
* for an earlier thread on the subject
|
||||
*/
|
||||
if (rtl)
|
||||
reverse(s.begin(), s.end());
|
||||
|
||||
if (s[0] == '\t')
|
||||
s.replace(0,1,from_ascii(" "));
|
||||
|
||||
if (!selection && !change_running.changed()) {
|
||||
x_ += pi_.pain.text(int(x_), yo_, s, font);
|
||||
x_ += pi_.pain.text(int(x_), yo_, s, font.fontInfo());
|
||||
return;
|
||||
}
|
||||
|
||||
FontInfo copy = font;
|
||||
FontInfo copy = font.fontInfo();
|
||||
if (change_running.changed())
|
||||
copy.setPaintColor(change_running.color());
|
||||
else if (selection)
|
||||
@ -394,36 +332,14 @@ void RowPainter::paintMisspelledMark(double orig_x, bool changed)
|
||||
void RowPainter::paintFromPos(pos_type & vpos, bool changed)
|
||||
{
|
||||
pos_type const pos = bidi_.vis2log(vpos);
|
||||
Font const orig_font = text_metrics_.displayFont(pit_, pos);
|
||||
Font const font = text_metrics_.displayFont(pit_, pos);
|
||||
double const orig_x = x_;
|
||||
|
||||
// usual characters, no insets
|
||||
char_type const c = par_.getChar(pos);
|
||||
paintChars(vpos, font);
|
||||
paintForeignMark(orig_x, font.language());
|
||||
|
||||
// special case languages
|
||||
string const & lang = orig_font.language()->lang();
|
||||
bool const hebrew = lang == "hebrew";
|
||||
bool const arabic = lang == "arabic_arabtex" || lang == "arabic_arabi" ||
|
||||
lang == "farsi";
|
||||
|
||||
// spelling correct?
|
||||
bool const misspelled =
|
||||
lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
|
||||
|
||||
// draw as many chars as we can
|
||||
if ((!hebrew && !arabic)
|
||||
|| (hebrew && !Encodings::isHebrewComposeChar(c))
|
||||
|| (arabic && !Encodings::isArabicComposeChar(c))) {
|
||||
paintChars(vpos, orig_font.fontInfo(), hebrew, arabic);
|
||||
} else if (hebrew) {
|
||||
paintHebrewComposeChar(vpos, orig_font.fontInfo());
|
||||
} else if (arabic) {
|
||||
paintArabicComposeChar(vpos, orig_font.fontInfo());
|
||||
}
|
||||
|
||||
paintForeignMark(orig_x, orig_font.language());
|
||||
|
||||
if (lyxrc.spellcheck_continuously && misspelled) {
|
||||
// Paint the spelling mark if needed.
|
||||
if (lyxrc.spellcheck_continuously && par_.isMisspelled(pos)) {
|
||||
// check for cursor position
|
||||
// don't draw misspelled marker for words at cursor position
|
||||
// we don't want to disturb the process of text editing
|
||||
|
@ -61,10 +61,7 @@ private:
|
||||
void paintSeparator(double orig_x, double width, FontInfo const & font);
|
||||
void paintForeignMark(double orig_x, Language const * lang, int desc = 0);
|
||||
void paintMisspelledMark(double orig_x, bool changed);
|
||||
void paintHebrewComposeChar(pos_type & vpos, FontInfo const & font);
|
||||
void paintArabicComposeChar(pos_type & vpos, FontInfo const & font);
|
||||
void paintChars(pos_type & vpos, FontInfo const & font,
|
||||
bool hebrew, bool arabic);
|
||||
void paintChars(pos_type & vpos, Font const & font);
|
||||
int paintAppendixStart(int y);
|
||||
void paintFromPos(pos_type & vpos, bool changed);
|
||||
void paintInset(Inset const * inset, pos_type const pos);
|
||||
|
@ -161,6 +161,29 @@ bool isNumber(char_type c)
|
||||
}
|
||||
|
||||
|
||||
bool isRTL(char_type c)
|
||||
{
|
||||
if (!is_utf16(c))
|
||||
// assume that no non-utf16 character is right-to-left
|
||||
// c outside the UCS4 range is catched as well
|
||||
return false;
|
||||
QChar::Direction direction = ucs4_to_qchar(c).direction();
|
||||
/**
|
||||
* See for example:
|
||||
* http://en.wikipedia.org/wiki/Template:Bidi_Class_%28Unicode%29.
|
||||
* Here we only handle the easy cases:
|
||||
* * R: Hebrew alphabet and related punctuation
|
||||
* * AL: Arabic, Thaana and Syriac alphabets, and most
|
||||
* punctuation specific to those scripts
|
||||
*
|
||||
* FIXME: testing show that this does not work (see
|
||||
* RowPainter::paintChars), but my knowledge of unicode is too
|
||||
* poor to understand why.
|
||||
*/
|
||||
return direction == QChar::DirR || direction ==QChar::DirAL;
|
||||
}
|
||||
|
||||
|
||||
bool isDigitASCII(char_type c)
|
||||
{
|
||||
return '0' <= c && c <= '9';
|
||||
|
@ -44,6 +44,9 @@ bool isSpace(char_type c);
|
||||
/// return true if a unicode char is a numeral.
|
||||
bool isNumber(char_type c);
|
||||
|
||||
/// return true is a unicode char uses a right-to-left direction for layout
|
||||
bool isRTL(char_type c);
|
||||
|
||||
/// return whether \p c is a digit in the ASCII range
|
||||
bool isDigitASCII(char_type c);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user