mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-22 21:21:32 +00:00
Draw right-to-left text string-wise using Qt
We rely on Qt built-in unicode support for handling Arabic and Hebrew compose characters. This allows to avoid to use our homegrown machinery. This should provide a nice speedup at a low cost and will eventually allow us to get rid of: * most of our Arabic/Hebrew machinery in Encodings.cpp, * Paragraph::transformChar, * and probably more.
This commit is contained in:
parent
b8170e0e01
commit
ea1a5cb80e
@ -11,7 +11,7 @@ what we have with force_paint_single_char. Moreover there has been
|
|||||||
some code factorization in TextMetrics, where the same row-breaking
|
some code factorization in TextMetrics, where the same row-breaking
|
||||||
algorithm was basically implemented 3 times.
|
algorithm was basically implemented 3 times.
|
||||||
|
|
||||||
Currently everything is supposed to work for LTR text, and RTL text
|
Currently everything is supposed to work for LTR text, and RtL text
|
||||||
should work too except possibly metrics with Arabic and Hebrew fonts.
|
should work too except possibly metrics with Arabic and Hebrew fonts.
|
||||||
|
|
||||||
When KEEP_OLD_METRICS_CODE is defined in TextMetrics.cpp, the new code
|
When KEEP_OLD_METRICS_CODE is defined in TextMetrics.cpp, the new code
|
||||||
@ -40,13 +40,14 @@ What is done:
|
|||||||
lyxrc.force_paint_single_char is false. In this case, remove also
|
lyxrc.force_paint_single_char is false. In this case, remove also
|
||||||
useless workarounds which disable kerning and ligatures.
|
useless workarounds which disable kerning and ligatures.
|
||||||
|
|
||||||
|
* when lyxrc.force_paint_single_char is false, draw also RtL text
|
||||||
|
string-wise. This both speed-up drawing and prepare for code
|
||||||
|
removal, since we now rely on Qt to do things we use to do by
|
||||||
|
ourselves (see isArabic* and isHebrew* code in Encodings.cpp).
|
||||||
|
|
||||||
|
|
||||||
Next steps:
|
Next steps:
|
||||||
|
|
||||||
* check what happens with Arabic and/or Hebrew text. There may be some
|
|
||||||
problems related to compose characters. Investigate whether RtL
|
|
||||||
strings could be drawn on a string-wise basis.
|
|
||||||
|
|
||||||
* investigate whether strings could be cut at separators in RowPainter
|
* investigate whether strings could be cut at separators in RowPainter
|
||||||
only in justified text. This would speed-up painting in other cases
|
only in justified text. This would speed-up painting in other cases
|
||||||
by lowering the number of strings to draw.
|
by lowering the number of strings to draw.
|
||||||
@ -76,18 +77,18 @@ Steps for later (aka out of the scope of this branch):
|
|||||||
|
|
||||||
Known bugs:
|
Known bugs:
|
||||||
|
|
||||||
* there are still difference in what breaks words. In particular,
|
|
||||||
RowPainter breaks strings at: selection end, spell-checking
|
|
||||||
extremities. This seems to be harmless.
|
|
||||||
|
|
||||||
* when clicking in the right margin, getPosNearX does not return
|
|
||||||
the same value as before. I am not sure whether this is important.
|
|
||||||
|
|
||||||
* When selecting text, the display seems to move around. This is
|
* When selecting text, the display seems to move around. This is
|
||||||
because partly selected words are drawn in two parts, and in case
|
because partly selected words are drawn in two parts, and in case
|
||||||
like "ef|fort" or "V|AN", there are some ligature or kerning effects
|
like "ef|fort" or "V|AN", there are some ligature or kerning effects
|
||||||
that change the display. I am not sure yet how to fix that.
|
that change the display. I am not sure yet how to fix that.
|
||||||
|
|
||||||
|
* there are other differences in what breaks words. In particular,
|
||||||
|
RowPainter breaks strings at spell-checking extremities. This seems
|
||||||
|
to be harmless.
|
||||||
|
|
||||||
|
* when clicking in the right margin, getPosNearX does not return
|
||||||
|
the same value as before. I am not sure whether this is important.
|
||||||
|
|
||||||
|
|
||||||
Other differences in behavior (aka bug fixes):
|
Other differences in behavior (aka bug fixes):
|
||||||
|
|
||||||
|
@ -711,6 +711,12 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Encodings::isHebrewChar(char_type c)
|
||||||
|
{
|
||||||
|
return c >= 0x0590 && c <= 0x05ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Encodings::isHebrewComposeChar(char_type c)
|
bool Encodings::isHebrewComposeChar(char_type c)
|
||||||
{
|
{
|
||||||
return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0;
|
return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0;
|
||||||
|
@ -269,6 +269,8 @@ public:
|
|||||||
///
|
///
|
||||||
static bool isHebrewComposeChar(char_type c);
|
static bool isHebrewComposeChar(char_type c);
|
||||||
///
|
///
|
||||||
|
static bool isHebrewChar(char_type c);
|
||||||
|
///
|
||||||
static bool isArabicComposeChar(char_type c);
|
static bool isArabicComposeChar(char_type c);
|
||||||
///
|
///
|
||||||
static bool isArabicSpecialChar(char_type c);
|
static bool isArabicSpecialChar(char_type c);
|
||||||
|
@ -161,72 +161,7 @@ void RowPainter::paintInset(Inset const * inset, pos_type const pos)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void RowPainter::paintHebrewComposeChar(pos_type & vpos, FontInfo const & font)
|
void RowPainter::paintChars(pos_type & vpos, Font const & font)
|
||||||
{
|
|
||||||
pos_type pos = bidi_.vis2log(vpos);
|
|
||||||
|
|
||||||
docstring str;
|
|
||||||
|
|
||||||
// first char
|
|
||||||
char_type c = par_.getChar(pos);
|
|
||||||
str += c;
|
|
||||||
++vpos;
|
|
||||||
|
|
||||||
int const width = theFontMetrics(font).width(c);
|
|
||||||
int dx = 0;
|
|
||||||
|
|
||||||
for (pos_type i = pos - 1; i >= 0; --i) {
|
|
||||||
c = par_.getChar(i);
|
|
||||||
if (!Encodings::isHebrewComposeChar(c)) {
|
|
||||||
if (isPrintableNonspace(c)) {
|
|
||||||
int const width2 = pm_.singleWidth(i,
|
|
||||||
text_metrics_.displayFont(pit_, i));
|
|
||||||
dx = (c == 0x05e8 || // resh
|
|
||||||
c == 0x05d3) // dalet
|
|
||||||
? width2 - width
|
|
||||||
: (width2 - width) / 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Draw nikud
|
|
||||||
pi_.pain.text(int(x_) + dx, yo_, str, font);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void RowPainter::paintArabicComposeChar(pos_type & vpos, FontInfo const & font)
|
|
||||||
{
|
|
||||||
pos_type pos = bidi_.vis2log(vpos);
|
|
||||||
docstring str;
|
|
||||||
|
|
||||||
// first char
|
|
||||||
char_type c = par_.getChar(pos);
|
|
||||||
c = par_.transformChar(c, pos);
|
|
||||||
str += c;
|
|
||||||
++vpos;
|
|
||||||
|
|
||||||
int const width = theFontMetrics(font).width(c);
|
|
||||||
int dx = 0;
|
|
||||||
|
|
||||||
for (pos_type i = pos - 1; i >= 0; --i) {
|
|
||||||
c = par_.getChar(i);
|
|
||||||
if (!Encodings::isArabicComposeChar(c)) {
|
|
||||||
if (isPrintableNonspace(c)) {
|
|
||||||
int const width2 = pm_.singleWidth(i,
|
|
||||||
text_metrics_.displayFont(pit_, i));
|
|
||||||
dx = (width2 - width) / 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Draw nikud
|
|
||||||
pi_.pain.text(int(x_) + dx, yo_, str, font);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
|
||||||
bool hebrew, bool arabic)
|
|
||||||
{
|
{
|
||||||
// This method takes up 70% of time when typing
|
// This method takes up 70% of time when typing
|
||||||
pos_type pos = bidi_.vis2log(vpos);
|
pos_type pos = bidi_.vis2log(vpos);
|
||||||
@ -236,15 +171,21 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
|||||||
str.reserve(100);
|
str.reserve(100);
|
||||||
str.push_back(prev_char);
|
str.push_back(prev_char);
|
||||||
|
|
||||||
|
// special case for arabic
|
||||||
|
string const & lang = font.language()->lang();
|
||||||
|
bool const swap_paren = lang == "arabic_arabtex"
|
||||||
|
|| lang == "arabic_arabi"
|
||||||
|
|| lang == "farsi";
|
||||||
|
|
||||||
// FIXME: Why only round brackets and why the difference to
|
// FIXME: Why only round brackets and why the difference to
|
||||||
// Hebrew? See also Paragraph::getUChar
|
// Hebrew? See also Paragraph::getUChar
|
||||||
if (arabic) {
|
if (swap_paren) {
|
||||||
char_type c = str[0];
|
char_type c = str[0];
|
||||||
if (c == '(')
|
if (c == '(')
|
||||||
c = ')';
|
c = ')';
|
||||||
else if (c == ')')
|
else if (c == ')')
|
||||||
c = '(';
|
c = '(';
|
||||||
str[0] = par_.transformChar(c, pos);
|
str[0] = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos_type const end = row_.endpos();
|
pos_type const end = row_.endpos();
|
||||||
@ -260,6 +201,12 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
|||||||
bool const spell_state =
|
bool const spell_state =
|
||||||
lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
|
lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
|
||||||
|
|
||||||
|
// are we building a RtL string?
|
||||||
|
//FIXME: I would like to use the new isRTL() from textutils.h,
|
||||||
|
// but it does not give the same results for some reason I do
|
||||||
|
// not understand.
|
||||||
|
bool const rtl = Encodings::isArabicChar(str[0]) || Encodings::isHebrewChar(str[0]);
|
||||||
|
|
||||||
// collect as much similar chars as we can
|
// collect as much similar chars as we can
|
||||||
for (++vpos ; vpos < end ; ++vpos) {
|
for (++vpos ; vpos < end ; ++vpos) {
|
||||||
if (lyxrc.force_paint_single_char)
|
if (lyxrc.force_paint_single_char)
|
||||||
@ -287,48 +234,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
|||||||
|
|
||||||
char_type c = par_.getChar(pos);
|
char_type c = par_.getChar(pos);
|
||||||
|
|
||||||
|
//FIXME: I would like to use the new isRTL() from textutils.h,
|
||||||
|
// but it does not give the same results for some reason I do
|
||||||
|
// not understand.
|
||||||
|
bool const new_rtl = Encodings::isArabicChar(c) || Encodings::isHebrewChar(c);
|
||||||
|
if (new_rtl != rtl)
|
||||||
|
// String direction has changed
|
||||||
|
break;
|
||||||
|
|
||||||
if (c == '\t')
|
if (c == '\t')
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!isPrintableNonspace(c))
|
if (!isPrintableNonspace(c))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Because we do our own bidi, at this point the strings are
|
|
||||||
* already in visual order. However, Qt also applies its own
|
|
||||||
* bidi algorithm to strings that it paints to the screen.
|
|
||||||
* Therefore, if we were to paint Hebrew/Arabic words as a
|
|
||||||
* single string, the letters in the words would get reversed
|
|
||||||
* again. In order to avoid that, we don't collect Hebrew/
|
|
||||||
* Arabic characters, but rather paint them one at a time.
|
|
||||||
* See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
|
|
||||||
*/
|
|
||||||
if (hebrew)
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* FIXME: these checks are irrelevant, since 'arabic' and
|
|
||||||
* 'hebrew' alone are already going to trigger a break.
|
|
||||||
* However, this should not be removed completely, because
|
|
||||||
* if an alternative solution is found which allows grouping
|
|
||||||
* of arabic and hebrew characters, then these breaks may have
|
|
||||||
* to be re-applied.
|
|
||||||
|
|
||||||
if (arabic && Encodings::isArabicComposeChar(c))
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (hebrew && Encodings::isHebrewComposeChar(c))
|
|
||||||
break;
|
|
||||||
*/
|
|
||||||
|
|
||||||
// FIXME: Why only round brackets and why the difference to
|
// FIXME: Why only round brackets and why the difference to
|
||||||
// Hebrew? See also Paragraph::getUChar
|
// Hebrew? See also Paragraph::getUChar
|
||||||
if (arabic) {
|
if (swap_paren) {
|
||||||
if (c == '(')
|
if (c == '(')
|
||||||
c = ')';
|
c = ')';
|
||||||
else if (c == ')')
|
else if (c == ')')
|
||||||
c = '(';
|
c = '(';
|
||||||
c = par_.transformChar(c, pos);
|
|
||||||
/* see comment in hebrew, explaining why we break */
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
str.push_back(c);
|
str.push_back(c);
|
||||||
@ -337,15 +263,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
|
|||||||
|
|
||||||
docstring s(&str[0], str.size());
|
docstring s(&str[0], str.size());
|
||||||
|
|
||||||
|
/* Because we do our own bidi, at this point the strings are
|
||||||
|
* already in visual order. However, Qt also applies its own
|
||||||
|
* bidi algorithm to strings that it paints to the screen.
|
||||||
|
* Therefore, if we were to paint Hebrew/Arabic words as a
|
||||||
|
* single string, the letters in the words would get reversed
|
||||||
|
* again. In order to avoid that, we reverse the string in advance.
|
||||||
|
* See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
|
||||||
|
* for an earlier thread on the subject
|
||||||
|
*/
|
||||||
|
if (rtl)
|
||||||
|
reverse(s.begin(), s.end());
|
||||||
|
|
||||||
if (s[0] == '\t')
|
if (s[0] == '\t')
|
||||||
s.replace(0,1,from_ascii(" "));
|
s.replace(0,1,from_ascii(" "));
|
||||||
|
|
||||||
if (!selection && !change_running.changed()) {
|
if (!selection && !change_running.changed()) {
|
||||||
x_ += pi_.pain.text(int(x_), yo_, s, font);
|
x_ += pi_.pain.text(int(x_), yo_, s, font.fontInfo());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
FontInfo copy = font;
|
FontInfo copy = font.fontInfo();
|
||||||
if (change_running.changed())
|
if (change_running.changed())
|
||||||
copy.setPaintColor(change_running.color());
|
copy.setPaintColor(change_running.color());
|
||||||
else if (selection)
|
else if (selection)
|
||||||
@ -394,36 +332,14 @@ void RowPainter::paintMisspelledMark(double orig_x, bool changed)
|
|||||||
void RowPainter::paintFromPos(pos_type & vpos, bool changed)
|
void RowPainter::paintFromPos(pos_type & vpos, bool changed)
|
||||||
{
|
{
|
||||||
pos_type const pos = bidi_.vis2log(vpos);
|
pos_type const pos = bidi_.vis2log(vpos);
|
||||||
Font const orig_font = text_metrics_.displayFont(pit_, pos);
|
Font const font = text_metrics_.displayFont(pit_, pos);
|
||||||
double const orig_x = x_;
|
double const orig_x = x_;
|
||||||
|
|
||||||
// usual characters, no insets
|
paintChars(vpos, font);
|
||||||
char_type const c = par_.getChar(pos);
|
paintForeignMark(orig_x, font.language());
|
||||||
|
|
||||||
// special case languages
|
// Paint the spelling mark if needed.
|
||||||
string const & lang = orig_font.language()->lang();
|
if (lyxrc.spellcheck_continuously && par_.isMisspelled(pos)) {
|
||||||
bool const hebrew = lang == "hebrew";
|
|
||||||
bool const arabic = lang == "arabic_arabtex" || lang == "arabic_arabi" ||
|
|
||||||
lang == "farsi";
|
|
||||||
|
|
||||||
// spelling correct?
|
|
||||||
bool const misspelled =
|
|
||||||
lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
|
|
||||||
|
|
||||||
// draw as many chars as we can
|
|
||||||
if ((!hebrew && !arabic)
|
|
||||||
|| (hebrew && !Encodings::isHebrewComposeChar(c))
|
|
||||||
|| (arabic && !Encodings::isArabicComposeChar(c))) {
|
|
||||||
paintChars(vpos, orig_font.fontInfo(), hebrew, arabic);
|
|
||||||
} else if (hebrew) {
|
|
||||||
paintHebrewComposeChar(vpos, orig_font.fontInfo());
|
|
||||||
} else if (arabic) {
|
|
||||||
paintArabicComposeChar(vpos, orig_font.fontInfo());
|
|
||||||
}
|
|
||||||
|
|
||||||
paintForeignMark(orig_x, orig_font.language());
|
|
||||||
|
|
||||||
if (lyxrc.spellcheck_continuously && misspelled) {
|
|
||||||
// check for cursor position
|
// check for cursor position
|
||||||
// don't draw misspelled marker for words at cursor position
|
// don't draw misspelled marker for words at cursor position
|
||||||
// we don't want to disturb the process of text editing
|
// we don't want to disturb the process of text editing
|
||||||
|
@ -61,10 +61,7 @@ private:
|
|||||||
void paintSeparator(double orig_x, double width, FontInfo const & font);
|
void paintSeparator(double orig_x, double width, FontInfo const & font);
|
||||||
void paintForeignMark(double orig_x, Language const * lang, int desc = 0);
|
void paintForeignMark(double orig_x, Language const * lang, int desc = 0);
|
||||||
void paintMisspelledMark(double orig_x, bool changed);
|
void paintMisspelledMark(double orig_x, bool changed);
|
||||||
void paintHebrewComposeChar(pos_type & vpos, FontInfo const & font);
|
void paintChars(pos_type & vpos, Font const & font);
|
||||||
void paintArabicComposeChar(pos_type & vpos, FontInfo const & font);
|
|
||||||
void paintChars(pos_type & vpos, FontInfo const & font,
|
|
||||||
bool hebrew, bool arabic);
|
|
||||||
int paintAppendixStart(int y);
|
int paintAppendixStart(int y);
|
||||||
void paintFromPos(pos_type & vpos, bool changed);
|
void paintFromPos(pos_type & vpos, bool changed);
|
||||||
void paintInset(Inset const * inset, pos_type const pos);
|
void paintInset(Inset const * inset, pos_type const pos);
|
||||||
|
@ -161,6 +161,29 @@ bool isNumber(char_type c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool isRTL(char_type c)
|
||||||
|
{
|
||||||
|
if (!is_utf16(c))
|
||||||
|
// assume that no non-utf16 character is right-to-left
|
||||||
|
// c outside the UCS4 range is catched as well
|
||||||
|
return false;
|
||||||
|
QChar::Direction direction = ucs4_to_qchar(c).direction();
|
||||||
|
/**
|
||||||
|
* See for example:
|
||||||
|
* http://en.wikipedia.org/wiki/Template:Bidi_Class_%28Unicode%29.
|
||||||
|
* Here we only handle the easy cases:
|
||||||
|
* * R: Hebrew alphabet and related punctuation
|
||||||
|
* * AL: Arabic, Thaana and Syriac alphabets, and most
|
||||||
|
* punctuation specific to those scripts
|
||||||
|
*
|
||||||
|
* FIXME: testing show that this does not work (see
|
||||||
|
* RowPainter::paintChars), but my knowledge of unicode is too
|
||||||
|
* poor to understand why.
|
||||||
|
*/
|
||||||
|
return direction == QChar::DirR || direction ==QChar::DirAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool isDigitASCII(char_type c)
|
bool isDigitASCII(char_type c)
|
||||||
{
|
{
|
||||||
return '0' <= c && c <= '9';
|
return '0' <= c && c <= '9';
|
||||||
|
@ -44,6 +44,9 @@ bool isSpace(char_type c);
|
|||||||
/// return true if a unicode char is a numeral.
|
/// return true if a unicode char is a numeral.
|
||||||
bool isNumber(char_type c);
|
bool isNumber(char_type c);
|
||||||
|
|
||||||
|
/// return true is a unicode char uses a right-to-left direction for layout
|
||||||
|
bool isRTL(char_type c);
|
||||||
|
|
||||||
/// return whether \p c is a digit in the ASCII range
|
/// return whether \p c is a digit in the ASCII range
|
||||||
bool isDigitASCII(char_type c);
|
bool isDigitASCII(char_type c);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user