Fix parentheses with Hebrew

Patch by Guy Rutenberg, with some changes of mine.

Fixes: #11191
This commit is contained in:
Juergen Spitzmueller 2018-10-30 12:33:35 +01:00
parent f0d7432608
commit 0ec295d63e
6 changed files with 73 additions and 46 deletions

View File

@ -8,6 +8,12 @@ changes happened in particular if possible. A good example would be
----------------------- -----------------------
2018-10-29 Guy Rutenberg <guyrutenberg@gmail.com>
* format incremeneted to 566: Fix direction of Hebrew parentheses in the LyX file.
2018-10-18 Kornel Benko <kornel@lyx.org>
* format incremented to 565: Added Adobe Source Pro fonts.
2018-09-20 Jürgen Spitzmüller <spitz@lyx.org> 2018-09-20 Jürgen Spitzmüller <spitz@lyx.org>
* format incremented to 564: New info-inset lyxinfo subtype layoutformat. This returns * format incremented to 564: New info-inset lyxinfo subtype layoutformat. This returns
the current layout format. the current layout format.

View File

@ -83,10 +83,13 @@ insert_document_option(document, option):
remove_document_option(document, option): remove_document_option(document, option):
Remove _option_ as a document option. Remove _option_ as a document option.
get_language_for_line(document, i):
Return the language setting for line number i.
''' '''
import re import re
from parser_tools import find_token, find_end_of_inset from parser_tools import find_token, find_end_of_inset, get_containing_layout
from unicode_symbols import unicode_reps from unicode_symbols import unicode_reps
# This will accept either a list of lines or a single line. # This will accept either a list of lines or a single line.
@ -604,3 +607,15 @@ def is_document_option(document, option):
return False return False
return True return True
def get_language_for_line(document, i):
" Return the language for line number i"
layout = get_containing_layout(document.body, i)
if not layout:
return document.language
start_of_par = layout[3]
for line in document.body[i:start_of_par:-1]:
if line.startswith('\\lang '):
return line[len('\\lang '):]
return document.language

View File

@ -36,7 +36,7 @@ from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_la
# is_in_inset, set_bool_value # is_in_inset, set_bool_value
# find_tokens, find_token_exact, check_token # find_tokens, find_token_exact, check_token
from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble) from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, get_language_for_line)
# revert_font_attrs, insert_to_preamble, latex_length # revert_font_attrs, insert_to_preamble, latex_length
# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets # get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
# revert_flex_inset, hex2ratio, str2bool # revert_flex_inset, hex2ratio, str2bool
@ -1381,6 +1381,22 @@ def revert_lformatinfo(document):
i = i + 1 i = i + 1
def convert_hebrew_parentheses(document):
" Don't reverse parentheses in Hebrew text"
for i, line in enumerate(document.body):
if line.startswith('\\\\'):
# not a text line, skip
continue
if get_language_for_line(document, i) == 'hebrew':
document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')
def revert_hebrew_parentheses(document):
" Store parentheses in Hebrew text reversed"
# This only exists to keep the convert/revert nameing convention
convert_hebrew_parentheses(document)
## ##
# Conversion hub # Conversion hub
# #
@ -1408,9 +1424,11 @@ convert = [
[563, []], [563, []],
[564, []], [564, []],
[565, [convert_AdobeFonts]], # Handle adobe fonts in GUI [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
[566, [convert_hebrew_parentheses]],
] ]
revert = [ revert = [
[565, [revert_hebrew_parentheses]],
[564, [revert_AdobeFonts]], [564, [revert_AdobeFonts]],
[563, [revert_lformatinfo]], [563, [revert_lformatinfo]],
[562, [revert_listpargs]], [562, [revert_listpargs]],

View File

@ -1998,47 +1998,44 @@ char_type Paragraph::getUChar(BufferParams const & bparams,
{ {
char_type c = d->text_[pos]; char_type c = d->text_[pos];
// Return unchanged character in LTR languages. // Return unchanged character in LTR languages
if (!getFontSettings(bparams, pos).isRightToLeft()) // or if we use poylglossia/bidi.
if (rp.use_polyglossia || !getFontSettings(bparams, pos).isRightToLeft())
return c; return c;
// FIXME This is a complete mess due to all the language-specific // Without polyglossia/bidi, we need to account for some special cases.
// special cases. We need to unify this eventually, but this // FIXME This needs to be audited!
// requires a file format change and some thought. // Check if:
// We also need to unify the input of parentheses in different RTL // * The input is as expected for all delimiters
// languages. Currently, some have their own methods (Arabic: // => checked for Hebrew!
// 18599/lyxsvn, Hebrew: e5f42f67d/lyxgit), some don't (Urdu, Syriac). // * The output matches the display in the LyX workarea
// Also note that the representation in the LyX file is probably wrong // => checked for Hebrew!
// (see FIXME in TextMetrics::breakRow). // * The special cases below are really necessary
// Most likely, we should simply rely on Qt's unicode handling here. // => checked for Hebrew!
// * In arabic_arabi, brackets are transformed to Arabic
// Ornate Parentheses. Is this is really wanted?
string const & lang = getFontSettings(bparams, pos).language()->lang(); string const & lang = getFontSettings(bparams, pos).language()->lang();
// With polyglossia, brackets and stuff need not be reversed in RTL scripts
// FIXME: The special casing for Hebrew parens is due to the special
// handling on input (for Hebrew in e5f42f67d/lyxgit); see #8251.
char_type uc = c; char_type uc = c;
if (rp.use_polyglossia) {
switch (c) {
case '(':
if (lang == "hebrew")
uc = ')';
break;
case ')':
if (lang == "hebrew")
uc = '(';
break;
}
return uc;
}
// In the following languages, brackets don't need to be reversed. // 1. In the following languages, parentheses need to be reversed.
// Furthermore, in arabic_arabi, they are transformed to Arabic bool const reverseparens = lang == "hebrew";
// Ornate Parentheses (dunno if this is really wanted)
// 2. In the following languages, brackets don't need to be reversed.
bool const reversebrackets = lang != "arabic_arabtex" bool const reversebrackets = lang != "arabic_arabtex"
&& lang != "arabic_arabi" && lang != "arabic_arabi"
&& lang != "farsi"; && lang != "farsi";
// Now swap delimiters if needed.
switch (c) { switch (c) {
case '(':
if (reverseparens)
uc = ')';
break;
case ')':
if (reverseparens)
uc = '(';
break;
case '[': case '[':
if (reversebrackets) if (reversebrackets)
uc = ']'; uc = ']';

View File

@ -892,17 +892,8 @@ bool TextMetrics::breakRow(Row & row, int const right_margin) const
// ¶ U+00B6 PILCROW SIGN // ¶ U+00B6 PILCROW SIGN
char_type const screen_char = (c == 0x2028) ? 0x2936 : 0x00B6; char_type const screen_char = (c == 0x2028) ? 0x2936 : 0x00B6;
row.add(i, screen_char, *fi, par.lookupChange(i)); row.add(i, screen_char, *fi, par.lookupChange(i));
} else { } else
// FIXME: please someone fix the Hebrew/Arabic parenthesis mess!
// see also Paragraph::getUChar.
if (fi->language()->lang() == "hebrew") {
if (c == '(')
c = ')';
else if (c == ')')
c = '(';
}
row.add(i, c, *fi, par.lookupChange(i)); row.add(i, c, *fi, par.lookupChange(i));
}
// add inline completion width // add inline completion width
// draw logically behind the previous character // draw logically behind the previous character

View File

@ -32,8 +32,8 @@ extern char const * const lyx_version_info;
// Do not remove the comment below, so we get merge conflict in // Do not remove the comment below, so we get merge conflict in
// independent branches. Instead add your own. // independent branches. Instead add your own.
#define LYX_FORMAT_LYX 565 // Kornel: Adobe fonts added #define LYX_FORMAT_LYX 566 // guyru: Fix parentheses in Hebrew
#define LYX_FORMAT_TEX2LYX 565 #define LYX_FORMAT_TEX2LYX 566
#if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX
#ifndef _MSC_VER #ifndef _MSC_VER