Fix parentheses with Hebrew

Patch by Guy Rutenberg, with some changes of mine.

Fixes: #11191
This commit is contained in:
Juergen Spitzmueller 2018-10-30 12:33:35 +01:00
parent f0d7432608
commit 0ec295d63e
6 changed files with 73 additions and 46 deletions

View File

@ -8,6 +8,12 @@ changes happened in particular if possible. A good example would be
-----------------------
2018-10-29 Guy Rutenberg <guyrutenberg@gmail.com>
* format incremeneted to 566: Fix direction of Hebrew parentheses in the LyX file.
2018-10-18 Kornel Benko <kornel@lyx.org>
* format incremented to 565: Added Adobe Source Pro fonts.
2018-09-20 Jürgen Spitzmüller <spitz@lyx.org>
* format incremented to 564: New info-inset lyxinfo subtype layoutformat. This returns
the current layout format.

View File

@ -83,10 +83,13 @@ insert_document_option(document, option):
remove_document_option(document, option):
Remove _option_ as a document option.
get_language_for_line(document, i):
Return the language setting for line number i.
'''
import re
from parser_tools import find_token, find_end_of_inset
from parser_tools import find_token, find_end_of_inset, get_containing_layout
from unicode_symbols import unicode_reps
# This will accept either a list of lines or a single line.
@ -604,3 +607,15 @@ def is_document_option(document, option):
return False
return True
def get_language_for_line(document, i):
" Return the language for line number i"
layout = get_containing_layout(document.body, i)
if not layout:
return document.language
start_of_par = layout[3]
for line in document.body[i:start_of_par:-1]:
if line.startswith('\\lang '):
return line[len('\\lang '):]
return document.language

View File

@ -36,7 +36,7 @@ from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_la
# is_in_inset, set_bool_value
# find_tokens, find_token_exact, check_token
from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, get_language_for_line)
# revert_font_attrs, insert_to_preamble, latex_length
# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
# revert_flex_inset, hex2ratio, str2bool
@ -1381,6 +1381,22 @@ def revert_lformatinfo(document):
i = i + 1
def convert_hebrew_parentheses(document):
" Don't reverse parentheses in Hebrew text"
for i, line in enumerate(document.body):
if line.startswith('\\\\'):
# not a text line, skip
continue
if get_language_for_line(document, i) == 'hebrew':
document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')
def revert_hebrew_parentheses(document):
" Store parentheses in Hebrew text reversed"
# This only exists to keep the convert/revert nameing convention
convert_hebrew_parentheses(document)
##
# Conversion hub
#
@ -1408,9 +1424,11 @@ convert = [
[563, []],
[564, []],
[565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
[566, [convert_hebrew_parentheses]],
]
revert = [
[565, [revert_hebrew_parentheses]],
[564, [revert_AdobeFonts]],
[563, [revert_lformatinfo]],
[562, [revert_listpargs]],

View File

@ -1998,47 +1998,44 @@ char_type Paragraph::getUChar(BufferParams const & bparams,
{
char_type c = d->text_[pos];
// Return unchanged character in LTR languages.
if (!getFontSettings(bparams, pos).isRightToLeft())
// Return unchanged character in LTR languages
// or if we use poylglossia/bidi.
if (rp.use_polyglossia || !getFontSettings(bparams, pos).isRightToLeft())
return c;
// FIXME This is a complete mess due to all the language-specific
// special cases. We need to unify this eventually, but this
// requires a file format change and some thought.
// We also need to unify the input of parentheses in different RTL
// languages. Currently, some have their own methods (Arabic:
// 18599/lyxsvn, Hebrew: e5f42f67d/lyxgit), some don't (Urdu, Syriac).
// Also note that the representation in the LyX file is probably wrong
// (see FIXME in TextMetrics::breakRow).
// Most likely, we should simply rely on Qt's unicode handling here.
// Without polyglossia/bidi, we need to account for some special cases.
// FIXME This needs to be audited!
// Check if:
// * The input is as expected for all delimiters
// => checked for Hebrew!
// * The output matches the display in the LyX workarea
// => checked for Hebrew!
// * The special cases below are really necessary
// => checked for Hebrew!
// * In arabic_arabi, brackets are transformed to Arabic
// Ornate Parentheses. Is this is really wanted?
string const & lang = getFontSettings(bparams, pos).language()->lang();
// With polyglossia, brackets and stuff need not be reversed in RTL scripts
// FIXME: The special casing for Hebrew parens is due to the special
// handling on input (for Hebrew in e5f42f67d/lyxgit); see #8251.
char_type uc = c;
if (rp.use_polyglossia) {
switch (c) {
case '(':
if (lang == "hebrew")
uc = ')';
break;
case ')':
if (lang == "hebrew")
uc = '(';
break;
}
return uc;
}
// In the following languages, brackets don't need to be reversed.
// Furthermore, in arabic_arabi, they are transformed to Arabic
// Ornate Parentheses (dunno if this is really wanted)
// 1. In the following languages, parentheses need to be reversed.
bool const reverseparens = lang == "hebrew";
// 2. In the following languages, brackets don't need to be reversed.
bool const reversebrackets = lang != "arabic_arabtex"
&& lang != "arabic_arabi"
&& lang != "farsi";
// Now swap delimiters if needed.
switch (c) {
case '(':
if (reverseparens)
uc = ')';
break;
case ')':
if (reverseparens)
uc = '(';
break;
case '[':
if (reversebrackets)
uc = ']';

View File

@ -892,17 +892,8 @@ bool TextMetrics::breakRow(Row & row, int const right_margin) const
// ¶ U+00B6 PILCROW SIGN
char_type const screen_char = (c == 0x2028) ? 0x2936 : 0x00B6;
row.add(i, screen_char, *fi, par.lookupChange(i));
} else {
// FIXME: please someone fix the Hebrew/Arabic parenthesis mess!
// see also Paragraph::getUChar.
if (fi->language()->lang() == "hebrew") {
if (c == '(')
c = ')';
else if (c == ')')
c = '(';
}
} else
row.add(i, c, *fi, par.lookupChange(i));
}
// add inline completion width
// draw logically behind the previous character

View File

@ -32,8 +32,8 @@ extern char const * const lyx_version_info;
// Do not remove the comment below, so we get merge conflict in
// independent branches. Instead add your own.
#define LYX_FORMAT_LYX 565 // Kornel: Adobe fonts added
#define LYX_FORMAT_TEX2LYX 565
#define LYX_FORMAT_LYX 566 // guyru: Fix parentheses in Hebrew
#define LYX_FORMAT_TEX2LYX 566
#if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX
#ifndef _MSC_VER