From 0ec295d63ef1ae59ab016214d4934585d351ccc4 Mon Sep 17 00:00:00 2001 From: Juergen Spitzmueller Date: Tue, 30 Oct 2018 12:33:35 +0100 Subject: [PATCH] Fix parentheses with Hebrew Patch by Guy Rutenberg, with some changes of mine. Fixes: #11191 --- development/FORMAT | 6 ++++ lib/lyx2lyx/lyx2lyx_tools.py | 17 +++++++++- lib/lyx2lyx/lyx_2_4.py | 20 +++++++++++- src/Paragraph.cpp | 61 +++++++++++++++++------------------- src/TextMetrics.cpp | 11 +------ src/version.h | 4 +-- 6 files changed, 73 insertions(+), 46 deletions(-) diff --git a/development/FORMAT b/development/FORMAT index 251e00ec09..b87d395bc3 100644 --- a/development/FORMAT +++ b/development/FORMAT @@ -8,6 +8,12 @@ changes happened in particular if possible. A good example would be ----------------------- +2018-10-29 Guy Rutenberg + * format incremeneted to 566: Fix direction of Hebrew parentheses in the LyX file. + +2018-10-18 Kornel Benko + * format incremented to 565: Added Adobe Source Pro fonts. + 2018-09-20 Jürgen Spitzmüller * format incremented to 564: New info-inset lyxinfo subtype layoutformat. This returns the current layout format. diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py index cb1996ecb5..51412e5b31 100644 --- a/lib/lyx2lyx/lyx2lyx_tools.py +++ b/lib/lyx2lyx/lyx2lyx_tools.py @@ -83,10 +83,13 @@ insert_document_option(document, option): remove_document_option(document, option): Remove _option_ as a document option. + +get_language_for_line(document, i): + Return the language setting for line number i. ''' import re -from parser_tools import find_token, find_end_of_inset +from parser_tools import find_token, find_end_of_inset, get_containing_layout from unicode_symbols import unicode_reps # This will accept either a list of lines or a single line. @@ -604,3 +607,15 @@ def is_document_option(document, option): return False return True + + +def get_language_for_line(document, i): + " Return the language for line number i" + layout = get_containing_layout(document.body, i) + if not layout: + return document.language + start_of_par = layout[3] + for line in document.body[i:start_of_par:-1]: + if line.startswith('\\lang '): + return line[len('\\lang '):] + return document.language diff --git a/lib/lyx2lyx/lyx_2_4.py b/lib/lyx2lyx/lyx_2_4.py index 365ba6fc02..b6b626c316 100644 --- a/lib/lyx2lyx/lyx_2_4.py +++ b/lib/lyx2lyx/lyx_2_4.py @@ -36,7 +36,7 @@ from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_la # is_in_inset, set_bool_value # find_tokens, find_token_exact, check_token -from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble) +from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, get_language_for_line) # revert_font_attrs, insert_to_preamble, latex_length # get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets # revert_flex_inset, hex2ratio, str2bool @@ -1381,6 +1381,22 @@ def revert_lformatinfo(document): i = i + 1 +def convert_hebrew_parentheses(document): + " Don't reverse parentheses in Hebrew text" + for i, line in enumerate(document.body): + if line.startswith('\\\\'): + # not a text line, skip + continue + if get_language_for_line(document, i) == 'hebrew': + document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')') + + +def revert_hebrew_parentheses(document): + " Store parentheses in Hebrew text reversed" + # This only exists to keep the convert/revert nameing convention + convert_hebrew_parentheses(document) + + ## # Conversion hub # @@ -1408,9 +1424,11 @@ convert = [ [563, []], [564, []], [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI + [566, [convert_hebrew_parentheses]], ] revert = [ + [565, [revert_hebrew_parentheses]], [564, [revert_AdobeFonts]], [563, [revert_lformatinfo]], [562, [revert_listpargs]], diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index 96f95d721c..5bad896dfd 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -1998,47 +1998,44 @@ char_type Paragraph::getUChar(BufferParams const & bparams, { char_type c = d->text_[pos]; - // Return unchanged character in LTR languages. - if (!getFontSettings(bparams, pos).isRightToLeft()) + // Return unchanged character in LTR languages + // or if we use poylglossia/bidi. + if (rp.use_polyglossia || !getFontSettings(bparams, pos).isRightToLeft()) return c; - // FIXME This is a complete mess due to all the language-specific - // special cases. We need to unify this eventually, but this - // requires a file format change and some thought. - // We also need to unify the input of parentheses in different RTL - // languages. Currently, some have their own methods (Arabic: - // 18599/lyxsvn, Hebrew: e5f42f67d/lyxgit), some don't (Urdu, Syriac). - // Also note that the representation in the LyX file is probably wrong - // (see FIXME in TextMetrics::breakRow). - // Most likely, we should simply rely on Qt's unicode handling here. + // Without polyglossia/bidi, we need to account for some special cases. + // FIXME This needs to be audited! + // Check if: + // * The input is as expected for all delimiters + // => checked for Hebrew! + // * The output matches the display in the LyX workarea + // => checked for Hebrew! + // * The special cases below are really necessary + // => checked for Hebrew! + // * In arabic_arabi, brackets are transformed to Arabic + // Ornate Parentheses. Is this is really wanted? + string const & lang = getFontSettings(bparams, pos).language()->lang(); - - // With polyglossia, brackets and stuff need not be reversed in RTL scripts - // FIXME: The special casing for Hebrew parens is due to the special - // handling on input (for Hebrew in e5f42f67d/lyxgit); see #8251. char_type uc = c; - if (rp.use_polyglossia) { - switch (c) { - case '(': - if (lang == "hebrew") - uc = ')'; - break; - case ')': - if (lang == "hebrew") - uc = '('; - break; - } - return uc; - } - // In the following languages, brackets don't need to be reversed. - // Furthermore, in arabic_arabi, they are transformed to Arabic - // Ornate Parentheses (dunno if this is really wanted) + // 1. In the following languages, parentheses need to be reversed. + bool const reverseparens = lang == "hebrew"; + + // 2. In the following languages, brackets don't need to be reversed. bool const reversebrackets = lang != "arabic_arabtex" && lang != "arabic_arabi" - && lang != "farsi"; + && lang != "farsi"; + // Now swap delimiters if needed. switch (c) { + case '(': + if (reverseparens) + uc = ')'; + break; + case ')': + if (reverseparens) + uc = '('; + break; case '[': if (reversebrackets) uc = ']'; diff --git a/src/TextMetrics.cpp b/src/TextMetrics.cpp index 2b0903c750..a106338421 100644 --- a/src/TextMetrics.cpp +++ b/src/TextMetrics.cpp @@ -892,17 +892,8 @@ bool TextMetrics::breakRow(Row & row, int const right_margin) const // ¶ U+00B6 PILCROW SIGN char_type const screen_char = (c == 0x2028) ? 0x2936 : 0x00B6; row.add(i, screen_char, *fi, par.lookupChange(i)); - } else { - // FIXME: please someone fix the Hebrew/Arabic parenthesis mess! - // see also Paragraph::getUChar. - if (fi->language()->lang() == "hebrew") { - if (c == '(') - c = ')'; - else if (c == ')') - c = '('; - } + } else row.add(i, c, *fi, par.lookupChange(i)); - } // add inline completion width // draw logically behind the previous character diff --git a/src/version.h b/src/version.h index 59781fd32c..09d0ddc487 100644 --- a/src/version.h +++ b/src/version.h @@ -32,8 +32,8 @@ extern char const * const lyx_version_info; // Do not remove the comment below, so we get merge conflict in // independent branches. Instead add your own. -#define LYX_FORMAT_LYX 565 // Kornel: Adobe fonts added -#define LYX_FORMAT_TEX2LYX 565 +#define LYX_FORMAT_LYX 566 // guyru: Fix parentheses in Hebrew +#define LYX_FORMAT_TEX2LYX 566 #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #ifndef _MSC_VER