FindAdv: Special handling for \dot{i} and 'ß'

Different behaviour in regexp{..} for 'İ' and 'ß': 1.) lowercase routine for 'İ' gives 'İ', so that if we are searching while ignoring case, the string '\dot{I}' is converted to '\dot{i}'. In this case we have to change it to 'İ' (instead of 'i', as one would expect). 2.) If 'ß' is inserted via keybord on fresh created regexp box it appears as \lyxmathsym{ß}, if pasted from the lyx-screen it appears as \text{ß}
2024-11-07 12:32:26 +00:00 · 2019-03-10 00:29:56 +01:00 · 2019-03-10 00:29:56 +01:00 · c041439c51
commit c041439c51
parent 99e216b78d
1 changed files with 16 additions and 10 deletions
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@ -1263,8 +1263,13 @@ static void buildaccent(string n, string param, string values)
      // get the corresponding utf8-value
      if ((values[start] & 0xc0) != 0xc0) {
        // should not happen, utf8 encoding starts at least with 11xxxxxx
-        start++;
-        continue;
+	// but value for '\dot{i}' is 'i', which is ascii
+	if ((values[start] & 0x80) == 0) {
+	  // is ascii
+	  accents[key] = values.substr(start, 1);
+	}
+	start++;
+	continue;
      }
      for (int j = 1; ;j++) {
        if (start + j >= values.size()) {
@ -1272,7 +1277,7 @@ static void buildaccent(string n, string param, string values)
          start = values.size() - 1;
          break;
        }
-        else if ((values[start+j] & 0xc0) == 0xc0) {
+        else if ((values[start+j] & 0xc0) != 0x80) {
          // This is the first byte of following utf8 char
          accents[key] = values.substr(start, j);
          start += j;
@ -1289,11 +1294,12 @@ static void buildAccentsMap()
  accents["i"] = "ı";
  accents["jmath"] = "ȷ";
  accents["lyxmathsym{ß}"] = "ß";
+  accents["text{ß}"] = "ß";
  accents["ddot{\\imath}"] = "ï";
-  buildaccent("ddot", "aAeEiIoOuUyY",
-                      "äÄëËïÏöÖüÜÿŸ");	// umlaut
-  buildaccent("dot|.", "cCeEgGiIzZaAoObBdDfFyY",
-                       "ċĊėĖġĠiİżŻȧȦȯȮḃḂḋḊḟḞẏẎ");
+  buildaccent("ddot", "aAeEiIioOuUyY",
+                      "äÄëËïÏïöÖüÜÿŸ");	// umlaut
+  buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+                       "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
  accents["acute{\\imath}"] = "í";
  buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
                       "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
@ -1302,8 +1308,8 @@ static void buildAccentsMap()
                            "åÅůŮẘẙ");  // ring
  accents["check{\\imath}"] = "ǐ";
  accents["check{\\jmath}"] = "ǰ";
-  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTzZ",
-                         "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤžŽ");	// caron
+  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+                         "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ");	// caron
  accents["hat{\\imath}"] = "î";
  accents["hat{\\jmath}"] = "ĵ";
  buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
@ -1332,7 +1338,7 @@ void Intervall::removeAccents()
 {
  if (accents.empty())
    buildAccentsMap();
-  static regex const accre("\\\\((.|grave|breve|lyxmathsym|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+  static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
  smatch sub;
  for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
    sub = *itacc;