FindAdv: Special handling for \dot{i} and 'ß'

Different behaviour in regexp{..} for 'İ' and 'ß':
1.) lowercase routine for 'İ' gives 'İ', so that if we are searching
  while ignoring case, the string '\dot{I}' is converted to '\dot{i}'.
  In this case we have to change it to 'İ' (instead of 'i', as one would expect).

2.) If 'ß' is inserted via keybord on fresh created regexp box it appears as \lyxmathsym{ß},
  if pasted from the lyx-screen it appears as \text{ß}
This commit is contained in:
Kornel Benko 2019-03-10 00:29:56 +01:00
parent 99e216b78d
commit c041439c51

View File

@ -1263,8 +1263,13 @@ static void buildaccent(string n, string param, string values)
// get the corresponding utf8-value // get the corresponding utf8-value
if ((values[start] & 0xc0) != 0xc0) { if ((values[start] & 0xc0) != 0xc0) {
// should not happen, utf8 encoding starts at least with 11xxxxxx // should not happen, utf8 encoding starts at least with 11xxxxxx
start++; // but value for '\dot{i}' is 'i', which is ascii
continue; if ((values[start] & 0x80) == 0) {
// is ascii
accents[key] = values.substr(start, 1);
}
start++;
continue;
} }
for (int j = 1; ;j++) { for (int j = 1; ;j++) {
if (start + j >= values.size()) { if (start + j >= values.size()) {
@ -1272,7 +1277,7 @@ static void buildaccent(string n, string param, string values)
start = values.size() - 1; start = values.size() - 1;
break; break;
} }
else if ((values[start+j] & 0xc0) == 0xc0) { else if ((values[start+j] & 0xc0) != 0x80) {
// This is the first byte of following utf8 char // This is the first byte of following utf8 char
accents[key] = values.substr(start, j); accents[key] = values.substr(start, j);
start += j; start += j;
@ -1289,11 +1294,12 @@ static void buildAccentsMap()
accents["i"] = "ı"; accents["i"] = "ı";
accents["jmath"] = "ȷ"; accents["jmath"] = "ȷ";
accents["lyxmathsym{ß}"] = "ß"; accents["lyxmathsym{ß}"] = "ß";
accents["text{ß}"] = "ß";
accents["ddot{\\imath}"] = "ï"; accents["ddot{\\imath}"] = "ï";
buildaccent("ddot", "aAeEiIoOuUyY", buildaccent("ddot", "aAeEiIioOuUyY",
"äÄëËïÏöÖüÜÿŸ"); // umlaut "äÄëËïÏïöÖüÜÿŸ"); // umlaut
buildaccent("dot|.", "cCeEgGiIzZaAoObBdDfFyY", buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
"ċĊėĖġĠiİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
accents["acute{\\imath}"] = "í"; accents["acute{\\imath}"] = "í";
buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI", buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
"áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ"); "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
@ -1302,8 +1308,8 @@ static void buildAccentsMap()
"åÅůŮẘẙ"); // ring "åÅůŮẘẙ"); // ring
accents["check{\\imath}"] = "ǐ"; accents["check{\\imath}"] = "ǐ";
accents["check{\\jmath}"] = "ǰ"; accents["check{\\jmath}"] = "ǰ";
buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTzZ", buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
"čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤžŽ"); // caron "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron
accents["hat{\\imath}"] = "î"; accents["hat{\\imath}"] = "î";
accents["hat{\\jmath}"] = "ĵ"; accents["hat{\\jmath}"] = "ĵ";
buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU", buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
@ -1332,7 +1338,7 @@ void Intervall::removeAccents()
{ {
if (accents.empty()) if (accents.empty())
buildAccentsMap(); buildAccentsMap();
static regex const accre("\\\\((.|grave|breve|lyxmathsym|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))"); static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
smatch sub; smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc; sub = *itacc;