Find-Adv: A try to handle cyrillic chars also in regexp-mode

2024-09-20 06:49:56 +00:00 · 2020-12-14 20:43:39 +01:00 · 2020-12-14 20:43:39 +01:00 · d384136ff9
commit d384136ff9
parent 425a846b14
1 changed files with 44 additions and 10 deletions
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@ -8,6 +8,7 @@
 * \author Jürgen Vigna
 * \author Alfredo Braunstein
 * \author Tommaso Cucinotta
+ * \author Kornel Benko
 *
 * Full author contact details are available in file CREDITS.
 */
@ -943,6 +944,10 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
 					    AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT,
 					    &runparams);
 		}
+		// Even in ignore-format we have to remove "\text{}" parts
+		string t = to_utf8(str);
+		while (regex_replace(t, t, "\\\\text\\{([^\\}]*)\\}", "$1"));
+		str = from_utf8(t);
 	}
 	return str;
 }
@ -955,7 +960,13 @@ static size_t identifyLeading(string const & s)
 	// @TODO Support \item[text]
 	// Kornel: Added textsl, textsf, textit, texttt and noun
 	// + allow to search for colored text too
-	while (regex_replace(t, t, "^\\\\(((footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|emph|noun|minisec|text(bf|md|sl|sf|it|tt))|((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
+	while (regex_replace(t, t, "^\\\\(("
+	                     "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|"
+	                       "lyxaddress|lyxrightaddress|"
+	                       "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
+	                       "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|"
+	                     "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|"
+	                     "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
 	       || regex_replace(t, t, "^\\$", "")
 	       || regex_replace(t, t, "^\\\\\\[", "")
 	       || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "")
@ -1056,6 +1067,8 @@ class KeyInfo {
    isIgnored,
    /* like \lettrine[lines=5]{}{} */
    cleanToStart,
+    // like isStandard, but always remove head
+    headRemove,
    /* End of arguments marker for lettrine,
     * so that they can be ignored */
    endArguments
@ -1142,11 +1155,15 @@ void Intervall::setForDefaultLang(KeyInfo const & defLang) const
  // Enable the use of first token again
  if (ignoreidx >= 0) {
    int value = defLang._tokenstart + defLang._tokensize;
+    int borderidx = 0;
+    if (hasTitle) {
+      borderidx = 1;
+    }
    if (value > 0) {
-      if (borders[0].low < value)
-        borders[0].low = value;
-      if (borders[0].upper < value)
-        borders[0].upper = value;
+      if (borders[borderidx].low < value)
+        borders[borderidx].low = value;
+      if (borders[borderidx].upper < value)
+        borders[borderidx].upper = value;
    }
  }
 }
@ -1368,7 +1385,7 @@ void Intervall::removeAccents()
 {
  if (accents.empty())
    buildAccentsMap();
-  static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}|(i|imath|jmath|cdot|[a-z]+space)(?![a-zA-Z]))");
+  static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}|(i|imath|jmath|cdot|[a-z]+space)(?![a-zA-Z]))");
  smatch sub;
  for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
    sub = *itacc;
@ -1478,12 +1495,11 @@ class LatexInfo {
      return -1;
    }
    if (entries_[0].keytype == KeyInfo::isTitle) {
-      if (! entries_[0].disabled) {
      interval_.hasTitle = true;
+      if (! entries_[0].disabled) {
        interval_.titleValue = entries_[0].head;
      }
      else {
-        interval_.hasTitle = false;
        interval_.titleValue = "";
      }
      removeHead(entries_[0]);
@ -1708,6 +1724,20 @@ void LatexInfo::buildEntries(bool isPatternString)
        }
      }
    };
+    if (keys.find(key) != keys.end()) {
+      if (keys[key].keytype == KeyInfo::headRemove) {
+        KeyInfo found1 = keys[key];
+        found1.disabled = true;
+        found1.head = "\\" + key + "{";
+        found1._tokenstart = sub.position(size_t(0));
+        found1._tokensize = found1.head.length();
+        found1._dataStart = found1._tokenstart + found1.head.length();
+        int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
+        found1._dataEnd = endpos;
+        removeHead(found1);
+        continue;
+      }
+    }
    if (evaluatingRegexp) {
      if (sub.str(1).compare("endregexp") == 0) {
        evaluatingRegexp = false;
@ -1979,6 +2009,8 @@ void LatexInfo::buildKeys(bool isPatternString)
  static bool keysBuilt = false;
  if (keysBuilt && !isPatternString) return;

+  // Keys to ignore in any case
+  makeKey("text|textcyrillic|lyxmathsym", KeyInfo(KeyInfo::headRemove, 1, true), true);
  // Known standard keys with 1 parameter.
  // Split is done, if not at start of region
  makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
@ -2291,13 +2323,14 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
      }
      break;
    }
-    case KeyInfo::endArguments:
+    case KeyInfo::endArguments: {
      // Remove trailing '{}' too
      actual._dataStart += 1;
      actual._dataEnd += 1;
      interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
      nextKeyIdx = getNextKey();
      break;
+    }
    case KeyInfo::noMain:
      // fall through
    case KeyInfo::isStandard: {
@ -2481,7 +2514,8 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
      break;
    }
    case KeyInfo::invalid:
-      // This cannot happen, already handled
+    case KeyInfo::headRemove:
+      // These two cases cannot happen, already handled
      // fall through
    default: {
      // LYXERR(Debug::INFO, "Unhandled keytype");