From ff933b52f5a9fe8c8453a03e096879755ff91340 Mon Sep 17 00:00:00 2001 From: Kornel Benko Date: Sat, 6 Oct 2018 23:50:50 +0200 Subject: [PATCH] Amend(2) b78bdf8 Modified language handling Still, there are problems, because sometimes the search pattern does not contain the the requested info. So the 'find' often fails for strings inside a list environment. --- src/lyxfind.cpp | 162 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 121 insertions(+), 41 deletions(-) diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 13830d1e03..f59840cead 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -855,12 +855,19 @@ static size_t identifyLeading(string const & s) return s.find(t); } +/* + * Given a latexified string, retrieve some handled features + * The features of the regex will later be compared with the features + * of the searched text. If the regex features are not a + * subset of the analized, then, in not format ignoring search + * we can early stop the search in the relevant inset. + */ typedef map Features; static Features identifyFeatures(string const & s) { static regex const feature("\\\\(([a-z]+(\\{([a-z]+)\\}|\\*)?))\\{"); - static regex const valid("^(((emph|noun|text(bf|sl|sf|it|tt)|(textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)$"); + static regex const valid("^(((emph|noun|text(bf|sl|sf|it|tt)|(textcolor|foreignlanguage)\\{[a-z]+\\})|item |(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)$"); smatch sub; bool displ = true; Features info; @@ -891,6 +898,9 @@ static Features identifyFeatures(string const & s) return(info); } +/* + * Faster search for the related closing parenthesis + */ static int findclosing(string p, int start, int end) { int skip = 0; @@ -909,7 +919,67 @@ static int findclosing(string p, int start, int end) return(-1); } +/* + * Discard any info for char sizes for now. + */ +static string removefontinfo(string par) +{ + // Remove fontsizes, inputencoding + smatch sub; + list fpars; + static regex const sizescodings("(\\\\(footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|inputencoding\\{[^\\}]*})(\b|(\\{(\\{\\})?\\})?(%\\n)?))"); + for (sregex_iterator it(par.begin(), par.end(), sizescodings), end; it != end; ++it) { + sub = *it; + string token = sub.str(1); + fpars.push_back(token); + } + for (list::const_iterator li = fpars.begin(); li != fpars.end(); ++li) { + string token = *li; + int f; + int firstpos = 0; + int ic; // Position of closing part e.g. '}' + while ((f = par.find(token, firstpos)) >= 0) { + size_t ssize = token.length(); + int parcount = 0; // how many '{}' can be removed + if (f == 0) + ic = -1; + else { + if (par[f-1] != '{') + ic = -1; + else { + // here '{' preceedes + ic = findclosing(par, f + ssize, par.length()); + if (f == 1) + parcount = 1; + else if ((f == 2) && (par[f-2] == '{')) { + if ((ic < 0) || (par[ic+1] == '}')) + parcount = 2; + else + parcount = 1; + } else while (f > parcount + 1) { + if (par[f-2-parcount] != '{') + break; + parcount++; + if ((ic > 0) && (par[ic+parcount] != '}')) + break; + } + } + } + firstpos = f; + if (ic < 0) + ic = par.length() - parcount; + par = par.substr(0, f-parcount) + par.substr(f+ssize, ic+parcount-f-ssize) + par.substr(ic+parcount); + } + } + return(par); +} + +/* + * Try to unify the language specs in the latexified text. + * Resulting modified string is set to "", if + * the searched tex does not contain all the features in the search pattern + */ static string correctlanguagesetting(string par, bool from_regex, bool withformat) { static string langstart = "\\foreignlanguage{"; @@ -920,7 +990,6 @@ static string correctlanguagesetting(string par, bool from_regex, bool withforma static bool regex_with_format = false; int parlen = par.length(); - string result = par; while ((parlen > 0) && (par[parlen-1] == '\n')) { parlen--; @@ -951,45 +1020,78 @@ static string correctlanguagesetting(string par, bool from_regex, bool withforma else { // LYXERR0("No regex formats"); } - if (par.compare(0, llen, langstart) == 0) { + string result = removefontinfo(par.substr(0, parlen)); + parlen = result.length(); + if (result.compare(0, llen, langstart) == 0) { if (from_regex) { removefirstlang = false; } - int i = findclosing(par, llen, parlen); + int i = findclosing(result, llen, parlen); if (removefirstlang) { if (i < 0) result = ""; else { - int closepos = findclosing(par, i+2, parlen); + int closepos = findclosing(result, i+2, parlen); if (closepos > 0) { - result = par.substr(i+2, closepos-i-2) + par.substr(closepos+1, parlen - closepos-1); + result = result.substr(i+2, closepos-i-2) + result.substr(closepos+1, parlen - closepos-1); } else { - result = par.substr(i+2, parlen-i-2); + result = result.substr(i+2, parlen-i-2); } } } else if (i > 0) { // skip '}{' after the language spec - int closepos = findclosing(par, i+2, parlen); - size_t insertpos = par.find(langstart, i+2); + string samelang = ""; + int startpos = i+2; + int closepos; + while(true) { + closepos = findclosing(result, startpos, parlen); + if (closepos >0) { + if (result[closepos+1] == '{') { + samelang += result.substr(startpos, closepos-startpos); + startpos = closepos + 2; + } + else { + samelang += result.substr(startpos, closepos-startpos); + result = result.substr(0, i+2) + samelang + result.substr(closepos); + closepos = i+2 + samelang.length(); + break; + } + } + else { + result = result.substr(0, i+2) + samelang + result.substr(startpos) +"}"; + closepos = result.length() - 1; + break; + } + } + size_t insertpos = result.find(langstart, i+2); + + if (insertpos == string::npos) + insertpos = result.length(); if (closepos < 0) { - if (insertpos == string::npos) { + if (insertpos == result.length()) { // there are no closing in par, and no next lang spec - result = par.substr(0, parlen) + "}"; + result += "}"; } else { // Add '}' at insertpos only, because closing is missing - result = par.substr(0,insertpos) + "}" + par.substr(insertpos, parlen-insertpos); + result = result.substr(0,insertpos) + "}" + result.substr(insertpos, parlen-insertpos); } } else if ((size_t) closepos > insertpos) { // Add '}' at insertpos and remove from closepos if closepos > insertpos - result = par.substr(0,insertpos) + "}" + par.substr(insertpos, closepos - insertpos) + par.substr(closepos+1, parlen -closepos-1); + result = result.substr(0,insertpos) + "}" + result.substr(insertpos, closepos - insertpos) + result.substr(closepos+1, parlen -closepos-1); + } + else { + // here closepos < insertpos + if ((size_t) closepos +1 < insertpos) { + result = result.substr(0, closepos) + result.substr(closepos+1, insertpos-closepos-1) + "}" + result.substr(insertpos); + } } } else { - result = par; + // result not good?, no closing '}' for \foreignlanguage{ ...>>> found // For i == 0, it is empty language spec // and for i < 0 it is Error } @@ -999,31 +1101,8 @@ static string correctlanguagesetting(string par, bool from_regex, bool withforma removefirstlang = true; } } - // Remove fontsizes - static vector fontssizes = { "footnotesize", "tiny", "scriptsize", "small", "large", "Large", "LARGE", "huge", "Huge"}; - for (size_t i = 0; i < fontssizes.size(); i++) { - int f; - int firstpos = 0; - while ((f = result.find("{\\" + fontssizes[i], firstpos)) >= 0) { - if (f >= 0) { - firstpos = f; - size_t ssize = fontssizes[i].size() + 2; - int ic = findclosing(result, f + 1, result.length()); - - if ((result[f+ssize] == '{') && (result[f+ssize+1] == '}')) { - ssize += 2; - } - if (ic > 0) { - result = result.substr(0, f) + result.substr(f+ssize, ic-f-ssize) + result.substr(ic+1); - } - else { - result = result.substr(0, f) + result.substr(f+ssize); - } - } - } - } // remove possible disturbing macros - while (regex_replace(result, result, "\\\\(inputencoding\\{[^\\}]*}|noindent )", "")) + while (regex_replace(result, result, "\\\\(noindent )", "")) ; // Either not found language spec,or is single and closed spec or empty // to be removed @@ -1031,8 +1110,9 @@ static string correctlanguagesetting(string par, bool from_regex, bool withforma static regex const parreg("((\\n)?\\\\[a-z]+par)\\{"); list pars; + smatch sub; for (sregex_iterator it(result.begin(), result.end(), parreg), end; it != end; ++it) { - smatch sub = *it; + sub = *it; string token = sub.str(1); pars.push_back(token); } @@ -1310,10 +1390,10 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); - while (regex_replace(t, t, "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{\\})+", "")) + while (regex_replace(t, t, "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - while (regex_replace(t, t, "\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item )?\\})+", "")); + while (regex_replace(t, t, "\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item |\\{\\})?\\})+", "")); // FIXME - check what preceeds the brace if (hack_braces) { if (opt.ignoreformat)