Amend f500a287 (FindAdv: Try to make regex search with format enabled somehow faster)

Remove 1 out of range access,
Estimate the search result of regular expression for further processing
This commit is contained in:
Kornel Benko 2021-01-02 18:37:14 +01:00
parent adf62c3184
commit 8c67cb8c3a

View File

@ -854,6 +854,11 @@ bool braces_match(string const & beg,
class MatchResult { class MatchResult {
public: public:
enum range {
newIsTooFar,
newIsBetter,
newIsInvalid
};
int match_len; int match_len;
int match_prefix; int match_prefix;
int match2end; int match2end;
@ -862,6 +867,17 @@ public:
MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {}; MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {};
}; };
static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
{
if (newres.match2end < oldres.match2end)
return MatchResult::newIsTooFar;
if (newres.match_len < oldres.match_len)
return MatchResult::newIsTooFar;
if ((newres.match_len == oldres.match_len) && (newres.match2end == oldres.match2end))
return MatchResult::newIsBetter;
return MatchResult::newIsInvalid;
}
/** The class performing a match between a position in the document and the FindAdvOptions. /** The class performing a match between a position in the document and the FindAdvOptions.
**/ **/
@ -1074,7 +1090,7 @@ class KeyInfo {
noContent, noContent,
/* Char, like \backslash */ /* Char, like \backslash */
isChar, isChar,
/* remove starting backslash */ /* replace starting backslash with '#' */
isText, isText,
/* \part, \section*, ... */ /* \part, \section*, ... */
isSectioning, isSectioning,
@ -1485,9 +1501,9 @@ void Intervall::removeAccents()
} }
// Remove possibly following space too // Remove possibly following space too
if (par[pos+sub.str(0).size()] == ' ') if (par[pos+sub.str(0).size()] == ' ')
addIntervall(pos+val.size(), pos + sub.str(0).size()+1); addIntervall(pos+val.size(), pos + sub.str(0).size()+1);
else else
addIntervall(pos+val.size(), pos + sub.str(0).size()); addIntervall(pos+val.size(), pos + sub.str(0).size());
for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) { for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
// remove traces of any remaining chars // remove traces of any remaining chars
par[i] = ' '; par[i] = ' ';
@ -1570,7 +1586,7 @@ class LatexInfo {
public: public:
LatexInfo(string const & par, bool isPatternString) LatexInfo(string const & par, bool isPatternString)
: entidx_(-1), interval_(isPatternString, par) : entidx_(-1), interval_(isPatternString, par)
{ {
buildKeys(isPatternString); buildKeys(isPatternString);
entries_ = vector<KeyInfo>(); entries_ = vector<KeyInfo>();
@ -2004,7 +2020,13 @@ void LatexInfo::buildEntries(bool isPatternString)
optionalEnd = optend; optionalEnd = optend;
} }
string token = sub.str(5); string token = sub.str(5);
int closings = found.parenthesiscount; int closings;
if (interval_.par[optend] != '{') {
closings = 0;
found.parenthesiscount = 0;
}
else
closings = found.parenthesiscount;
if (found.parenthesiscount == 1) { if (found.parenthesiscount == 1) {
found.head = "\\" + key + "{"; found.head = "\\" + key + "{";
} }
@ -2381,7 +2403,8 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
break; break;
} }
case KeyInfo::isText: case KeyInfo::isText:
interval_.addIntervall(actual._tokenstart, actual._tokenstart+1); interval_.par[actual._tokenstart] = '#';
//interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
nextKeyIdx = getNextKey(); nextKeyIdx = getNextKey();
break; break;
case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ case KeyInfo::noContent: { /* char like "\hspace{2cm}" */
@ -3159,7 +3182,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
// Size of the leading string: m[1].second - m[1].first // Size of the leading string: m[1].second - m[1].first
int leadingsize = 0; int leadingsize = 0;
int result; int result;
size_t pos;
#if QTSEARCH #if QTSEARCH
if (match.lastCapturedIndex() > 0) { if (match.lastCapturedIndex() > 0) {
leadingsize = match.capturedEnd(1) - match.capturedStart(1); leadingsize = match.capturedEnd(1) - match.capturedStart(1);
@ -3173,7 +3195,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
else else
result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0); result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0);
pos = match.capturedStart(1);
#else #else
if (m.size() > 2) { if (m.size() > 2) {
leadingsize = m[1].second - m[1].first; leadingsize = m[1].second - m[1].first;
@ -3185,7 +3206,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
result = m[0].second - m[0].first; result = m[0].second - m[0].first;
else else
result = m[m.size() - close_wildcards].first - m[0].first; result = m[m.size() - close_wildcards].first - m[0].first;
pos = m.position(size_t(1));
#endif #endif
if (result > leadingsize) if (result > leadingsize)
result -= leadingsize; result -= leadingsize;
@ -3193,14 +3213,19 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
result = 0; result = 0;
#if QTSEARCH #if QTSEARCH
mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2); mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result) - mres.match_prefix; // mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result) - mres.match_prefix;
mres.match2end = qstr.size() - pos - leadingsize - mres.match_prefix; mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
// mres.match2end = qstr.size() - pos - leadingsize - mres.match_prefix;
mres.match2end = qstr.size() - match.capturedEnd(0);
mres.pos = match.capturedStart(2);
#else #else
mres.match_prefix = = m[2].second - m[2].first; mres.match_prefix = m[2].second - m[2].first;
mres.match_len = computeSize(str.substr(pos+leadingsize,result), result) - mres.match_prefix; // mres.match_len = computeSize(str.substr(pos+leadingsize,result), result) - mres.match_prefix;
mres.match2end = str.size() - pos - leadingsize - mres.match_prefix; mres.match_len = m[0].second - m[2].second;
// mres.match2end = str.size() - pos - leadingsize - mres.match_prefix;
mres.match2end = str.size() - m[0].second;
mres.pos = m[2].first;
#endif #endif
mres.pos = pos+leadingsize + mres.match_prefix;
mres.leadsize = leadingsize; mres.leadsize = leadingsize;
return mres; return mres;
} }
@ -3320,7 +3345,6 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
// FIXME - check what preceeds the brace // FIXME - check what preceeds the brace
if (hack_braces) { if (hack_braces) {
@ -3558,7 +3582,6 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
return len; return len;
} }
#if 0 #if 0
static void displayMResult(MatchResult &mres) static void displayMResult(MatchResult &mres)
{ {
@ -3590,9 +3613,13 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
if (match_len > 0) { if (match_len > 0) {
// Try to find the begin of searched string // Try to find the begin of searched string
int increment; int increment;
increment = mres.match_prefix/2; int firstInvalid = 100000;
if (mres.match_prefix + mres.pos - mres.leadsize > 0)
increment = (mres.match_prefix + mres.pos - mres.leadsize)/2;
else
increment = 10;
LYXERR(Debug::FIND, "Set increment to " << increment); LYXERR(Debug::FIND, "Set increment to " << increment);
while (mres.match_prefix > 1 && (increment > 1)) { while (increment > 0) {
DocIterator old_cur = cur; DocIterator old_cur = cur;
for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
} }
@ -3605,24 +3632,31 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
else { else {
MatchResult mres2 = match(cur, -1, false); MatchResult mres2 = match(cur, -1, false);
displayMres(mres2) displayMres(mres2)
if ((mres2.match2end < mres.match2end) || switch (interpretMatch(mres, mres2)) {
(mres2.match_len < mres.match_len)) { case MatchResult::newIsTooFar:
cur = old_cur; // behind the expected match
increment /= 2; firstInvalid = increment;
} cur = old_cur;
else if ((mres2.match2end == mres.match2end) && (mres2.match_len == mres.match_len)) { increment /= 2;
// next part with the same increment as before break;
mres = mres2; case MatchResult::newIsBetter:
if (increment > mres.match_prefix/2) // not reached yet
increment = mres.match_prefix/2; mres = mres2;
} firstInvalid -= increment;
else { if (increment > firstInvalid/2)
// Something wrong here increment = firstInvalid/2;
LYXERR0( "Increment = " << increment << " match_prefix = " << mres.match_prefix); break;
break; default:
} // Handle not like MatchResult::newIsTooFar
LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
firstInvalid--;
increment = firstInvalid -1;
cur = old_cur;
break;
}
} }
} }
// LYXERR0("Leaving first loop");
int match_len_zero_count = 0; int match_len_zero_count = 0;
MatchResult mres3; MatchResult mres3;
for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {