FindAdv: Try to make regex search with format enabled somehow faster

This commit is contained in:
Kornel Benko 2021-01-01 21:50:36 +01:00
parent eaebe404ae
commit f500a287d4

View File

@ -51,9 +51,17 @@
#include <map> #include <map>
#include <regex> #include <regex>
#include <QtCore> // sets QT_VERSION #define USE_QT_FOR_SEARCH
#if (QT_VERSION >= 0x050000) #if defined(USE_QT_FOR_SEARCH)
#include <QRegularExpression> #include <QtCore> // sets QT_VERSION
#if (QT_VERSION >= 0x050000)
#include <QRegularExpression>
#define QTSEARCH 1
#else
#define QTSEARCH 0
#endif
#else
#define QTSEARCH 0
#endif #endif
using namespace std; using namespace std;
@ -797,7 +805,7 @@ bool regex_replace(string const & s, string & t, string const & searchstr,
** @param unmatched ** @param unmatched
** Number of open braces that must remain open at the end for the verification to succeed. ** Number of open braces that must remain open at the end for the verification to succeed.
**/ **/
#if (QT_VERSION >= 0x050000) #if QTSEARCH
bool braces_match(QString const & beg, bool braces_match(QString const & beg,
int unmatched = 0) int unmatched = 0)
#else #else
@ -806,7 +814,7 @@ bool braces_match(string const & beg,
#endif #endif
{ {
int open_pars = 0; int open_pars = 0;
#if (QT_VERSION >= 0x050000) #if QTSEARCH
LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'"); LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
#else #else
LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'"); LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
@ -814,7 +822,7 @@ bool braces_match(string const & beg,
int lastidx = beg.size(); int lastidx = beg.size();
for (int i=0; i < lastidx; ++i) { for (int i=0; i < lastidx; ++i) {
// Skip escaped braces in the count // Skip escaped braces in the count
#if (QT_VERSION >= 0x050000) #if QTSEARCH
QChar c = beg.at(i); QChar c = beg.at(i);
#else #else
char c = beg.at(i); char c = beg.at(i);
@ -847,9 +855,11 @@ bool braces_match(string const & beg,
class MatchResult { class MatchResult {
public: public:
int match_len; int match_len;
int match_prefix;
int match2end; int match2end;
int pos; int pos;
MatchResult(): match_len(0),match2end(0), pos(0) {}; int leadsize;
MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {};
}; };
/** The class performing a match between a position in the document and the FindAdvOptions. /** The class performing a match between a position in the document and the FindAdvOptions.
@ -870,7 +880,7 @@ public:
** The length of the matching text, or zero if no match was found. ** The length of the matching text, or zero if no match was found.
**/ **/
MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
#if (QT_VERSION >= 0x050000) #if QTSEARCH
bool regexIsValid; bool regexIsValid;
string regexError; string regexError;
#endif #endif
@ -905,7 +915,7 @@ private:
string par_as_string; string par_as_string;
// regular expression to use for searching // regular expression to use for searching
// regexp2 is same as regexp, but prefixed with a ".*?" // regexp2 is same as regexp, but prefixed with a ".*?"
#if (QT_VERSION >= 0x050000) #if QTSEARCH
QRegularExpression regexp; QRegularExpression regexp;
QRegularExpression regexp2; QRegularExpression regexp2;
#else #else
@ -2372,6 +2382,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
} }
case KeyInfo::isText: case KeyInfo::isText:
interval_.addIntervall(actual._tokenstart, actual._tokenstart+1); interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
nextKeyIdx = getNextKey();
break; break;
case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ case KeyInfo::noContent: { /* char like "\hspace{2cm}" */
if (actual.disabled) if (actual.disabled)
@ -2952,17 +2963,17 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
// TODO: Adapt '\[12345678]' in par_as_string to acount for the first '() // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '()
// Unfortunately is '\1', '\2', etc not working for strings with extra format // Unfortunately is '\1', '\2', etc not working for strings with extra format
// so the convert has no effect in that case // so the convert has no effect in that case
for (int i = 8; i > 0; --i) { for (int i = 7; i > 0; --i) {
string orig = "\\\\" + std::to_string(i); string orig = "\\\\" + std::to_string(i);
string dest = "\\" + std::to_string(i+1); string dest = "\\" + std::to_string(i+2);
while (regex_replace(par_as_string, par_as_string, orig, dest)); while (regex_replace(par_as_string, par_as_string, orig, dest));
} }
regexp_str = "(" + lead_as_regexp + ")" + par_as_string; regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
} }
LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
#if (QT_VERSION >= 0x050000) #if QTSEARCH
// Handle \w properly // Handle \w properly
QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
if (! opt.casesensitive) { if (! opt.casesensitive) {
@ -3022,7 +3033,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
// \& ==> 1 // \& ==> 1
// --- ==> 1 // --- ==> 1
// \\[a-zA-Z]+ ==> 1 // \\[a-zA-Z]+ ==> 1
#if (QT_VERSION >= 0x050000) #if QTSEARCH
static int computeSize(QStringRef s, int len) static int computeSize(QStringRef s, int len)
#define isLyxAlpha(arg) arg.isLetter() #define isLyxAlpha(arg) arg.isLetter()
#else #else
@ -3103,7 +3114,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
if (use_regexp) { if (use_regexp) {
LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
#if (QT_VERSION >= 0x050000) #if QTSEARCH
QString qstr = QString::fromStdString(str); QString qstr = QString::fromStdString(str);
QRegularExpression const *p_regexp; QRegularExpression const *p_regexp;
QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
@ -3117,7 +3128,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
return mres; return mres;
// Check braces on segments that matched all (.*?) subexpressions, // Check braces on segments that matched all (.*?) subexpressions,
// except the last "padding" one inserted by lyx. // except the last "padding" one inserted by lyx.
for (int i = 1; i < match.lastCapturedIndex(); ++i) for (int i = 3; i < match.lastCapturedIndex(); ++i)
if (!braces_match(match.captured(i), open_braces)) if (!braces_match(match.captured(i), open_braces))
return mres; return mres;
#else #else
@ -3136,7 +3147,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
match_results<string::const_iterator> const & m = *re_it; match_results<string::const_iterator> const & m = *re_it;
// Check braces on segments that matched all (.*?) subexpressions, // Check braces on segments that matched all (.*?) subexpressions,
// except the last "padding" one inserted by lyx. // except the last "padding" one inserted by lyx.
for (size_t i = 1; i < m.size() - 1; ++i) for (size_t i = 3; i < m.size() - 1; ++i)
if (!braces_match(m[i], open_braces)) if (!braces_match(m[i], open_braces))
return mres; return mres;
#endif #endif
@ -3149,10 +3160,10 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
int leadingsize = 0; int leadingsize = 0;
int result; int result;
size_t pos; size_t pos;
#if (QT_VERSION >= 0x050000) #if QTSEARCH
if (match.lastCapturedIndex() > 0) if (match.lastCapturedIndex() > 0) {
leadingsize = match.capturedEnd(1) - match.capturedStart(1); leadingsize = match.capturedEnd(1) - match.capturedStart(1);
}
int lastidx = match.lastCapturedIndex(); int lastidx = match.lastCapturedIndex();
for (int i = 0; i <= lastidx; i++) { for (int i = 0; i <= lastidx; i++) {
LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long"); LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long");
@ -3162,17 +3173,11 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
else else
result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0); result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0);
pos = match.capturedStart(0); pos = match.capturedStart(1);
// Ignore last closing characters
while (result > 0) {
if (qstr.at(pos+result-1) == '}')
--result;
else
break;
}
#else #else
if (m.size() > 1) if (m.size() > 2) {
leadingsize = m[1].second - m[1].first; leadingsize = m[1].second - m[1].first;
}
for (size_t i = 0; i < m.size(); i++) { for (size_t i = 0; i < m.size(); i++) {
LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long");
} }
@ -3180,27 +3185,23 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
result = m[0].second - m[0].first; result = m[0].second - m[0].first;
else else
result = m[m.size() - close_wildcards].first - m[0].first; result = m[m.size() - close_wildcards].first - m[0].first;
pos = m.position(size_t(0)); pos = m.position(size_t(1));
// Ignore last closing characters
while (result > 0) {
if (str[pos+result-1] == '}')
--result;
else
break;
}
#endif #endif
if (result > leadingsize) if (result > leadingsize)
result -= leadingsize; result -= leadingsize;
else else
result = 0; result = 0;
#if (QT_VERSION >= 0x050000) #if QTSEARCH
mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result); mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
mres.match2end = qstr.size() - pos - leadingsize; mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result) - mres.match_prefix;
mres.match2end = qstr.size() - pos - leadingsize - mres.match_prefix;
#else #else
mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); mres.match_prefix = = m[2].second - m[2].first;
mres.match2end = str.size() - pos - leadingsize; mres.match_len = computeSize(str.substr(pos+leadingsize,result), result) - mres.match_prefix;
mres.match2end = str.size() - pos - leadingsize - mres.match_prefix;
#endif #endif
mres.pos = pos+leadingsize; mres.pos = pos+leadingsize + mres.match_prefix;
mres.leadsize = leadingsize;
return mres; return mres;
} }
@ -3558,6 +3559,20 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
} }
#if 0
static void displayMResult(MatchResult &mres)
{
LYXERR0( "pos: " << mres.pos);
LYXERR0( "leadsize: " << mres.leadsize);
LYXERR0( "match_len: " << mres.match_len);
LYXERR0( "match_prefix: " << mres.match_prefix);
LYXERR0( "match2end: " << mres.match2end);
}
#define displayMres(s) displayMResult(s);
#else
#define displayMres(s)
#endif
/// Finds forward /// Finds forward
int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
{ {
@ -3566,16 +3581,18 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
while (!theApp()->longOperationCancelled() && cur) { while (!theApp()->longOperationCancelled() && cur) {
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
MatchResult mres = match(cur, -1, false); MatchResult mres = match(cur, -1, false);
displayMres(mres)
int match_len = mres.match_len; int match_len = mres.match_len;
LYXERR(Debug::FIND, "match_len: " << match_len);
if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
match_len = 0; match_len = 0;
} }
if (match_len > 0) { if (match_len > 0) {
// Try to find the begin of searched string // Try to find the begin of searched string
int increment = mres.pos/2; int increment;
while (mres.pos > 5 && (increment > 5)) { increment = mres.match_prefix/2;
LYXERR(Debug::FIND, "Set increment to " << increment);
while (mres.match_prefix > 1 && (increment > 1)) {
DocIterator old_cur = cur; DocIterator old_cur = cur;
for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
} }
@ -3587,23 +3604,32 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
} }
else { else {
MatchResult mres2 = match(cur, -1, false); MatchResult mres2 = match(cur, -1, false);
displayMres(mres2)
if ((mres2.match2end < mres.match2end) || if ((mres2.match2end < mres.match2end) ||
(mres2.match_len < mres.match_len)) { (mres2.match_len < mres.match_len)) {
cur = old_cur; cur = old_cur;
increment /= 2; increment /= 2;
} }
else { else if ((mres2.match2end == mres.match2end) && (mres2.match_len == mres.match_len)) {
// next part with the same increment as before
mres = mres2; mres = mres2;
increment -= 2; if (increment > mres.match_prefix/2)
if (increment > mres.pos/2) increment = mres.match_prefix/2;
increment = mres.pos/2; }
else {
// Something wrong here
LYXERR0( "Increment = " << increment << " match_prefix = " << mres.match_prefix);
break;
} }
} }
} }
int match_len_zero_count = 0; int match_len_zero_count = 0;
MatchResult mres3;
for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
if (i++ > 10) { if (i++ > 3) {
int remaining_len = match(cur, -1, false).match_len; mres3 = match(cur, -1, false);
displayMres(mres3)
int remaining_len = mres3.match_len;
if (remaining_len <= 0) { if (remaining_len <= 0) {
// Apparently the searched string is not in the remaining part // Apparently the searched string is not in the remaining part
break; break;
@ -3613,26 +3639,29 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
} }
} }
LYXERR(Debug::FIND, "Advancing cur: " << cur); LYXERR(Debug::FIND, "Advancing cur: " << cur);
int match_len3 = match(cur, 1).match_len; mres3 = match(cur, 1);
displayMres(mres3)
int match_len3 = mres3.match_len;
if (match_len3 < 0) if (match_len3 < 0)
continue; continue;
int match_len2 = match(cur).match_len; mres3 = match(cur);
displayMres(mres3)
int match_len2 = mres3.match_len;
LYXERR(Debug::FIND, "match_len2: " << match_len2); LYXERR(Debug::FIND, "match_len2: " << match_len2);
if (match_len2 > 0) { if (match_len2 > 0) {
// Sometimes in finalize we understand it wasn't a match // Sometimes in finalize we understand it wasn't a match
// and we need to continue the outest loop // and we need to continue the outest loop
LYXERR(Debug::FIND, "Finalizing");
int len = findAdvFinalize(cur, match); int len = findAdvFinalize(cur, match);
if (len > 0) { if (len > 0) {
return len; return len;
} }
} }
if (match_len2 >= 0) { if (match_len2 > 0)
if (match_len2 == 0) match_len_zero_count = 0;
match_len_zero_count++; else if (match_len2 == 0)
else match_len_zero_count++;
match_len_zero_count = 0; if (match_len2 < 0) {
}
else {
if (++match_len_zero_count > 3) { if (++match_len_zero_count > 3) {
LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
} }
@ -3913,7 +3942,7 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
try { try {
MatchStringAdv matchAdv(bv->buffer(), opt); MatchStringAdv matchAdv(bv->buffer(), opt);
#if (QT_VERSION >= 0x050000) #if QTSEARCH
if (!matchAdv.regexIsValid) { if (!matchAdv.regexIsValid) {
bv->message(lyx::from_utf8(matchAdv.regexError)); bv->message(lyx::from_utf8(matchAdv.regexError));
return(false); return(false);