Find: Use greedy behaviour

This change is valid for findadv too.
Patterns like '.*' now are greedy, like it is normal in regex
Searching for whole words is corrected, but can be slow.
One can speed up the search with adapted pattern.
So for instance searching for words starting and ending with 'r'
the normal pattern is 'r.*r'. The speed-up pattern could be
'\br[^\s]*r\b'. This halves the search time.

Search results are now different to that of lyx2.3, because the greedy
'.*' is now really greedy.
To achive the same results, we have to use '.*?' instead.
This commit is contained in:
Kornel Benko 2018-11-25 17:51:20 +01:00
parent e2a3dd1167
commit e96a9d6329

View File

@ -1863,7 +1863,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
// Remove the key with all parameters and following spaces
size_t pos;
for (pos = actual._dataEnd+1; pos < interval.par.length(); pos++) {
if (interval.par[pos] != ' ')
if ((interval.par[pos] != ' ') && (interval.par[pos] != '%'))
break;
}
interval.addIntervall(actual._tokenstart, pos);
@ -2406,7 +2406,7 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
else
result = m[m.size() - close_wildcards].first - m[0].first;
size_t pos = m.position(size_t(0));
size_t pos = m.position(0);
// Ignore last closing characters
while (result > 0) {
if (str[pos+result-1] == '}')
@ -2451,19 +2451,28 @@ int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin)
<< ", inTexted=" << cur.inTexted());
if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted())
return res;
if ((len > 0) && (res < len))
return 0;
Paragraph const & par = cur.paragraph();
bool ws_left = (cur.pos() > 0)
? par.isWordSeparator(cur.pos() - 1)
: true;
bool ws_right = (cur.pos() + res < par.size())
? par.isWordSeparator(cur.pos() + res)
bool ws_right = (cur.pos() + len < par.size())
? par.isWordSeparator(cur.pos() + len)
: true;
LYXERR(Debug::FIND,
"cur.pos()=" << cur.pos() << ", res=" << res
<< ", separ: " << ws_left << ", " << ws_right
<< ", len: " << len
<< endl);
if (ws_left && ws_right)
if (ws_left && ws_right) {
// Check for word separators inside the found 'word'
for (int i = 0; i < len; i++) {
if (par.isWordSeparator(cur.pos() + i))
return 0;
}
return res;
}
return 0;
}
@ -2641,11 +2650,13 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
cur.forwardPos();
} while (cur && cur.depth() > d && match(cur) > 0);
cur = old_cur;
if (match(cur) <= 0) return 0;
int max_match = match(cur); /* match valid only if not searching whole words */
if (max_match <= 0) return 0;
LYXERR(Debug::FIND, "Ok");
// Compute the match length
int len = 1;
if (match.opt.matchword) {
if (cur.pos() + len > cur.lastpos())
return 0;
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
@ -2657,57 +2668,34 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
int old_match = match(cur, len);
if (old_match < 0)
old_match = 0;
int prev_old_match = old_match;
int old_len = len;
int step;
int new_match;
if (match.opt.matchword)
step = 1;
else
step = 2 + (cur.lastpos() - cur.pos())/4;
while (step > 4) {
if (cur.pos() + len + step >= cur.lastpos()) {
step = 2 + step/4;
len = old_len;
old_match = prev_old_match;
}
else {
new_match = match(cur, len + step);
if (new_match > old_match) {
prev_old_match = old_match;
old_match = new_match;
old_len = len;
len += step;
}
else {
step = 2 + step/4;
len = old_len;
old_match = prev_old_match;
}
}
}
// Greedy behaviour while matching regexps
bool examining = true;
while (examining) {
examining = false;
// Kornel: The loop is needed, since it looks like
// incrementing 'cur.pos()' does not always lead to the following
// char which we could then match.
int maxcnt;
if (match.opt.matchword)
maxcnt = 2;
else
maxcnt = 4;
for (int count = 1; count < maxcnt; ++count) {
if (cur.pos() + len + count > cur.lastpos()) {
break;
}
new_match = match(cur, len + count);
if (new_match > old_match) {
len += count;
while ((new_match = match(cur, len + 1)) > old_match) {
++len;
old_match = new_match;
examining = true;
break;
LYXERR(Debug::FIND, "verifying match with len = " << len);
}
if (old_match == 0)
len = 0;
}
else {
int minl = 1;
int maxl = cur.lastpos() - cur.pos();
// Greedy behaviour while matching regexps
while (maxl > minl) {
int actual_match = match(cur, len);
if (actual_match == max_match) {
maxl = len;
len = (int)((maxl + minl)/2);
}
else if (actual_match < max_match) {
minl = len + 1;
len = (int)((maxl + minl)/2);
}
else {
// cannot happen, but in case of
LYXERR0("????");
max_match = actual_match;
}
}
}