mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-22 13:18:28 +00:00
FindAdv: Converting patterns without regex to use regex instead
Makes the code with less exceptions (no need to differentiate beteen use_regex and !use_regex) Move the creation of regexes to own subroutine (Handles '#if QTSEARCH ... #endif') Use cursor position differences instead of length of matched string. This is important for putSelectionAt()
This commit is contained in:
parent
c77ab339c1
commit
31c3bddd6e
273
src/lyxfind.cpp
273
src/lyxfind.cpp
@ -867,8 +867,9 @@ public:
|
||||
int match2end;
|
||||
int pos;
|
||||
int leadsize;
|
||||
int pos_len;
|
||||
vector <string> result = vector <string>();
|
||||
MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0) {};
|
||||
MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1) {};
|
||||
};
|
||||
|
||||
static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
|
||||
@ -921,6 +922,7 @@ public:
|
||||
private:
|
||||
/// Auxiliary find method (does not account for opt.matchword)
|
||||
MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
|
||||
void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = "");
|
||||
|
||||
/** Normalize a stringified or latexified LyX paragraph.
|
||||
**
|
||||
@ -935,7 +937,7 @@ private:
|
||||
** @todo Normalization should also expand macros, if the corresponding
|
||||
** search option was checked.
|
||||
**/
|
||||
string normalize(docstring const & s, bool hack_braces) const;
|
||||
string normalize(docstring const & s) const;
|
||||
// normalized string to search
|
||||
string par_as_string;
|
||||
// regular expression to use for searching
|
||||
@ -2914,9 +2916,65 @@ static int identifyClosing(string & t)
|
||||
static int num_replaced = 0;
|
||||
static bool previous_single_replace = true;
|
||||
|
||||
void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string)
|
||||
{
|
||||
#if QTSEARCH
|
||||
// Handle \w properly
|
||||
QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
|
||||
if (! opt.casesensitive) {
|
||||
popts |= QRegularExpression::CaseInsensitiveOption;
|
||||
}
|
||||
regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
|
||||
regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
|
||||
regexError = "";
|
||||
if (regexp.isValid() && regexp2.isValid()) {
|
||||
regexIsValid = true;
|
||||
// Check '{', '}' pairs inside the regex
|
||||
int balanced = 0;
|
||||
int skip = 1;
|
||||
for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
|
||||
char c = par_as_string[i];
|
||||
if (c == '\\') {
|
||||
skip = 2;
|
||||
continue;
|
||||
}
|
||||
if (c == '{')
|
||||
balanced++;
|
||||
else if (c == '}') {
|
||||
balanced--;
|
||||
if (balanced < 0)
|
||||
break;
|
||||
}
|
||||
skip = 1;
|
||||
}
|
||||
if (balanced != 0) {
|
||||
regexIsValid = false;
|
||||
regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
|
||||
}
|
||||
}
|
||||
else {
|
||||
regexIsValid = false;
|
||||
if (!regexp.isValid())
|
||||
regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
|
||||
else
|
||||
regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
|
||||
}
|
||||
#else
|
||||
if (opt.casesensitive) {
|
||||
regexp = regex(regexp_str);
|
||||
regexp2 = regex(regexp2_str);
|
||||
}
|
||||
else {
|
||||
regexp = regex(regexp_str, std::regex_constants::icase);
|
||||
regexp2 = regex(regexp2_str, std::regex_constants::icase);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt)
|
||||
: p_buf(&buf), p_first_buf(&buf), opt(opt)
|
||||
{
|
||||
static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\\])" };
|
||||
Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
|
||||
docstring const & ds = stringifySearchBuffer(find_buf, opt);
|
||||
use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
|
||||
@ -2929,38 +2987,45 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
|
||||
previous_single_replace = true;
|
||||
}
|
||||
// When using regexp, braces are hacked already by escape_for_regex()
|
||||
par_as_string = normalize(ds, !use_regexp);
|
||||
par_as_string = normalize(ds);
|
||||
open_braces = 0;
|
||||
close_wildcards = 0;
|
||||
|
||||
size_t lead_size = 0;
|
||||
// correct the language settings
|
||||
par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat);
|
||||
if (opt.ignoreformat) {
|
||||
if (!use_regexp) {
|
||||
// if par_as_string_nolead were emty,
|
||||
// the following call to findAux will always *find* the string
|
||||
// in the checked data, and thus always using the slow
|
||||
// examining of the current text part.
|
||||
par_as_string_nolead = par_as_string;
|
||||
if (!use_regexp) {
|
||||
identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string
|
||||
if (opt.ignoreformat) {
|
||||
lead_size = 0;
|
||||
}
|
||||
} else {
|
||||
else {
|
||||
lead_size = identifyLeading(par_as_string);
|
||||
}
|
||||
lead_as_string = par_as_string.substr(0, lead_size);
|
||||
string lead_as_regex_string = std::regex_replace(lead_as_string, specialChars, R"(\$&)" );
|
||||
par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
|
||||
string par_as_regex_string_nolead = std::regex_replace(par_as_string_nolead, specialChars, R"(\$&)" );
|
||||
string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead;
|
||||
string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead;
|
||||
CreateRegexp(opt, regexp_str, regexp2_str);
|
||||
use_regexp = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!opt.ignoreformat) {
|
||||
lead_size = identifyLeading(par_as_string);
|
||||
LYXERR(Debug::FIND, "Lead_size: " << lead_size);
|
||||
lead_as_string = par_as_string.substr(0, lead_size);
|
||||
par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
|
||||
}
|
||||
|
||||
if (!use_regexp) {
|
||||
open_braces = identifyClosing(par_as_string);
|
||||
identifyClosing(par_as_string_nolead);
|
||||
LYXERR(Debug::FIND, "Open braces: " << open_braces);
|
||||
LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'");
|
||||
} else {
|
||||
// Here we are using regexp
|
||||
LASSERT(use_regexp, /**/);
|
||||
{
|
||||
string lead_as_regexp;
|
||||
if (lead_size > 0) {
|
||||
// @todo No need to search for \regexp{} insets in leading material
|
||||
static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\\])" };
|
||||
lead_as_regexp = std::regex_replace(par_as_string.substr(0, lead_size), specialChars, R"(\$&)" );
|
||||
// lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat);
|
||||
par_as_string = par_as_string_nolead;
|
||||
@ -3001,18 +3066,6 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
|
||||
}
|
||||
if (lng < par_as_string.size())
|
||||
par_as_string = par_as_string.substr(0,lng);
|
||||
/*
|
||||
// save '\.'
|
||||
regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_");
|
||||
// handle '.' -> '[^]', replace later as '[^\}\{\\]'
|
||||
regex_replace(par_as_string, par_as_string, "\\.", "[^]");
|
||||
// replace '[^...]' with '[^...\}\{\\]'
|
||||
regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_");
|
||||
regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^");
|
||||
regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]");
|
||||
// restore '\.'
|
||||
regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\.");
|
||||
*/
|
||||
}
|
||||
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
|
||||
LYXERR(Debug::FIND, "Open braces: " << open_braces);
|
||||
@ -3037,57 +3090,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
|
||||
}
|
||||
LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
|
||||
LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
|
||||
#if QTSEARCH
|
||||
// Handle \w properly
|
||||
QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
|
||||
if (! opt.casesensitive) {
|
||||
popts |= QRegularExpression::CaseInsensitiveOption;
|
||||
}
|
||||
regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
|
||||
regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
|
||||
regexError = "";
|
||||
if (regexp.isValid() && regexp2.isValid()) {
|
||||
regexIsValid = true;
|
||||
// Check '{', '}' pairs inside the regex
|
||||
int balanced = 0;
|
||||
int skip = 1;
|
||||
for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
|
||||
char c = par_as_string[i];
|
||||
if (c == '\\') {
|
||||
skip = 2;
|
||||
continue;
|
||||
}
|
||||
if (c == '{')
|
||||
balanced++;
|
||||
else if (c == '}') {
|
||||
balanced--;
|
||||
if (balanced < 0)
|
||||
break;
|
||||
}
|
||||
skip = 1;
|
||||
}
|
||||
if (balanced != 0) {
|
||||
regexIsValid = false;
|
||||
regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
|
||||
}
|
||||
}
|
||||
else {
|
||||
regexIsValid = false;
|
||||
if (!regexp.isValid())
|
||||
regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
|
||||
if (!regexp2.isValid())
|
||||
regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
|
||||
}
|
||||
#else
|
||||
if (opt.casesensitive) {
|
||||
regexp = regex(regexp_str);
|
||||
regexp2 = regex(regexp2_str);
|
||||
}
|
||||
else {
|
||||
regexp = regex(regexp_str, std::regex_constants::icase);
|
||||
regexp2 = regex(regexp2_str, std::regex_constants::icase);
|
||||
}
|
||||
#endif
|
||||
CreateRegexp(opt, regexp_str, regexp2_str, par_as_string);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3164,9 +3167,9 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
|
||||
docstring docstr = stringifyFromForSearch(opt, cur, len);
|
||||
string str;
|
||||
if (use_regexp || opt.casesensitive)
|
||||
str = normalize(docstr, true);
|
||||
str = normalize(docstr);
|
||||
else
|
||||
str = normalize(lowercase(docstr), true);
|
||||
str = normalize(lowercase(docstr));
|
||||
if (!opt.ignoreformat) {
|
||||
str = correctlanguagesetting(str, false, !opt.ignoreformat);
|
||||
}
|
||||
@ -3377,6 +3380,7 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at
|
||||
return mres;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static bool simple_replace(string &t, string from, string to)
|
||||
{
|
||||
regex repl("(\\\\)*(" + from + ")");
|
||||
@ -3399,8 +3403,9 @@ static bool simple_replace(string &t, string from, string to)
|
||||
t = s;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
|
||||
string MatchStringAdv::normalize(docstring const & s) const
|
||||
{
|
||||
string t;
|
||||
t = lyx::to_utf8(s);
|
||||
@ -3440,18 +3445,6 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
|
||||
while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
|
||||
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
|
||||
while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
|
||||
// FIXME - check what preceeds the brace
|
||||
if (hack_braces) {
|
||||
if (opt.ignoreformat)
|
||||
while (regex_replace(t, t, "\\{", "_x_<")
|
||||
|| regex_replace(t, t, "\\}", "_x_>"))
|
||||
LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
|
||||
else {
|
||||
simple_replace(t, "\\\\\\{", "_x_<");
|
||||
simple_replace(t, "\\\\\\}", "_x_>");
|
||||
LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
@ -3581,6 +3574,7 @@ static void displayMResult(MatchResult &mres, int increment)
|
||||
LYXERR0( "match_len: " << mres.match_len);
|
||||
LYXERR0( "match_prefix: " << mres.match_prefix);
|
||||
LYXERR0( "match2end: " << mres.match2end);
|
||||
LYXERR0( "pos_len: " << mres.pos_len); // Set in finalize
|
||||
for (size_t i = 0; i < mres.result.size(); i++)
|
||||
LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
|
||||
}
|
||||
@ -3620,6 +3614,7 @@ static bool findAdvForwardInnermost(DocIterator & cur)
|
||||
/** Finalize an advanced find operation, advancing the cursor to the innermost
|
||||
** position that matches, plus computing the length of the matching text to
|
||||
** be selected
|
||||
** Return the cur.pos() difference between start and end of found match
|
||||
**/
|
||||
MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1))
|
||||
{
|
||||
@ -3660,38 +3655,38 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
|
||||
LYXERR(Debug::FIND, "Ok");
|
||||
|
||||
// Compute the match length
|
||||
int len = 1;
|
||||
int len = 1;
|
||||
if (cur.pos() + len > cur.lastpos())
|
||||
return fail;
|
||||
// regexp should use \w+, \S+, or \b(some string)\b
|
||||
// to search for whole words
|
||||
if (match.opt.matchword && !match.use_regexp) {
|
||||
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
|
||||
while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
|
||||
++len;
|
||||
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
|
||||
}
|
||||
// Length of matched text (different from len param)
|
||||
static MatchResult old_match = match(cur, len, at_begin);
|
||||
if (old_match.match_len < 0)
|
||||
old_match = fail;
|
||||
MatchResult new_match;
|
||||
// Greedy behaviour while matching regexps
|
||||
while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
|
||||
++len;
|
||||
old_match = new_match;
|
||||
LYXERR(Debug::FIND, "verifying match with len = " << len);
|
||||
}
|
||||
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
|
||||
while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
|
||||
++len;
|
||||
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
|
||||
}
|
||||
// Length of matched text (different from len param)
|
||||
static MatchResult old_match = match(cur, len, at_begin);
|
||||
if (old_match.match_len < 0)
|
||||
old_match = fail;
|
||||
MatchResult new_match;
|
||||
// Greedy behaviour while matching regexps
|
||||
while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
|
||||
++len;
|
||||
old_match = new_match;
|
||||
LYXERR(Debug::FIND, "verifying match with len = " << len);
|
||||
}
|
||||
return old_match;
|
||||
}
|
||||
else {
|
||||
}
|
||||
else {
|
||||
int minl = 1;
|
||||
int maxl = cur.lastpos() - cur.pos();
|
||||
// Greedy behaviour while matching regexps
|
||||
while (maxl > minl) {
|
||||
MatchResult mres2;
|
||||
mres2 = match(cur, len, at_begin);
|
||||
displayMres(mres2, len);
|
||||
MatchResult mres2;
|
||||
mres2 = match(cur, len, at_begin);
|
||||
displayMres(mres2, len);
|
||||
int actual_match = mres2.match_len;
|
||||
if (actual_match >= max_match.match_len) {
|
||||
// actual_match > max_match _can_ happen,
|
||||
@ -3743,15 +3738,17 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
|
||||
actual_match = match(cur, len, at_begin);
|
||||
if (actual_match.match_len == max_match.match_len) {
|
||||
old_cur = cur;
|
||||
max_match = actual_match;
|
||||
}
|
||||
max_match = actual_match;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (len == 0)
|
||||
return fail;
|
||||
else
|
||||
return max_match;
|
||||
}
|
||||
if (len == 0)
|
||||
return fail;
|
||||
else {
|
||||
max_match.pos_len = len;
|
||||
return max_match;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds forward
|
||||
@ -3825,8 +3822,9 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
|
||||
LYXERR(Debug::FIND, "Finalizing 1");
|
||||
MatchResult found_match = findAdvFinalize(cur, match, mres);
|
||||
if (found_match.match_len > 0) {
|
||||
LASSERT(found_match.pos_len > 0, /**/);
|
||||
match.FillResults(found_match);
|
||||
return found_match.match_len;
|
||||
return found_match.pos_len;
|
||||
}
|
||||
else {
|
||||
// try next possible match
|
||||
@ -3868,7 +3866,8 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
|
||||
MatchResult mres4 = findAdvFinalize(cur, match, mres.match_len);
|
||||
if (mres4.match_len > 0) {
|
||||
match.FillResults(mres4);
|
||||
return mres4.match_len;
|
||||
LASSERT(mres4.pos_len > 0, /**/);
|
||||
return mres4.pos_len;
|
||||
}
|
||||
}
|
||||
if (match_len2 > 0)
|
||||
@ -3952,7 +3951,8 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
|
||||
if (found_match) {
|
||||
MatchResult found_mr = findMostBackwards(cur, match);
|
||||
match.FillResults(found_mr);
|
||||
return found_mr.match_len;
|
||||
LASSERT(found_mr.pos_len > 0, /**/);
|
||||
return found_mr.pos_len;
|
||||
}
|
||||
|
||||
// Stop if begin of document reached
|
||||
@ -4182,7 +4182,7 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma
|
||||
bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
|
||||
{
|
||||
DocIterator cur;
|
||||
int match_len = 0;
|
||||
int pos_len = 0;
|
||||
|
||||
// e.g., when invoking word-findadv from mini-buffer wither with
|
||||
// wrong options syntax or before ever opening advanced F&R pane
|
||||
@ -4203,15 +4203,15 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
|
||||
num_replaced += findAdvReplace(bv, opt, matchAdv);
|
||||
cur = bv->cursor();
|
||||
if (opt.forward)
|
||||
match_len = findForwardAdv(cur, matchAdv);
|
||||
pos_len = findForwardAdv(cur, matchAdv);
|
||||
else
|
||||
match_len = findBackwardsAdv(cur, matchAdv);
|
||||
pos_len = findBackwardsAdv(cur, matchAdv);
|
||||
} catch (exception & ex) {
|
||||
bv->message(from_utf8(ex.what()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (match_len == 0) {
|
||||
if (pos_len == 0) {
|
||||
if (num_replaced > 0) {
|
||||
switch (num_replaced)
|
||||
{
|
||||
@ -4238,8 +4238,13 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
|
||||
else
|
||||
bv->message(_("Match found."));
|
||||
|
||||
LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len);
|
||||
bv->putSelectionAt(cur, match_len, !opt.forward);
|
||||
if (cur.pos() + pos_len > cur.lastpos()) {
|
||||
// Prevent crash in bv->putSelectionAt()
|
||||
// Should never happen, maybe LASSERT() here?
|
||||
pos_len = cur.lastpos() - cur.pos();
|
||||
}
|
||||
LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len);
|
||||
bv->putSelectionAt(cur, pos_len, !opt.forward);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user