FindAdv: Added handling for \w' in regex using non-ascii chars

Also fix some 'out of range accesses' (causing crash in debug-glibc-mode)
This commit is contained in:
Kornel Benko 2020-12-31 16:53:46 +01:00
parent 46aedb4eaf
commit 4e9dc856e4

View File

@ -1064,6 +1064,8 @@ class KeyInfo {
noContent, noContent,
/* Char, like \backslash */ /* Char, like \backslash */
isChar, isChar,
/* remove starting backslash */
isText,
/* \part, \section*, ... */ /* \part, \section*, ... */
isSectioning, isSectioning,
/* title, author etc */ /* title, author etc */
@ -1719,6 +1721,7 @@ void LatexInfo::buildEntries(bool isPatternString)
bool math_end_waiting = false; bool math_end_waiting = false;
size_t math_pos = 10000; size_t math_pos = 10000;
string math_end; string math_end;
static vector<string> usedText = vector<string>();
interval_.removeAccents(); interval_.removeAccents();
@ -1768,6 +1771,11 @@ void LatexInfo::buildEntries(bool isPatternString)
} }
// Ignore language if there is math somewhere in pattern-string // Ignore language if there is math somewhere in pattern-string
if (isPatternString) { if (isPatternString) {
for (auto s: usedText) {
// Remove entries created in previous search runs
keys.erase(s);
}
usedText = vector<string>();
if (! mi.empty()) { if (! mi.empty()) {
// Disable language // Disable language
keys["foreignlanguage"].disabled = true; keys["foreignlanguage"].disabled = true;
@ -1837,12 +1845,15 @@ void LatexInfo::buildEntries(bool isPatternString)
} }
if (keys.find(key) == keys.end()) { if (keys.find(key) == keys.end()) {
found = KeyInfo(KeyInfo::isStandard, 0, true); found = KeyInfo(KeyInfo::isStandard, 0, true);
LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text");
found = KeyInfo(KeyInfo::isText, 0, false);
if (isPatternString) { if (isPatternString) {
found.keytype = KeyInfo::isChar; found.keytype = KeyInfo::isChar;
found.disabled = false; found.disabled = false;
found.used = true; found.used = true;
} }
keys[key] = found; keys[key] = found;
usedText.push_back(key);
} }
else else
found = keys[key]; found = keys[key];
@ -2033,7 +2044,7 @@ void LatexInfo::buildEntries(bool isPatternString)
// Disable this key, treate it as standard // Disable this key, treate it as standard
found.keytype = KeyInfo::isStandard; found.keytype = KeyInfo::isStandard;
found.disabled = true; found.disabled = true;
if ((codeEnd == interval_.par.length()) && if ((codeEnd +1 >= interval_.par.length()) &&
(found._tokenstart == codeStart)) { (found._tokenstart == codeStart)) {
// trickery, because the code inset starts // trickery, because the code inset starts
// with \selectlanguage ... // with \selectlanguage ...
@ -2117,6 +2128,7 @@ void LatexInfo::buildKeys(bool isPatternString)
makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("guillemotright|guillemotleft", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Spaces // Spaces
makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
@ -2169,6 +2181,7 @@ void LatexInfo::buildKeys(bool isPatternString)
// Survives, like known character // Survives, like known character
// makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString); makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString);
makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
@ -2357,6 +2370,9 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
interval_.closes[0] = -1; interval_.closes[0] = -1;
break; break;
} }
case KeyInfo::isText:
interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
break;
case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ case KeyInfo::noContent: { /* char like "\hspace{2cm}" */
if (actual.disabled) if (actual.disabled)
interval_.addIntervall(actual._tokenstart, actual._dataEnd); interval_.addIntervall(actual._tokenstart, actual._dataEnd);
@ -2642,8 +2658,10 @@ int LatexInfo::process(ostringstream & os, KeyInfo const & actual )
int output_end; int output_end;
if (actual._dataEnd < end) if (actual._dataEnd < end)
output_end = interval_.nextNotIgnored(actual._dataEnd); output_end = interval_.nextNotIgnored(actual._dataEnd);
else else if (interval_.par.size() > (size_t) end)
output_end = interval_.nextNotIgnored(end); output_end = interval_.nextNotIgnored(end);
else
output_end = interval_.par.size();
if ((actual.keytype == KeyInfo::isMain) && actual.disabled) { if ((actual.keytype == KeyInfo::isMain) && actual.disabled) {
interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize); interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize);
} }
@ -2945,12 +2963,10 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
#if (QT_VERSION >= 0x050000) #if (QT_VERSION >= 0x050000)
QRegularExpression::PatternOptions popts; // Handle \w properly
QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
if (! opt.casesensitive) { if (! opt.casesensitive) {
popts = QRegularExpression::CaseInsensitiveOption; popts |= QRegularExpression::CaseInsensitiveOption;
}
else {
popts = QRegularExpression::NoPatternOption;
} }
regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
@ -3021,15 +3037,17 @@ static int computeSize(string s, int len)
for (int i = 0; i < len; i += skip, count++) { for (int i = 0; i < len; i += skip, count++) {
if (s.at(i) == '\\') { if (s.at(i) == '\\') {
skip = 2; skip = 2;
if (isLyxAlpha(s.at(i+1))) { if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
for (int j = 2; i+j < len; j++) { for (int j = 2; i+j < len; j++) {
if (isLyxAlpha(s.at(i+j))) { if (! isLyxAlpha(s.at(i+j))) {
if (s.at(i+j) == ' ') if (s.at(i+j) == ' ')
skip++; skip++;
else if ((s.at(i+j) == '{') && s.at(i+j+1) == '}') else if (s.at(i+j) == '{') {
if (i+j+1 < len && s.at(i+j+1) == '}')
skip += 2; skip += 2;
else if ((s.at(i+j) == '{') && (i + j + 1 >= len)) else if (i + j + 1 >= len)
skip++; skip++;
}
break; break;
} }
skip++; skip++;
@ -3037,14 +3055,14 @@ static int computeSize(string s, int len)
} }
} }
else if (s.at(i) == '{') { else if (s.at(i) == '{') {
if (s.at(i+1) == '}') if (i + 1 < len && s.at(i+1) == '}')
skip = 2; skip = 2;
else else
skip = 3; skip = 3;
} }
else if (s.at(i) == '-') { else if (s.at(i) == '-') {
if (s.at(i+1) == '-') { if (i+1 < len && s.at(i+1) == '-') {
if (s.at(i+2) == '-') if (i + 2 < len && s.at(i+2) == '-')
skip = 3; skip = 3;
else else
skip = 2; skip = 2;