FindAdv: Use always converted latex macros

In some situation the macros are not used with their unicode
representation. For instance all greek characters are used
as \alpha, \beta, ... in the latex output.
For the search it is more convenient to use the corresponding UTF-8 codes.
This commit is contained in:
Kornel Benko 2022-03-26 19:32:33 +01:00
parent b678844481
commit 93b6fe2c48

View File

@ -917,7 +917,7 @@ string escape_for_regex(string s, bool withformat)
if (lastpos == s.size()) if (lastpos == s.size())
break; break;
} }
size_t end_pos = s.find("\\endregexp{", regex_pos + 8); size_t end_pos = s.find("\\endregexp", regex_pos + 8);
result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat); result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
lastpos = end_pos + 13; lastpos = end_pos + 13;
} }
@ -1089,6 +1089,42 @@ static docstring buffer_to_latex(Buffer & buffer)
return ods.str(); return ods.str();
} }
static string latexNamesToUtf8(docstring strIn)
{
string addtmp = to_utf8(strIn);
static regex const rmAcc("(\\\\)*(\\\\([a-z]+) ?)");
size_t lastpos = 0;
smatch sub;
string replace;
string add("");
if (accents.empty())
buildAccentsMap();
for (sregex_iterator it_add(addtmp.begin(), addtmp.end(), rmAcc), end; it_add != end; ++it_add) {
sub = *it_add;
if ((sub.position(2) - sub.position(0)) % 3 == 1) {
continue;
}
else {
string key = sub.str(3);
AccentsIterator it_ac = accents.find(key);
if (it_ac == accents.end()) {
replace = sub.str(2);
}
else {
replace = it_ac->second;
}
}
if (lastpos < (size_t) sub.position(2))
add += addtmp.substr(lastpos, sub.position(2) - lastpos);
add += replace;
lastpos = sub.position(2) + sub.length(2);
}
if (lastpos == 0)
add = addtmp;
LYXERR(Debug::FIND, "Adding to search string: '"
<< add << "'");
return add;
}
static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt) static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt)
{ {
@ -1113,19 +1149,17 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
if (ignoreFormats.getNonContent()) { if (ignoreFormats.getNonContent()) {
runparams.for_searchAdv |= OutputParams::SearchNonOutput; runparams.for_searchAdv |= OutputParams::SearchNonOutput;
} }
string t("");
for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
Paragraph const & par = buffer.paragraphs().at(pit); Paragraph const & par = buffer.paragraphs().at(pit);
string add = latexNamesToUtf8(par.asString(pos_type(0), par.size(),
option,
&runparams));
LYXERR(Debug::FIND, "Adding to search string: '" LYXERR(Debug::FIND, "Adding to search string: '"
<< par.asString(pos_type(0), par.size(), << add << "'");
option, t += add;
&runparams)
<< "'");
str += par.asString(pos_type(0), par.size(),
option,
&runparams);
} }
// Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
string t = to_utf8(str);
while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2")); while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2"));
str = from_utf8(t); str = from_utf8(t);
} }
@ -1911,7 +1945,7 @@ void Intervall::removeAccents()
buildAccentsMap(); buildAccentsMap();
static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|" static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
"cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}" "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
"|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(textquote|brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))"); "|((i|imath|jmath|cdot|[a-z]+(space)?)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(textquote|brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
smatch sub; smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc; sub = *itacc;
@ -3862,10 +3896,10 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
runparams.for_searchAdv |= OutputParams::SearchNonOutput; runparams.for_searchAdv |= OutputParams::SearchNonOutput;
} }
LYXERR(Debug::FIND, "Stringifying with cur: " LYXERR(Debug::FIND, "Stringifying with cur: "
<< cur << ", from pos: " << cur.pos() << ", end: " << end); << cur << ", from pos: " << cur.pos() << ", end: " << end);
return par.asString(cur.pos(), end, return from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end,
option, option,
&runparams); &runparams)));
} else if (cur.inMathed()) { } else if (cur.inMathed()) {
CursorSlice cs = cur.top(); CursorSlice cs = cur.top();
MathData md = cs.cell(); MathData md = cs.cell();
@ -3874,10 +3908,9 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
? md.end() ? md.end()
: md.begin() + cs.pos() + len ); : md.begin() + cs.pos() + len );
MathData md2; MathData md2;
for (MathData::const_iterator it = md.begin() + cs.pos(); for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it)
it != it_end; ++it)
md2.push_back(*it); md2.push_back(*it);
docstring s = asString(md2); docstring s = from_utf8(latexNamesToUtf8(asString(md2)));
LYXERR(Debug::FIND, "Stringified math: '" << s << "'"); LYXERR(Debug::FIND, "Stringified math: '" << s << "'");
return s; return s;
} }