Remove performance bottleneck in getAuthors()

The regex is expensive which is especially noticeable with very long author lists. This introduces a case-insensitive subst() variant which is much faster. (cherry picked from commit 8ba74fe958)
2024-11-26 11:16:55 +00:00 · 2024-07-05 14:05:26 +02:00 · 2024-07-05 14:05:26 +02:00 · 0f35e3141b
commit 0f35e3141b
parent 46d1d6e0d3
3 changed files with 22 additions and 14 deletions
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@ -279,12 +279,7 @@ vector<docstring> const getAuthors(docstring const & author)
 	// Then, we temporarily make all " and " strings to ampersands in order
 	// to handle them later on a per-char level. Note that arbitrary casing
 	// ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
-	static regex const and_reg("(.* )([aA][nN][dD])( .*)");
+	iname = subst(iname, from_ascii(" and "), from_ascii(" & "), false);
 	smatch sub;
 	string res = to_utf8(iname);
 	while (regex_match(res, sub, and_reg))
 		res = sub.str(1) + "&" + sub.str(3);
 	iname = from_utf8(res);
 	// Now we traverse through the string and replace the "&" by the proper
 	// output in- and outside groups
 	docstring name;
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@ -913,17 +913,28 @@ String const subst_string(String const & a,
 docstring const subst_string(docstring const & a,
-		docstring const & oldstr, docstring const & newstr)
+		docstring const & oldstr, docstring const & newstr,
 		bool const case_sens)
 {
 	LASSERT(!oldstr.empty(), return a);
 	docstring lstr = a;
 	size_t i = 0;
 	size_t const olen = oldstr.length();
 	if (case_sens)
 		while ((i = lstr.find(oldstr, i)) != string::npos) {
 			lstr.replace(i, olen, newstr);
 			i += newstr.length(); // We need to be sure that we don't
 			// use the same i over and over again.
 		}
 	else {
 		docstring lcstr = lowercase(lstr);
 		while ((i = lcstr.find(oldstr, i)) != string::npos) {
 			lstr.replace(i, olen, newstr);
 			i += newstr.length(); // We need to be sure that we don't
 			// use the same i over and over again.
 			lcstr = lowercase(lstr);
 		}
 	}
 	return lstr;
 }
@ -951,9 +962,10 @@ string const subst(string const & a,
 docstring const subst(docstring const & a,
-		docstring const & oldstr, docstring const & newstr)
+		docstring const & oldstr, docstring const & newstr,
 		bool case_sens)
 {
-	return subst_string(a, oldstr, newstr);
+	return subst_string(a, oldstr, newstr, case_sens);
 }
--- a/src/support/lstrings.h
+++ b/src/support/lstrings.h
@ -196,7 +196,8 @@ std::string const subst(std::string const & a,
 /// substitutes all instances of \a oldstr with \a newstr
 docstring const subst(docstring const & a,
-		docstring const & oldstr, docstring const & newstr);
+		docstring const & oldstr, docstring const & newstr,
 		bool case_sens = true);
 /// Count all occurrences of char \a chr inside \a str
 int count_char(std::string const & str, char chr);