From fcd61f90e7469cb4c9a3263ce81117e0d2158864 Mon Sep 17 00:00:00 2001 From: Juergen Spitzmueller Date: Fri, 5 Jul 2024 17:19:24 +0200 Subject: [PATCH] Cut excessively long author lists before parsing them for the GUI as they will be truncated anyway. Fixes performance issues with long author lists in bib files. (cherry picked from commit 7d2d26132b6bb285e96d65da5ca8269061af8d6c) --- src/BiblioInfo.cpp | 51 +++++++++++++++++++++++++++++----------------- src/BiblioInfo.h | 8 +++++--- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp index e2ea4bde54..b1ea8c3a49 100644 --- a/src/BiblioInfo.cpp +++ b/src/BiblioInfo.cpp @@ -264,8 +264,18 @@ docstring constructName(docstring const & name, string const & scheme) } -vector const getAuthors(docstring const & author) +vector const getAuthors(docstring const & author_in, + size_t const max_key_size) { + docstring author = author_in; + // for the GUI (not xhtml output) we cut obscenely long + // author lists as we won't display all authors anyway, + // and these long lists impact heavily on performance + // We take more than max_key_size, as we might have + // some extra characters in here + if (max_key_size < UINT_MAX && author.size() > 2 * max_key_size) + author.resize(2 * max_key_size); + // We check for goupings (via {...}) and only consider " and " // outside groups as author separator. This is to account // for cases such as {{Barnes and Noble, Inc.}}, which @@ -315,7 +325,7 @@ vector const getAuthors(docstring const & author) bool multipleAuthors(docstring const & author) { - return getAuthors(author).size() > 1; + return getAuthors(author, 128).size() > 1; } @@ -513,18 +523,20 @@ BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type) docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf, - bool full, bool forceshort) const + size_t const max_key_size, + bool full, bool forceshort) const { docstring author = operator[]("author"); if (author.empty()) author = operator[]("editor"); - return getAuthorList(buf, author, full, forceshort); + return getAuthorList(buf, author, max_key_size, full, forceshort); } docstring const BibTeXInfo::getAuthorList(Buffer const * buf, - docstring const & author, bool const full, bool const forceshort, + docstring const & author, size_t const max_key_size, + bool const full, bool const forceshort, bool const allnames, bool const beginning) const { // Maxnames treshold depend on engine @@ -556,7 +568,7 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, // OK, we've got some names. Let's format them. // Try to split the author list - vector const authors = getAuthors(author); + vector const authors = getAuthors(author, max_key_size); docstring retval; @@ -1170,14 +1182,14 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf, // with respect to maxcitenames. Suitable for Bibliography // beginnings. docstring const kind = operator[](subtype); - ret = getAuthorList(&buf, kind, false, false, true); + ret = getAuthorList(&buf, kind, ci.max_key_size, false, false, true); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (prefixIs(key, "fullnames:")) { // Return a full name list. Suitable for Bibliography // beginnings. docstring const kind = operator[](subtype); - ret = getAuthorList(&buf, kind, true, false, true); + ret = getAuthorList(&buf, kind, ci.max_key_size, true, false, true); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (prefixIs(key, "forceabbrvnames:")) { @@ -1185,7 +1197,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf, // irrespective of maxcitenames. Suitable for Bibliography // beginnings. docstring const kind = operator[](subtype); - ret = getAuthorList(&buf, kind, false, true, true); + ret = getAuthorList(&buf, kind, ci.max_key_size, false, true, true); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (prefixIs(key, "abbrvbynames:")) { @@ -1193,14 +1205,14 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf, // with respect to maxcitenames. Suitable for further names inside a // bibliography item // (such as "ed. by ...") docstring const kind = operator[](subtype); - ret = getAuthorList(&buf, kind, false, false, true, false); + ret = getAuthorList(&buf, kind, ci.max_key_size, false, false, true, false); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (prefixIs(key, "fullbynames:")) { // Return a full name list. Suitable for further names inside a // bibliography item // (such as "ed. by ...") docstring const kind = operator[](subtype); - ret = getAuthorList(&buf, kind, true, false, true, false); + ret = getAuthorList(&buf, kind, ci.max_key_size, true, false, true, false); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (prefixIs(key, "forceabbrvbynames:")) { @@ -1208,26 +1220,26 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf, // irrespective of maxcitenames. Suitable for further names inside a // bibliography item // (such as "ed. by ...") docstring const kind = operator[](subtype); - ret = getAuthorList(&buf, kind, false, true, true, false); + ret = getAuthorList(&buf, kind, ci.max_key_size, false, true, true, false); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (key == "abbrvciteauthor") { // Special key to provide abbreviated author or // editor names (suitable for citation labels), // with respect to maxcitenames. - ret = getAuthorOrEditorList(&buf, false, false); + ret = getAuthorOrEditorList(&buf, ci.max_key_size, false, false); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (key == "fullciteauthor") { // Return a full author or editor list (for citation labels) - ret = getAuthorOrEditorList(&buf, true, false); + ret = getAuthorOrEditorList(&buf, ci.max_key_size, true, false); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (key == "forceabbrvciteauthor") { // Special key to provide abbreviated author or // editor names (suitable for citation labels), // irrespective of maxcitenames. - ret = getAuthorOrEditorList(&buf, false, true); + ret = getAuthorOrEditorList(&buf, ci.max_key_size, false, true); if (ci.forceUpperCase && isLowerCase(ret[0])) ret[0] = uppercase(ret[0]); } else if (key == "bibentry") { @@ -1392,13 +1404,14 @@ vector const BiblioInfo::getEntries() const } -docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const +docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf, + size_t const max_key_size) const { BiblioInfo::const_iterator it = find(key); if (it == end()) return docstring(); BibTeXInfo const & data = it->second; - return data.getAuthorOrEditorList(&buf, false); + return data.getAuthorOrEditorList(&buf, max_key_size, false); } @@ -1686,7 +1699,7 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf) if (numbers) { entry.label(entry.citeNumber()); } else { - docstring const auth = entry.getAuthorOrEditorList(&buf, false); + docstring const auth = entry.getAuthorOrEditorList(&buf, 128, false); // we do it this way so as to access the xref, if necessary // note that this also gives us the modifier docstring const year = getYear(ce, buf, true); @@ -1761,7 +1774,7 @@ void authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs } // Split the input list of authors into individual authors. - vector const authors = getAuthors(authorsString); + vector const authors = getAuthors(authorsString, UINT_MAX); // Retrieve the "et al." variation. string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal"); diff --git a/src/BiblioInfo.h b/src/BiblioInfo.h index aeaf0fcecc..d1f1a8ed8d 100644 --- a/src/BiblioInfo.h +++ b/src/BiblioInfo.h @@ -67,9 +67,10 @@ public: /// This will be translated to the UI language if buf is null /// otherwise, it will be translated to the buffer language. docstring const getAuthorOrEditorList(Buffer const * buf = nullptr, - bool full = false, bool forceshort = false) const; + size_t const max_key_size = 128, + bool full = false, bool forceshort = false) const; /// Same for a specific author role (editor, author etc.) - docstring const getAuthorList(Buffer const * buf, docstring const & author, + docstring const getAuthorList(Buffer const * buf, docstring const & author, size_t const max_key_size, bool const full = false, bool const forceshort = false, bool const allnames = false, bool const beginning = true) const; /// @@ -201,7 +202,8 @@ public: /// \return a sorted vector of BibTeX entry types in use std::vector const getEntries() const; /// \return author or editor list (abbreviated form by default) - docstring const getAuthorOrEditorList(docstring const & key, Buffer const & buf) const; + docstring const getAuthorOrEditorList(docstring const & key, Buffer const & buf, + size_t const max_key_size) const; /// \return the year from the bibtex data record for \param key /// if \param use_modifier is true, then we will also append any /// modifier for this entry (e.g., 1998b).