Improve BibTeX name parsing #3

Correctly handle name suffix ("Jr.-part")
2024-11-25 10:58:52 +00:00 · 2017-03-19 12:42:18 +01:00 · 2017-03-19 12:42:18 +01:00 · 5fdcca4c06
commit 5fdcca4c06
parent 9f4df64f23
3 changed files with 54 additions and 23 deletions
--- a/lib/citeengines/basic.citeengine
+++ b/lib/citeengines/basic.citeengine
@ -60,9 +60,9 @@ CiteFormat default
 	!sep ,
 	!close ]
 	# Modify scheme of the first author in the bibliography
-	!firstnameform %prename% %surname%
+	!firstnameform %prename% %surname%{%junior%[[, %junior%]]}
 	# Modify scheme of other authors in the bibliography
-	!othernameform %prename% %surname%
+	!othernameform %prename% %surname%{%junior%[[, %junior%]]}

 	# A link that lets us jump to the bibliography entry in LyXHTML
 	# %clean:key% will be substituted by the cite key to give a unique id
--- a/lib/layouts/stdciteformats.inc
+++ b/lib/layouts/stdciteformats.inc
@ -32,13 +32,13 @@ CiteFormat default
 	# Macros
 	#
 	# Scheme of the first author in the bibliography
-	!firstnameform %surname%{%prename%[[, %prename%]]}
+	!firstnameform %surname%{%junior%[[, %junior%]]}{%prename%[[, %prename%]]}
 	# Scheme of other authors in the bibliography
-	!othernameform %surname%{%prename%[[, %prename%]]}
+	!othernameform %surname%{%junior%[[, %junior%]]}{%prename%[[, %prename%]]}
 	# Scheme of the first name in later parts (such as book editor)
-	!firstbynameform %prename% %surname%
+	!firstbynameform %prename% %surname%{%junior%[[, %junior%]]}
 	# Scheme of other authors in later parts (such as book editor)
-	!otherbynameform %prename% %surname%
+	!otherbynameform %prename% %surname%{%junior%[[, %junior%]]}
 	# pagination
 	!pages {%pages%[[, %_pptext% %pages%]]}
 	# ed. or eds.
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@ -55,11 +55,19 @@ docstring renormalize(docstring const & input)
 }


+struct name_parts {
+	docstring surname;
+	docstring prename;
+	docstring junior;
+};
+
+
 // gets the "prename" and "family name" from an author-type string
-pair<docstring, docstring> nameParts(docstring const & iname)
+name_parts nameParts(docstring const & iname)
 {
+	name_parts res;
 	if (iname.empty())
-		return make_pair(docstring(), docstring());
+		return res;

 	// First we check for goupings (via {...}) and replace blanks and
 	// commas inside groups with temporary placeholders
@ -85,21 +93,33 @@ pair<docstring, docstring> nameParts(docstring const & iname)
 	// Now we look for a comma, and take the last name to be everything
 	// preceding the right-most one, so that we also get the "jr" part.
 	vector<docstring> pieces = getVectorFromString(name);
-	if (pieces.size() > 1)
+	if (pieces.size() > 1) {
 		// whether we have a jr. part or not, it's always
 		// the first and last item (reversed)
-		return make_pair(renormalize(pieces.back()), renormalize(pieces.front()));
+		res.surname = renormalize(pieces.front());
+		res.prename = renormalize(pieces.back());
+		// If we have three pieces (the maximum allowed by BibTeX),
+		// the second one is the jr part.
+		if (pieces.size() > 2)
+			res.junior = renormalize(pieces.at(1));
+		return res;
+	}

 	// OK, so now we want to look for the last name. We're going to
 	// include the "von" part. This isn't perfect.
 	// Split on spaces, to get various tokens.
 	pieces = getVectorFromString(name, from_ascii(" "));
 	// No space: Only a family name given
-	if (pieces.size() < 2)
-		return make_pair(from_ascii(""), renormalize(pieces.back()));
+	if (pieces.size() < 2) {
+		res.surname = renormalize(pieces.back());
+		return res;
+	}
 	// If we get two pieces, assume the last one is the last name
-	if (pieces.size() == 2)
-		return make_pair(renormalize(pieces.front()), renormalize(pieces.back()));
+	if (pieces.size() == 2) {
+		res.surname = renormalize(pieces.back());
+		res.prename = renormalize(pieces.front());
+		return res;
+	}

 	// More than 3 pieces: Now we look for the first piece that
 	// begins with a lower case letter (the "von-part").
@ -141,7 +161,9 @@ pair<docstring, docstring> nameParts(docstring const & iname)
 			first = false;
 		surname += *it;
 	}
-	return make_pair(renormalize(prename), renormalize(surname));
+	res.surname = renormalize(surname);
+	res.prename = renormalize(prename);
+	return res;
 }


@ -149,10 +171,12 @@ docstring constructName(docstring const & name, string const scheme)
 {
 	// re-constructs a name from name parts according
 	// to a given scheme
-	docstring const prename = nameParts(name).first;
-	docstring const surname = nameParts(name).second;
+	docstring const prename = nameParts(name).prename;
+	docstring const surname = nameParts(name).surname;
+	docstring const junior = nameParts(name).junior;
 	string res = scheme;
 	static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+	static regex const reg2("(.*)(\\{%junior%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 	smatch sub;
 	if (regex_match(scheme, sub, reg1)) {
 		res = sub.str(1);
@ -160,9 +184,16 @@ docstring constructName(docstring const & name, string const scheme)
 			res += sub.str(3);
 		res += sub.str(5);
 	}
+	if (regex_match(res, sub, reg2)) {
+		res = sub.str(1);
+		if (!junior.empty())
+			res += sub.str(3);
+		res += sub.str(5);
+	}
 	docstring result = from_ascii(res);
 	result = subst(result, from_ascii("%prename%"), prename);
 	result = subst(result, from_ascii("%surname%"), surname);
+	result = subst(result, from_ascii("%junior%"), junior);
 	return result;
 }

@ -445,15 +476,15 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 		     : " and ";
 	string firstnameform =
 			buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
-			     : "%surname%, %prename%";
+			     : "%surname%{%junior%[[, %junior%]]}{%prename%[[, %prename%]]}";
 	if (!beginning)
 		firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
-					     : "%prename% %surname%";
+					     : "%prename% %surname%{%junior%[[, %junior%]]}";
 	string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
-			     : "%surname%, %prename%";
+			     : "%surname%{%junior%[[, %junior%]]}{%prename%[[, %prename%]]}";
 	if (!beginning)
 		othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
-					     : "%prename% %surname%";
+					     : "%prename% %surname%{%junior%[[, %junior%]]}";

 	// Shorten the list (with et al.) if forceshort is set
 	// and the list can actually be shortened, else if maxcitenames
@ -481,13 +512,13 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 			retval += (i == 0) ? constructName(*it, firstnameform)
 				: constructName(*it, othernameform);
 		else
-			retval += nameParts(*it).second;
+			retval += nameParts(*it).surname;
 	}
 	if (shorten) {
 		if (allnames)
 			retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 		else
-			retval = nameParts(authors[0]).second + (buf ? buf->B_(etal) : from_ascii(etal));
+			retval = nameParts(authors[0]).surname + (buf ? buf->B_(etal) : from_ascii(etal));
 	}

 	return convertLaTeXCommands(retval);