Improvements to the parser that extracts the "family name". We now get

the last name, with the "von" part, even when commas are not used in the BibTeX file. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27989 a592a061-630c-0410-9148-cb99ea01b6c8
2024-12-25 05:55:34 +00:00 · 2009-01-04 23:35:08 +00:00 · 2009-01-04 23:35:08 +00:00 · 9d48ad4f74
commit 9d48ad4f74
parent 89034a543b
3 changed files with 50 additions and 17 deletions
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@ -28,6 +28,7 @@
 #include "support/gettext.h"
 #include "support/lassert.h"
 #include "support/lstrings.h"
 #include "support/textutils.h"
 #include "boost/regex.hpp"
@ -75,24 +76,45 @@ docstring familyName(docstring const & name)
 	if (name.empty())
 		return docstring();
-	// Very simple parser
+	// first we look for a comma, and take the last name to be everything
-	docstring fname = name;
+	// preceding the right-most one, so that we also get the "jr" part.
-
+	docstring::size_type idx = name.rfind(',');
 	// possible authorname combinations are:
 	// "Surname, FirstName"
 	// "Surname, F."
 	// "FirstName Surname"
 	// "F. Surname"
 	docstring::size_type idx = fname.find(',');
 	if (idx != docstring::npos)
-		return ltrim(fname.substr(0, idx));
+		return ltrim(name.substr(0, idx));
-	idx = fname.rfind('.');
+
-	if (idx != docstring::npos && idx + 1 < fname.size())
+	// OK, so now we want to look for the last name. We're going to
-		fname = ltrim(fname.substr(idx + 1));
+	// include the "von" part. This isn't perfect.
-	// test if we have a LaTeX Space in front
+	// Split on spaces, to get various tokens.
-	if (fname[0] == '\\')
+	vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
-		return fname.substr(2);
+	// If we only get two, assume the last one is the last name
-	return rtrim(fname);
+	if (pieces.size() <= 2)
 		return pieces.back();
 	// Now we look for the first token that begins with a lower case letter.
 	vector<docstring>::const_iterator it = pieces.begin();
 	vector<docstring>::const_iterator en = pieces.end();
 	for (; it != en; ++it) {
 		if ((*it).size() == 0)
 			continue;
 		char_type const c = (*it)[0];
 		if (isLower(c))
 			break;
 	}
 	if (it == en) // we never found a "von"
 		return pieces.back();
 	// reconstruct what we need to return
 	docstring retval;
 	bool first = true;
 	for (; it != en; ++it) {
 		if (!first)
 			retval += " ";
 		else 
 			first = false;
 		retval += *it;
 	}
 	return retval;
 }
 docstring const BibTeXInfo::getAbbreviatedAuthor() const
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@ -94,6 +94,14 @@ bool isLetterChar(char_type c)
 }
 bool isLower(char_type c)
 {
 	if (!is_utf16(c))
 		return false;
 	return ucs4_to_qchar(c).isLower();
 }
 bool isAlphaASCII(char_type c)
 {
 	return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
--- a/src/support/textutils.h
+++ b/src/support/textutils.h
@ -26,6 +26,9 @@ inline bool isLineSeparatorChar(char_type c) { return c == ' '; }
 /// return true if a char is alphabetical (including accented chars)
 bool isLetterChar(char_type c);
 /// return true if a char is lowercase
 bool isLower(char_type c);
 /// return whether \p c is an alphabetic character in the ASCII range
 bool isAlphaASCII(char_type c);