Improvements to the parser that extracts the "family name". We now get

the last name, with the "von" part, even when commas are not used in the BibTeX file. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27989 a592a061-630c-0410-9148-cb99ea01b6c8
2025-01-23 00:38:01 +00:00 · 2009-01-04 23:35:08 +00:00 · 2009-01-04 23:35:08 +00:00 · 9d48ad4f74
commit 9d48ad4f74
parent 89034a543b
3 changed files with 50 additions and 17 deletions
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@ -28,6 +28,7 @@
 #include "support/gettext.h"
 #include "support/lassert.h"
 #include "support/lstrings.h"
+#include "support/textutils.h"

 #include "boost/regex.hpp"

@ -75,24 +76,45 @@ docstring familyName(docstring const & name)
 	if (name.empty())
 		return docstring();

-	// Very simple parser
-	docstring fname = name;
-
-	// possible authorname combinations are:
-	// "Surname, FirstName"
-	// "Surname, F."
-	// "FirstName Surname"
-	// "F. Surname"
-	docstring::size_type idx = fname.find(',');
+	// first we look for a comma, and take the last name to be everything
+	// preceding the right-most one, so that we also get the "jr" part.
+	docstring::size_type idx = name.rfind(',');
 	if (idx != docstring::npos)
-		return ltrim(fname.substr(0, idx));
-	idx = fname.rfind('.');
-	if (idx != docstring::npos && idx + 1 < fname.size())
-		fname = ltrim(fname.substr(idx + 1));
-	// test if we have a LaTeX Space in front
-	if (fname[0] == '\\')
-		return fname.substr(2);
-	return rtrim(fname);
+		return ltrim(name.substr(0, idx));
+
+	// OK, so now we want to look for the last name. We're going to
+	// include the "von" part. This isn't perfect.
+	// Split on spaces, to get various tokens.
+	vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
+	// If we only get two, assume the last one is the last name
+	if (pieces.size() <= 2)
+		return pieces.back();
+
+	// Now we look for the first token that begins with a lower case letter.
+	vector<docstring>::const_iterator it = pieces.begin();
+	vector<docstring>::const_iterator en = pieces.end();
+	for (; it != en; ++it) {
+		if ((*it).size() == 0)
+			continue;
+		char_type const c = (*it)[0];
+		if (isLower(c))
+			break;
+	}
+
+	if (it == en) // we never found a "von"
+		return pieces.back();
+
+	// reconstruct what we need to return
+	docstring retval;
+	bool first = true;
+	for (; it != en; ++it) {
+		if (!first)
+			retval += " ";
+		else 
+			first = false;
+		retval += *it;
+	}
+	return retval;
 }

 docstring const BibTeXInfo::getAbbreviatedAuthor() const
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@ -94,6 +94,14 @@ bool isLetterChar(char_type c)
 }


+bool isLower(char_type c)
+{
+	if (!is_utf16(c))
+		return false;
+	return ucs4_to_qchar(c).isLower();
+}
+
+
 bool isAlphaASCII(char_type c)
 {
 	return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
--- a/src/support/textutils.h
+++ b/src/support/textutils.h
@ -26,6 +26,9 @@ inline bool isLineSeparatorChar(char_type c) { return c == ' '; }
 /// return true if a char is alphabetical (including accented chars)
 bool isLetterChar(char_type c);

+/// return true if a char is lowercase
+bool isLower(char_type c);
+
 /// return whether \p c is an alphabetic character in the ASCII range
 bool isAlphaASCII(char_type c);