Improvements to the parser that extracts the "family name". We now get

the last name, with the "von" part, even when commas are not used in the
BibTeX file.



git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27989 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Richard Heck 2009-01-04 23:35:08 +00:00
parent 89034a543b
commit 9d48ad4f74
3 changed files with 50 additions and 17 deletions

View File

@ -28,6 +28,7 @@
#include "support/gettext.h" #include "support/gettext.h"
#include "support/lassert.h" #include "support/lassert.h"
#include "support/lstrings.h" #include "support/lstrings.h"
#include "support/textutils.h"
#include "boost/regex.hpp" #include "boost/regex.hpp"
@ -75,24 +76,45 @@ docstring familyName(docstring const & name)
if (name.empty()) if (name.empty())
return docstring(); return docstring();
// Very simple parser // first we look for a comma, and take the last name to be everything
docstring fname = name; // preceding the right-most one, so that we also get the "jr" part.
docstring::size_type idx = name.rfind(',');
// possible authorname combinations are:
// "Surname, FirstName"
// "Surname, F."
// "FirstName Surname"
// "F. Surname"
docstring::size_type idx = fname.find(',');
if (idx != docstring::npos) if (idx != docstring::npos)
return ltrim(fname.substr(0, idx)); return ltrim(name.substr(0, idx));
idx = fname.rfind('.');
if (idx != docstring::npos && idx + 1 < fname.size()) // OK, so now we want to look for the last name. We're going to
fname = ltrim(fname.substr(idx + 1)); // include the "von" part. This isn't perfect.
// test if we have a LaTeX Space in front // Split on spaces, to get various tokens.
if (fname[0] == '\\') vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
return fname.substr(2); // If we only get two, assume the last one is the last name
return rtrim(fname); if (pieces.size() <= 2)
return pieces.back();
// Now we look for the first token that begins with a lower case letter.
vector<docstring>::const_iterator it = pieces.begin();
vector<docstring>::const_iterator en = pieces.end();
for (; it != en; ++it) {
if ((*it).size() == 0)
continue;
char_type const c = (*it)[0];
if (isLower(c))
break;
}
if (it == en) // we never found a "von"
return pieces.back();
// reconstruct what we need to return
docstring retval;
bool first = true;
for (; it != en; ++it) {
if (!first)
retval += " ";
else
first = false;
retval += *it;
}
return retval;
} }
docstring const BibTeXInfo::getAbbreviatedAuthor() const docstring const BibTeXInfo::getAbbreviatedAuthor() const

View File

@ -94,6 +94,14 @@ bool isLetterChar(char_type c)
} }
bool isLower(char_type c)
{
if (!is_utf16(c))
return false;
return ucs4_to_qchar(c).isLower();
}
bool isAlphaASCII(char_type c) bool isAlphaASCII(char_type c)
{ {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');

View File

@ -26,6 +26,9 @@ inline bool isLineSeparatorChar(char_type c) { return c == ' '; }
/// return true if a char is alphabetical (including accented chars) /// return true if a char is alphabetical (including accented chars)
bool isLetterChar(char_type c); bool isLetterChar(char_type c);
/// return true if a char is lowercase
bool isLower(char_type c);
/// return whether \p c is an alphabetic character in the ASCII range /// return whether \p c is an alphabetic character in the ASCII range
bool isAlphaASCII(char_type c); bool isAlphaASCII(char_type c);