mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-10 20:04:46 +00:00
Improvements to the parser that extracts the "family name". We now get
the last name, with the "von" part, even when commas are not used in the BibTeX file. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27989 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
89034a543b
commit
9d48ad4f74
@ -28,6 +28,7 @@
|
||||
#include "support/gettext.h"
|
||||
#include "support/lassert.h"
|
||||
#include "support/lstrings.h"
|
||||
#include "support/textutils.h"
|
||||
|
||||
#include "boost/regex.hpp"
|
||||
|
||||
@ -75,24 +76,45 @@ docstring familyName(docstring const & name)
|
||||
if (name.empty())
|
||||
return docstring();
|
||||
|
||||
// Very simple parser
|
||||
docstring fname = name;
|
||||
|
||||
// possible authorname combinations are:
|
||||
// "Surname, FirstName"
|
||||
// "Surname, F."
|
||||
// "FirstName Surname"
|
||||
// "F. Surname"
|
||||
docstring::size_type idx = fname.find(',');
|
||||
// first we look for a comma, and take the last name to be everything
|
||||
// preceding the right-most one, so that we also get the "jr" part.
|
||||
docstring::size_type idx = name.rfind(',');
|
||||
if (idx != docstring::npos)
|
||||
return ltrim(fname.substr(0, idx));
|
||||
idx = fname.rfind('.');
|
||||
if (idx != docstring::npos && idx + 1 < fname.size())
|
||||
fname = ltrim(fname.substr(idx + 1));
|
||||
// test if we have a LaTeX Space in front
|
||||
if (fname[0] == '\\')
|
||||
return fname.substr(2);
|
||||
return rtrim(fname);
|
||||
return ltrim(name.substr(0, idx));
|
||||
|
||||
// OK, so now we want to look for the last name. We're going to
|
||||
// include the "von" part. This isn't perfect.
|
||||
// Split on spaces, to get various tokens.
|
||||
vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
|
||||
// If we only get two, assume the last one is the last name
|
||||
if (pieces.size() <= 2)
|
||||
return pieces.back();
|
||||
|
||||
// Now we look for the first token that begins with a lower case letter.
|
||||
vector<docstring>::const_iterator it = pieces.begin();
|
||||
vector<docstring>::const_iterator en = pieces.end();
|
||||
for (; it != en; ++it) {
|
||||
if ((*it).size() == 0)
|
||||
continue;
|
||||
char_type const c = (*it)[0];
|
||||
if (isLower(c))
|
||||
break;
|
||||
}
|
||||
|
||||
if (it == en) // we never found a "von"
|
||||
return pieces.back();
|
||||
|
||||
// reconstruct what we need to return
|
||||
docstring retval;
|
||||
bool first = true;
|
||||
for (; it != en; ++it) {
|
||||
if (!first)
|
||||
retval += " ";
|
||||
else
|
||||
first = false;
|
||||
retval += *it;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
docstring const BibTeXInfo::getAbbreviatedAuthor() const
|
||||
|
@ -94,6 +94,14 @@ bool isLetterChar(char_type c)
|
||||
}
|
||||
|
||||
|
||||
bool isLower(char_type c)
|
||||
{
|
||||
if (!is_utf16(c))
|
||||
return false;
|
||||
return ucs4_to_qchar(c).isLower();
|
||||
}
|
||||
|
||||
|
||||
bool isAlphaASCII(char_type c)
|
||||
{
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
|
||||
|
@ -26,6 +26,9 @@ inline bool isLineSeparatorChar(char_type c) { return c == ' '; }
|
||||
/// return true if a char is alphabetical (including accented chars)
|
||||
bool isLetterChar(char_type c);
|
||||
|
||||
/// return true if a char is lowercase
|
||||
bool isLower(char_type c);
|
||||
|
||||
/// return whether \p c is an alphabetic character in the ASCII range
|
||||
bool isAlphaASCII(char_type c);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user