mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-12 11:32:21 +00:00
Improvements to the parser that extracts the "family name". We now get
the last name, with the "von" part, even when commas are not used in the BibTeX file. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27989 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
89034a543b
commit
9d48ad4f74
@ -28,6 +28,7 @@
|
|||||||
#include "support/gettext.h"
|
#include "support/gettext.h"
|
||||||
#include "support/lassert.h"
|
#include "support/lassert.h"
|
||||||
#include "support/lstrings.h"
|
#include "support/lstrings.h"
|
||||||
|
#include "support/textutils.h"
|
||||||
|
|
||||||
#include "boost/regex.hpp"
|
#include "boost/regex.hpp"
|
||||||
|
|
||||||
@ -75,24 +76,45 @@ docstring familyName(docstring const & name)
|
|||||||
if (name.empty())
|
if (name.empty())
|
||||||
return docstring();
|
return docstring();
|
||||||
|
|
||||||
// Very simple parser
|
// first we look for a comma, and take the last name to be everything
|
||||||
docstring fname = name;
|
// preceding the right-most one, so that we also get the "jr" part.
|
||||||
|
docstring::size_type idx = name.rfind(',');
|
||||||
// possible authorname combinations are:
|
|
||||||
// "Surname, FirstName"
|
|
||||||
// "Surname, F."
|
|
||||||
// "FirstName Surname"
|
|
||||||
// "F. Surname"
|
|
||||||
docstring::size_type idx = fname.find(',');
|
|
||||||
if (idx != docstring::npos)
|
if (idx != docstring::npos)
|
||||||
return ltrim(fname.substr(0, idx));
|
return ltrim(name.substr(0, idx));
|
||||||
idx = fname.rfind('.');
|
|
||||||
if (idx != docstring::npos && idx + 1 < fname.size())
|
// OK, so now we want to look for the last name. We're going to
|
||||||
fname = ltrim(fname.substr(idx + 1));
|
// include the "von" part. This isn't perfect.
|
||||||
// test if we have a LaTeX Space in front
|
// Split on spaces, to get various tokens.
|
||||||
if (fname[0] == '\\')
|
vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
|
||||||
return fname.substr(2);
|
// If we only get two, assume the last one is the last name
|
||||||
return rtrim(fname);
|
if (pieces.size() <= 2)
|
||||||
|
return pieces.back();
|
||||||
|
|
||||||
|
// Now we look for the first token that begins with a lower case letter.
|
||||||
|
vector<docstring>::const_iterator it = pieces.begin();
|
||||||
|
vector<docstring>::const_iterator en = pieces.end();
|
||||||
|
for (; it != en; ++it) {
|
||||||
|
if ((*it).size() == 0)
|
||||||
|
continue;
|
||||||
|
char_type const c = (*it)[0];
|
||||||
|
if (isLower(c))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it == en) // we never found a "von"
|
||||||
|
return pieces.back();
|
||||||
|
|
||||||
|
// reconstruct what we need to return
|
||||||
|
docstring retval;
|
||||||
|
bool first = true;
|
||||||
|
for (; it != en; ++it) {
|
||||||
|
if (!first)
|
||||||
|
retval += " ";
|
||||||
|
else
|
||||||
|
first = false;
|
||||||
|
retval += *it;
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
docstring const BibTeXInfo::getAbbreviatedAuthor() const
|
docstring const BibTeXInfo::getAbbreviatedAuthor() const
|
||||||
|
@ -94,6 +94,14 @@ bool isLetterChar(char_type c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool isLower(char_type c)
|
||||||
|
{
|
||||||
|
if (!is_utf16(c))
|
||||||
|
return false;
|
||||||
|
return ucs4_to_qchar(c).isLower();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool isAlphaASCII(char_type c)
|
bool isAlphaASCII(char_type c)
|
||||||
{
|
{
|
||||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
|
||||||
|
@ -26,6 +26,9 @@ inline bool isLineSeparatorChar(char_type c) { return c == ' '; }
|
|||||||
/// return true if a char is alphabetical (including accented chars)
|
/// return true if a char is alphabetical (including accented chars)
|
||||||
bool isLetterChar(char_type c);
|
bool isLetterChar(char_type c);
|
||||||
|
|
||||||
|
/// return true if a char is lowercase
|
||||||
|
bool isLower(char_type c);
|
||||||
|
|
||||||
/// return whether \p c is an alphabetic character in the ASCII range
|
/// return whether \p c is an alphabetic character in the ASCII range
|
||||||
bool isAlphaASCII(char_type c);
|
bool isAlphaASCII(char_type c);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user