We'll do the unicode conversion where it's actually needed: where we

calculate what to display.


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@28191 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Richard Heck 2009-01-16 23:42:16 +00:00
parent d8bf83a85a
commit bb24f02188

View File

@ -16,6 +16,7 @@
#include "Buffer.h"
#include "BufferParams.h"
#include "buffer_funcs.h"
#include "Encoding.h"
#include "InsetIterator.h"
#include "Paragraph.h"
@ -45,7 +46,7 @@ namespace lyx {
//////////////////////////////////////////////////////////////////////
BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
: is_bibtex_(true), bib_key_(key), entry_type_(type)
: is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
{}
@ -173,6 +174,106 @@ docstring const BibTeXInfo::getYear() const
}
namespace {
docstring convertLaTeXCommands(docstring const & str)
{
docstring val = str;
docstring ret;
bool scanning_cmd = false;
bool scanning_math = false;
bool escaped = false; // used to catch \$, etc.
while (val.size()) {
char_type const ch = val[0];
// if we're scanning math, we output everything until we
// find an unescaped $, at which point we break out.
if (scanning_math) {
if (escaped)
escaped = false;
else if (ch == '\\')
escaped = true;
else if (ch == '$')
scanning_math = false;
ret += ch;
val = val.substr(1);
continue;
}
// if we're scanning a command name, then we just
// discard characters until we hit something that
// isn't alpha.
if (scanning_cmd) {
if (isAlphaASCII(ch)) {
val = val.substr(1);
escaped = false;
continue;
}
// so we're done with this command.
// now we fall through and check this character.
scanning_cmd = false;
}
// was the last character a \? If so, then this is something like: \\,
// or \$, so we'll just output it. That's probably not always right...
if (escaped) {
ret += ch;
val = val.substr(1);
escaped = false;
continue;
}
if (ch == '$') {
ret += ch;
val = val.substr(1);
scanning_math = true;
continue;
}
// we just ignore braces
if (ch == '{' || ch == '}') {
val = val.substr(1);
continue;
}
// we're going to check things that look like commands, so if
// this doesn't, just output it.
if (ch != '\\') {
ret += ch;
val = val.substr(1);
continue;
}
// ok, could be a command of some sort
// let's see if it corresponds to some unicode
// unicodesymbols has things in the form: \"{u},
// whereas we may see things like: \"u. So we'll
// look for that and change it, if necessary.
static boost::regex const reg("^\\\\\\W\\w");
if (boost::regex_search(to_utf8(val), reg)) {
val.insert(3, from_ascii("}"));
val.insert(2, from_ascii("{"));
}
docstring rem;
docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
if (!cnvtd.empty()) {
// it did, so we'll take that bit and proceed with what's left
ret += cnvtd;
val = rem;
continue;
}
// it's a command of some sort
scanning_cmd = true;
escaped = true;
val = val.substr(1);
}
return ret;
}
} // anon namespace
docstring const BibTeXInfo::getInfo() const
{
if (!info_.empty())
@ -229,7 +330,7 @@ docstring const BibTeXInfo::getInfo() const
docstring const result_str = rtrim(result.str());
if (!result_str.empty()) {
info_ = result_str;
info_ = convertLaTeXCommands(result_str);
return info_;
}