From f93e4254ee80c39ee3d0f9e2e4aafdf2bb40aa58 Mon Sep 17 00:00:00 2001 From: Richard Heck Date: Mon, 29 Dec 2008 00:10:41 +0000 Subject: [PATCH] Convert LaTeX constructs to Unicode when reading BibTeX files. Also, ignore other LaTeX commands. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27975 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/insets/InsetBibtex.cpp | 91 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp index 0dcbf49fe9..a3affb4d93 100644 --- a/src/insets/InsetBibtex.cpp +++ b/src/insets/InsetBibtex.cpp @@ -529,15 +529,16 @@ namespace { /// the variable strings. /// @return true if reading was successfull (all single parts were delimited /// correctly) - bool readValue(docstring & val, ifdocstream & ifs, const VarMap & strings) { + bool readValue(docstring & value, ifdocstream & ifs, const VarMap & strings) { char_type ch; - val.clear(); + value.clear(); if (!ifs) return false; + docstring val; do { // skip whitespace do { @@ -593,7 +594,7 @@ namespace { lastWasWhiteSpace = false; val += ' '; } - + val += ch; // update nesting level @@ -654,6 +655,90 @@ namespace { ifs.putback(ch); + // Ok, we now have the value. Now we are going to go + // through it and replace e.g. \"a with its unicode value. + // We'll also strip commands, like \emph, and the like, so + // it will look nice in the UI. + bool scanning_cmd = false; + bool scanning_math = false; + bool escaped = false; // used to catch \$, etc. + while (val.size()) { + char_type const ch = val[0]; + + // if we're scanning math, we output everything until we + // find an unescaped $, at which point we break out. + if (scanning_math) { + if (escaped) + escaped = false; + else if (ch == '\\') + escaped = true; + else if (ch == '$') + scanning_math = false; + value += ch; + val = val.substr(1); + continue; + } + + // if we're scanning a command name, then we just + // discard characters until we hit something that + // isn't alpha. + if (scanning_cmd) { + if (isAlphaASCII(ch)) { + val = val.substr(1); + escaped = false; + continue; + } + // so we're done with this command. + // now we fall through and check this character. + scanning_cmd = false; + } + + // was the last character a \? If so, then this is something like: \\, + // or \$, so we'll just output it. That's probably not always right... + if (escaped) { + value += ch; + val = val.substr(1); + escaped = false; + continue; + } + + if (ch == '$') { + value += ch; + val = val.substr(1); + scanning_math = true; + continue; + } + + // we just ignore braces + if (ch == '{' || ch == '}') { + val = val.substr(1); + continue; + } + + // we're going to check things that look like commands, so if + // this doesn't, just output it. + if (ch != '\\') { + value += ch; + val = val.substr(1); + continue; + } + + // ok, could be a command of some sort + // let's see if it corresponds to some unicode + docstring rem; + docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem); + if (!cnvtd.empty()) { + // it did, so we'll take that bit and proceed with what's left + value += cnvtd; + val = rem; + continue; + } + // it's a command of some sort + scanning_cmd = true; + escaped = true; + val = val.substr(1); + } + return true; } }