mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-09 18:31:04 +00:00
mode convertLaTeXCommands from BiblioInfo to Encodings
So it can also be used in other contexts. This includes also an improvement of math parsing
This commit is contained in:
parent
485f5afc21
commit
9291fc465b
@ -329,141 +329,6 @@ bool multipleAuthors(docstring const & author)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// converts a string containing LaTeX commands into unicode
|
|
||||||
// for display.
|
|
||||||
docstring convertLaTeXCommands(docstring const & str)
|
|
||||||
{
|
|
||||||
docstring val = str;
|
|
||||||
docstring ret;
|
|
||||||
|
|
||||||
bool scanning_cmd = false;
|
|
||||||
bool scanning_math = false;
|
|
||||||
bool is_section = false;
|
|
||||||
bool escaped = false; // used to catch \$, etc.
|
|
||||||
while (!val.empty()) {
|
|
||||||
char_type const ch = val[0];
|
|
||||||
|
|
||||||
// if we're scanning math, we output everything until we
|
|
||||||
// find an unescaped $, at which point we break out.
|
|
||||||
if (scanning_math) {
|
|
||||||
if (escaped)
|
|
||||||
escaped = false;
|
|
||||||
else if (ch == '\\')
|
|
||||||
escaped = true;
|
|
||||||
else if (ch == '$')
|
|
||||||
scanning_math = false;
|
|
||||||
ret += ch;
|
|
||||||
val = val.substr(1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we're scanning a command name, then we just
|
|
||||||
// discard characters until we hit something that
|
|
||||||
// isn't alpha.
|
|
||||||
if (scanning_cmd) {
|
|
||||||
if (!is_section && ch == 'S') {
|
|
||||||
is_section = true;
|
|
||||||
val = val.substr(1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (isAlphaASCII(ch)) {
|
|
||||||
is_section = false;
|
|
||||||
val = val.substr(1);
|
|
||||||
escaped = false;
|
|
||||||
continue;
|
|
||||||
} else if (is_section) {
|
|
||||||
ret.push_back(0x00a7);
|
|
||||||
is_section = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// so we're done with this command.
|
|
||||||
// now we fall through and check this character.
|
|
||||||
is_section = false;
|
|
||||||
scanning_cmd = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// was the last character a \? If so, then this is something like:
|
|
||||||
// \\ or \$, so we'll just output it. That's probably not always right...
|
|
||||||
if (escaped) {
|
|
||||||
// exception: output \, as THIN SPACE
|
|
||||||
if (ch == ',')
|
|
||||||
ret.push_back(0x2009);
|
|
||||||
else
|
|
||||||
ret += ch;
|
|
||||||
val = val.substr(1);
|
|
||||||
escaped = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ch == '~') {
|
|
||||||
ret += char_type(0x00a0);
|
|
||||||
val = val.substr(1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ch == '$') {
|
|
||||||
ret += ch;
|
|
||||||
val = val.substr(1);
|
|
||||||
scanning_math = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Change text mode accents in the form
|
|
||||||
// {\v a} to \v{a} (see #9340).
|
|
||||||
// FIXME: This is a sort of mini-tex2lyx.
|
|
||||||
// Use the real tex2lyx instead!
|
|
||||||
static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
|
|
||||||
if (regex_search(to_utf8(val), tma_reg)) {
|
|
||||||
val = val.substr(1);
|
|
||||||
val.replace(2, 1, from_ascii("{"));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apart from the above, we just ignore braces
|
|
||||||
if (ch == '{' || ch == '}') {
|
|
||||||
val = val.substr(1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// we're going to check things that look like commands, so if
|
|
||||||
// this doesn't, just output it.
|
|
||||||
if (ch != '\\') {
|
|
||||||
ret += ch;
|
|
||||||
val = val.substr(1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ok, could be a command of some sort
|
|
||||||
// let's see if it corresponds to some unicode
|
|
||||||
// unicodesymbols has things in the form: \"{u},
|
|
||||||
// whereas we may see things like: \"u. So we'll
|
|
||||||
// look for that and change it, if necessary.
|
|
||||||
// FIXME: This is a sort of mini-tex2lyx.
|
|
||||||
// Use the real tex2lyx instead!
|
|
||||||
static regex const reg("^\\\\\\W\\w");
|
|
||||||
if (regex_search(to_utf8(val), reg)) {
|
|
||||||
val.insert(3, from_ascii("}"));
|
|
||||||
val.insert(2, from_ascii("{"));
|
|
||||||
}
|
|
||||||
bool termination;
|
|
||||||
docstring rem;
|
|
||||||
docstring const cnvtd = Encodings::fromLaTeXCommand(val,
|
|
||||||
Encodings::TEXT_CMD, termination, rem);
|
|
||||||
if (!cnvtd.empty()) {
|
|
||||||
// it did, so we'll take that bit and proceed with what's left
|
|
||||||
ret += cnvtd;
|
|
||||||
val = rem;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// it's a command of some sort
|
|
||||||
scanning_cmd = true;
|
|
||||||
escaped = true;
|
|
||||||
val = val.substr(1);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
|
// Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
|
||||||
docstring processRichtext(docstring const & str, bool richtext)
|
docstring processRichtext(docstring const & str, bool richtext)
|
||||||
{
|
{
|
||||||
@ -639,7 +504,7 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
|
|||||||
retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
|
retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
|
||||||
}
|
}
|
||||||
|
|
||||||
return convertLaTeXCommands(retval);
|
return Encodings::convertLaTeXCommands(retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1068,7 +933,7 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!richtext && !info_.empty()) {
|
if (!richtext && !info_.empty()) {
|
||||||
info_ = convertLaTeXCommands(processRichtext(info_, false));
|
info_ = Encodings::convertLaTeXCommands(processRichtext(info_, false));
|
||||||
return info_;
|
return info_;
|
||||||
}
|
}
|
||||||
if (richtext && !info_richtext_.empty())
|
if (richtext && !info_richtext_.empty())
|
||||||
@ -1090,11 +955,11 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (richtext) {
|
if (richtext) {
|
||||||
info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
|
info_richtext_ = Encodings::convertLaTeXCommands(processRichtext(info_, true));
|
||||||
return info_richtext_;
|
return info_richtext_;
|
||||||
}
|
}
|
||||||
|
|
||||||
info_ = convertLaTeXCommands(processRichtext(info_, false));
|
info_ = Encodings::convertLaTeXCommands(processRichtext(info_, false));
|
||||||
return info_;
|
return info_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1110,7 +975,7 @@ docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
|
|||||||
|
|
||||||
if (!loclabel.empty() && !next) {
|
if (!loclabel.empty() && !next) {
|
||||||
loclabel = processRichtext(loclabel, ci.richtext);
|
loclabel = processRichtext(loclabel, ci.richtext);
|
||||||
loclabel = convertLaTeXCommands(loclabel);
|
loclabel = Encodings::convertLaTeXCommands(loclabel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return loclabel;
|
return loclabel;
|
||||||
|
151
src/Encoding.cpp
151
src/Encoding.cpp
@ -26,6 +26,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <regex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -613,6 +614,156 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
docstring Encodings::convertLaTeXCommands(docstring const & str)
|
||||||
|
{
|
||||||
|
docstring val = str;
|
||||||
|
docstring ret;
|
||||||
|
docstring mret;
|
||||||
|
|
||||||
|
bool scanning_cmd = false;
|
||||||
|
bool scanning_math = false;
|
||||||
|
bool is_section = false;
|
||||||
|
bool escaped = false; // used to catch \$, etc.
|
||||||
|
while (!val.empty()) {
|
||||||
|
char_type const ch = val[0];
|
||||||
|
|
||||||
|
// if we're scanning math, we output everything until we
|
||||||
|
// find an unescaped $, at which point we break out.
|
||||||
|
if (scanning_math) {
|
||||||
|
if (escaped)
|
||||||
|
escaped = false;
|
||||||
|
else if (ch == '\\')
|
||||||
|
escaped = true;
|
||||||
|
else if (ch == '$') {
|
||||||
|
scanning_math = false;
|
||||||
|
bool termination;
|
||||||
|
docstring rem;
|
||||||
|
ret += fromLaTeXCommand(mret, MATH_CMD, termination, rem);
|
||||||
|
// parse remaining math
|
||||||
|
while (!rem.empty()) {
|
||||||
|
docstring rrem;
|
||||||
|
// split command from normal text
|
||||||
|
docstring cmd = split(rem, rrem, '\\');
|
||||||
|
ret += rrem;
|
||||||
|
// done of no command was found
|
||||||
|
if (cmd.empty())
|
||||||
|
break;
|
||||||
|
// go on ...
|
||||||
|
ret += fromLaTeXCommand(from_ascii("\\") + cmd, MATH_CMD, termination, rem);
|
||||||
|
}
|
||||||
|
mret = docstring();
|
||||||
|
}
|
||||||
|
mret += ch;
|
||||||
|
val = val.substr(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we're scanning a command name, then we just
|
||||||
|
// discard characters until we hit something that
|
||||||
|
// isn't alpha.
|
||||||
|
if (scanning_cmd) {
|
||||||
|
if (!is_section && ch == 'S') {
|
||||||
|
is_section = true;
|
||||||
|
val = val.substr(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isAlphaASCII(ch)) {
|
||||||
|
is_section = false;
|
||||||
|
val = val.substr(1);
|
||||||
|
escaped = false;
|
||||||
|
continue;
|
||||||
|
} else if (is_section) {
|
||||||
|
ret.push_back(0x00a7);
|
||||||
|
is_section = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// so we're done with this command.
|
||||||
|
// now we fall through and check this character.
|
||||||
|
is_section = false;
|
||||||
|
scanning_cmd = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// was the last character a \? If so, then this is something like:
|
||||||
|
// \\ or \$, so we'll just output it. That's probably not always right...
|
||||||
|
if (escaped) {
|
||||||
|
// exception: output \, as THIN SPACE
|
||||||
|
if (ch == ',')
|
||||||
|
ret.push_back(0x2009);
|
||||||
|
else
|
||||||
|
ret += ch;
|
||||||
|
val = val.substr(1);
|
||||||
|
escaped = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == '~') {
|
||||||
|
ret += char_type(0x00a0);
|
||||||
|
val = val.substr(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == '$') {
|
||||||
|
val = val.substr(1);
|
||||||
|
scanning_math = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change text mode accents in the form
|
||||||
|
// {\v a} to \v{a} (see #9340).
|
||||||
|
// FIXME: This is a sort of mini-tex2lyx.
|
||||||
|
// Use the real tex2lyx instead!
|
||||||
|
static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
|
||||||
|
if (regex_search(to_utf8(val), tma_reg)) {
|
||||||
|
val = val.substr(1);
|
||||||
|
val.replace(2, 1, from_ascii("{"));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apart from the above, we just ignore braces
|
||||||
|
if (ch == '{' || ch == '}') {
|
||||||
|
val = val.substr(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we're going to check things that look like commands, so if
|
||||||
|
// this doesn't, just output it.
|
||||||
|
if (ch != '\\') {
|
||||||
|
ret += ch;
|
||||||
|
val = val.substr(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ok, could be a command of some sort
|
||||||
|
// let's see if it corresponds to some unicode
|
||||||
|
// unicodesymbols has things in the form: \"{u},
|
||||||
|
// whereas we may see things like: \"u. So we'll
|
||||||
|
// look for that and change it, if necessary.
|
||||||
|
// FIXME: This is a sort of mini-tex2lyx.
|
||||||
|
// Use the real tex2lyx instead!
|
||||||
|
static regex const reg("^\\\\\\W\\w");
|
||||||
|
if (regex_search(to_utf8(val), reg)) {
|
||||||
|
val.insert(3, from_ascii("}"));
|
||||||
|
val.insert(2, from_ascii("{"));
|
||||||
|
}
|
||||||
|
bool termination;
|
||||||
|
docstring rem;
|
||||||
|
docstring const cnvtd = fromLaTeXCommand(val,
|
||||||
|
TEXT_CMD, termination, rem);
|
||||||
|
if (!cnvtd.empty()) {
|
||||||
|
// it did, so we'll take that bit and proceed with what's left
|
||||||
|
ret += cnvtd;
|
||||||
|
val = rem;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// it's a command of some sort
|
||||||
|
scanning_cmd = true;
|
||||||
|
escaped = true;
|
||||||
|
val = val.substr(1);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
CharInfo const & Encodings::unicodeCharInfo(char_type c)
|
CharInfo const & Encodings::unicodeCharInfo(char_type c)
|
||||||
{
|
{
|
||||||
static CharInfo empty;
|
static CharInfo empty;
|
||||||
|
@ -349,6 +349,9 @@ public:
|
|||||||
static char_type fromLaTeXCommand(docstring const & cmd, int cmdtype,
|
static char_type fromLaTeXCommand(docstring const & cmd, int cmdtype,
|
||||||
bool & combining, bool & needsTermination,
|
bool & combining, bool & needsTermination,
|
||||||
std::set<std::string> * req = nullptr);
|
std::set<std::string> * req = nullptr);
|
||||||
|
/// converts a string containing LaTeX commands into unicode
|
||||||
|
/// for display.
|
||||||
|
static docstring convertLaTeXCommands(docstring const & str);
|
||||||
///
|
///
|
||||||
enum LatexCmd {
|
enum LatexCmd {
|
||||||
///
|
///
|
||||||
|
Loading…
Reference in New Issue
Block a user