MathML: Convert HTML entities to XML entities.

This commit is contained in:
Thibaut Cuvelier 2020-06-08 04:07:47 +02:00 committed by Pavel Sanda
parent 054bdc5d18
commit 718eede94b
17 changed files with 958 additions and 915 deletions

File diff suppressed because it is too large Load Diff

View File

@ -139,7 +139,7 @@ void InsetMathBig::mathmlize(MathStream & ms) const
{
ms << "<" << from_ascii(ms.namespacedTag("mo"))
<< " form='prefix' fence='true' stretchy='true' symmetric='true'>"
<< convertDelimToXMLEscape(delim_)
<< convertDelimToXMLEscape(delim_, ms.xmlMode())
<< "</" << from_ascii(ms.namespacedTag("mo")) << ">";
}
@ -154,7 +154,7 @@ void InsetMathBig::htmlize(HtmlStream & os) const
default: name = "big"; break;
}
os << MTag("span", "class='" + name + "symbol'")
<< convertDelimToXMLEscape(delim_)
<< convertDelimToXMLEscape(delim_, false)
<< ETag("span");
}

View File

@ -224,7 +224,10 @@ void InsetMathChar::mathmlize(MathStream & ms) const
case '>': entity = "&gt;"; break;
case '&': entity = "&amp;"; break;
case ' ': {
ms << from_ascii("&nbsp;");
if (ms.xmlMode())
ms << from_ascii("&#0160;");
else
ms << from_ascii("&nbsp;");
return;
}
default: break;

View File

@ -169,45 +169,46 @@ void InsetMathDecoration::infoize(odocstream & os) const
namespace {
struct Attributes {
Attributes() : over(false) {}
Attributes(bool o, string const & t)
: over(o), tag(t) {}
Attributes(bool o, string const & t, string const & entity)
: over(o), tag(t), entity(entity) {}
bool over;
string tag;
string entity;
};
typedef map<string, Attributes> TranslationMap;
void buildTranslationMap(TranslationMap & t) {
// the decorations we need to support are listed in lib/symbols
t["acute"] = Attributes(true, "&acute;");
t["bar"] = Attributes(true, "&OverBar;");
t["breve"] = Attributes(true, "&breve;");
t["check"] = Attributes(true, "&caron;");
t["ddddot"] = Attributes(true, "&DotDot;");
t["dddot"] = Attributes(true, "&TripleDot;");
t["ddot"] = Attributes(true, "&Dot;");
t["dot"] = Attributes(true, "&dot;");
t["grave"] = Attributes(true, "&grave;");
t["hat"] = Attributes(true, "&circ;");
t["mathring"] = Attributes(true, "&ring;");
t["overbrace"] = Attributes(true, "&OverBrace;");
t["overleftarrow"] = Attributes(true, "&xlarr;");
t["overleftrightarrow"] = Attributes(true, "&xharr;");
t["overline"] = Attributes(true, "&macr;");
t["overrightarrow"] = Attributes(true, "&xrarr;");
t["tilde"] = Attributes(true, "&tilde;");
t["underbar"] = Attributes(false, "&UnderBar;");
t["underbrace"] = Attributes(false, "&UnderBrace;");
t["underleftarrow"] = Attributes(false, "&xlarr;");
t["underleftrightarrow"] = Attributes(false, "&xharr;");
t["acute"] = Attributes(true, "&acute;", "&#x00B4;");
t["bar"] = Attributes(true, "&OverBar;", "&#x00AF;");
t["breve"] = Attributes(true, "&breve;", "&#x02D8;");
t["check"] = Attributes(true, "&caron;", "&#x02C7;");
t["ddddot"] = Attributes(true, "&DotDot;", "&#x20DC;");
t["dddot"] = Attributes(true, "&TripleDot;", "&#x20DB;");
t["ddot"] = Attributes(true, "&Dot;", "&#x00A8;");
t["dot"] = Attributes(true, "&dot;", "&#x02D9;");
t["grave"] = Attributes(true, "&grave;", "&#x0060;");
t["hat"] = Attributes(true, "&circ;", "&#x02C6;");
t["mathring"] = Attributes(true, "&ring;", "&#x02DA;");
t["overbrace"] = Attributes(true, "&OverBrace;", "&#xFE37;");
t["overleftarrow"] = Attributes(true, "&xlarr;", "&#x27F5;");
t["overleftrightarrow"] = Attributes(true, "&xharr;", "&#x27F7;");
t["overline"] = Attributes(true, "&macr;", "&#x00AF;");
t["overrightarrow"] = Attributes(true, "&xrarr;", "&#x27F6;");
t["tilde"] = Attributes(true, "&tilde;", "&#x02DC;");
t["underbar"] = Attributes(false, "&UnderBar;", "&#x0332;");
t["underbrace"] = Attributes(false, "&UnderBrace;", "&#xFE38;");
t["underleftarrow"] = Attributes(false, "&xlarr;", "&#x27F5;");
t["underleftrightarrow"] = Attributes(false, "&xharr;", "&#x27F7;");
// this is the macron, again, but it works
t["underline"] = Attributes(false, "&macr;");
t["underrightarrow"] = Attributes(false, "&xrarr;");
t["undertilde"] = Attributes(false, "&Tilde;");
t["utilde"] = Attributes(false, "&Tilde;");
t["vec"] = Attributes(true, "&rarr;");
t["widehat"] = Attributes(true, "&Hat;");
t["widetilde"] = Attributes(true, "&Tilde;");
t["underline"] = Attributes(false, "&macr;", "&#x00AF;");
t["underrightarrow"] = Attributes(false, "&xrarr;", "&#x27F6;");
t["undertilde"] = Attributes(false, "&Tilde;", "&#x223C;");
t["utilde"] = Attributes(false, "&Tilde;", "&#x223C;");
t["vec"] = Attributes(true, "&rarr;", "&#x2192;");
t["widehat"] = Attributes(true, "&Hat;", "&#x005E;");
t["widetilde"] = Attributes(true, "&Tilde;", "&#x223C;");
}
TranslationMap const & translationMap() {
@ -224,10 +225,11 @@ void InsetMathDecoration::mathmlize(MathStream & ms) const
TranslationMap::const_iterator cur = t.find(to_utf8(key_->name));
LASSERT(cur != t.end(), return);
char const * const outag = cur->second.over ? "mover" : "munder";
std::string decoration = ms.xmlMode() ? cur->second.entity : cur->second.tag;
ms << MTag(outag)
<< MTag("mrow") << cell(0) << ETag("mrow")
<< "<" << from_ascii(ms.namespacedTag("mo")) << " stretchy=\"true\">"
<< from_ascii(cur->second.tag)
<< from_ascii(decoration)
<< "</" << from_ascii(ms.namespacedTag("mo")) << ">"
<< ETag(outag);
}

View File

@ -170,7 +170,7 @@ void InsetMathDelim::mathematica(MathematicaStream & os) const
{
if (isAbs()) {
if (cell(0).size() == 1 && cell(0).front()->asMatrixInset())
os << "Det" << cell(0) << ']';
os << "Det[" << cell(0) << ']';
else
os << "Abs[" << cell(0) << ']';
}
@ -183,7 +183,7 @@ void InsetMathDelim::mathmlize(MathStream & ms) const
{
ms << "<" << from_ascii(ms.namespacedTag("mo")) << " form='prefix' fence='true' stretchy='true' symmetric='true'>"
<< "<" << from_ascii(ms.namespacedTag("mrow")) << ">"
<< convertDelimToXMLEscape(left_)
<< convertDelimToXMLEscape(left_, ms.xmlMode())
<< "</" << from_ascii(ms.namespacedTag("mrow")) << ">"
<< "</" << from_ascii(ms.namespacedTag("mo")) << ">"
<< "\n"
@ -191,7 +191,7 @@ void InsetMathDelim::mathmlize(MathStream & ms) const
<< "\n"
<< "<" << from_ascii(ms.namespacedTag("mo")) << " form='postfix' fence='true' stretchy='true' symmetric='true'>"
<< "<" << from_ascii(ms.namespacedTag("mrow")) << ">"
<< convertDelimToXMLEscape(right_)
<< convertDelimToXMLEscape(right_, ms.xmlMode())
<< "</" << from_ascii(ms.namespacedTag("mrow")) << ">"
<< "</" << from_ascii(ms.namespacedTag("mo")) << ">"
<< "\n";
@ -200,9 +200,9 @@ void InsetMathDelim::mathmlize(MathStream & ms) const
void InsetMathDelim::htmlize(HtmlStream & os) const
{
os << convertDelimToXMLEscape(left_)
os << convertDelimToXMLEscape(left_, false)
<< cell(0)
<< convertDelimToXMLEscape(right_);
<< convertDelimToXMLEscape(right_, false);
}

View File

@ -91,18 +91,31 @@ void InsetMathDots::mathmlize(MathStream & ms) const
// lib/symbols as generating a dots inset
docstring const & n = key_->name;
std::string ent;
if (n == "dots" || n == "dotsc" || n == "dotso" || n == "ldots")
ent = "&hellip;";
else if (n == "adots" || n == "iddots")
ent = "&utdot;";
else if (n == "cdots" || n == "dotsb" || n == "dotsi" || n == "dotsm")
ent = "&ctdot;";
else if (n == "ddots")
ent = "&dtdot;";
else if (n == "vdots")
ent = "&vellip;";
else
LASSERT(false, ent = "&hellip;");
if (ms.xmlMode()) {
if (n == "dots" || n == "dotsc" || n == "dotso" || n == "ldots")
ent = "&#x2026;";
else if (n == "adots" || n == "iddots")
ent = "&#x22F0;";
else if (n == "cdots" || n == "dotsb" || n == "dotsi" || n == "dotsm")
ent = "&#x22EF;";
else if (n == "ddots")
ent = "&#x22F1;";
else if (n == "vdots")
ent = "&#x22EE;";
else LASSERT(false, ent = "&#x2026;");
} else {
if (n == "dots" || n == "dotsc" || n == "dotso" || n == "ldots")
ent = "&hellip;";
else if (n == "adots" || n == "iddots")
ent = "&utdot;";
else if (n == "cdots" || n == "dotsb" || n == "dotsi" || n == "dotsm")
ent = "&ctdot;";
else if (n == "ddots")
ent = "&dtdot;";
else if (n == "vdots")
ent = "&vellip;";
else LASSERT(false, ent = "&hellip;");
}
ms << MTag("mi") << from_ascii(ent) << ETag("mi");
}

View File

@ -2406,8 +2406,13 @@ int InsetMathHull::plaintext(odocstringstream & os,
int InsetMathHull::docbook(odocstream & os, OutputParams const & runparams) const
{
MathStream ms(os);
// With DocBook 5, MathML must be within its own namespace; defined in Buffer.cpp::writeDocBookSource as "m".
// Output everything in a separate stream so that this does not interfere with the standard flow of DocBook tags.
odocstringstream osmath;
MathStream ms(osmath, "m", true);
int res = 0;
// Choose the tag around the MathML equation.
docstring name;
if (getType() == hullSimple)
name = from_ascii("inlineequation");
@ -2415,8 +2420,12 @@ int InsetMathHull::docbook(odocstream & os, OutputParams const & runparams) cons
name = from_ascii("informalequation");
docstring bname = name;
if (!label(0).empty())
bname += " id='" + xml::cleanID(label(0)) + "'";
for (row_type i = 0; i < nrows(); ++i) {
if (!label(i).empty()) {
bname += " xml:id=\"" + xml::cleanID(label(i)) + "\"";
break;
}
}
++ms.tab(); ms.cr(); ms.os() << '<' << bname << '>';

View File

@ -1181,7 +1181,7 @@ void InsetMathMacro::mathmlize(MathStream & ms) const
// macro_ is 0 if this is an unknown macro
LATTEST(d->macro_ || d->displayMode_ != DISPLAY_NORMAL);
if (d->macro_) {
docstring const xmlname = d->macro_->xmlname();
docstring const xmlname = (ms.xmlMode()) ? d->macro_->xmlname() : d->macro_->htmlname();
if (!xmlname.empty()) {
char const * type = d->macro_->MathMLtype();
ms << "<" << from_ascii(ms.namespacedTag(type)) << ">"
@ -1203,7 +1203,7 @@ void InsetMathMacro::htmlize(HtmlStream & os) const
// macro_ is 0 if this is an unknown macro
LATTEST(d->macro_ || d->displayMode_ != DISPLAY_NORMAL);
if (d->macro_) {
docstring const xmlname = d->macro_->xmlname();
docstring const xmlname = d->macro_->htmlname();
if (!xmlname.empty()) {
os << ' ' << xmlname << ' ';
return;

View File

@ -57,7 +57,6 @@ public:
int plaintext(odocstringstream &, OutputParams const &, size_t) const;
///
bool inheritFont() const { return false; }
///
docstring name() const;
///

View File

@ -95,7 +95,7 @@ void InsetMathMatrix::mathematica(MathematicaStream & os) const
void InsetMathMatrix::mathmlize(MathStream & ms) const
{
ms << "<" << from_ascii(ms.namespacedTag("mo")) << " form='prefix' fence='true' stretchy='true' symmetric='true' lspace='thinmathspace'>"
<< convertDelimToXMLEscape(left_)
<< convertDelimToXMLEscape(left_, ms.xmlMode())
<< "</" << from_ascii(ms.namespacedTag("mo")) << ">"
<< MTag("mtable");
for (row_type row = 0; row < nrows(); ++row) {
@ -114,7 +114,7 @@ void InsetMathMatrix::mathmlize(MathStream & ms) const
}
ms << ETag("mtable")
<< "<" << from_ascii(ms.namespacedTag("mo")) << " form='postfix' fence='true' stretchy='true' symmetric='true' lspace='thinmathspace'>"
<< convertDelimToXMLEscape(right_)
<< convertDelimToXMLEscape(right_, ms.xmlMode())
<< "</" << from_ascii(ms.namespacedTag("mo")) << ">";
}

View File

@ -168,11 +168,13 @@ void InsetMathSymbol::mathmlize(MathStream & ms) const
// with MathMLtype.
docstring tag = from_ascii(ms.namespacedTag(sym_->MathMLtype()));
ms << '<' << tag << ">";
if (sym_->xmlname == "x")
if ((ms.xmlMode() && sym_->xmlname == "x") || (!ms.xmlMode() && sym_->htmlname == "x"))
// unknown so far
ms << name();
else
else if (ms.xmlMode())
ms << sym_->xmlname;
else
ms << sym_->htmlname;
ms << "</" << tag << '>';
}
@ -184,13 +186,13 @@ void InsetMathSymbol::htmlize(HtmlStream & os, bool spacing) const
char const * type = sym_->MathMLtype();
bool op = (std::string(type) == "mo");
if (sym_->xmlname == "x")
if (sym_->htmlname == "x")
// unknown so far
os << ' ' << name() << ' ';
else if (op && spacing)
os << ' ' << sym_->xmlname << ' ';
os << ' ' << sym_->htmlname << ' ';
else
os << sym_->xmlname;
os << sym_->htmlname;
}

View File

@ -126,6 +126,14 @@ bool MacroData::hidden() const
}
docstring const MacroData::htmlname() const
{
if (sym_)
return sym_->htmlname;
return docstring();
}
docstring const MacroData::xmlname() const
{
if (sym_)

View File

@ -64,6 +64,8 @@ public:
///
bool hidden() const;
///
docstring const htmlname() const;
///
docstring const xmlname() const;
///
char const * MathMLtype() const;

View File

@ -186,23 +186,23 @@ void initSymbols()
// or
// \def\macroname{definition} requires
// or
// \def\macroname{definition} extra xmlname requires
// \def\macroname{definition} extra htmlname requires
istringstream is(line);
string macro;
string required;
string extra;
string xmlname;
string htmlname;
bool hidden = false;
is >> setw(65536) >> macro >> required;
if ((is >> xmlname)) {
if ((is >> htmlname)) {
extra = required;
if (!(is >> required))
required = "";
} else
xmlname = "";
htmlname = "";
MacroTable::iterator it = MacroTable::globalMacros().insert(
0, from_utf8(macro));
if (!extra.empty() || !xmlname.empty() || !required.empty()) {
if (!extra.empty() || !htmlname.empty() || !required.empty()) {
MathWordList::iterator wit = theMathWordList.find(it->first);
if (wit != theMathWordList.end())
LYXERR(Debug::MATHED, "readSymbols: inset "
@ -212,7 +212,7 @@ void initSymbols()
tmp.inset = "macro";
tmp.name = it->first;
tmp.extra = from_utf8(extra);
tmp.xmlname = from_utf8(xmlname);
tmp.htmlname = from_utf8(htmlname);
if (required == "hiddensymbol") {
required = "";
tmp.hidden = hidden = true;
@ -226,10 +226,10 @@ void initSymbols()
// If you change the following output, please adjust
// development/tools/generate_symbols_images.py.
LYXERR(Debug::MATHED, "read symbol '" << to_utf8(it->first)
<< " inset: macro"
<< " draw: 0"
<< " extra: " << extra
<< " xml: " << xmlname
<< " inset: macro"
<< " draw: 0"
<< " extra: " << extra
<< " html: " << htmlname
<< " requires: " << required
<< " hidden: " << hidden << '\'');
continue;
@ -241,7 +241,7 @@ void initSymbols()
is >> tmp.name >> help;
tmp.inset = to_ascii(help);
if (isFontName(tmp.inset))
is >> charid >> fallbackid >> tmp.extra >> tmp.xmlname;
is >> charid >> fallbackid >> tmp.extra >> tmp.htmlname >> tmp.xmlname;
else
is >> tmp.extra;
// requires is optional
@ -327,6 +327,7 @@ void initSymbols()
<< " inset: " << tmp.inset
<< " draw: " << int(tmp.draw.empty() ? 0 : tmp.draw[0])
<< " extra: " << to_utf8(tmp.extra)
<< " html: " << to_utf8(tmp.htmlname)
<< " xml: " << to_utf8(tmp.xmlname)
<< " requires: " << tmp.required
<< " hidden: " << tmp.hidden << '\'');

View File

@ -54,6 +54,8 @@ public:
docstring draw;
/// operator/..., fontname e
docstring extra;
/// how is this called as HTML entity in MathML?
docstring htmlname;
/// how is this called as XML entity in MathML?
docstring xmlname;
/// required LaTeXFeatures

View File

@ -269,8 +269,8 @@ WriteStream & operator<<(WriteStream & ws, unsigned int i)
//////////////////////////////////////////////////////////////////////
MathStream::MathStream(odocstream & os, std::string xmlns)
: os_(os), tab_(0), line_(0), in_text_(false), xmlns_(xmlns)
MathStream::MathStream(odocstream & os, std::string xmlns, bool xmlMode)
: os_(os), tab_(0), line_(0), in_text_(false), xmlns_(xmlns), xml_mode_(xmlMode)
{}
@ -709,8 +709,9 @@ OctaveStream & operator<<(OctaveStream & os, string const & s)
}
docstring convertDelimToXMLEscape(docstring const & name)
docstring convertDelimToXMLEscape(docstring const & name, bool xmlmode)
{
// For the basic symbols, no difference between XML and HTML.
if (name.size() == 1) {
char_type const c = name[0];
if (c == '<')
@ -729,7 +730,8 @@ docstring convertDelimToXMLEscape(docstring const & name)
MathWordList const & words = mathedWordList();
MathWordList::const_iterator it = words.find(name);
if (it != words.end()) {
docstring const escape = it->second.xmlname;
// Only difference between XML and HTML, based on the contents read by MathFactory.
docstring const escape = xmlmode ? it->second.xmlname : it->second.htmlname;
return escape;
}
LYXERR0("Unable to find `" << name <<"' in the mathWordList.");

View File

@ -340,7 +340,7 @@ class MathExportException : public std::exception {};
class MathStream {
public:
/// Builds a stream proxy for os; the MathML namespace is given by xmlns (supposed to be already defined elsewhere in the document).
explicit MathStream(odocstream & os, std::string xmlns="");
explicit MathStream(odocstream & os, std::string xmlns="", bool xmlMode=false);
///
void cr();
///
@ -361,6 +361,8 @@ public:
bool inText() const { return in_text_; }
///
std::string xmlns() const { return xmlns_; }
///
bool xmlMode() const { return xml_mode_; }
/// Returns the tag name prefixed by the name space if needed.
std::string namespacedTag(std::string tag) const { return ((xmlns().empty()) ? "" : xmlns() + ":") + tag; }
private:
@ -379,6 +381,8 @@ private:
///
std::string xmlns_;
///
bool xml_mode_;
///
friend class SetMode;
};
@ -646,7 +650,7 @@ OctaveStream & operator<<(OctaveStream &, char);
OctaveStream & operator<<(OctaveStream &, int);
docstring convertDelimToXMLEscape(docstring const & name);
docstring convertDelimToXMLEscape(docstring const & name, bool xmlmode);
} // namespace lyx