Fix crash when generating MathML with InsetMathBox

Instead of generating code and parsing it to add <mtext>...</mtext> at
the right spots, this commit honors the text mode setting that was
already present in the codebase to generate it automatically.

This is the work of two helper methods in MathMLStream:

* beforeText() notices when the stream is in text mode and that a
  <mtext> has not yet been generated. In this case it inserts it, so
  that raw text can be emitted afterwards.

* beforeTag() checks whether a <mtext> needs to be closed at this
  point, and does it if needed.

To make this work, the code now tracks the nesting level in the
stream, and compares it the what the level was when text mode has been
enabled using the SetMode helper function.

In order to avoid later bugs, member os() that allows to access the
underlying stream of MathMLStream is removed. This required many <<
operators to become friends of MathMLStream.

In InsetMathBox, rename splitAndWrapInMText() to mathmlizeHelper(),
which is not just a method that sets text mode inside a <mrow>
element.

In InsetMathFont and InsetMathHull, the explicit generation of nesting
in <mtext>...</mtext> can be removed now.

Fixes bug #13069.
This commit is contained in:
Jean-Marc Lasgouttes 2024-07-20 12:15:32 +02:00
parent fbd4b0a13f
commit 216a6fb348
6 changed files with 88 additions and 90 deletions

View File

@ -204,10 +204,13 @@ void InsetMath::mathematica(MathematicaStream & os) const
void InsetMath::mathmlize(MathMLStream & ms) const
{
SetMode rawmode(ms, false);
ms << "<!-- " << from_utf8(insetName(lyxCode())) << " -->";
ms << MTagInline("mi");
NormalStream ns(ms.os());
odocstringstream ods;
NormalStream ns(ods);
normalize(ns);
ms << ods.str();
ms << ETagInline("mi");
}

View File

@ -59,69 +59,19 @@ void InsetMathBox::normalize(NormalStream & os) const
namespace {
void splitAndWrapInMText(MathMLStream & ms, MathData const & cell,
const std::string & attributes)
// Generate the MathML, making sure that anything that is outside of
// any tag is wrapped in <mtext></mtext> tags, then wrap the whole thing in an
// <mrow></mrow> tag with attributes
void mathmlizeHelper(MathMLStream & ms, MathData const & cell, const std::string & attributes)
{
// First, generate the inset into a string of its own.
docstring inset_contents;
{
odocstringstream ostmp;
MathMLStream mstmp(ostmp, ms.xmlns());
SetMode textmode(mstmp, true);
mstmp << cell;
inset_contents = ostmp.str();
}
// No tags are allowed within <m:mtext>: split the string if there are tags.
std::vector<docstring> parts;
while (true) {
std::size_t angle_pos = inset_contents.find('<');
if (angle_pos == docstring::npos)
break;
// String structure:
// - prefix: pure text, no tag
// - tag to split: something like <m:mn>1</m:mn> or more complicated
// (like nested tags), with or without name space
// - rest to be taken care of in the next iteration
// Push the part before the tag.
parts.emplace_back(inset_contents.substr(0, angle_pos));
inset_contents = inset_contents.substr(angle_pos);
// Now, inset_contents starts with the tag to isolate, so that
// inset_contents[0] == '<'
// Push the tag, up to its end. Process: find the tag name (either
// before > or the first attribute of the tag), then the matching end
// tag, then proceed with pushing.
const std::size_t tag_name_end =
std::min(inset_contents.find(' ', 1), inset_contents.find('>', 1));
const std::size_t tag_name_length = tag_name_end - 1;
const docstring tag_name = inset_contents.substr(1, tag_name_length);
const std::size_t end_tag_start =
inset_contents.find(tag_name, tag_name_end + 1);
const std::size_t end_tag = inset_contents.find('>', end_tag_start);
parts.emplace_back(inset_contents.substr(0, end_tag + 1));
inset_contents = inset_contents.substr(end_tag + 1);
}
parts.emplace_back(inset_contents);
// Finally, output the complete inset: escape the test in <m:mtext>, leave
// the other tags untouched.
ms << MTag("mrow", attributes);
for (std::size_t i = 0; i < parts.size(); i += 2) {
ms << MTag("mtext")
<< parts[i]
<< ETag("mtext");
if (parts.size() > i + 1)
ms << parts[i + 1];
{
SetMode textmode(ms, true);
ms << cell;
}
ms << ETag("mrow");
}
}
@ -131,7 +81,7 @@ void InsetMathBox::mathmlize(MathMLStream & ms) const
// Need to do something special for tags here.
// Probably will have to involve deferring them, which
// means returning something from this routine.
splitAndWrapInMText(ms, cell(0), "class='mathbox'");
mathmlizeHelper(ms, cell(0), "class='mathbox'");
}
@ -230,7 +180,7 @@ void InsetMathFBox::normalize(NormalStream & os) const
void InsetMathFBox::mathmlize(MathMLStream & ms) const
{
splitAndWrapInMText(ms, cell(0), "class='fbox'");
mathmlizeHelper(ms, cell(0), "class='fbox'");
}
@ -373,7 +323,7 @@ void InsetMathMakebox::mathmlize(MathMLStream & ms) const
{
// FIXME We could do something with the other arguments.
std::string const cssclass = framebox_ ? "framebox" : "makebox";
splitAndWrapInMText(ms, cell(2), "class='" + cssclass + "'");
mathmlizeHelper(ms, cell(2), "class='" + cssclass + "'");
}
@ -452,7 +402,7 @@ void InsetMathBoxed::infoize(odocstream & os) const
void InsetMathBoxed::mathmlize(MathMLStream & ms) const
{
splitAndWrapInMText(ms, cell(0), "class='boxed'");
mathmlizeHelper(ms, cell(0), "class='boxed'");
}

View File

@ -220,9 +220,7 @@ void InsetMathFont::mathmlize(MathMLStream & ms) const
if (tag == "text" || tag == "textnormal" || tag == "textrm" ||
tag == "textup" || tag == "textmd") {
SetMode textmode(ms, true);
ms << MTagInline("mtext");
ms << cell(0);
ms << ETagInline("mtext");
} else if (!variant.empty()) {
ms << MTag("mstyle", "mathvariant='" + variant + "'");
ms << cell(0);

View File

@ -2565,8 +2565,10 @@ void InsetMathHull::mathmlize(MathMLStream & ms) const
if (haveNumbers()) {
ms << MTag("mtd");
docstring const & num = numbers_[row];
if (!num.empty())
ms << MTagInline("mtext") << '(' << num << ')' << ETagInline("mtext");
if (!num.empty()) {
SetMode textmode(ms, true);
ms << '(' << num << ')';
}
ms << ETag("mtd");
}

View File

@ -291,7 +291,7 @@ TeXMathStream & operator<<(TeXMathStream & ws, unsigned int i)
MathMLStream::MathMLStream(odocstream & os, std::string const & xmlns)
: os_(os), xmlns_(xmlns)
{
if (in_text_)
if (inText())
font_math_style_ = TEXT_STYLE;
else
font_math_style_ = DISPLAY_STYLE;
@ -300,9 +300,9 @@ MathMLStream::MathMLStream(odocstream & os, std::string const & xmlns)
void MathMLStream::cr()
{
os() << '\n';
os_ << '\n';
for (int i = 0; i < tab(); ++i)
os() << ' ';
os_ << ' ';
}
@ -323,6 +323,23 @@ docstring MathMLStream::deferred() const
return deferred_.str();
}
void MathMLStream::beforeText()
{
if (!in_mtext_ && nesting_level_ == text_level_) {
*this << MTagInline("mtext");
in_mtext_ = true;
}
}
void MathMLStream::beforeTag()
{
if (in_mtext_ && nesting_level_ == text_level_ + 1) {
in_mtext_ = false;
*this << ETagInline("mtext");
}
}
MathMLStream & operator<<(MathMLStream & ms, MathAtom const & at)
{
@ -340,7 +357,8 @@ MathMLStream & operator<<(MathMLStream & ms, MathData const & ar)
MathMLStream & operator<<(MathMLStream & ms, docstring const & s)
{
ms.os() << s;
ms.beforeText();
ms.os_ << s;
return ms;
}
@ -368,51 +386,65 @@ MathMLStream & operator<<(MathMLStream & ms, char_type c)
MathMLStream & operator<<(MathMLStream & ms, MTag const & t)
{
ms.beforeTag();
SetMode rawmode(ms, false);
ms.cr();
++ms.tab();
ms.os() << '<' << from_ascii(ms.namespacedTag(t.tag_));
ms.os_ << '<' << from_ascii(ms.namespacedTag(t.tag_));
if (!t.attr_.empty())
ms.os() << " " << from_ascii(t.attr_);
ms.os_ << " " << from_ascii(t.attr_);
ms << ">";
++ms.nesting_level_;
return ms;
}
MathMLStream & operator<<(MathMLStream & ms, MTagInline const & t)
{
ms.beforeTag();
SetMode rawmode(ms, false);
ms.cr();
ms.os() << '<' << from_ascii(ms.namespacedTag(t.tag_));
ms.os_ << '<' << from_ascii(ms.namespacedTag(t.tag_));
if (!t.attr_.empty())
ms.os() << " " << from_ascii(t.attr_);
ms.os_ << " " << from_ascii(t.attr_);
ms << ">";
++ms.nesting_level_;
return ms;
}
MathMLStream & operator<<(MathMLStream & ms, ETag const & t)
{
ms.beforeTag();
SetMode rawmode(ms, false);
if (ms.tab() > 0)
--ms.tab();
ms.cr();
ms.os() << "</" << from_ascii(ms.namespacedTag(t.tag_)) << ">";
ms.os_ << "</" << from_ascii(ms.namespacedTag(t.tag_)) << ">";
--ms.nesting_level_;
return ms;
}
MathMLStream & operator<<(MathMLStream & ms, ETagInline const & t)
{
ms.os() << "</" << from_ascii(ms.namespacedTag(t.tag_)) << ">";
ms.beforeTag();
SetMode rawmode(ms, false);
ms.os_ << "</" << from_ascii(ms.namespacedTag(t.tag_)) << ">";
--ms.nesting_level_;
return ms;
}
MathMLStream & operator<<(MathMLStream & ms, CTag const & t)
{
ms.beforeTag();
SetMode rawmode(ms, false);
ms.cr();
ms.os() << "<" << from_ascii(ms.namespacedTag(t.tag_));
ms.os_ << "<" << from_ascii(ms.namespacedTag(t.tag_));
if (!t.attr_.empty())
ms.os() << " " << from_utf8(t.attr_);
ms.os() << "/>";
ms.os_ << " " << from_utf8(t.attr_);
ms.os_ << "/>";
return ms;
}
@ -508,14 +540,14 @@ HtmlStream & operator<<(HtmlStream & ms, docstring const & s)
SetMode::SetMode(MathMLStream & ms, bool text)
: ms_(ms)
{
was_text_ = ms_.inText();
ms_.setTextMode(text);
old_text_level_ = ms_.text_level_;
ms_.text_level_ = text ? ms_.nesting_level_ : MathMLStream::nlevel;
}
SetMode::~SetMode()
{
ms_.setTextMode(was_text_);
ms_.text_level_ = old_text_level_;
}

View File

@ -380,8 +380,6 @@ public:
explicit MathMLStream(odocstream & os, std::string const & xmlns = "");
///
void cr();
///
odocstream & os() { return os_; }
/// Indentation when nesting tags
int & tab() { return tab_; }
///
@ -391,7 +389,7 @@ public:
///
docstring deferred() const;
///
bool inText() const { return in_text_; }
bool inText() const { return text_level_ != nlevel; }
///
std::string xmlns() const { return xmlns_; }
/// Returns the tag name prefixed by the name space if needed.
@ -403,14 +401,21 @@ public:
/// Sets the current math style in the stream.
void setFontMathStyle(const MathStyle style) { font_math_style_ = style; }
private:
///
void setTextMode(bool t) { in_text_ = t; }
/// Check whether it makes sense to start a <mtext>
void beforeText();
///Check whether there is a <mtext> to close here
void beforeTag();
///
odocstream & os_;
///
int tab_ = 0;
///
bool in_text_ = false;
int nesting_level_ = 0;
static const int nlevel = -1000;
///
int text_level_ = nlevel;
///
bool in_mtext_ = false;
///
odocstringstream deferred_;
///
@ -419,6 +424,14 @@ private:
MathStyle font_math_style_;
///
friend class SetMode;
friend MathMLStream & operator<<(MathMLStream &, MathAtom const &);
friend MathMLStream & operator<<(MathMLStream &, MathData const &);
friend MathMLStream & operator<<(MathMLStream &, docstring const &);
friend MathMLStream & operator<<(MathMLStream &, MTag const &);
friend MathMLStream & operator<<(MathMLStream &, MTagInline const &);
friend MathMLStream & operator<<(MathMLStream &, ETag const &);
friend MathMLStream & operator<<(MathMLStream &, ETagInline const &);
friend MathMLStream & operator<<(MathMLStream &, CTag const &);
};
///
@ -456,7 +469,7 @@ private:
///
MathMLStream & ms_;
///
bool was_text_;
bool old_text_level_;
};