lyx_mirror/src/output_xhtml.cpp
Richard Heck ad132e2e99 Restore basic paragraph output for XHTML. The insets are all disabled still.
Much of the point of this is to allow us properly to handle what LyX does as:
  <em>This is <strong>bold and italic</em> and now just bold.</strong>
We output:
  <em>This is <strong>bold and italic</strong></em><strong> and now just bold.</strong>
which is valid.

Note how much easier this would have been if emphasis and boldness were insets
rather than ranges. ;-)



git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@32086 a592a061-630c-0410-9148-cb99ea01b6c8
2009-11-19 18:24:19 +00:00

735 lines
19 KiB
C++

/**
* \file output_xhtml.cpp
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Richard Heck
*
* This code is based upon output_docbook.cpp
*
* Full author contact details are available in file CREDITS.
*/
#include <config.h>
#include "output_xhtml.h"
#include "Buffer.h"
#include "buffer_funcs.h"
#include "BufferParams.h"
#include "Counters.h"
#include "Layout.h"
#include "OutputParams.h"
#include "Paragraph.h"
#include "ParagraphList.h"
#include "ParagraphParameters.h"
#include "sgml.h"
#include "Text.h"
#include "TextClass.h"
#include "support/lassert.h"
#include "support/debug.h"
#include "support/lstrings.h"
#include <vector>
using namespace std;
using namespace lyx::support;
namespace lyx {
namespace html {
docstring escapeChar(char_type c)
{
docstring str;
switch (c) {
case ' ':
str += " ";
break;
case '&':
str += "&amp;";
break;
case '<':
str += "&lt;";
break;
case '>':
str += "&gt;";
break;
default:
str += c;
break;
}
return str;
}
// escape what needs escaping
docstring htmlize(docstring const & str) {
odocstringstream d;
docstring::const_iterator it = str.begin();
docstring::const_iterator en = str.end();
for (; it != en; ++it)
d << escapeChar(*it);
return d.str();
}
bool isFontTag(string const & s)
{
return s == "em" || s == "strong"; // others?
}
} // namespace html
docstring StartTag::asTag() const
{
string output = "<" + tag_;
if (!attr_.empty())
output += " " + attr_;
output += ">";
return from_utf8(output);
}
docstring StartTag::asEndTag() const
{
string output = "</" + tag_ + ">";
return from_utf8(output);
}
docstring EndTag::asEndTag() const
{
string output = "</" + tag_ + ">";
return from_utf8(output);
}
docstring CompTag::asTag() const
{
string output = "<" + tag_;
if (!attr_.empty())
output += " " + attr_;
output += " />";
return from_utf8(output);
}
////////////////////////////////////////////////////////////////
///
/// XHTMLStream
///
////////////////////////////////////////////////////////////////
XHTMLStream::XHTMLStream(odocstream & os)
:os_(os)
{}
void XHTMLStream::cr()
{
// tabs?
os_ << from_ascii("\n");
}
bool XHTMLStream::closeFontTags()
{
// first, we close any open font tags we can close
StartTag curtag = tag_stack_.back();
while (html::isFontTag(curtag.tag_)) {
os_ << curtag.asEndTag();
tag_stack_.pop_back();
if (tag_stack_.empty())
// this probably shouldn't happen, since then the
// font tags weren't in any other tag. but that
// problem will likely be caught elsewhere.
return true;
curtag = tag_stack_.back();
}
// so we've hit a non-font tag. let's see if any of the
// remaining tags are font tags.
TagStack::const_iterator it = tag_stack_.begin();
TagStack::const_iterator en = tag_stack_.end();
bool noFontTags = true;
for (; it != en; ++it) {
if (html::isFontTag(it->tag_)) {
LYXERR0("Font tag `" << it->tag_ << "' still open in closeFontTags().");
noFontTags = false;
}
}
return noFontTags;
}
void XHTMLStream::clearTagDeque()
{
while (!pending_tags_.empty()) {
StartTag const & tag = pending_tags_.front();
// tabs?
os_ << tag.asTag();
tag_stack_.push_back(tag);
pending_tags_.pop_front();
}
}
XHTMLStream & XHTMLStream::operator<<(docstring const & d)
{
// I'm tempted to make sure here that there are no tags in the input
clearTagDeque();
os_ << html::htmlize(d);
return *this;
}
XHTMLStream & XHTMLStream::operator<<(char_type c)
{
clearTagDeque();
os_ << html::escapeChar(c);
return *this;
}
XHTMLStream & XHTMLStream::operator<<(StartTag const & tag)
{
pending_tags_.push_back(tag);
if (tag.keepempty_)
clearTagDeque();
return *this;
}
XHTMLStream & XHTMLStream::operator<<(CompTag const & tag)
{
clearTagDeque();
// tabs?
os_ << tag.asTag();
return *this;
}
bool XHTMLStream::isTagOpen(string const & stag)
{
TagStack::const_iterator sit = tag_stack_.begin();
TagStack::const_iterator const sen = tag_stack_.end();
for (; sit != sen; ++sit)
// we could check for the
if (sit->tag_ == stag)
return true;
return false;
}
// this is complicated, because we want to make sure that
// everything is properly nested. the code ought to make
// sure of that, but we won't assert (yet) if we run into
// a problem. we'll just output error messages and try our
// best to make things work.
XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
{
// first make sure we're not closing an empty tag
if (!pending_tags_.empty()) {
StartTag const & stag = pending_tags_.back();
if (etag.tag_ == stag.tag_) {
// we have <tag></tag>, so we discard it and remove it
// from the pending_tags_.
pending_tags_.pop_back();
return *this;
}
// there is a pending tag that isn't the one we are trying
// to close.
// is this tag itself pending?
// non-const iterators because we may call erase().
TagDeque::iterator dit = pending_tags_.begin();
TagDeque::iterator const den = pending_tags_.end();
for (; dit != den; ++dit) {
if (dit->tag_ == etag.tag_) {
// it was pending, so we just erase it
LYXERR0("Tried to close pending tag `" << etag.tag_
<< "' when other tags were pending. Tag discarded.");
pending_tags_.erase(dit);
return *this;
}
}
// so etag isn't itself pending. is it even open?
if (!isTagOpen(etag.tag_)) {
LYXERR0("Tried to close `" << etag.tag_
<< "' when tag was not open. Tag discarded.");
return *this;
}
// ok, so etag is open.
// our strategy will be as below: we will do what we need to
// do to close this tag.
LYXERR0("Closing tag `" << etag.tag_
<< "' when other tags are pending. Discarded pending tags:");
for (dit = pending_tags_.begin(); dit != den; ++dit)
LYXERR0(dit->tag_);
// clear the pending tags...
pending_tags_.clear();
// ...and then just fall through.
}
// is the tag we are closing the last one we opened?
if (etag.tag_ == tag_stack_.back().tag_) {
// output it...
os_ << etag.asEndTag();
// ...and forget about it
tag_stack_.pop_back();
return *this;
}
// we are trying to close a tag other than the one last opened.
// let's first see if this particular tag is still open somehow.
if (!isTagOpen(etag.tag_)) {
LYXERR0("Tried to close `" << etag.tag_
<< "' when tag was not open. Tag discarded.");
return *this;
}
// so the tag was opened, but other tags have been opened since
// and not yet closed.
// if it's a font tag, though...
if (html::isFontTag(etag.tag_)) {
// it won't be a problem if the other tags open since this one
// are also font tags.
TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
TagStack::const_reverse_iterator ren = tag_stack_.rend();
for (; rit != ren; ++rit) {
if (rit->tag_ == etag.tag_)
break;
if (!html::isFontTag(rit->tag_)) {
// we'll just leave it and, presumably, have to close it later.
LYXERR0("Unable to close font tag `" << etag.tag_
<< "' due to open non-font tag `" << rit->tag_ << "'.");
return *this;
}
}
// so we have e.g.:
// <em>this is <strong>bold
// and are being asked to closed em. we want:
// <em>this is <strong>bold</strong></em><strong>
// first, we close the intervening tags...
StartTag curtag = tag_stack_.back();
// ...remembering them in a stack.
TagStack fontstack;
while (curtag.tag_ != etag.tag_) {
os_ << curtag.asEndTag();
fontstack.push_back(curtag);
tag_stack_.pop_back();
curtag = tag_stack_.back();
}
// now close our tag...
os_ << etag.asEndTag();
// ...and restore the other tags.
rit = fontstack.rbegin();
ren = fontstack.rend();
for (; rit != ren; ++rit)
pending_tags_.push_back(*rit);
return *this;
}
// it wasn't a font tag.
// so other tags were opened before this one and not properly closed.
// so we'll close them, too. that may cause other issues later, but it
// at least guarantees proper nesting.
LYXERR0("Closing tag `" << etag.tag_
<< "' when other tags are open, namely:");
StartTag curtag = tag_stack_.back();
while (curtag.tag_ != etag.tag_) {
LYXERR0(curtag.tag_);
os_ << curtag.asEndTag();
tag_stack_.pop_back();
curtag = tag_stack_.back();
}
// curtag is now the one we actually want.
os_ << curtag.asEndTag();
tag_stack_.pop_back();
return *this;
}
// End code for XHTMLStream
namespace {
// convenience functions
inline void openTag(XHTMLStream & xs, Layout const & lay)
{
xs << StartTag(lay.htmltag(), lay.htmlattr());
}
inline void closeTag(XHTMLStream & xs, Layout const & lay)
{
xs << EndTag(lay.htmltag());
}
inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
{
xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
}
inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
{
xs << EndTag(lay.htmllabeltag());
}
inline void openItemTag(XHTMLStream & xs, Layout const & lay)
{
xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
}
inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
{
xs << EndTag(lay.htmlitemtag());
}
// end of convenience functions
ParagraphList::const_iterator searchParagraphHtml(
ParagraphList::const_iterator p,
ParagraphList::const_iterator const & pend)
{
for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
;
return p;
}
ParagraphList::const_iterator searchEnvironmentHtml(
ParagraphList::const_iterator const pstart,
ParagraphList::const_iterator const & pend)
{
ParagraphList::const_iterator p = pstart;
Layout const & bstyle = p->layout();
size_t const depth = p->params().depth();
for (++p; p != pend; ++p) {
Layout const & style = p->layout();
// It shouldn't happen that e.g. a section command occurs inside
// a quotation environment, at a higher depth, but as of 6/2009,
// it can happen. We pretend that it's just at lowest depth.
if (style.latextype == LATEX_COMMAND)
return p;
// If depth is down, we're done
if (p->params().depth() < depth)
return p;
// If depth is up, we're not done
if (p->params().depth() > depth)
continue;
// Now we know we are at the same depth
if (style.latextype == LATEX_PARAGRAPH
|| style.latexname() != bstyle.latexname())
return p;
}
return pend;
}
ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
XHTMLStream & xs,
OutputParams const & runparams,
Text const & text,
ParagraphList::const_iterator const & pbegin,
ParagraphList::const_iterator const & pend)
{
ParagraphList::const_iterator const begin = text.paragraphs().begin();
ParagraphList::const_iterator par = pbegin;
for (; par != pend; ++par) {
Layout const & lay = par->layout();
if (!lay.counter.empty())
buf.params().documentClass().counters().step(lay.counter);
// FIXME We should see if there's a label to be output and
// do something with it.
if (par != pbegin)
xs.cr();
// FIXME Should we really allow anything other than 'p' here?
// If we are already in a paragraph, and this is the first one, then we
// do not want to open the paragraph tag.
bool const opened =
(par == pbegin && runparams.html_in_par) ? false : true;
if (opened)
openTag(xs, lay);
docstring const deferred =
par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
// We want to issue the closing tag if either:
// (i) We opened it, and either html_in_par is false,
// or we're not in the last paragraph, anyway.
// (ii) We didn't open it and html_in_par is true,
// but we are in the first par, and there is a next par.
ParagraphList::const_iterator nextpar = par;
nextpar++;
bool const needclose =
(opened && (!runparams.html_in_par || nextpar != pend))
|| (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
if (needclose) {
closeTag(xs, lay);
xs.cr();
}
if (!deferred.empty()) {
xs << deferred;
xs.cr();
}
}
return pend;
}
ParagraphList::const_iterator makeBibliography(Buffer const & buf,
XHTMLStream & xs,
OutputParams const & runparams,
Text const & text,
ParagraphList::const_iterator const & pbegin,
ParagraphList::const_iterator const & pend)
{
xs << StartTag("h2", "class='bibliography'");
xs << pbegin->layout().labelstring(false);
xs << EndTag("h2");
xs.cr();
xs << StartTag("div", "class='bibliography'");
xs.cr();
makeParagraphs(buf, xs, runparams, text, pbegin, pend);
xs << EndTag("div");
return pend;
}
bool isNormalEnv(Layout const & lay)
{
return lay.latextype == LATEX_ENVIRONMENT;
}
ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
XHTMLStream & xs,
OutputParams const & runparams,
Text const & text,
ParagraphList::const_iterator const & pbegin,
ParagraphList::const_iterator const & pend)
{
ParagraphList::const_iterator const begin = text.paragraphs().begin();
ParagraphList::const_iterator par = pbegin;
Layout const & bstyle = par->layout();
depth_type const origdepth = pbegin->params().depth();
// open tag for this environment
openTag(xs, bstyle);
xs.cr();
// we will on occasion need to remember a layout from before.
Layout const * lastlay = 0;
while (par != pend) {
Layout const & style = par->layout();
// the counter only gets stepped if we're in some kind of list,
// or if it's the first time through.
if (!style.counter.empty() && (par == pbegin || !isNormalEnv(style)))
buf.params().documentClass().counters().step(style.counter);
ParagraphList::const_iterator send;
// this will be positive, if we want to skip the initial word
// (if it's been taken for the label).
pos_type sep = 0;
switch (style.latextype) {
case LATEX_ENVIRONMENT:
case LATEX_LIST_ENVIRONMENT:
case LATEX_ITEM_ENVIRONMENT: {
// There are two possiblities in this case.
// One is that we are still in the environment in which we
// started---which we will be if the depth is the same.
if (par->params().depth() == origdepth) {
LASSERT(bstyle == style, /* */);
if (lastlay != 0) {
closeItemTag(xs, *lastlay);
lastlay = 0;
}
bool const labelfirst = style.htmllabelfirst();
if (isNormalEnv(style)) {
// in this case, we print the label only for the first
// paragraph (as in a theorem).
openItemTag(xs, style);
if (par == pbegin && style.htmllabeltag() != "NONE") {
docstring const lbl =
pbegin->expandLabel(style, buf.params(), false);
if (!lbl.empty()) {
openLabelTag(xs, style);
xs << lbl;
closeLabelTag(xs, style);
}
xs.cr();
}
} else { // some kind of list
if (!labelfirst)
openItemTag(xs, style);
if (style.labeltype == LABEL_MANUAL
&& style.htmllabeltag() != "NONE") {
openLabelTag(xs, style);
// sep = par->firstWordLyXHTML(xs, runparams);
closeLabelTag(xs, style);
xs.cr();
}
else if (style.labeltype != LABEL_NO_LABEL
&& style.htmllabeltag() != "NONE") {
openLabelTag(xs, style);
xs << par->expandLabel(style, buf.params(), false);
closeLabelTag(xs, style);
xs.cr();
}
if (labelfirst)
openItemTag(xs, style);
else
xs << StartTag("span", "class='" + to_utf8(style.name()) + " inneritem'>");
}
par->simpleLyXHTMLOnePar(buf, xs, runparams,
text.outerFont(distance(begin, par)), sep);
if (!isNormalEnv(style) && !labelfirst)
xs << EndTag("span");
++par;
// We may not want to close the tag yet, in particular,
// if we're not at the end...
if (par != pend
// and are doing items...
&& style.latextype == LATEX_ITEM_ENVIRONMENT
// and if the depth has changed...
&& par->params().depth() != origdepth) {
// then we'll save this layout for later, and close it when
// we get another item.
lastlay = &style;
} else
closeItemTag(xs, style);
xs.cr();
}
// The other possibility is that the depth has increased, in which
// case we need to recurse.
else {
send = searchEnvironmentHtml(par, pend);
par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
}
break;
}
case LATEX_PARAGRAPH:
send = searchParagraphHtml(par, pend);
par = makeParagraphs(buf, xs, runparams, text, par, send);
break;
// Shouldn't happen
case LATEX_BIB_ENVIRONMENT:
send = par;
++send;
par = makeParagraphs(buf, xs, runparams, text, par, send);
break;
// Shouldn't happen
case LATEX_COMMAND:
++par;
break;
}
}
if (lastlay != 0)
closeItemTag(xs, *lastlay);
closeTag(xs, bstyle);
xs.cr();
return pend;
}
void makeCommand(Buffer const & buf,
XHTMLStream & xs,
OutputParams const & runparams,
Text const & text,
ParagraphList::const_iterator const & pbegin)
{
Layout const & style = pbegin->layout();
if (!style.counter.empty())
buf.params().documentClass().counters().step(style.counter);
openTag(xs, style);
// Label around sectioning number:
// FIXME Probably need to account for LABEL_MANUAL
if (style.labeltype != LABEL_NO_LABEL) {
openLabelTag(xs, style);
xs << pbegin->expandLabel(style, buf.params(), false);
closeLabelTag(xs, style);
// Otherwise the label might run together with the text
xs << from_ascii(" ");
}
ParagraphList::const_iterator const begin = text.paragraphs().begin();
pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
text.outerFont(distance(begin, pbegin)));
closeTag(xs, style);
xs.cr();
}
} // end anonymous namespace
void xhtmlParagraphs(Text const & text,
Buffer const & buf,
XHTMLStream & xs,
OutputParams const & runparams)
{
ParagraphList const & paragraphs = text.paragraphs();
ParagraphList::const_iterator par = paragraphs.begin();
ParagraphList::const_iterator pend = paragraphs.end();
OutputParams ourparams = runparams;
while (par != pend) {
Layout const & style = par->layout();
ParagraphList::const_iterator lastpar = par;
ParagraphList::const_iterator send;
switch (style.latextype) {
case LATEX_COMMAND: {
// The files with which we are working never have more than
// one paragraph in a command structure.
// FIXME
// if (ourparams.html_in_par)
// fix it so we don't get sections inside standard, e.g.
// note that we may then need to make runparams not const, so we
// can communicate that back.
// FIXME Maybe this fix should be in the routines themselves, in case
// they are called from elsewhere.
makeCommand(buf, xs, ourparams, text, par);
++par;
break;
}
case LATEX_ENVIRONMENT:
case LATEX_LIST_ENVIRONMENT:
case LATEX_ITEM_ENVIRONMENT: {
// FIXME Same fix here.
send = searchEnvironmentHtml(par, pend);
par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
break;
}
case LATEX_BIB_ENVIRONMENT: {
// FIXME Same fix here.
send = searchEnvironmentHtml(par, pend);
par = makeBibliography(buf, xs, ourparams, text, par, send);
break;
}
case LATEX_PARAGRAPH:
send = searchParagraphHtml(par, pend);
par = makeParagraphs(buf, xs, ourparams, text, par, send);
break;
}
// FIXME??
// makeEnvironment may process more than one paragraphs and bypass pend
if (distance(lastpar, par) >= distance(lastpar, pend))
break;
}
}
} // namespace lyx