2019-05-09 23:35:40 +00:00
|
|
|
// -*- C++ -*-
|
|
|
|
/**
|
|
|
|
* \file xml.h
|
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
* License details can be found in the file COPYING.
|
|
|
|
*
|
|
|
|
* \author José Matos
|
|
|
|
* \author John Levon
|
|
|
|
*
|
|
|
|
* Full author contact details are available in file CREDITS.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef XML_H
|
|
|
|
#define XML_H
|
|
|
|
|
|
|
|
#include "support/docstring.h"
|
|
|
|
|
|
|
|
#include <deque>
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
|
|
|
class Buffer;
|
|
|
|
class Paragraph;
|
|
|
|
class OutputParams;
|
|
|
|
|
|
|
|
// Inspiration for the *Tag structs and for XMLStream
|
2020-12-26 19:02:46 +00:00
|
|
|
// came from MathMLStream and its cousins.
|
2019-05-09 23:35:40 +00:00
|
|
|
|
|
|
|
namespace xml {
|
|
|
|
struct StartTag;
|
|
|
|
struct EndTag;
|
|
|
|
struct CompTag;
|
|
|
|
struct ParTag;
|
|
|
|
struct FontTag;
|
|
|
|
struct CR;
|
|
|
|
}
|
|
|
|
|
|
|
|
class XMLStream {
|
|
|
|
public:
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-08-18 02:56:25 +00:00
|
|
|
explicit XMLStream(odocstream & os): os_(os), escape_(ESCAPE_ALL), is_last_tag_cr_(true) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
odocstream & os() { return os_; }
|
|
|
|
///
|
|
|
|
// int & tab() { return tab_; }
|
|
|
|
/// closes any font tags that are eligible to be closed,
|
|
|
|
/// i.e., last on the tag_stack_.
|
|
|
|
/// \return false if there are open font tags we could not close.
|
|
|
|
/// because they are "blocked" by open non-font tags on the stack.
|
|
|
|
bool closeFontTags();
|
|
|
|
/// sets a mark so we know what tags to close at the end.
|
|
|
|
/// normally called at the start of a paragraph.
|
|
|
|
void startDivision(bool keep_empty);
|
|
|
|
/// clears the mark set by previous method.
|
|
|
|
/// there should not be any other tags open before it on the stack,
|
|
|
|
/// but if there are, we will close them.
|
|
|
|
void endDivision();
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(docstring const &);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(const char *);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(char_type);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(int);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(char);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(xml::StartTag const &);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(xml::EndTag const &);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(xml::CompTag const &);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(xml::ParTag const &);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(xml::FontTag const &);
|
|
|
|
///
|
|
|
|
XMLStream & operator<<(xml::CR const &);
|
|
|
|
///
|
|
|
|
enum EscapeSettings {
|
|
|
|
ESCAPE_NONE,
|
|
|
|
ESCAPE_AND, // meaning &
|
2020-08-18 02:56:25 +00:00
|
|
|
ESCAPE_ALL, // meaning <, >, &, at present, except things that are forbidden in comments
|
2020-07-31 22:02:36 +00:00
|
|
|
ESCAPE_COMMENTS // Anything that is forbidden within comments
|
2020-07-08 16:15:34 +00:00
|
|
|
};
|
|
|
|
/// Sets what we are going to escape on the NEXT write.
|
|
|
|
/// Everything is reset for the next time.
|
|
|
|
XMLStream & operator<<(EscapeSettings);
|
|
|
|
/// This routine is for debugging the tag stack, etc. Code
|
|
|
|
/// for it is disabled by default, however, so you will need
|
|
|
|
/// to enable it if you want to use it.
|
|
|
|
void dumpTagStack(std::string const & msg);
|
|
|
|
///
|
|
|
|
bool isTagOpen(xml::StartTag const &, int maxdepth = -1) const;
|
|
|
|
///
|
|
|
|
bool isTagOpen(xml::EndTag const &, int maxdepth = -1) const;
|
|
|
|
///
|
|
|
|
bool isTagPending(xml::StartTag const &, int maxdepth = -1) const;
|
2020-08-18 02:56:25 +00:00
|
|
|
/// Is the last tag that was added to the stream a new line (CR)? This is mostly to known
|
|
|
|
/// whether a new line must be added. Therefore, consider that an empty stream just had a CR,
|
|
|
|
/// that simplifies the logic using this code.
|
2021-09-28 08:21:45 +00:00
|
|
|
bool isLastTagCR() const { return is_last_tag_cr_; }
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-08-29 00:43:30 +00:00
|
|
|
void writeError(std::string const &);
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-08-29 00:43:30 +00:00
|
|
|
void writeError(docstring const &);
|
|
|
|
///
|
|
|
|
typedef std::shared_ptr<xml::StartTag> TagPtr;
|
|
|
|
/// Returns the last element on the tag stack. XMLStream keeps ownership of the item.
|
|
|
|
TagPtr getLastStackTag();
|
2020-08-17 22:37:34 +00:00
|
|
|
private:
|
|
|
|
///
|
|
|
|
void clearTagDeque();
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
odocstream & os_;
|
|
|
|
///
|
|
|
|
EscapeSettings escape_;
|
|
|
|
// What we would really like to do here is simply use a
|
|
|
|
// deque<StartTag>. But we want to store both StartTags and
|
|
|
|
// sub-classes thereof on this stack, which means we run into the
|
|
|
|
// so-called polymorphic class problem with the STL. We therefore have
|
|
|
|
// to use a deque<StartTag *>, which leads to the question who will
|
|
|
|
// own these pointers and how they will be deleted, so we use shared
|
|
|
|
// pointers.
|
|
|
|
///
|
|
|
|
typedef std::deque<TagPtr> TagDeque;
|
|
|
|
///
|
|
|
|
template <typename T>
|
|
|
|
TagPtr makeTagPtr(T const & tag) { return std::make_shared<T>(tag); }
|
|
|
|
///
|
|
|
|
TagDeque pending_tags_;
|
|
|
|
///
|
|
|
|
TagDeque tag_stack_;
|
2020-08-17 21:44:02 +00:00
|
|
|
///
|
|
|
|
bool is_last_tag_cr_;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
namespace xml {
|
|
|
|
|
|
|
|
/// Escape the given character, if necessary, to an entity.
|
|
|
|
docstring escapeChar(char_type c, XMLStream::EscapeSettings e);
|
|
|
|
|
|
|
|
/// Escape the given character, if necessary, to an entity.
|
2021-09-28 17:20:57 +00:00
|
|
|
/// \param c must be ASCII
|
2019-05-09 23:35:40 +00:00
|
|
|
docstring escapeChar(char c, XMLStream::EscapeSettings e);
|
|
|
|
|
|
|
|
/// Escape a word instead of a single character
|
|
|
|
docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e=XMLStream::ESCAPE_ALL);
|
|
|
|
|
|
|
|
/// cleans \param str for use as an attribute by replacing all non-altnum by "_"
|
|
|
|
docstring cleanAttr(docstring const & str);
|
|
|
|
|
|
|
|
/// replaces illegal characters from ID attributes
|
|
|
|
docstring cleanID(docstring const &orig);
|
|
|
|
|
|
|
|
/// returns a unique numeric ID
|
2020-07-31 22:02:36 +00:00
|
|
|
docstring uniqueID(docstring const & label);
|
2019-05-09 23:35:40 +00:00
|
|
|
|
2020-09-02 23:23:20 +00:00
|
|
|
/// determines whether a string only contains space characters
|
|
|
|
bool isNotOnlySpace(docstring const & str);
|
|
|
|
|
|
|
|
/// trims the string to the left, i.e. remove any space-like character at the beginning of the string
|
|
|
|
docstring trimLeft(docstring const & str);
|
|
|
|
|
2019-05-09 23:35:40 +00:00
|
|
|
struct FontTag;
|
|
|
|
struct EndFontTag;
|
|
|
|
|
|
|
|
/// Attributes will be escaped automatically and so should NOT
|
|
|
|
/// be escaped before being passed to the constructor.
|
|
|
|
struct StartTag
|
|
|
|
{
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
explicit StartTag(std::string const & tag) : tag_(from_ascii(tag)), keepempty_(false), tagtype_("none") {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
explicit StartTag(docstring const & tag) : tag_(tag), keepempty_(false), tagtype_("none") {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
explicit StartTag(docstring const & tag, docstring const & attr,
|
2020-11-18 04:29:26 +00:00
|
|
|
bool keepempty = false, std::string const & tagtype = "none")
|
|
|
|
: tag_(tag), attr_(attr), keepempty_(keepempty), tagtype_(tagtype) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
explicit StartTag(std::string const & tag, std::string const & attr,
|
2020-11-18 04:29:26 +00:00
|
|
|
bool keepempty = false, std::string const & tagtype = "none")
|
|
|
|
: tag_(from_ascii(tag)), attr_(from_utf8(attr)), keepempty_(keepempty), tagtype_(tagtype) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
explicit StartTag(std::string const & tag, docstring const & attr,
|
2020-11-18 04:29:26 +00:00
|
|
|
bool keepempty = false, std::string const & tagtype = "none")
|
|
|
|
: tag_(from_ascii(tag)), attr_(attr), keepempty_(keepempty), tagtype_(tagtype) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
virtual ~StartTag() = default;
|
2020-07-08 16:15:34 +00:00
|
|
|
/// <tag_ attr_>
|
|
|
|
virtual docstring writeTag() const;
|
|
|
|
/// </tag_>
|
|
|
|
virtual docstring writeEndTag() const;
|
|
|
|
///
|
|
|
|
virtual FontTag const * asFontTag() const { return nullptr; }
|
|
|
|
///
|
|
|
|
docstring tag_;
|
|
|
|
///
|
|
|
|
docstring attr_;
|
|
|
|
/// whether to keep things like "<tag></tag>" or discard them
|
|
|
|
/// you would want this for td, e.g, but maybe not for a div
|
|
|
|
bool keepempty_;
|
2020-11-18 04:29:26 +00:00
|
|
|
/// Type of tag for new-line behaviour. Either "paragraph", "inline", "block", or "none" (default).
|
|
|
|
std::string tagtype_;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
///
|
|
|
|
struct EndTag
|
|
|
|
{
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
explicit EndTag(std::string const & tag, std::string const & tagtype = "none")
|
|
|
|
: tag_(from_ascii(tag)), tagtype_(tagtype) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
explicit EndTag(docstring const & tag, std::string const & tagtype = "none")
|
|
|
|
: tag_(tag), tagtype_(tagtype) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
virtual ~EndTag() = default;
|
2020-07-08 16:15:34 +00:00
|
|
|
/// </tag_>
|
|
|
|
virtual docstring writeEndTag() const;
|
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
virtual EndFontTag const * asFontTag() const { return nullptr; }
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
docstring tag_;
|
2020-11-18 04:29:26 +00:00
|
|
|
/// Type of tag for new-line behaviour. Either "paragraph", "inline", "block", or "none" (default).
|
|
|
|
/// The value should match that of the corresponding xml::StartTag.
|
|
|
|
std::string tagtype_;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// Tags like <img />
|
|
|
|
/// Attributes will be escaped automatically and so should NOT
|
|
|
|
/// be escaped before being passed to the constructor.
|
|
|
|
struct CompTag
|
|
|
|
{
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
explicit CompTag(std::string const & tag)
|
2020-11-18 04:29:26 +00:00
|
|
|
: tag_(from_utf8(tag)), tagtype_("none") {}
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-11-18 04:38:25 +00:00
|
|
|
explicit CompTag(docstring const & tag)
|
|
|
|
: tag_(tag), tagtype_("none") {}
|
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
explicit CompTag(std::string const & tag, std::string const & attr, std::string const & tagtype = "none")
|
|
|
|
: tag_(from_utf8(tag)), attr_(from_utf8(attr)), tagtype_(tagtype) {}
|
2020-08-29 21:41:28 +00:00
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
explicit CompTag(std::string const & tag, docstring const & attr, std::string const & tagtype = "none")
|
|
|
|
: tag_(from_utf8(tag)), attr_(attr), tagtype_(tagtype) {}
|
2020-11-18 04:38:25 +00:00
|
|
|
///
|
|
|
|
explicit CompTag(docstring const & tag, std::string const & attr, std::string const & tagtype = "none")
|
|
|
|
: tag_(tag), attr_(from_utf8(attr)), tagtype_(tagtype) {}
|
|
|
|
///
|
|
|
|
explicit CompTag(docstring const & tag, docstring const & attr, std::string const & tagtype = "none")
|
|
|
|
: tag_(tag), attr_(attr), tagtype_(tagtype) {}
|
2020-07-08 16:15:34 +00:00
|
|
|
/// <tag_ attr_ />
|
|
|
|
docstring writeTag() const;
|
|
|
|
///
|
2020-08-29 21:41:28 +00:00
|
|
|
docstring tag_;
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
2020-08-29 21:41:28 +00:00
|
|
|
docstring attr_;
|
2020-11-18 04:29:26 +00:00
|
|
|
/// Type of tag for new-line behaviour. Either "paragraph", "inline", "block", or "none" (default).
|
|
|
|
std::string tagtype_;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// A special case of StartTag, used exclusively for tags that wrap paragraphs.
|
|
|
|
struct ParTag : public StartTag
|
|
|
|
{
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
explicit ParTag(std::string const & tag, const std::string & attr): StartTag(tag, from_utf8(attr)) {}
|
|
|
|
///
|
2020-11-18 04:29:26 +00:00
|
|
|
~ParTag() override = default;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
///
|
|
|
|
enum FontTypes {
|
2020-07-08 16:15:34 +00:00
|
|
|
// ranges
|
|
|
|
FT_EMPH,
|
|
|
|
FT_NOUN,
|
|
|
|
FT_UBAR,
|
|
|
|
FT_DBAR,
|
|
|
|
FT_WAVE,
|
|
|
|
FT_SOUT,
|
|
|
|
FT_XOUT,
|
|
|
|
// bold
|
|
|
|
FT_BOLD,
|
|
|
|
// shapes
|
|
|
|
FT_UPRIGHT,
|
|
|
|
FT_ITALIC,
|
|
|
|
FT_SLANTED,
|
|
|
|
FT_SMALLCAPS,
|
|
|
|
// families
|
|
|
|
FT_ROMAN,
|
|
|
|
FT_SANS,
|
|
|
|
FT_TYPE,
|
|
|
|
// sizes
|
|
|
|
FT_SIZE_TINY,
|
|
|
|
FT_SIZE_SCRIPT,
|
|
|
|
FT_SIZE_FOOTNOTE,
|
|
|
|
FT_SIZE_SMALL,
|
|
|
|
FT_SIZE_NORMAL,
|
|
|
|
FT_SIZE_LARGE,
|
|
|
|
FT_SIZE_LARGER,
|
|
|
|
FT_SIZE_LARGEST,
|
|
|
|
FT_SIZE_HUGE,
|
|
|
|
FT_SIZE_HUGER,
|
|
|
|
FT_SIZE_INCREASE,
|
|
|
|
FT_SIZE_DECREASE
|
|
|
|
|
|
|
|
// When updating this list, also update fontToTag in both output_docbook.cpp and output_xhtml.cpp,
|
|
|
|
// fontToRole in output_docbook.cpp, and fontToAttribute in output_xhtml.cpp.
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
///
|
|
|
|
struct FontTag : public StartTag
|
|
|
|
{
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
FontTag(docstring const & tag, FontTypes type): StartTag(tag), font_type_(type) {}
|
|
|
|
///
|
|
|
|
FontTag(std::string const & tag, FontTypes type): StartTag(from_utf8(tag)), font_type_(type) {}
|
|
|
|
///
|
|
|
|
FontTag(docstring const & tag, docstring const & attr, FontTypes type): StartTag(tag, attr), font_type_(type) {}
|
|
|
|
///
|
|
|
|
FontTag(std::string const & tag, std::string const & attr, FontTypes type): StartTag(from_utf8(tag), from_utf8(attr)), font_type_(type) {}
|
|
|
|
///
|
|
|
|
FontTag const * asFontTag() const override { return this; }
|
|
|
|
///
|
|
|
|
FontTypes font_type_;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
///
|
|
|
|
struct EndFontTag : public EndTag
|
|
|
|
{
|
2020-07-08 16:15:34 +00:00
|
|
|
///
|
|
|
|
EndFontTag(docstring const & tag, FontTypes type): EndTag(tag), font_type_(type) {}
|
|
|
|
///
|
|
|
|
EndFontTag(std::string const & tag, FontTypes type): EndTag(from_utf8(tag)), font_type_(type) {}
|
|
|
|
///
|
|
|
|
EndFontTag const * asFontTag() const override { return this; }
|
|
|
|
///
|
|
|
|
FontTypes font_type_;
|
2019-05-09 23:35:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// trivial struct for output of newlines
|
|
|
|
struct CR{};
|
|
|
|
|
|
|
|
// an illegal tag for internal use
|
|
|
|
xml::StartTag const parsep_tag("&LyX_parsep_tag&");
|
|
|
|
|
|
|
|
/// Open tag
|
|
|
|
void openTag(odocstream & os, std::string const & name,
|
|
|
|
std::string const & attribute = std::string());
|
|
|
|
|
|
|
|
/// Open tag
|
|
|
|
void openTag(Buffer const & buf, odocstream & os,
|
|
|
|
OutputParams const & runparams, Paragraph const & par);
|
|
|
|
|
|
|
|
/// Close tag
|
|
|
|
void closeTag(odocstream & os, std::string const & name);
|
|
|
|
|
|
|
|
/// Close tag
|
|
|
|
void closeTag(odocstream & os, Paragraph const & par);
|
|
|
|
|
2020-11-18 04:38:25 +00:00
|
|
|
// Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
|
|
|
|
// Block style:
|
|
|
|
// Content before
|
|
|
|
// <blocktag>
|
|
|
|
// Contents of the block.
|
|
|
|
// </blocktag>
|
|
|
|
// Content after
|
|
|
|
// Paragraph style:
|
|
|
|
// Content before
|
|
|
|
// <paratag>Contents of the paragraph.</paratag>
|
|
|
|
// Content after
|
|
|
|
// Inline style:
|
|
|
|
// Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
|
|
|
|
|
|
|
|
///
|
|
|
|
void openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype);
|
|
|
|
///
|
|
|
|
void compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype);
|
|
|
|
|
2019-05-09 23:35:40 +00:00
|
|
|
} // namespace xml
|
|
|
|
|
2022-12-27 15:52:51 +00:00
|
|
|
|
|
|
|
/// Comparison operators for tags. They are defined as free functions, otherwise comparison of casts does not work.
|
|
|
|
/// For font tags, do not only compare the XML tag, but also the font type: several fonts can be using the same tag.
|
|
|
|
/// In XHTML, <span>; in DocBook, <emphasis>.
|
|
|
|
bool operator==(xml::StartTag const & lhs, xml::StartTag const & rhs);
|
|
|
|
bool operator==(xml::EndTag const & lhs, xml::StartTag const & rhs);
|
|
|
|
bool operator!=(xml::EndTag const & lhs, xml::StartTag const & rhs);
|
|
|
|
bool operator!=(xml::StartTag const & lhs, xml::StartTag const & rhs);
|
|
|
|
|
2019-05-09 23:35:40 +00:00
|
|
|
} // namespace lyx
|
|
|
|
|
|
|
|
#endif // XML_H
|