Sanitize ids for SGML/XML.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@9142 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
José Matox 2004-10-29 23:08:04 +00:00
parent 6fe9e4e30d
commit 8e27aa2fb7
13 changed files with 165 additions and 42 deletions

View File

@ -1,3 +1,14 @@
2004-10-29 José Matos <jamatos@lyx.org>
* output_docbook.C (makeEnvironment):
* sgml.C (openTag):
* paragraph.[Ch] (getID): rename function, and return it enclosed in id="...".
2004-10-29 Andreas Vox <vox@isp.uni-luebeck.de>
* sgml.[Ch] (uniqueID): returns a unique id for a given label.
(cleanID): sanitize any id.
2004-10-29 Georg Baum <Georg.Baum@post.rwth-aachen.de>
* buffer.C, lyxlex_pimpl.C:

View File

@ -1,3 +1,12 @@
2004-10-29 José Matos <jamatos@lyx.org>
* insetgraphics.C (uniqueID): transfered to sgml.C
2004-10-29 Andreas Vox <vox@isp.uni-luebeck.de>
* insetlabel.C (docbook, linuxdoc):
* insetref.C (docbook, linuxdoc) sanitize id.
2004-10-29 Georg Baum <Georg.Baum@post.rwth-aachen.de>
* ExternalSupport.C: s/getExtFromContents/getFormatFromContents/

View File

@ -69,6 +69,7 @@ TODO
#include "metricsinfo.h"
#include "mover.h"
#include "outputparams.h"
#include "sgml.h"
#include "frontends/Alert.h"
#include "frontends/LyXView.h"
@ -78,7 +79,6 @@ TODO
#include "support/lyxlib.h" // float_equal
#include "support/os.h"
#include "support/systemcall.h"
#include "support/tostr.h"
#include <boost/bind.hpp>
#include <boost/tuple/tuple.hpp>
@ -126,13 +126,6 @@ string const RemoveExtension(string const & filename)
}
string const uniqueID()
{
static unsigned int seed = 1000;
return "graph" + tostr(++seed);
}
string findTargetFormat(string const & format, OutputParams const & runparams)
{
// Are we using latex or pdflatex?
@ -160,7 +153,7 @@ string findTargetFormat(string const & format, OutputParams const & runparams)
InsetGraphics::InsetGraphics()
: graphic_label(uniqueID()),
: graphic_label(sgml::uniqueID("graph")),
graphic_(new RenderGraphic(this))
{}
@ -168,7 +161,7 @@ InsetGraphics::InsetGraphics()
InsetGraphics::InsetGraphics(InsetGraphics const & ig)
: InsetOld(ig),
boost::signals::trackable(),
graphic_label(uniqueID()),
graphic_label(sgml::uniqueID("graph")),
graphic_(new RenderGraphic(*ig.graphic_, this))
{
setParams(ig.params());

View File

@ -20,6 +20,7 @@
#include "lyxtext.h"
#include "paragraph.h"
#include "pariterator.h"
#include "sgml.h"
#include "frontends/LyXView.h"
@ -135,7 +136,7 @@ int InsetLabel::plaintext(Buffer const &, ostream & os,
int InsetLabel::linuxdoc(Buffer const &, ostream & os,
OutputParams const &) const
{
os << "<label id=\"" << getContents() << "\" >";
os << "<label id=\"" << sgml::cleanID(getContents()) << "\" >";
return 0;
}
@ -143,6 +144,6 @@ int InsetLabel::linuxdoc(Buffer const &, ostream & os,
int InsetLabel::docbook(Buffer const &, ostream & os,
OutputParams const &) const
{
os << "<!-- anchor id=\"" << getContents() << "\" -->";
os << "<!-- anchor id=\"" << sgml::cleanID(getContents()) << "\" -->";
return 0;
}

View File

@ -19,6 +19,7 @@
#include "gettext.h"
#include "LaTeXFeatures.h"
#include "outputparams.h"
#include "sgml.h"
#include "frontends/LyXView.h"
@ -104,7 +105,7 @@ int InsetRef::plaintext(Buffer const &, ostream & os,
int InsetRef::linuxdoc(Buffer const &, ostream & os,
OutputParams const &) const
{
os << "<ref id=\"" << getContents()
os << "<ref id=\"" << sgml::cleanID(getContents())
<< "\" name=\"" << getOptions() << "\" >";
return 0;
}
@ -114,11 +115,11 @@ int InsetRef::docbook(Buffer const &, ostream & os,
OutputParams const & runparams) const
{
if (getOptions().empty() && runparams.flavor == OutputParams::XML) {
os << "<xref linkend=\"" << getContents() << "\" />";
os << "<xref linkend=\"" << sgml::cleanID(getContents()) << "\" />";
} else if (getOptions().empty()) {
os << "<xref linkend=\"" << getContents() << "\">";
os << "<xref linkend=\"" << sgml::cleanID(getContents()) << "\">";
} else {
os << "<link linkend=\"" << getContents()
os << "<link linkend=\"" << sgml::cleanID(getContents())
<< "\">" << getOptions() << "</link>";
}

View File

@ -1,3 +1,8 @@
2004-10-29 Andreas Vox <vox@isp.uni-luebeck.de>
* math_hullinset.C (docbook):
* ref_inset.C (docbook): sanitize ids, clean exported docbook.
2004-10-15 Georg Baum <Georg.Baum@post.rwth-aachen.de>
* math_hullinset.C (mutate): fix endless loop for unknown types

View File

@ -30,6 +30,7 @@
#include "lyx_main.h"
#include "lyxrc.h"
#include "outputparams.h"
#include "sgml.h"
#include "textpainter.h"
#include "undo.h"
@ -40,11 +41,14 @@
#include "graphics/PreviewImage.h"
#include "graphics/PreviewLoader.h"
#include "support/lstrings.h"
#include <boost/bind.hpp>
#include <sstream>
using lyx::cap::grabAndEraseSelection;
using lyx::support::subst;
using std::endl;
using std::max;
@ -1122,7 +1126,6 @@ bool MathHullInset::getStatus(LCursor & cur, FuncRequest const & cmd,
#include "frontends/LyXView.h"
#include "frontends/Dialogs.h"
#include "support/lstrings.h"
#include "support/lyxlib.h"
@ -1342,25 +1345,44 @@ int MathHullInset::docbook(Buffer const & buf, ostream & os,
name = "informalequation";
string bname = name;
if (! label(0).empty()) bname += " id=\"" + label(0)+ "\"";
if (!label(0).empty())
bname += " id=\"" + sgml::cleanID(label(0)) + "\"";
ms << MTag(bname.c_str());
ostringstream ls;
if (runparams.flavor == OutputParams::XML) {
ms << MTag("math");
MathGridInset::mathmlize(ms);
ms << ETag("math");
ms << MTag("alt role=\"tex\" ");
ostringstream ls;
ms << MTag("alt role=\"tex\" ");
// Workaround for db2latex: db2latex always includes equations with
// \ensuremath{} or \begin{display}\end{display}
// so we strip LyX' math environment
WriteStream wi(ls, false, false);
MathGridInset::write(wi);
ms << ls.str();
ms << ETag("alt");
ms << subst(subst(ls.str(), "&", "&amp;"), "<", "&lt;");
ms << ETag("alt");
ms << MTag("math");
MathGridInset::mathmlize(ms);
ms << ETag("math");
} else {
ms << MTag("alt role=\"tex\" ");
res = latex(buf, ms.os(), runparams);
ms << ETag("alt");
ms << MTag("alt role=\"tex\"");
res = latex(buf, ls, runparams);
ms << subst(subst(ls.str(), "&", "&amp;"), "<", "&lt;");
ms << ETag("alt");
}
ms << "<graphic fileref=\"eqn/";
if ( !label(0).empty())
ms << sgml::cleanID(label(0));
else {
// Some arbitrary unique number for this os.
// Note that each call of math_hullinset::docbook()
// will increase the os position by at least 60 chars or more
ms << sgml::uniqueID("anon");
}
if (runparams.flavor == OutputParams::XML)
ms << "\"/>";
else
ms << "\">";
ms << ETag(name.c_str());
return ms.line() + res;
}

View File

@ -21,6 +21,8 @@
#include "math_data.h"
#include "math_factory.h"
#include "math_support.h"
#include "outputparams.h"
#include "sgml.h"
#include "frontends/LyXView.h"
#include "frontends/Dialogs.h"
@ -131,17 +133,21 @@ int RefInset::plaintext(std::ostream & os, OutputParams const &) const
int RefInset::linuxdoc(std::ostream & os, OutputParams const &) const
{
os << "<ref id=\"" << asString(cell(0))
<< "\" name=\"" << asString(cell(1)) << "\" >";
<< "\" name=\"" << asString(cell(1)) << "\">";
return 0;
}
int RefInset::docbook(std::ostream & os, OutputParams const &) const
int RefInset::docbook(std::ostream & os, OutputParams const & runparams) const
{
if (cell(1).empty()) {
os << "<xref linkend=\"" << asString(cell(0)) << "\">";
os << "<xref linkend=\"" << sgml::cleanID(asString(cell(0)));
if (runparams.flavor == OutputParams::XML)
os << "\"/>";
else
os << "\">";
} else {
os << "<link linkend=\"" << asString(cell(0))
os << "<link linkend=\"" << sgml::cleanID(asString(cell(0)))
<< "\">" << asString(cell(1)) << "</link>";
}

View File

@ -133,8 +133,7 @@ ParagraphList::const_iterator makeEnvironment(Buffer const & buf,
while (par != pend) {
LyXLayout_ptr const & style = par->layout();
ParagraphList::const_iterator send;
string id = par->getDocbookId();
id = id.empty()? "" : " id = \"" + id + "\"";
string id = par->getID();
string wrapper = "";
pos_type sep = 0;

View File

@ -1331,14 +1331,15 @@ void Paragraph::simpleLinuxDocOnePar(Buffer const & buf,
}
string Paragraph::getDocbookId() const
string Paragraph::getID() const
{
for (pos_type i = 0; i < size(); ++i) {
if (isInset(i)) {
InsetBase const * inset = getInset(i);
InsetBase::Code lyx_code = inset->lyxCode();
if (lyx_code == InsetBase::LABEL_CODE) {
return static_cast<InsetCommand const *>(inset)->getContents();
string const id = static_cast<InsetCommand const *>(inset)->getContents();
return "id=\"" + sgml::cleanID(id) + "\"";
}
}

View File

@ -129,8 +129,8 @@ public:
OutputParams const & runparams,
lyx::depth_type depth) const;
/// Get the id of the paragraph, usefull for docbook
std::string getDocbookId() const;
/// Get the id of the paragraph, usefull for docbook and linuxdoc
std::string getID() const;
// Get the first word of a paragraph, return the position where it left
lyx::pos_type getFirstWord(Buffer const & buf,

View File

@ -25,12 +25,13 @@
#include <boost/tuple/tuple.hpp>
#include <map>
#include <sstream>
using lyx::support::subst;
using std::make_pair;
using std::map;
using std::ostream;
using std::ostringstream;
using std::pair;
@ -112,6 +113,75 @@ string escapeString(string const & raw)
}
string const uniqueID(string const label)
{
static unsigned int seed = 1000;
return label + tostr(++seed);
}
string cleanID(std::string const & orig, std::string const & allowed)
{
// The standard DocBook SGML declaration only allows letters,
// digits, '-' and '.' in a name.
// Since users might change that declaration one has to cater
// for additional allowed characters.
// This routine replaces illegal characters by '-' or '.'
// and adds a number for uniqueness.
// If you know what you are doing, you can set allowed==""
// to disable this mangling.
string::const_iterator it = orig.begin();
string::const_iterator end = orig.end();
string content;
if (allowed.empty()) {
return orig;
}
typedef map<string, string> MangledMap;
static MangledMap mangledNames;
static int mangleID = 1;
MangledMap::const_iterator const known = mangledNames.find(orig);
if (known != mangledNames.end())
return (*known).second;
// make sure it starts with a letter
if (!isalpha(*it) && allowed.find(*it) >= allowed.size())
content += "x";
bool mangle = false;
for (; it != end; ++it) {
char c = *it;
if (isalpha(c) || isdigit(c) || c == '-' || c == '.' || allowed.find(c) < allowed.size())
content += c;
else if (c == '_' || c == ' ') {
mangle = true;
content += "-";
}
else if (c == ':' || c == ',' || c == ';' || c == '!') {
mangle = true;
content += ".";
}
else {
mangle = true;
}
}
if (mangle) {
content += "-" + tostr(mangleID++);
}
else if (isdigit(content[content.size()-1])) {
content += ".";
}
mangledNames[orig] = content;
return content;
}
void openTag(ostream & os, string const & name, string const & attribute)
{
// This should be fixed in layout files later.
@ -141,8 +211,7 @@ void openTag(Buffer const & buf, ostream & os, Paragraph const & par)
string param = style->latexparam();
Counters & counters = buf.params().getLyXTextClass().counters();
string id = par.getDocbookId();
id = id.empty()? "" : " id = \"" + id + "\"";
string id = par.getID();
string attribute;
if(!id.empty()) {

View File

@ -34,6 +34,12 @@ std::pair<bool, std::string> escapeChar(char c);
/// Escape a word instead of a single character
std::string escapeString(std::string const & raw);
/// replaces illegal chars like ':' or '_' from SGML ID attributes
std::string cleanID(std::string const & orig, std::string const & allowed = std::string());
/// returns a uniq numeric id
std::string const uniqueID(std::string const label);
/// Opens tag
void openTag(std::ostream & os, std::string const & name,
std::string const & attribute = std::string());