2002-08-09 00:42:12 +00:00
|
|
|
/**
|
2007-04-26 04:41:58 +00:00
|
|
|
* \file sgml.cpp
|
2003-08-23 00:17:00 +00:00
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
* Licence details can be found in the file COPYING.
|
2002-08-09 00:42:12 +00:00
|
|
|
*
|
2008-11-14 15:58:50 +00:00
|
|
|
* \author José Matos
|
2003-08-23 00:17:00 +00:00
|
|
|
* \author John Levon
|
|
|
|
*
|
|
|
|
* Full author contact details are available in file CREDITS.
|
2002-08-09 00:42:12 +00:00
|
|
|
*/
|
|
|
|
|
2002-10-21 16:21:56 +00:00
|
|
|
#include <config.h>
|
2002-11-04 02:12:42 +00:00
|
|
|
|
2004-10-24 20:55:22 +00:00
|
|
|
#include "sgml.h"
|
2002-11-04 02:12:42 +00:00
|
|
|
|
2007-04-26 04:41:58 +00:00
|
|
|
#include "Buffer.h"
|
|
|
|
#include "BufferParams.h"
|
|
|
|
#include "Counters.h"
|
2007-09-29 20:02:32 +00:00
|
|
|
#include "Layout.h"
|
2007-04-26 04:41:58 +00:00
|
|
|
#include "OutputParams.h"
|
|
|
|
#include "Paragraph.h"
|
2007-11-06 21:45:24 +00:00
|
|
|
#include "Text.h"
|
2007-11-07 23:25:08 +00:00
|
|
|
#include "TextClass.h"
|
2002-11-04 02:12:42 +00:00
|
|
|
|
2007-11-06 21:45:24 +00:00
|
|
|
#include "support/convert.h"
|
2007-11-01 22:17:22 +00:00
|
|
|
#include "support/docstream.h"
|
2004-10-24 20:55:22 +00:00
|
|
|
#include "support/lstrings.h"
|
2007-04-02 15:21:36 +00:00
|
|
|
#include "support/textutils.h"
|
2004-10-24 20:55:22 +00:00
|
|
|
|
2016-07-10 15:50:19 +00:00
|
|
|
#include <atomic>
|
2004-10-29 23:08:04 +00:00
|
|
|
#include <map>
|
2014-10-26 17:02:49 +00:00
|
|
|
#include <QThreadStorage>
|
2004-10-25 00:26:05 +00:00
|
|
|
|
2007-12-12 10:16:00 +00:00
|
|
|
using namespace std;
|
2007-12-12 18:57:56 +00:00
|
|
|
using namespace lyx::support;
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
2004-10-24 20:55:22 +00:00
|
|
|
|
2006-10-21 11:38:43 +00:00
|
|
|
docstring sgml::escapeChar(char_type c)
|
2002-08-09 00:42:12 +00:00
|
|
|
{
|
2006-10-21 11:38:43 +00:00
|
|
|
docstring str;
|
2002-08-09 00:42:12 +00:00
|
|
|
switch (c) {
|
|
|
|
case ' ':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += " ";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '&':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "&";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '<':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "<";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '>':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += ">";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
2004-05-14 15:47:35 +00:00
|
|
|
#if 0
|
2002-08-09 00:42:12 +00:00
|
|
|
case '$':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "$";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '#':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "#";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '%':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "%";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '[':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "[";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case ']':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "]";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '{':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "{";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '}':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "}";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '~':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "˜";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '"':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += """;
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
case '\\':
|
2006-10-21 11:38:43 +00:00
|
|
|
str += "\";
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
2004-05-14 15:47:35 +00:00
|
|
|
#endif
|
2002-08-09 00:42:12 +00:00
|
|
|
default:
|
2006-10-21 11:38:43 +00:00
|
|
|
str += c;
|
2002-08-09 00:42:12 +00:00
|
|
|
break;
|
|
|
|
}
|
2006-10-21 11:38:43 +00:00
|
|
|
return str;
|
2002-08-09 00:42:12 +00:00
|
|
|
}
|
|
|
|
|
2002-11-04 02:12:42 +00:00
|
|
|
|
2006-10-21 11:38:43 +00:00
|
|
|
docstring sgml::escapeString(docstring const & raw)
|
2004-10-25 00:26:05 +00:00
|
|
|
{
|
2007-11-06 21:45:24 +00:00
|
|
|
docstring bin;
|
|
|
|
bin.reserve(raw.size() * 2); // crude approximation is sufficient
|
|
|
|
for (size_t i = 0; i != raw.size(); ++i)
|
|
|
|
bin += sgml::escapeChar(raw[i]);
|
2004-10-25 00:26:05 +00:00
|
|
|
|
2007-11-06 21:45:24 +00:00
|
|
|
return bin;
|
2004-10-25 00:26:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-05 17:13:10 +00:00
|
|
|
docstring const sgml::uniqueID(docstring const & label)
|
2004-10-29 23:08:04 +00:00
|
|
|
{
|
2016-07-10 15:50:19 +00:00
|
|
|
// thread-safe
|
|
|
|
static atomic_uint seed(1000);
|
2006-10-22 11:24:33 +00:00
|
|
|
return label + convert<docstring>(++seed);
|
2004-10-29 23:08:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-10-22 11:24:33 +00:00
|
|
|
docstring sgml::cleanID(Buffer const & buf, OutputParams const & runparams,
|
|
|
|
docstring const & orig)
|
2004-10-29 23:08:04 +00:00
|
|
|
{
|
|
|
|
// The standard DocBook SGML declaration only allows letters,
|
|
|
|
// digits, '-' and '.' in a name.
|
|
|
|
// Since users might change that declaration one has to cater
|
|
|
|
// for additional allowed characters.
|
|
|
|
// This routine replaces illegal characters by '-' or '.'
|
|
|
|
// and adds a number for uniqueness.
|
|
|
|
// If you know what you are doing, you can set allowed==""
|
|
|
|
// to disable this mangling.
|
2008-02-28 01:42:02 +00:00
|
|
|
DocumentClass const & tclass = buf.params().documentClass();
|
2007-09-20 09:15:43 +00:00
|
|
|
docstring const allowed = from_ascii(
|
2008-02-24 15:44:11 +00:00
|
|
|
runparams.flavor == OutputParams::XML ? ".-_:" : tclass.options());
|
2004-11-02 11:25:20 +00:00
|
|
|
|
|
|
|
if (allowed.empty())
|
|
|
|
return orig;
|
|
|
|
|
2006-10-22 11:24:33 +00:00
|
|
|
docstring::const_iterator it = orig.begin();
|
|
|
|
docstring::const_iterator end = orig.end();
|
2004-10-29 23:08:04 +00:00
|
|
|
|
2006-10-22 11:24:33 +00:00
|
|
|
docstring content;
|
2004-10-29 23:08:04 +00:00
|
|
|
|
2006-10-22 11:24:33 +00:00
|
|
|
typedef map<docstring, docstring> MangledMap;
|
2014-10-26 17:02:49 +00:00
|
|
|
static QThreadStorage<MangledMap> tMangledNames;
|
|
|
|
static QThreadStorage<int> tMangleID;
|
|
|
|
|
|
|
|
MangledMap & mangledNames = tMangledNames.localData();
|
2004-10-29 23:08:04 +00:00
|
|
|
|
|
|
|
MangledMap::const_iterator const known = mangledNames.find(orig);
|
|
|
|
if (known != mangledNames.end())
|
2008-02-24 15:44:11 +00:00
|
|
|
return known->second;
|
2004-10-29 23:08:04 +00:00
|
|
|
|
|
|
|
// make sure it starts with a letter
|
2007-04-02 15:21:36 +00:00
|
|
|
if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size())
|
2004-10-29 23:08:04 +00:00
|
|
|
content += "x";
|
2004-11-26 14:52:54 +00:00
|
|
|
|
|
|
|
bool mangle = false;
|
2004-10-29 23:08:04 +00:00
|
|
|
for (; it != end; ++it) {
|
2007-09-20 09:15:43 +00:00
|
|
|
char_type c = *it;
|
2007-04-02 15:21:36 +00:00
|
|
|
if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.'
|
2006-10-22 11:35:16 +00:00
|
|
|
|| allowed.find(c) < allowed.size())
|
2004-10-29 23:08:04 +00:00
|
|
|
content += c;
|
|
|
|
else if (c == '_' || c == ' ') {
|
|
|
|
mangle = true;
|
|
|
|
content += "-";
|
|
|
|
}
|
|
|
|
else if (c == ':' || c == ',' || c == ';' || c == '!') {
|
|
|
|
mangle = true;
|
|
|
|
content += ".";
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
mangle = true;
|
|
|
|
}
|
|
|
|
}
|
2007-11-06 21:45:24 +00:00
|
|
|
|
2015-09-20 08:42:35 +00:00
|
|
|
if (mangle) {
|
|
|
|
int & mangleID = tMangleID.localData();
|
2006-10-22 11:24:33 +00:00
|
|
|
content += "-" + convert<docstring>(mangleID++);
|
2015-09-20 08:42:35 +00:00
|
|
|
} else if (isDigitASCII(content[content.size() - 1]))
|
2004-10-29 23:08:04 +00:00
|
|
|
content += ".";
|
|
|
|
|
|
|
|
mangledNames[orig] = content;
|
|
|
|
|
|
|
|
return content;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-10-19 21:00:33 +00:00
|
|
|
void sgml::openTag(odocstream & os, string const & name, string const & attribute)
|
2002-10-21 16:21:56 +00:00
|
|
|
{
|
2007-05-28 22:27:45 +00:00
|
|
|
// FIXME UNICODE
|
2004-10-28 15:10:10 +00:00
|
|
|
// This should be fixed in layout files later.
|
|
|
|
string param = subst(attribute, "<", "\"");
|
|
|
|
param = subst(param, ">", "\"");
|
|
|
|
|
2013-04-14 10:34:47 +00:00
|
|
|
// Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
|
|
|
|
// if the name is *dummy*.
|
|
|
|
// We ignore dummy because dummy is not a valid docbook element and it is
|
|
|
|
// the internal name given to single paragraphs in the latex output.
|
|
|
|
// This allow us to simplify the code a lot and is a reasonable compromise.
|
|
|
|
if (!name.empty() && name != "!-- --" && name != "dummy") {
|
2006-10-22 11:35:16 +00:00
|
|
|
os << '<' << from_ascii(name);
|
2004-10-28 15:10:10 +00:00
|
|
|
if (!param.empty())
|
2006-10-22 11:35:16 +00:00
|
|
|
os << ' ' << from_ascii(param);
|
2003-11-25 17:23:36 +00:00
|
|
|
os << '>';
|
2002-10-21 16:21:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-10-19 21:00:33 +00:00
|
|
|
void sgml::closeTag(odocstream & os, string const & name)
|
2002-10-21 16:21:56 +00:00
|
|
|
{
|
2013-04-14 10:34:47 +00:00
|
|
|
if (!name.empty() && name != "!-- --" && name != "dummy")
|
2006-10-22 11:35:16 +00:00
|
|
|
os << "</" << from_ascii(name) << '>';
|
2004-10-28 13:07:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-10-22 11:35:16 +00:00
|
|
|
void sgml::openTag(Buffer const & buf, odocstream & os,
|
|
|
|
OutputParams const & runparams, Paragraph const & par)
|
2004-10-28 13:07:45 +00:00
|
|
|
{
|
2008-03-06 21:31:27 +00:00
|
|
|
Layout const & style = par.layout();
|
|
|
|
string const & name = style.latexname();
|
|
|
|
string param = style.latexparam();
|
2008-02-28 01:42:02 +00:00
|
|
|
Counters & counters = buf.params().documentClass().counters();
|
2004-10-28 13:07:45 +00:00
|
|
|
|
2004-11-02 11:25:20 +00:00
|
|
|
string id = par.getID(buf, runparams);
|
2004-10-28 13:07:45 +00:00
|
|
|
|
|
|
|
string attribute;
|
2007-11-06 22:20:05 +00:00
|
|
|
if (!id.empty()) {
|
2004-10-28 13:07:45 +00:00
|
|
|
if (param.find('#') != string::npos) {
|
|
|
|
string::size_type pos = param.find("id=<");
|
|
|
|
string::size_type end = param.find(">");
|
2005-01-18 14:15:57 +00:00
|
|
|
if( pos != string::npos && end != string::npos)
|
2004-10-28 13:07:45 +00:00
|
|
|
param.erase(pos, end-pos + 1);
|
|
|
|
}
|
|
|
|
attribute = id + ' ' + param;
|
|
|
|
} else {
|
|
|
|
if (param.find('#') != string::npos) {
|
2006-10-20 20:30:00 +00:00
|
|
|
// FIXME UNICODE
|
2008-03-06 21:31:27 +00:00
|
|
|
if (!style.counter.empty())
|
2010-01-20 19:47:27 +00:00
|
|
|
// This uses InternalUpdate at the moment becuase sgml output
|
|
|
|
// does not do anything with tracked counters, and it would need
|
|
|
|
// to track layouts if it did want to use them.
|
|
|
|
counters.step(style.counter, InternalUpdate);
|
2004-10-28 13:07:45 +00:00
|
|
|
else
|
2010-01-20 19:47:27 +00:00
|
|
|
counters.step(from_ascii(name), InternalUpdate);
|
2006-10-21 00:16:43 +00:00
|
|
|
int i = counters.value(from_ascii(name));
|
2005-01-06 15:40:49 +00:00
|
|
|
attribute = subst(param, "#", convert<string>(i));
|
2004-10-28 13:07:45 +00:00
|
|
|
} else {
|
|
|
|
attribute = param;
|
|
|
|
}
|
2002-10-21 16:21:56 +00:00
|
|
|
}
|
2004-10-28 13:07:45 +00:00
|
|
|
openTag(os, name, attribute);
|
|
|
|
}
|
2002-10-21 16:21:56 +00:00
|
|
|
|
|
|
|
|
2006-10-19 21:00:33 +00:00
|
|
|
void sgml::closeTag(odocstream & os, Paragraph const & par)
|
2004-10-28 13:07:45 +00:00
|
|
|
{
|
2008-03-06 21:31:27 +00:00
|
|
|
Layout const & style = par.layout();
|
|
|
|
closeTag(os, style.latexname());
|
2002-10-21 16:21:56 +00:00
|
|
|
}
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
} // namespace lyx
|