lyx_mirror/src/paragraph_pimpl.C
Georg Baum feb7895965 Add machinery to output arbitrary unicode characters with LaTeX commands
read from a text file.

	* src/encoding.[Ch]

	(Encoding::latexChar): New, output a character to LaTeX
	(Encoding::validate): New, add needed preamble stuff for a character
	(Encodings::read): Read new unicodesymbols file
	(Encodings::isCombiningChar): New, is a character a combining char?

	* src/paragraph_pimpl.C
	(isEncoding): Delete, no longer needed
	(getEncoding): New, get the real encoding of a font
	(Paragraph::Pimpl::latexSurrogatePair): New, output a surrogate pair
	to LaTeX
	(Paragraph::Pimpl::simpleTeXBlanks): Use latexSurrogatePair if needed
	(Paragraph::Pimpl::simpleTeXSpecialChars): Ditto, and replace several
	hardcoded characters with a call of encoding.latexChar()
	(Paragraph::Pimpl::validate): replace several hardcoded characters
	with a call of encoding.validate()

	* src/support/debugstream.h
	(basic_debugstream::disable): New, disable the stream completely
	(basic_debugstream::enable): New, reenable the stream

	* src/lyx_main.[Ch]: Adjust to changes above

	* src/paragraph.C: Ditto

	* lib/unicodesymbols: New file with UCS4 -> LaTeX command mapping.
	It is far from complete yet, but contains most accents on latin
	characters.

	* lib/Makefile.am: add lib/unicodesymbols

	* development/scons/scons_manifest.py: ditto

	* development/tools/unicodesymbols.py: Helper script to update
	lib/unicodesymbols with new symbols


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16920 a592a061-630c-0410-9148-cb99ea01b6c8
2007-01-28 21:27:45 +00:00

859 lines
21 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* \file paragraph_pimpl.C
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Lars Gullik Bjønnes
* \author Jean-Marc Lasgouttes
* \author John Levon
* \author André Pönitz
*
* Full author contact details are available in file CREDITS.
*/
#include <config.h>
#include "paragraph_pimpl.h"
#include "paragraph.h"
#include "bufferparams.h"
#include "debug.h"
#include "encoding.h"
#include "language.h"
#include "LaTeXFeatures.h"
#include "LColor.h"
#include "lyxlength.h"
#include "lyxrc.h"
#include "outputparams.h"
#include "texrow.h"
#include <boost/next_prior.hpp>
namespace lyx {
using std::endl;
using std::upper_bound;
using std::lower_bound;
using std::string;
// Initialization of the counter for the paragraph id's,
unsigned int Paragraph::Pimpl::paragraph_id = 0;
namespace {
struct special_phrase {
string phrase;
docstring macro;
bool builtin;
};
special_phrase const special_phrases[] = {
{ "LyX", from_ascii("\\LyX{}"), false },
{ "TeX", from_ascii("\\TeX{}"), true },
{ "LaTeX2e", from_ascii("\\LaTeXe{}"), true },
{ "LaTeX", from_ascii("\\LaTeX{}"), true },
};
size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
/// Get the real encoding of a character with font \p font.
/// doc_encoding == bparams.encoding(), but we use a precomputed variable
/// since bparams.encoding() is expensive
inline Encoding const & getEncoding(BufferParams const & bparams,
Encoding const & doc_encoding, LyXFont const & font)
{
if (bparams.inputenc == "auto" || bparams.inputenc == "default")
return *(font.language()->encoding());
return doc_encoding;
}
} // namespace anon
Paragraph::Pimpl::Pimpl(Paragraph * owner)
: owner_(owner)
{
inset_owner = 0;
id_ = paragraph_id++;
}
Paragraph::Pimpl::Pimpl(Pimpl const & p, Paragraph * owner)
: params(p.params), changes_(p.changes_), owner_(owner)
{
inset_owner = p.inset_owner;
fontlist = p.fontlist;
id_ = paragraph_id++;
}
bool Paragraph::Pimpl::isChanged(pos_type start, pos_type end) const
{
BOOST_ASSERT(start >= 0 && start <= size());
BOOST_ASSERT(end > start && end <= size() + 1);
return changes_.isChanged(start, end);
}
bool Paragraph::Pimpl::isMergedOnEndOfParDeletion(bool trackChanges) const {
// keep the logic here in sync with the logic of eraseChars()
if (!trackChanges) {
return true;
}
Change change = changes_.lookup(size());
return change.type == Change::INSERTED && change.author == 0;
}
void Paragraph::Pimpl::setChange(Change const & change)
{
// beware of the imaginary end-of-par character!
changes_.set(change, 0, size() + 1);
/*
* Propagate the change recursively - but not in case of DELETED!
*
* Imagine that your co-author makes changes in an existing inset. He
* sends your document to you and you come to the conclusion that the
* inset should go completely. If you erase it, LyX must not delete all
* text within the inset. Otherwise, the change tracked insertions of
* your co-author get lost and there is no way to restore them later.
*
* Conclusion: An inset's content should remain untouched if you delete it
*/
if (change.type != Change::DELETED) {
for (pos_type pos = 0; pos < size(); ++pos) {
if (owner_->isInset(pos)) {
owner_->getInset(pos)->setChange(change);
}
}
}
}
void Paragraph::Pimpl::setChange(pos_type pos, Change const & change)
{
BOOST_ASSERT(pos >= 0 && pos <= size());
changes_.set(change, pos);
// see comment in setChange(Change const &) above
if (change.type != Change::DELETED &&
pos < size() && owner_->isInset(pos)) {
owner_->getInset(pos)->setChange(change);
}
}
Change const & Paragraph::Pimpl::lookupChange(pos_type pos) const
{
BOOST_ASSERT(pos >= 0 && pos <= size());
return changes_.lookup(pos);
}
void Paragraph::Pimpl::acceptChanges(BufferParams const & bparams, pos_type start, pos_type end)
{
BOOST_ASSERT(start >= 0 && start <= size());
BOOST_ASSERT(end > start && end <= size() + 1);
for (pos_type pos = start; pos < end; ++pos) {
switch (lookupChange(pos).type) {
case Change::UNCHANGED:
// accept changes in nested inset
if (pos < size() && owner_->isInset(pos)) {
owner_->getInset(pos)->acceptChanges(bparams);
}
break;
case Change::INSERTED:
changes_.set(Change(Change::UNCHANGED), pos);
// also accept changes in nested inset
if (pos < size() && owner_->isInset(pos)) {
owner_->getInset(pos)->acceptChanges(bparams);
}
break;
case Change::DELETED:
// Suppress access to non-existent
// "end-of-paragraph char"
if (pos < size()) {
eraseChar(pos, false);
--end;
--pos;
}
break;
}
}
}
void Paragraph::Pimpl::rejectChanges(BufferParams const & bparams, pos_type start, pos_type end)
{
BOOST_ASSERT(start >= 0 && start <= size());
BOOST_ASSERT(end > start && end <= size() + 1);
for (pos_type pos = start; pos < end; ++pos) {
switch (lookupChange(pos).type) {
case Change::UNCHANGED:
// reject changes in nested inset
if (pos < size() && owner_->isInset(pos)) {
owner_->getInset(pos)->rejectChanges(bparams);
}
break;
case Change::INSERTED:
// Suppress access to non-existent
// "end-of-paragraph char"
if (pos < size()) {
eraseChar(pos, false);
--end;
--pos;
}
break;
case Change::DELETED:
changes_.set(Change(Change::UNCHANGED), pos);
// Do NOT reject changes within a deleted inset!
// There may be insertions of a co-author inside of it!
break;
}
}
}
Paragraph::value_type Paragraph::Pimpl::getChar(pos_type pos) const
{
BOOST_ASSERT(pos >= 0 && pos <= size());
return owner_->getChar(pos);
}
void Paragraph::Pimpl::insertChar(pos_type pos, value_type c, Change const & change)
{
BOOST_ASSERT(pos >= 0 && pos <= size());
// track change
changes_.insert(change, pos);
// This is actually very common when parsing buffers (and
// maybe inserting ascii text)
if (pos == size()) {
// when appending characters, no need to update tables
owner_->text_.push_back(c);
return;
}
owner_->text_.insert(owner_->text_.begin() + pos, c);
// Update the font table.
FontTable search_font(pos, LyXFont());
for (FontList::iterator it
= lower_bound(fontlist.begin(), fontlist.end(), search_font, matchFT());
it != fontlist.end(); ++it)
{
it->pos(it->pos() + 1);
}
// Update the insets
owner_->insetlist.increasePosAfterPos(pos);
}
void Paragraph::Pimpl::insertInset(pos_type pos, InsetBase * inset,
Change const & change)
{
BOOST_ASSERT(inset);
BOOST_ASSERT(pos >= 0 && pos <= size());
insertChar(pos, META_INSET, change);
BOOST_ASSERT(owner_->text_[pos] == META_INSET);
// Add a new entry in the insetlist.
owner_->insetlist.insert(inset, pos);
}
bool Paragraph::Pimpl::eraseChar(pos_type pos, bool trackChanges)
{
BOOST_ASSERT(pos >= 0 && pos <= size());
// keep the logic here in sync with the logic of isMergedOnEndOfParDeletion()
if (trackChanges) {
Change change = changes_.lookup(pos);
// set the character to DELETED if
// a) it was previously unchanged or
// b) it was inserted by a co-author
if (change.type == Change::UNCHANGED ||
(change.type == Change::INSERTED && change.author != 0)) {
setChange(pos, Change(Change::DELETED));
return false;
}
if (change.type == Change::DELETED)
return false;
}
// Don't physically access the imaginary end-of-paragraph character.
// eraseChar() can only mark it as DELETED. A physical deletion of
// end-of-par must be handled externally.
if (pos == size()) {
return false;
}
// track change
changes_.erase(pos);
// if it is an inset, delete the inset entry
if (owner_->text_[pos] == Paragraph::META_INSET) {
owner_->insetlist.erase(pos);
}
owner_->text_.erase(owner_->text_.begin() + pos);
// Erase entries in the tables.
FontTable search_font(pos, LyXFont());
FontList::iterator it =
lower_bound(fontlist.begin(),
fontlist.end(),
search_font, matchFT());
if (it != fontlist.end() && it->pos() == pos &&
(pos == 0 ||
(it != fontlist.begin()
&& boost::prior(it)->pos() == pos - 1))) {
// If it is a multi-character font
// entry, we just make it smaller
// (see update below), otherwise we
// should delete it.
unsigned int const i = it - fontlist.begin();
fontlist.erase(fontlist.begin() + i);
it = fontlist.begin() + i;
if (i > 0 && i < fontlist.size() &&
fontlist[i - 1].font() == fontlist[i].font()) {
fontlist.erase(fontlist.begin() + i - 1);
it = fontlist.begin() + i - 1;
}
}
// Update all other entries
FontList::iterator fend = fontlist.end();
for (; it != fend; ++it)
it->pos(it->pos() - 1);
// Update the insetlist
owner_->insetlist.decreasePosAfterPos(pos);
return true;
}
int Paragraph::Pimpl::eraseChars(pos_type start, pos_type end, bool trackChanges)
{
BOOST_ASSERT(start >= 0 && start <= size());
BOOST_ASSERT(end >= start && end <= size() + 1);
pos_type i = start;
for (pos_type count = end - start; count; --count) {
if (!eraseChar(i, trackChanges))
++i;
}
return end - i;
}
int Paragraph::Pimpl::latexSurrogatePair(odocstream & os, value_type c,
value_type next, Encoding const & encoding)
{
// Writing next here may circumvent a possible font change between
// c and next. Since next is only output if it forms a surrogate pair
// with c we can ignore this:
// A font change inside a surrogate pair does not make sense and is
// hopefully impossible to input.
// FIXME: change tracking
// Is this correct WRT change tracking?
docstring const latex1 = encoding.latexChar(next);
docstring const latex2 = encoding.latexChar(c);
os << latex1 << '{' << latex2 << '}';
return latex1.length() + latex2.length() + 2;
}
bool Paragraph::Pimpl::simpleTeXBlanks(BufferParams const & bparams,
Encoding const & doc_encoding,
odocstream & os, TexRow & texrow,
pos_type & i,
unsigned int & column,
LyXFont const & font,
LyXLayout const & style)
{
if (style.pass_thru)
return false;
if (i < size() - 1) {
char_type next = getChar(i + 1);
if (Encodings::isCombiningChar(next)) {
// This space has an accent, so we must always output it.
Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
column += latexSurrogatePair(os, ' ', next, encoding) - 1;
++i;
return true;
}
}
if (lyxrc.plaintext_linelen > 0
&& column > lyxrc.plaintext_linelen
&& i
&& getChar(i - 1) != ' '
&& (i < size() - 1)
// same in FreeSpacing mode
&& !owner_->isFreeSpacing()
// In typewriter mode, we want to avoid
// ! . ? : at the end of a line
&& !(font.family() == LyXFont::TYPEWRITER_FAMILY
&& (getChar(i - 1) == '.'
|| getChar(i - 1) == '?'
|| getChar(i - 1) == ':'
|| getChar(i - 1) == '!'))) {
os << '\n';
texrow.newline();
texrow.start(owner_->id(), i + 1);
column = 0;
} else if (style.free_spacing) {
os << '~';
} else {
os << ' ';
}
return false;
}
bool Paragraph::Pimpl::isTextAt(string const & str, pos_type pos) const
{
pos_type const len = str.length();
// is the paragraph large enough?
if (pos + len > size())
return false;
// does the wanted text start at point?
for (string::size_type i = 0; i < str.length(); ++i) {
// Caution: direct comparison of characters works only
// because str is pure ASCII.
if (str[i] != owner_->text_[pos + i])
return false;
}
// is there a font change in middle of the word?
FontList::const_iterator cit = fontlist.begin();
FontList::const_iterator end = fontlist.end();
for (; cit != end; ++cit) {
if (cit->pos() >= pos)
break;
}
if (cit != end && pos + len - 1 > cit->pos())
return false;
return true;
}
void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
BufferParams const & bparams,
Encoding const & doc_encoding,
odocstream & os,
TexRow & texrow,
OutputParams const & runparams,
LyXFont & font,
LyXFont & running_font,
LyXFont & basefont,
LyXFont const & outerfont,
bool & open_font,
Change::Type & running_change,
LyXLayout const & style,
pos_type & i,
unsigned int & column,
value_type const c)
{
if (style.pass_thru) {
if (c != Paragraph::META_INSET) {
if (c != '\0')
// FIXME UNICODE: This can fail if c cannot
// be encoded in the current encoding.
os.put(c);
} else
owner_->getInset(i)->plaintext(buf, os, runparams);
return;
}
// Two major modes: LaTeX or plain
// Handle here those cases common to both modes
// and then split to handle the two modes separately.
switch (c) {
case Paragraph::META_INSET: {
InsetBase * inset = owner_->getInset(i);
// FIXME: remove this check
if (!inset)
break;
// FIXME: move this to InsetNewline::latex
if (inset->lyxCode() == InsetBase::NEWLINE_CODE) {
// newlines are handled differently here than
// the default in simpleTeXSpecialChars().
if (!style.newline_allowed) {
os << '\n';
} else {
if (open_font) {
column += running_font.latexWriteEndChanges(
os, basefont, basefont, bparams);
open_font = false;
}
basefont = owner_->getLayoutFont(bparams, outerfont);
running_font = basefont;
if (font.family() == LyXFont::TYPEWRITER_FAMILY)
os << '~';
if (runparams.moving_arg)
os << "\\protect ";
os << "\\\\\n";
}
texrow.newline();
texrow.start(owner_->id(), i + 1);
column = 0;
break;
}
// output change tracking marks only if desired,
// if dvipost is installed,
// and with dvi/ps (other formats don't work)
LaTeXFeatures features(buf, bparams, runparams);
bool const output = bparams.outputChanges
&& runparams.flavor == OutputParams::LATEX
&& features.isAvailable("dvipost");
if (inset->canTrackChanges()) {
column += Changes::latexMarkChange(os, running_change,
Change::UNCHANGED, output);
running_change = Change::UNCHANGED;
}
bool close = false;
odocstream::pos_type const len = os.tellp();
if ((inset->lyxCode() == InsetBase::GRAPHICS_CODE
|| inset->lyxCode() == InsetBase::MATH_CODE
|| inset->lyxCode() == InsetBase::URL_CODE)
&& running_font.isRightToLeft()) {
os << "\\L{";
close = true;
}
#ifdef WITH_WARNINGS
#warning Bug: we can have an empty font change here!
// if there has just been a font change, we are going to close it
// right now, which means stupid latex code like \textsf{}. AFAIK,
// this does not harm dvi output. A minor bug, thus (JMarc)
#endif
// some insets cannot be inside a font change command
if (open_font && inset->noFontChange()) {
column += running_font.latexWriteEndChanges(
os, basefont, basefont, bparams);
open_font = false;
basefont = owner_->getLayoutFont(bparams, outerfont);
running_font = basefont;
}
int tmp = inset->latex(buf, os, runparams);
if (close)
os << '}';
if (tmp) {
for (int j = 0; j < tmp; ++j) {
texrow.newline();
}
texrow.start(owner_->id(), i + 1);
column = 0;
} else {
column += os.tellp() - len;
}
}
break;
default:
// And now for the special cases within each mode
switch (c) {
case '\\':
os << "\\textbackslash{}";
column += 15;
break;
// The following characters could be written literally in latin1, but they
// would be wrongly converted on systems where char is signed, so we give
// the code points.
// This also makes us independant from the encoding of this source file.
case '|': case '<': case '>':
// In T1 encoding, these characters exist
if (lyxrc.fontenc == "T1") {
os.put(c);
//... but we should avoid ligatures
if ((c == '>' || c == '<')
&& i <= size() - 2
&& getChar(i + 1) == c) {
//os << "\\textcompwordmark{}";
//column += 19;
// Jean-Marc, have a look at
// this. I think this works
// equally well:
os << "\\,{}";
// Lgb
column += 3;
}
break;
}
// Typewriter font also has them
if (font.family() == LyXFont::TYPEWRITER_FAMILY) {
os.put(c);
break;
}
// Otherwise, we use what LaTeX
// provides us.
switch (c) {
case '<':
os << "\\textless{}";
column += 10;
break;
case '>':
os << "\\textgreater{}";
column += 13;
break;
case '|':
os << "\\textbar{}";
column += 9;
break;
}
break;
case '-': // "--" in Typewriter mode -> "-{}-"
if (i <= size() - 2
&& getChar(i + 1) == '-'
&& font.family() == LyXFont::TYPEWRITER_FAMILY) {
os << "-{}";
column += 2;
} else {
os << '-';
}
break;
case '\"':
os << "\\char`\\\"{}";
column += 9;
break;
case '$': case '&':
case '%': case '#': case '{':
case '}': case '_':
os << '\\';
os.put(c);
column += 1;
break;
case '~':
os << "\\textasciitilde{}";
column += 16;
break;
case '^':
os << "\\textasciicircum{}";
column += 17;
break;
case '*': case '[':
// avoid being mistaken for optional arguments
os << '{';
os.put(c);
os << '}';
column += 2;
break;
case ' ':
// Blanks are printed before font switching.
// Sure? I am not! (try nice-latex)
// I am sure it's correct. LyX might be smarter
// in the future, but for now, nothing wrong is
// written. (Asger)
break;
default:
// I assume this is hack treating typewriter as verbatim
// FIXME UNICODE: This can fail if c cannot be encoded
// in the current encoding.
if (font.family() == LyXFont::TYPEWRITER_FAMILY) {
if (c != '\0') {
os.put(c);
}
break;
}
// LyX, LaTeX etc.
// FIXME: if we have "LaTeX" with a font
// change in the middle (before the 'T', then
// the "TeX" part is still special cased.
// Really we should only operate this on
// "words" for some definition of word
size_t pnr = 0;
for (; pnr < phrases_nr; ++pnr) {
if (isTextAt(special_phrases[pnr].phrase, i)) {
os << special_phrases[pnr].macro;
i += special_phrases[pnr].phrase.length() - 1;
column += special_phrases[pnr].macro.length() - 1;
break;
}
}
if (pnr == phrases_nr && c != '\0') {
Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
if (i < size() - 1) {
char_type next = getChar(i + 1);
if (Encodings::isCombiningChar(next)) {
column += latexSurrogatePair(os, c, next, encoding) - 1;
++i;
break;
}
}
docstring const latex = encoding.latexChar(c);
if (latex.length() > 1 &&
latex[latex.length() - 1] != '}') {
// Prevent eating of a following
// space or command corruption by
// following characters
column += latex.length() + 1;
os << latex << "{}";
} else {
column += latex.length() - 1;
os << latex;
}
}
break;
}
}
}
void Paragraph::Pimpl::validate(LaTeXFeatures & features,
LyXLayout const & layout) const
{
BufferParams const & bparams = features.bufferParams();
// check the params.
if (!params.spacing().isDefault())
features.require("setspace");
// then the layouts
features.useLayout(layout.name());
// then the fonts
Language const * doc_language = bparams.language;
FontList::const_iterator fcit = fontlist.begin();
FontList::const_iterator fend = fontlist.end();
for (; fcit != fend; ++fcit) {
if (fcit->font().noun() == LyXFont::ON) {
lyxerr[Debug::LATEX] << "font.noun: "
<< fcit->font().noun()
<< endl;
features.require("noun");
lyxerr[Debug::LATEX] << "Noun enabled. Font: "
<< to_utf8(fcit->font().stateText(0))
<< endl;
}
switch (fcit->font().color()) {
case LColor::none:
case LColor::inherit:
case LColor::ignore:
// probably we should put here all interface colors used for
// font displaying! For now I just add this ones I know of (Jug)
case LColor::latex:
case LColor::note:
break;
default:
features.require("color");
lyxerr[Debug::LATEX] << "Color enabled. Font: "
<< to_utf8(fcit->font().stateText(0))
<< endl;
}
Language const * language = fcit->font().language();
if (language->babel() != doc_language->babel() &&
language != ignore_language &&
language != latex_language)
{
features.useLanguage(language);
lyxerr[Debug::LATEX] << "Found language "
<< language->babel() << endl;
}
}
if (!params.leftIndent().zero())
features.require("ParagraphLeftIndent");
// then the insets
InsetList::const_iterator icit = owner_->insetlist.begin();
InsetList::const_iterator iend = owner_->insetlist.end();
for (; icit != iend; ++icit) {
if (icit->inset) {
icit->inset->validate(features);
if (layout.needprotect &&
icit->inset->lyxCode() == InsetBase::FOOT_CODE)
features.require("NeedLyXFootnoteCode");
}
}
// then the contents
Encoding const & doc_encoding = bparams.encoding();
for (pos_type i = 0; i < size() ; ++i) {
for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
if (!special_phrases[pnr].builtin
&& isTextAt(special_phrases[pnr].phrase, i)) {
features.require(special_phrases[pnr].phrase);
break;
}
}
// We do not need the completely realized font, since we are
// only interested in the language, and that is never inherited.
// Therefore we can use getFontSettings instead of getFont.
LyXFont const & font = owner_->getFontSettings(bparams, i);
Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
encoding.validate(getChar(i), features);
}
}
} // namespace lyx