Make the output of \inputencoding commands (and the change of output

encodings) more fine grained: From paragraph level to individual character
level. The inputenc package supports that since at least 2000.

	* src/insets/insetbase.h
	(latex): Document the differences between output to a string stream
	and a file stream

	* src/buffer.h
	(writeLaTeXSource): ditto

	* src/output_latex.h
	(latexParagraphs): ditto
	(switchEncoding): new function to change the encoding of a stream
	(and output \inputencoding commands)

	* src/paragraph_pimpl.C
	(Paragraph::Pimpl::simpleTeXSpecialC): Adjust to latexWriteEndChanges
	changes

	* src/support/docstream.[Ch]
	(setEncoding, operator<<): New stream modifier that changes the
	encoding of file streams

	* src/lyxfont.[Ch]
	(LyXFont::latexWriteStartChanges): Change stream encoding if needed
	(LyXFont::latexWriteEndChanges): Change stream encoding if needed

	* src/lyxfont.h
	(public:):

	* src/paragraph.C
	(Paragraph::simpleTeXOnePar): Adjust to latexWriteStartChanges and latexWriteEndChanges changes
	(bool Paragraph::simpleTeXOnePar):
	(bool Paragraph::simpleTeXOnePar):
	(bool Paragraph::simpleTeXOnePar):
	(bool Paragraph::simpleTeXOnePar):
	(bool Paragraph::simpleTeXOnePar):

	* src/output_latex.C
	(TeXOnePar): Remove the ugly hack to for wencoding changes and use
	switchEncoding instead. A nice side effect is that the old hack would
	not work if the main language encoding is latin1 and a character
	would be mapped to a cedilla in the "fake ucs4" encoding, because
	iconv refuses to convert such a character to latin1, although it
	exists in latin1 (it wants to attach it to a base character).


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16633 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2007-01-09 19:25:40 +00:00
parent 6ca4bd03e0
commit bcb1654b15
10 changed files with 167 additions and 111 deletions

View File

@ -146,13 +146,31 @@ public:
/// Write file. Returns \c false if unsuccesful.
bool writeFile(support::FileName const &) const;
/// Just a wrapper for the method below, first creating the ofstream.
/// Just a wrapper for writeLaTeXSource, first creating the ofstream.
bool makeLaTeXFile(support::FileName const & filename,
std::string const & original_path,
OutputParams const &,
bool output_preamble = true,
bool output_body = true);
///
/** Export the buffer to LaTeX.
If \p os is a file stream, and params().inputenc == "auto", and
the buffer contains text in different languages with more than
one encoding, then this method will change the encoding
associated to \p os. Therefore you must not call this method with
a string stream if the output is supposed to go to a file. \code
odocfstream ofs;
ofs.open("test.tex");
writeLaTeXSource(ofs, ...);
ofs.close();
\endcode is NOT equivalent to \code
odocstringstream oss;
writeLaTeXSource(oss, ...);
odocfstream ofs;
ofs.open("test.tex");
ofs << oss.str();
ofs.close();
\endcode
*/
void writeLaTeXSource(odocstream & os,
std::string const & original_path,
OutputParams const &,

View File

@ -369,7 +369,12 @@ public:
virtual void write(Buffer const &, std::ostream &) const {}
/// read inset in .lyx format
virtual void read(Buffer const &, LyXLex &) {}
/// returns the number of rows (\n's) of generated tex code.
/** Export the inset to LaTeX.
* Don't use a temporary stringstream if the final output is
* supposed to go to a file.
* \sa Buffer::writeLaTeXSource for the reason.
* \return the number of rows (\n's) of generated LaTeX code.
*/
virtual int latex(Buffer const &, odocstream &,
OutputParams const &) const { return 0; }
/// returns true to override begin and end inset in file

View File

@ -23,6 +23,7 @@
#include "LColor.h"
#include "lyxlex.h"
#include "lyxrc.h"
#include "output_latex.h"
#include "support/lstrings.h"
@ -737,11 +738,13 @@ void LyXFont::lyxWriteChanges(LyXFont const & orgfont,
/// Writes the head of the LaTeX needed to impose this font
// Returns number of chars written.
int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base,
LyXFont const & prev) const
LyXFont const & prev,
BufferParams const & bparams) const
{
int count = 0;
bool env = false;
int count = switchEncoding(os, bparams, *(prev.language()->encoding()),
*(language()->encoding()));
if (language()->babel() != base.language()->babel() &&
language() != prev.language()) {
if (isRightToLeft() != prev.isRightToLeft()) {
@ -833,7 +836,8 @@ int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base,
// Returns number of chars written
// This one corresponds to latexWriteStartChanges(). (Asger)
int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base,
LyXFont const & next) const
LyXFont const & next,
BufferParams const & bparams) const
{
int count = 0;
bool env = false;
@ -897,6 +901,8 @@ int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base,
os << '}';
++count;
}
count += switchEncoding(os, bparams, *(language()->encoding()),
*(next.language()->encoding()));
return count;
}

View File

@ -300,14 +300,17 @@ public:
font state active now.
*/
int latexWriteStartChanges(odocstream &, LyXFont const & base,
LyXFont const & prev) const;
LyXFont const & prev,
BufferParams const &) const;
/** Writes the tail of the LaTeX needed to change to this font.
Returns number of chars written. Base is the font state we want
to achieve.
*/
int latexWriteEndChanges(odocstream &, LyXFont const & base,
LyXFont const & next) const;
LyXFont const & next,
BufferParams const &) const;
/// Build GUI description of font state
docstring const stateText(BufferParams * params) const;

View File

@ -29,7 +29,6 @@
#include "insets/insetoptarg.h"
#include "support/lstrings.h"
#include "support/unicode.h"
namespace lyx {
@ -237,7 +236,7 @@ ParagraphList::const_iterator
TeXOnePar(Buffer const & buf,
ParagraphList const & paragraphs,
ParagraphList::const_iterator pit,
odocstream & ucs4, TexRow & texrow,
odocstream & os, TexRow & texrow,
OutputParams const & runparams_in,
string const & everypar)
{
@ -275,49 +274,42 @@ TeXOnePar(Buffer const & buf,
if (!lyxrc.language_command_end.empty() &&
previous_language->babel() != doc_language->babel())
{
ucs4 << from_ascii(subst(lyxrc.language_command_end,
os << from_ascii(subst(lyxrc.language_command_end,
"$$lang",
previous_language->babel()))
<< '\n';
<< '\n';
texrow.newline();
}
if (lyxrc.language_command_end.empty() ||
language->babel() != doc_language->babel())
{
ucs4 << from_ascii(subst(
os << from_ascii(subst(
lyxrc.language_command_begin,
"$$lang",
language->babel()))
<< '\n';
<< '\n';
texrow.newline();
}
}
// FIXME thailatex does not support the inputenc package, so we
// ignore switches from/to tis620-0 encoding here. This does of
// course only work as long as the non-thai text contains ASCII
// only, but it is the best we can do.
bool const use_thailatex = (language->encoding()->name() == "tis620-0" ||
previous_language->encoding()->name() == "tis620-0");
if (bparams.inputenc == "auto" &&
language->encoding() != previous_language->encoding() &&
!use_thailatex) {
ucs4 << "\\inputencoding{"
<< from_ascii(language->encoding()->latexName())
<< "}\n";
LyXFont const outerfont =
outerFont(std::distance(paragraphs.begin(), pit),
paragraphs);
// This must be identical to basefont in Paragraph::simpleTeXOnePar
LyXFont basefont = (pit->beginOfBody() > 0) ?
pit->getLabelFont(bparams, outerfont) :
pit->getLayoutFont(bparams, outerfont);
Encoding const & outer_encoding(*(outerfont.language()->encoding()));
// FIXME we switch from the outer encoding to the encoding of
// this paragraph, since I could not figure out the correct
// logic to take the encoding of the previous paragraph into
// account. This may result in some unneeded encoding changes.
if (switchEncoding(os, bparams, outer_encoding,
*(basefont.language()->encoding()))) {
os << '\n';
texrow.newline();
}
// We need to output the paragraph to a temporary stream if we
// need to change the encoding. Don't do this if the result does
// not go to a file but to the builtin source viewer.
odocstringstream par_stream;
bool const change_encoding = !runparams_in.dryrun &&
bparams.inputenc == "auto" &&
language->encoding() != doc_language->encoding() &&
!use_thailatex;
// don't trigger the copy ctor because it's private on msvc
odocstream & os = *(change_encoding ? &par_stream : &ucs4);
// In an inset with unlimited length (all in one row),
// don't allow any special options in the paragraph
@ -376,9 +368,6 @@ TeXOnePar(Buffer const & buf,
// FIXME UNICODE
os << from_utf8(everypar);
LyXFont const outerfont =
outerFont(std::distance(paragraphs.begin(), pit),
paragraphs);
bool need_par = pit->simpleTeXOnePar(buf, bparams, outerfont,
os, texrow, runparams);
@ -478,6 +467,17 @@ TeXOnePar(Buffer const & buf,
texrow.newline();
}
// FIXME we switch from the encoding of this paragraph to the
// outer encoding, since I could not figure out the correct logic
// to take the encoding of the next paragraph into account.
// This may result in some unneeded encoding changes.
basefont = pit->getLayoutFont(bparams, outerfont);
if (switchEncoding(os, bparams, *(basefont.language()->encoding()),
outer_encoding)) {
os << '\n';
texrow.newline();
}
// we don't need it for the last paragraph!!!
// Note from JMarc: we will re-add a \n explicitely in
// TeXEnvironment, because it is needed in this case
@ -490,59 +490,6 @@ TeXOnePar(Buffer const & buf,
lyxerr.debugging(Debug::LATEX))
lyxerr << "TeXOnePar...done " << &*boost::next(pit) << endl;
if (change_encoding) {
lyxerr[Debug::LATEX] << "Converting paragraph to encoding "
<< language->encoding()->iconvName() << endl;
docstring const par = par_stream.str();
// Convert the paragraph to the 8bit encoding that we need to
// output.
std::vector<char> const encoded = lyx::ucs4_to_eightbit(par.c_str(),
par.size(), language->encoding()->iconvName());
// Interpret this as if it was in the 8 bit encoding of the
// document language and convert it back to UCS4. That means
// that faked does not contain pure UCS4 anymore, but what
// will be written to the output file will be correct, because
// the real output stream will do a UCS4 -> document language
// encoding conversion.
// This is of course a hack, but not a bigger one than mixing
// two encodings in one file.
// FIXME: Catch iconv conversion errors and display an error
// dialog.
// Here follows an explanation how I (gb) came to the current
// solution:
// codecvt facets are only used by file streams -> OK, maybe
// we could use file streams and not generic streams in the
// latex() methods? No, that does not work, we use them at
// several places to write to string streams.
// Next try: Maybe we could do something else than codecvt
// in our streams, and add a setEncoding() method? That
// does not work unless we rebuild the functionality of file
// and string streams, since both odocfstream and
// odocstringstream inherit from std::basic_ostream<docstring>
// and we can neither add a method to that class nor change
// the inheritance of the file and string streams.
// What might be possible is to encapsulate the real file and
// string streams in our own version, and use a homemade
// streambuf that would do the encoding conversion and then
// forward to the real stream. That would probably work, but
// would require far more code and a good understanding of
// stream buffers to get it right.
// Another idea by JMarc is to use a modifier like
// os << setencoding("iso-8859-1");
// That currently looks like the best idea.
std::vector<char_type> const faked = lyx::eightbit_to_ucs4(&(encoded[0]),
encoded.size(), doc_language->encoding()->iconvName());
std::vector<char_type>::const_iterator const end = faked.end();
std::vector<char_type>::const_iterator it = faked.begin();
for (; it != end; ++it)
ucs4.put(*it);
}
return ++pit;
}
@ -646,4 +593,24 @@ void latexParagraphs(Buffer const & buf,
}
int switchEncoding(odocstream & os, BufferParams const & bparams,
Encoding const & oldEnc, Encoding const & newEnc)
{
// FIXME thailatex does not support the inputenc package, so we
// ignore switches from/to tis620-0 encoding here. This does of
// course only work as long as the non-thai text contains ASCII
// only, but it is the best we can do.
if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() &&
oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") {
lyxerr[Debug::LATEX] << "Changing LaTeX encoding from "
<< oldEnc.name() << " to "
<< newEnc.name() << endl;
os << setEncoding(newEnc.iconvName());
docstring const inputenc(from_ascii(newEnc.latexName()));
os << "\\inputencoding{" << inputenc << '}';
return 16 + inputenc.length();
}
return 0;
}
} // namespace lyx

View File

@ -20,11 +20,16 @@
namespace lyx {
class Buffer;
class BufferParams;
class Encoding;
class OutputParams;
class TexRow;
/// Just a wrapper for the method below, first creating the ofstream.
/** Export \p paragraphs of buffer \p buf to LaTeX.
Don't use a temporary stringstream for \p os if the final output is
supposed to go to a file.
\sa Buffer::writeLaTeXSource for the reason.
*/
void latexParagraphs(Buffer const & buf,
ParagraphList const & paragraphs,
odocstream & ofs,
@ -32,6 +37,10 @@ void latexParagraphs(Buffer const & buf,
OutputParams const &,
std::string const & everypar = std::string());
/// Switch the encoding of \p os from \p oldEnc to \p newEnc if needed.
/// \return the number of characters written to \p os.
int switchEncoding(odocstream & os, BufferParams const & bparams,
Encoding const & oldEnc, Encoding const & newEnc);
} // namespace lyx

View File

@ -761,7 +761,7 @@ int adjust_column_count(string const & str, int oldcol)
// This could go to ParagraphParameters if we want to
int Paragraph::startTeXParParams(BufferParams const & bparams,
odocstream & os, bool moving_arg) const
odocstream & os, bool moving_arg) const
{
int column = 0;
@ -825,7 +825,7 @@ int Paragraph::startTeXParParams(BufferParams const & bparams,
// This could go to ParagraphParameters if we want to
int Paragraph::endTeXParParams(BufferParams const & bparams,
odocstream & os, bool moving_arg) const
odocstream & os, bool moving_arg) const
{
int column = 0;
@ -913,6 +913,7 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
// As long as we are in the label, this font is the base font of the
// label. Before the first body character it is set to the base font
// of the body.
// This must be identical to basefont in TeXOnePar().
LyXFont basefont;
LaTeXFeatures features(buf, bparams, runparams);
@ -964,7 +965,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
if (i == body_pos) {
if (body_pos > 0) {
if (open_font) {
column += running_font.latexWriteEndChanges(os, basefont, basefont);
column += running_font.latexWriteEndChanges(
os, basefont, basefont, bparams);
open_font = false;
}
basefont = getLayoutFont(bparams, outerfont);
@ -1004,9 +1006,10 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
(font != running_font ||
font.language() != running_font.language()))
{
column += running_font.latexWriteEndChanges(os,
basefont,
(i == body_pos-1) ? basefont : font);
column += running_font.latexWriteEndChanges(
os, basefont,
(i == body_pos-1) ? basefont : font,
bparams);
running_font = basefont;
open_font = false;
}
@ -1025,8 +1028,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
font.language() != running_font.language()) &&
i != body_pos - 1)
{
column += font.latexWriteStartChanges(os, basefont,
last_font);
column += font.latexWriteStartChanges(
os, basefont, last_font, bparams);
running_font = font;
open_font = true;
}
@ -1062,11 +1065,11 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
if (next_) {
running_font
.latexWriteEndChanges(os, basefont,
next_->getFont(bparams,
0, outerfont));
next_->getFont(bparams, 0, outerfont),
bparams);
} else {
running_font.latexWriteEndChanges(os, basefont,
basefont);
basefont, bparams);
}
#else
#ifdef WITH_WARNINGS
@ -1074,7 +1077,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
//#warning there as we start another \selectlanguage with the next paragraph if
//#warning we are in need of this. This should be fixed sometime (Jug)
#endif
running_font.latexWriteEndChanges(os, basefont, basefont);
running_font.latexWriteEndChanges(os, basefont, basefont,
bparams);
#endif
}

View File

@ -483,7 +483,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
os << '\n';
} else {
if (open_font) {
column += running_font.latexWriteEndChanges(os, basefont, basefont);
column += running_font.latexWriteEndChanges(
os, basefont, basefont, bparams);
open_font = false;
}
basefont = owner_->getLayoutFont(bparams, outerfont);
@ -536,10 +537,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
#endif
// some insets cannot be inside a font change command
if (open_font && inset->noFontChange()) {
column +=running_font.
latexWriteEndChanges(os,
basefont,
basefont);
column += running_font.latexWriteEndChanges(
os, basefont, basefont, bparams);
open_font = false;
basefont = owner_->getLayoutFont(bparams, outerfont);
running_font = basefont;

View File

@ -294,6 +294,32 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
open(s, mode);
}
SetEnc setEncoding(string const & encoding)
{
return SetEnc(encoding);
}
odocstream & operator<<(odocstream & os, SetEnc e)
{
if (std::has_facet<iconv_codecvt_facet>(os.rdbuf()->getloc())) {
// This stream must be a file stream, since we never imbue
// any other stream with a locale having a iconv_codecvt_facet.
// Flush the stream so that all pending output is written
// with the old encoding.
os.flush();
std::locale locale(os.rdbuf()->getloc(),
new iconv_codecvt_facet(e.encoding, std::ios_base::out));
// FIXME Does changing the codecvt facet of an open file
// stream always work? It does with gcc 4.1, but I have read
// somewhere that it does not with MSVC.
// What does the standard say?
os.imbue(locale);
}
return os;
}
}
#if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)

View File

@ -77,6 +77,25 @@ odocstream & operator<<(odocstream & os, char c)
return os;
}
/// Helper struct for changing stream encoding
struct SetEnc {
SetEnc(std::string const & e) : encoding(e) {}
std::string encoding;
};
/// Helper function for changing stream encoding
SetEnc setEncoding(std::string const & encoding);
/** Change the encoding of \p os to \p e.encoding.
\p e.encoding must be a valid iconv name of an 8bit encoding.
This does nothing if the stream is not a file stream, since only
file streams do have an associated 8bit encoding.
Usage: \code
os << setEncoding("ISO-8859-1");
\endcode
*/
odocstream & operator<<(odocstream & os, SetEnc e);
}
#endif