lyx_mirror/src/support/docstream.h
Georg Baum de3e5280f6 Open tex2lyx input files with correct encoding
We open the input file now twice: The first time in latin1 encoding to read
the document encoding from the preamble. This does always work, since
traditional TeX does not allow non-ASCII contents without an encoding changing
command (except for comments, but we do not need them, and using latin1 rather
than utf8 ensures that they do not produce an iconv exception, but are simply
recored with wrong characters), and we do detect the utf8 based TeX engines
XeTeX and LuaTeX as well. The second time we open the file directly with the
document encoding.

This fixes a few tex2lyx tests on OS X, since changing the encoding of an
open file steam does not work with clang on OS X. Files using more than one
encoding are still broken, but all single-encoding files are fixed now.
2016-01-31 12:54:59 +01:00

120 lines
3.6 KiB
C++

// -*- C++ -*-
/**
* \file docstream.h
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Georg Baum
*
* Full author contact details are available in file CREDITS.
*/
#ifndef LYX_DOCSTREAM_H
#define LYX_DOCSTREAM_H
#include "support/docstring.h"
#include <fstream>
#include <sstream>
namespace lyx {
class iconv_codecvt_facet_exception : public std::exception {
public:
virtual ~iconv_codecvt_facet_exception() throw() {}
virtual const char * what() const throw();
};
/// Base class for UCS4 input streams
typedef std::basic_istream<char_type> idocstream;
/** Base class for UCS4 output streams.
If you want to output a single UCS4 character, use \code
os.put(c);
\endcode, not \code
os << c;
\endcode . The latter will not output the character, but the code point
as number if USE_WCHAR_T is not defined. This is because we can't overload
operator<< (our character type is not always a real type but sometimes a
typedef). Narrow characters of type char can be output as usual.
*/
typedef std::basic_ostream<char_type> odocstream;
struct SetEnc;
/// File stream for reading UTF8-encoded files with automatic conversion to
/// UCS4.
/// Buffering must be switched off if the encoding is changed after
/// construction by calling rdbuf()->pubsetbuf(0, 0).
class ifdocstream : public std::basic_ifstream<char_type> {
typedef std::basic_ifstream<char_type> base;
public:
ifdocstream();
/// Create a stream with a specific encoding \p enc.
/// We must not pass \p enc as string, to avoid confusing it with a file name.
explicit ifdocstream(SetEnc const & enc);
explicit ifdocstream(const char* s,
std::ios_base::openmode mode = std::ios_base::in,
std::string const & encoding = "UTF-8");
~ifdocstream() {}
};
/// File stream for writing files in 8bit encoding \p encoding with automatic
/// conversion from UCS4.
class ofdocstream : public std::basic_ofstream<char_type> {
typedef std::basic_ofstream<char_type> base;
public:
ofdocstream();
/// Create a stream with a specific encoding \p enc.
/// We must not pass \p enc as string, to avoid confusing it with a file name.
explicit ofdocstream(SetEnc const & enc);
explicit ofdocstream(const char* s,
std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc,
std::string const & encoding = "UTF-8");
~ofdocstream() {}
///
void reset(std::string const & encoding);
};
/// UCS4 input stringstream
typedef std::basic_istringstream<char_type> idocstringstream;
/// UCS4 output manipulator
typedef odocstream & (*odocstream_manip)(odocstream &);
/// Helper struct for changing stream encoding
struct SetEnc {
/**
* It is important that this constructor is explicit.
* Otherwise the attempt to output a std::string to an odocstream
* would compile, but cause a (probably failing) encoding change
* instead of string output (we do not define
* operator<<(odocstream &, std::string) since we want to avoid
* outputting strings with unspecified encoding)
*/
explicit SetEnc(std::string const & e) : encoding(e) {}
std::string encoding;
};
/// Helper function for changing stream encoding
SetEnc setEncoding(std::string const & encoding);
/** Change the encoding of \p os to \p e.encoding.
\p e.encoding must be a valid iconv name of an 8bit encoding.
This does nothing if the stream is not a file stream, since only
file streams do have an associated 8bit encoding.
Usage: \code
os << setEncoding("ISO-8859-1");
\endcode
*/
odocstream & operator<<(odocstream & os, SetEnc e);
idocstream & operator<<(idocstream & os, SetEnc e);
}
#endif