2006-08-13 22:54:59 +00:00
|
|
|
|
/**
|
|
|
|
|
* \file unicode.h
|
|
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
|
* Licence details can be found in the file COPYING.
|
|
|
|
|
*
|
|
|
|
|
* \author Lars Gullik Bj<EFBFBD>nnes
|
|
|
|
|
*
|
|
|
|
|
* Full author contact details are available in file CREDITS.
|
|
|
|
|
*
|
|
|
|
|
* A collection of unicode conversion functions, using iconv.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef LYX_SUPPORT_UNICODE_H
|
|
|
|
|
#define LYX_SUPPORT_UNICODE_H
|
|
|
|
|
|
2006-09-13 17:11:39 +00:00
|
|
|
|
#include "support/types.h"
|
|
|
|
|
|
2006-10-29 21:59:59 +00:00
|
|
|
|
#include <string>
|
2006-08-13 22:54:59 +00:00
|
|
|
|
#include <vector>
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
|
2006-10-29 21:59:59 +00:00
|
|
|
|
class IconvProcessor
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
IconvProcessor(
|
|
|
|
|
char const * tocode = "",
|
|
|
|
|
char const * fromcode = "");
|
|
|
|
|
~IconvProcessor();
|
|
|
|
|
|
|
|
|
|
/// convert any data from \c fromcode to \c tocode unicode format.
|
|
|
|
|
/// \return the number of bytes of the converted output buffer.
|
|
|
|
|
int convert(
|
|
|
|
|
char const * in_buffer,
|
|
|
|
|
size_t in_size,
|
|
|
|
|
char * out_buffer,
|
|
|
|
|
size_t max_out_size);
|
|
|
|
|
private:
|
|
|
|
|
/// open iconv.
|
|
|
|
|
/// \return true if the processor is ready to use.
|
|
|
|
|
bool init();
|
|
|
|
|
|
2006-10-30 09:36:33 +00:00
|
|
|
|
std::string tocode_;
|
|
|
|
|
std::string fromcode_;
|
2006-10-29 21:59:59 +00:00
|
|
|
|
|
|
|
|
|
struct Private;
|
|
|
|
|
Private * pimpl_;
|
|
|
|
|
};
|
|
|
|
|
|
2006-09-10 18:34:24 +00:00
|
|
|
|
// utf8_to_ucs4
|
|
|
|
|
|
|
|
|
|
// A single codepoint conversion for utf8_to_ucs4 does not make
|
|
|
|
|
// sense, so that function is left out.
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str);
|
2006-08-13 22:54:59 +00:00
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
std::vector<lyx::char_type> utf8_to_ucs4(char const * utf8str, size_t ls);
|
2006-09-10 18:34:24 +00:00
|
|
|
|
|
|
|
|
|
// ucs2_to_ucs4
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
lyx::char_type ucs2_to_ucs4(unsigned short c);
|
2006-09-10 18:34:24 +00:00
|
|
|
|
|
2006-09-13 17:11:39 +00:00
|
|
|
|
std::vector<lyx::char_type>
|
2006-08-13 22:54:59 +00:00
|
|
|
|
ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str);
|
|
|
|
|
|
2006-09-13 17:11:39 +00:00
|
|
|
|
std::vector<lyx::char_type>
|
2006-09-10 18:34:24 +00:00
|
|
|
|
ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls);
|
|
|
|
|
|
|
|
|
|
// ucs4_to_ucs2
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
unsigned short ucs4_to_ucs2(lyx::char_type c);
|
2006-09-10 18:34:24 +00:00
|
|
|
|
|
2006-08-13 22:54:59 +00:00
|
|
|
|
std::vector<unsigned short>
|
2006-09-13 17:11:39 +00:00
|
|
|
|
ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str);
|
2006-08-13 22:54:59 +00:00
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
std::vector<unsigned short> ucs4_to_ucs2(lyx::char_type const * s, size_t ls);
|
2006-08-13 22:54:59 +00:00
|
|
|
|
|
2006-09-10 18:34:24 +00:00
|
|
|
|
// ucs4_to_utf8
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
std::vector<char> ucs4_to_utf8(lyx::char_type c);
|
2006-08-13 22:54:59 +00:00
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
std::vector<char> ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str);
|
2006-08-13 22:54:59 +00:00
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
std::vector<char> ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls);
|
2006-08-13 22:54:59 +00:00
|
|
|
|
|
2006-10-26 15:01:45 +00:00
|
|
|
|
/// convert \p s from encoding \p encoding to ucs4.
|
|
|
|
|
/// \p encoding must be a valid iconv 8bit encoding
|
|
|
|
|
std::vector<lyx::char_type>
|
|
|
|
|
eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding);
|
|
|
|
|
|
|
|
|
|
/// convert \p s from ucs4 to encoding \p encoding.
|
|
|
|
|
/// \p encoding must be a valid iconv 8bit encoding
|
|
|
|
|
std::vector<char>
|
|
|
|
|
ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding);
|
|
|
|
|
|
2006-10-17 11:58:21 +00:00
|
|
|
|
extern char const * ucs4_codeset;
|
|
|
|
|
extern char const * ucs2_codeset;
|
|
|
|
|
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
|
|
} // namespace lyx
|
|
|
|
|
|
2006-08-13 22:54:59 +00:00
|
|
|
|
#endif
|