Some further work and simplification. Now the conversion engine is only restarted on error.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14967 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Lars Gullik Bjønnes 2006-09-10 16:59:36 +00:00
parent 8fd2299744
commit 2a06b31354

View File

@ -20,10 +20,8 @@
#include <cerrno>
#include <iomanip>
#include <string>
using std::endl;
using std::string;
namespace {
@ -35,15 +33,20 @@ namespace {
char const * ucs2_codeset = "UCS-2LE";
#endif
std::vector<char>
iconv_convert(std::string const & tocode, std::string const & fromcode,
std::vector<char> const & buf)
template<typename RetType, typename InType>
std::vector<RetType>
iconv_convert(iconv_t * cd,
char const * tocode,
char const * fromcode,
InType const * buf,
size_t buflen)
{
if (buf.empty())
return std::vector<char>();
if (buflen == 0)
return std::vector<RetType>();
iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
if (cd == (iconv_t)(-1)) {
if (*cd == (iconv_t)(-1)) {
*cd = iconv_open(tocode, fromcode);
if (*cd == (iconv_t)(-1)) {
lyxerr << "Error returned from iconv_open" << endl;
switch (errno) {
case EINVAL:
@ -57,14 +60,15 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
break;
}
}
}
char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
size_t inbytesleft = buf.size();
char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
size_t inbytesleft = buflen * sizeof(InType);
static char out[1000];
char * outbuf = out;
size_t outbytesleft = 1000;
size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (res == (size_t)(-1)) {
lyxerr << "Error returned from iconv" << endl;
@ -78,7 +82,7 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
<< "When converting from " << fromcode
<< " to " << tocode << ".\n";
lyxerr << "Input: " << std::hex;
for (size_t i = 0; i < buf.size(); ++i) {
for (size_t i = 0; i < buflen; ++i) {
unsigned char const b = buf[i];
lyxerr << "0x" << int(b) << " ";
}
@ -90,7 +94,7 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
<< "When converting from " << fromcode
<< " to " << tocode << ".\n";
lyxerr << "Input: " << std::hex;
for (size_t i = 0; i < buf.size(); ++i) {
for (size_t i = 0; i < buflen; ++i) {
unsigned char const b = buf[i];
lyxerr << "0x" << int(b) << " ";
}
@ -100,34 +104,21 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
lyxerr << "\tSome other error: " << errno << endl;
break;
}
}
if (iconv_close(cd) == -1) {
// We got an error so we close down the conversion engine
if (iconv_close(*cd) == -1) {
lyxerr << "Error returned from iconv_close("
<< errno << ")" << endl;
}
*cd = (iconv_t)(-1);
}
//lyxerr << std::dec;
//lyxerr << "Inbytesleft: " << inbytesleft << endl;
//lyxerr << "Outbytesleft: " << outbytesleft << endl;
int bytes = 1000 - outbytesleft;
std::vector<char> outvec(out, out + bytes);
return outvec;
}
std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
{
boost::uint32_t const * tmp = reinterpret_cast<uint32_t const *>(&bytes[0]);
return std::vector<boost::uint32_t>(tmp, tmp + bytes.size() / 4);
}
std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
{
unsigned short const * tmp = reinterpret_cast<unsigned short const *>(&bytes[0]);
return std::vector<unsigned short>(tmp, tmp + bytes.size() / 2);
RetType const * tmp = reinterpret_cast<RetType const *>(out);
return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
}
} // anon namespace
@ -135,69 +126,55 @@ std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
{
//lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
// << " (" << utf8str.size() << ")" << endl;
//lyxerr << "Res = " << string(res.begin(), res.end())
// << " (" << res.size() << ")" << endl;
std::vector<char> res = iconv_convert(ucs4_codeset, "UTF-8", utf8str);
return bytes_to_ucs4(res);
static iconv_t cd = (iconv_t)(-1);
return iconv_convert<boost::uint32_t>(&cd, ucs4_codeset, "UTF-8",
&utf8str[0], utf8str.size());
}
std::vector<boost::uint32_t>
ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
{
char const * tin = reinterpret_cast<char const *>(&ucs2str[0]);
std::vector<char> in(tin, tin + ucs2str.size() * 2);
std::vector<char> res = iconv_convert(ucs4_codeset, ucs2_codeset, in);
return bytes_to_ucs4(res);
static iconv_t cd = (iconv_t)(-1);
return iconv_convert<boost::uint32_t>(&cd, ucs4_codeset, ucs2_codeset,
&ucs2str[0], ucs2str.size());
}
std::vector<unsigned short>
ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
{
char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
std::vector<char> in(tin, tin + ucs4str.size() * 4);
std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
return bytes_to_ucs2(res);
return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
}
std::vector<unsigned short>
ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
{
char const * tin = reinterpret_cast<char const *>(s);
std::vector<char> in(tin, tin + ls * 4);
std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
return bytes_to_ucs2(res);
static iconv_t cd = (iconv_t)(-1);
return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
s, ls);
}
unsigned short
ucs4_to_ucs2(boost::uint32_t c)
{
char const * tin = reinterpret_cast<char const *>(&c);
std::vector<char> in(tin, tin + 4);
std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
return bytes_to_ucs2(res)[0];
boost::uint32_t tmp[] = { c, 0 };
return ucs4_to_ucs2(tmp, 1)[0];
}
std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
{
char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
std::vector<char> in(tin, tin + ucs4str.size() * 4);
std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
return res;
static iconv_t cd = (iconv_t)(-1);
return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
&ucs4str[0], ucs4str.size());
}
std::vector<char> ucs4_to_utf8(boost::uint32_t c)
{
char const * tin = reinterpret_cast<char const *>(&c);
std::vector<char> in(tin, tin + 4);
std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
return res;
static iconv_t cd = (iconv_t)(-1);
return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
}