mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-23 10:18:50 +00:00
Some further work and simplification. Now the conversion engine is only restarted on error.
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14967 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
8fd2299744
commit
2a06b31354
@ -20,10 +20,8 @@
|
|||||||
|
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <string>
|
|
||||||
|
|
||||||
using std::endl;
|
using std::endl;
|
||||||
using std::string;
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -35,15 +33,20 @@ namespace {
|
|||||||
char const * ucs2_codeset = "UCS-2LE";
|
char const * ucs2_codeset = "UCS-2LE";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::vector<char>
|
template<typename RetType, typename InType>
|
||||||
iconv_convert(std::string const & tocode, std::string const & fromcode,
|
std::vector<RetType>
|
||||||
std::vector<char> const & buf)
|
iconv_convert(iconv_t * cd,
|
||||||
|
char const * tocode,
|
||||||
|
char const * fromcode,
|
||||||
|
InType const * buf,
|
||||||
|
size_t buflen)
|
||||||
{
|
{
|
||||||
if (buf.empty())
|
if (buflen == 0)
|
||||||
return std::vector<char>();
|
return std::vector<RetType>();
|
||||||
|
|
||||||
iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
|
if (*cd == (iconv_t)(-1)) {
|
||||||
if (cd == (iconv_t)(-1)) {
|
*cd = iconv_open(tocode, fromcode);
|
||||||
|
if (*cd == (iconv_t)(-1)) {
|
||||||
lyxerr << "Error returned from iconv_open" << endl;
|
lyxerr << "Error returned from iconv_open" << endl;
|
||||||
switch (errno) {
|
switch (errno) {
|
||||||
case EINVAL:
|
case EINVAL:
|
||||||
@ -57,14 +60,15 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
|
char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
|
||||||
size_t inbytesleft = buf.size();
|
size_t inbytesleft = buflen * sizeof(InType);
|
||||||
static char out[1000];
|
static char out[1000];
|
||||||
char * outbuf = out;
|
char * outbuf = out;
|
||||||
size_t outbytesleft = 1000;
|
size_t outbytesleft = 1000;
|
||||||
|
|
||||||
size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||||
|
|
||||||
if (res == (size_t)(-1)) {
|
if (res == (size_t)(-1)) {
|
||||||
lyxerr << "Error returned from iconv" << endl;
|
lyxerr << "Error returned from iconv" << endl;
|
||||||
@ -78,7 +82,7 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
|
|||||||
<< "When converting from " << fromcode
|
<< "When converting from " << fromcode
|
||||||
<< " to " << tocode << ".\n";
|
<< " to " << tocode << ".\n";
|
||||||
lyxerr << "Input: " << std::hex;
|
lyxerr << "Input: " << std::hex;
|
||||||
for (size_t i = 0; i < buf.size(); ++i) {
|
for (size_t i = 0; i < buflen; ++i) {
|
||||||
unsigned char const b = buf[i];
|
unsigned char const b = buf[i];
|
||||||
lyxerr << "0x" << int(b) << " ";
|
lyxerr << "0x" << int(b) << " ";
|
||||||
}
|
}
|
||||||
@ -90,7 +94,7 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
|
|||||||
<< "When converting from " << fromcode
|
<< "When converting from " << fromcode
|
||||||
<< " to " << tocode << ".\n";
|
<< " to " << tocode << ".\n";
|
||||||
lyxerr << "Input: " << std::hex;
|
lyxerr << "Input: " << std::hex;
|
||||||
for (size_t i = 0; i < buf.size(); ++i) {
|
for (size_t i = 0; i < buflen; ++i) {
|
||||||
unsigned char const b = buf[i];
|
unsigned char const b = buf[i];
|
||||||
lyxerr << "0x" << int(b) << " ";
|
lyxerr << "0x" << int(b) << " ";
|
||||||
}
|
}
|
||||||
@ -100,34 +104,21 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
|
|||||||
lyxerr << "\tSome other error: " << errno << endl;
|
lyxerr << "\tSome other error: " << errno << endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
// We got an error so we close down the conversion engine
|
||||||
|
if (iconv_close(*cd) == -1) {
|
||||||
if (iconv_close(cd) == -1) {
|
|
||||||
lyxerr << "Error returned from iconv_close("
|
lyxerr << "Error returned from iconv_close("
|
||||||
<< errno << ")" << endl;
|
<< errno << ")" << endl;
|
||||||
}
|
}
|
||||||
|
*cd = (iconv_t)(-1);
|
||||||
|
}
|
||||||
|
|
||||||
//lyxerr << std::dec;
|
//lyxerr << std::dec;
|
||||||
//lyxerr << "Inbytesleft: " << inbytesleft << endl;
|
//lyxerr << "Inbytesleft: " << inbytesleft << endl;
|
||||||
//lyxerr << "Outbytesleft: " << outbytesleft << endl;
|
//lyxerr << "Outbytesleft: " << outbytesleft << endl;
|
||||||
int bytes = 1000 - outbytesleft;
|
int bytes = 1000 - outbytesleft;
|
||||||
|
|
||||||
std::vector<char> outvec(out, out + bytes);
|
RetType const * tmp = reinterpret_cast<RetType const *>(out);
|
||||||
return outvec;
|
return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
|
|
||||||
{
|
|
||||||
boost::uint32_t const * tmp = reinterpret_cast<uint32_t const *>(&bytes[0]);
|
|
||||||
return std::vector<boost::uint32_t>(tmp, tmp + bytes.size() / 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
|
|
||||||
{
|
|
||||||
unsigned short const * tmp = reinterpret_cast<unsigned short const *>(&bytes[0]);
|
|
||||||
return std::vector<unsigned short>(tmp, tmp + bytes.size() / 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anon namespace
|
} // anon namespace
|
||||||
@ -135,69 +126,55 @@ std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
|
|||||||
|
|
||||||
std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
|
std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
|
||||||
{
|
{
|
||||||
//lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
|
static iconv_t cd = (iconv_t)(-1);
|
||||||
// << " (" << utf8str.size() << ")" << endl;
|
return iconv_convert<boost::uint32_t>(&cd, ucs4_codeset, "UTF-8",
|
||||||
//lyxerr << "Res = " << string(res.begin(), res.end())
|
&utf8str[0], utf8str.size());
|
||||||
// << " (" << res.size() << ")" << endl;
|
|
||||||
|
|
||||||
std::vector<char> res = iconv_convert(ucs4_codeset, "UTF-8", utf8str);
|
|
||||||
return bytes_to_ucs4(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<boost::uint32_t>
|
std::vector<boost::uint32_t>
|
||||||
ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
|
ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
|
||||||
{
|
{
|
||||||
char const * tin = reinterpret_cast<char const *>(&ucs2str[0]);
|
static iconv_t cd = (iconv_t)(-1);
|
||||||
std::vector<char> in(tin, tin + ucs2str.size() * 2);
|
return iconv_convert<boost::uint32_t>(&cd, ucs4_codeset, ucs2_codeset,
|
||||||
std::vector<char> res = iconv_convert(ucs4_codeset, ucs2_codeset, in);
|
&ucs2str[0], ucs2str.size());
|
||||||
return bytes_to_ucs4(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<unsigned short>
|
std::vector<unsigned short>
|
||||||
ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
|
ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
|
||||||
{
|
{
|
||||||
char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
|
return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
|
||||||
std::vector<char> in(tin, tin + ucs4str.size() * 4);
|
|
||||||
std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
|
|
||||||
return bytes_to_ucs2(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<unsigned short>
|
std::vector<unsigned short>
|
||||||
ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
|
ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
|
||||||
{
|
{
|
||||||
char const * tin = reinterpret_cast<char const *>(s);
|
static iconv_t cd = (iconv_t)(-1);
|
||||||
std::vector<char> in(tin, tin + ls * 4);
|
return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
|
||||||
std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
|
s, ls);
|
||||||
return bytes_to_ucs2(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned short
|
unsigned short
|
||||||
ucs4_to_ucs2(boost::uint32_t c)
|
ucs4_to_ucs2(boost::uint32_t c)
|
||||||
{
|
{
|
||||||
char const * tin = reinterpret_cast<char const *>(&c);
|
boost::uint32_t tmp[] = { c, 0 };
|
||||||
std::vector<char> in(tin, tin + 4);
|
return ucs4_to_ucs2(tmp, 1)[0];
|
||||||
std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
|
|
||||||
return bytes_to_ucs2(res)[0];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
|
std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
|
||||||
{
|
{
|
||||||
char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
|
static iconv_t cd = (iconv_t)(-1);
|
||||||
std::vector<char> in(tin, tin + ucs4str.size() * 4);
|
return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
|
||||||
std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
|
&ucs4str[0], ucs4str.size());
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<char> ucs4_to_utf8(boost::uint32_t c)
|
std::vector<char> ucs4_to_utf8(boost::uint32_t c)
|
||||||
{
|
{
|
||||||
char const * tin = reinterpret_cast<char const *>(&c);
|
static iconv_t cd = (iconv_t)(-1);
|
||||||
std::vector<char> in(tin, tin + 4);
|
return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
|
||||||
std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user