diff --git a/src/support/unicode.C b/src/support/unicode.C index 3f431dbab6..5c09a0d73b 100644 --- a/src/support/unicode.C +++ b/src/support/unicode.C @@ -20,10 +20,8 @@ #include #include -#include using std::endl; -using std::string; namespace { @@ -35,36 +33,42 @@ namespace { char const * ucs2_codeset = "UCS-2LE"; #endif -std::vector -iconv_convert(std::string const & tocode, std::string const & fromcode, - std::vector const & buf) +template +std::vector +iconv_convert(iconv_t * cd, + char const * tocode, + char const * fromcode, + InType const * buf, + size_t buflen) { - if (buf.empty()) - return std::vector(); + if (buflen == 0) + return std::vector(); - iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str()); - if (cd == (iconv_t)(-1)) { - lyxerr << "Error returned from iconv_open" << endl; - switch (errno) { - case EINVAL: - lyxerr << "EINVAL The conversion from " << fromcode - << " to " << tocode - << " is not supported by the implementation." - << endl; - break; - default: - lyxerr << "\tSome other error: " << errno << endl; - break; + if (*cd == (iconv_t)(-1)) { + *cd = iconv_open(tocode, fromcode); + if (*cd == (iconv_t)(-1)) { + lyxerr << "Error returned from iconv_open" << endl; + switch (errno) { + case EINVAL: + lyxerr << "EINVAL The conversion from " << fromcode + << " to " << tocode + << " is not supported by the implementation." + << endl; + break; + default: + lyxerr << "\tSome other error: " << errno << endl; + break; + } } } - char ICONV_CONST * inbuf = const_cast(&buf[0]); - size_t inbytesleft = buf.size(); + char ICONV_CONST * inbuf = const_cast(reinterpret_cast(buf)); + size_t inbytesleft = buflen * sizeof(InType); static char out[1000]; char * outbuf = out; size_t outbytesleft = 1000; - size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (res == (size_t)(-1)) { lyxerr << "Error returned from iconv" << endl; @@ -78,7 +82,7 @@ iconv_convert(std::string const & tocode, std::string const & fromcode, << "When converting from " << fromcode << " to " << tocode << ".\n"; lyxerr << "Input: " << std::hex; - for (size_t i = 0; i < buf.size(); ++i) { + for (size_t i = 0; i < buflen; ++i) { unsigned char const b = buf[i]; lyxerr << "0x" << int(b) << " "; } @@ -90,7 +94,7 @@ iconv_convert(std::string const & tocode, std::string const & fromcode, << "When converting from " << fromcode << " to " << tocode << ".\n"; lyxerr << "Input: " << std::hex; - for (size_t i = 0; i < buf.size(); ++i) { + for (size_t i = 0; i < buflen; ++i) { unsigned char const b = buf[i]; lyxerr << "0x" << int(b) << " "; } @@ -100,11 +104,12 @@ iconv_convert(std::string const & tocode, std::string const & fromcode, lyxerr << "\tSome other error: " << errno << endl; break; } - } - - if (iconv_close(cd) == -1) { - lyxerr << "Error returned from iconv_close(" - << errno << ")" << endl; + // We got an error so we close down the conversion engine + if (iconv_close(*cd) == -1) { + lyxerr << "Error returned from iconv_close(" + << errno << ")" << endl; + } + *cd = (iconv_t)(-1); } //lyxerr << std::dec; @@ -112,22 +117,8 @@ iconv_convert(std::string const & tocode, std::string const & fromcode, //lyxerr << "Outbytesleft: " << outbytesleft << endl; int bytes = 1000 - outbytesleft; - std::vector outvec(out, out + bytes); - return outvec; -} - - -std::vector bytes_to_ucs4(std::vector const & bytes) -{ - boost::uint32_t const * tmp = reinterpret_cast(&bytes[0]); - return std::vector(tmp, tmp + bytes.size() / 4); -} - - -std::vector bytes_to_ucs2(std::vector const & bytes) -{ - unsigned short const * tmp = reinterpret_cast(&bytes[0]); - return std::vector(tmp, tmp + bytes.size() / 2); + RetType const * tmp = reinterpret_cast(out); + return std::vector(tmp, tmp + bytes / sizeof(RetType)); } } // anon namespace @@ -135,69 +126,55 @@ std::vector bytes_to_ucs2(std::vector const & bytes) std::vector utf8_to_ucs4(std::vector const & utf8str) { - //lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end()) - // << " (" << utf8str.size() << ")" << endl; - //lyxerr << "Res = " << string(res.begin(), res.end()) - // << " (" << res.size() << ")" << endl; - - std::vector res = iconv_convert(ucs4_codeset, "UTF-8", utf8str); - return bytes_to_ucs4(res); + static iconv_t cd = (iconv_t)(-1); + return iconv_convert(&cd, ucs4_codeset, "UTF-8", + &utf8str[0], utf8str.size()); } std::vector ucs2_to_ucs4(std::vector const & ucs2str) { - char const * tin = reinterpret_cast(&ucs2str[0]); - std::vector in(tin, tin + ucs2str.size() * 2); - std::vector res = iconv_convert(ucs4_codeset, ucs2_codeset, in); - return bytes_to_ucs4(res); + static iconv_t cd = (iconv_t)(-1); + return iconv_convert(&cd, ucs4_codeset, ucs2_codeset, + &ucs2str[0], ucs2str.size()); } std::vector ucs4_to_ucs2(std::vector const & ucs4str) { - char const * tin = reinterpret_cast(&ucs4str[0]); - std::vector in(tin, tin + ucs4str.size() * 4); - std::vector res = iconv_convert(ucs2_codeset, ucs4_codeset, in); - return bytes_to_ucs2(res); + return ucs4_to_ucs2(&ucs4str[0], ucs4str.size()); } std::vector ucs4_to_ucs2(boost::uint32_t const * s, size_t ls) { - char const * tin = reinterpret_cast(s); - std::vector in(tin, tin + ls * 4); - std::vector res = iconv_convert(ucs2_codeset, ucs4_codeset, in); - return bytes_to_ucs2(res); + static iconv_t cd = (iconv_t)(-1); + return iconv_convert(&cd, ucs2_codeset, ucs4_codeset, + s, ls); } unsigned short ucs4_to_ucs2(boost::uint32_t c) { - char const * tin = reinterpret_cast(&c); - std::vector in(tin, tin + 4); - std::vector res = iconv_convert(ucs2_codeset, ucs4_codeset, in); - return bytes_to_ucs2(res)[0]; + boost::uint32_t tmp[] = { c, 0 }; + return ucs4_to_ucs2(tmp, 1)[0]; } std::vector ucs4_to_utf8(std::vector const & ucs4str) { - char const * tin = reinterpret_cast(&ucs4str[0]); - std::vector in(tin, tin + ucs4str.size() * 4); - std::vector res = iconv_convert("UTF-8", ucs4_codeset, in); - return res; + static iconv_t cd = (iconv_t)(-1); + return iconv_convert(&cd, "UTF-8", ucs4_codeset, + &ucs4str[0], ucs4str.size()); } std::vector ucs4_to_utf8(boost::uint32_t c) { - char const * tin = reinterpret_cast(&c); - std::vector in(tin, tin + 4); - std::vector res = iconv_convert("UTF-8", ucs4_codeset, in); - return res; + static iconv_t cd = (iconv_t)(-1); + return iconv_convert(&cd, "UTF-8", ucs4_codeset, &c, 1); }