From bb61b2655ff6c0efd9e9ed9285c1307ef4625215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Gullik=20Bj=C3=B8nnes?= Date: Sun, 10 Sep 2006 18:34:24 +0000 Subject: [PATCH] More unicode work. Fixup some usages. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14968 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/frontends/qt3/QLyXKeySym.C | 12 +++--- src/frontends/qt3/qt_helpers.C | 5 +-- src/lyxlex_pimpl.C | 6 +-- src/support/docstring.C | 4 +- src/support/unicode.C | 67 ++++++++++++++++++++++++---------- src/support/unicode.h | 29 +++++++++++++-- 6 files changed, 87 insertions(+), 36 deletions(-) diff --git a/src/frontends/qt3/QLyXKeySym.C b/src/frontends/qt3/QLyXKeySym.C index 27ab911f86..fd01db4c51 100644 --- a/src/frontends/qt3/QLyXKeySym.C +++ b/src/frontends/qt3/QLyXKeySym.C @@ -202,18 +202,18 @@ string QLyXKeySym::getSymbolName() const size_t QLyXKeySym::getUCSEncoded() const { - unsigned short const * ptr = text_.ucs2(); - std::vector tmp(ptr, ptr + text_.length()); + if (text_.isEmpty()) + return 0; + + //unsigned short const * ptr = text_.ucs2(); + //std::vector tmp(ptr, ptr + text_.length()); //lyxerr << "Data is " << tmp << endl; //lyxerr << "Length is " << text_.length() << endl; - if (text_.isEmpty()) - return 0; - //size_t res = utf8_to_ucs4(tmp, tmp.length()); //lyxerr << "Res is " << res << endl; - return ucs2_to_ucs4(tmp)[0]; + return ucs2_to_ucs4(text_.ucs2(), text_.length())[0]; } diff --git a/src/frontends/qt3/qt_helpers.C b/src/frontends/qt3/qt_helpers.C index 2f89335547..0458e9a174 100644 --- a/src/frontends/qt3/qt_helpers.C +++ b/src/frontends/qt3/qt_helpers.C @@ -120,7 +120,7 @@ QString const toqstr(string const & str) QString const toqstr(docstring const & str) { std::vector ucs2 = - ucs4_to_ucs2(str.c_str(), str.length()); + ucs4_to_ucs2(str.data(), str.length()); ucs2.push_back('\0'); return QString::fromUcs2(&ucs2[0]); } @@ -149,8 +149,7 @@ string const fromqstr(QString const & str) docstring const qstring_to_ucs4(QString const & str) { unsigned short const * const ucs2 = str.ucs2(); - std::vector const ucs4 = ucs2_to_ucs4( - std::vector(ucs2, ucs2 + str.length())); + std::vector const ucs4 = ucs2_to_ucs4(ucs2, str.length()); return docstring(ucs4.begin(), ucs4.end()); } diff --git a/src/lyxlex_pimpl.C b/src/lyxlex_pimpl.C index 3d66b7efd1..9007d31c44 100644 --- a/src/lyxlex_pimpl.C +++ b/src/lyxlex_pimpl.C @@ -73,9 +73,9 @@ string const LyXLex::Pimpl::getString() const lyx::docstring const LyXLex::Pimpl::getDocString() const { - std::vector res = utf8_to_ucs4(buff); - lyx::docstring dstr(res.begin(), res.end()); - return dstr; + std::vector res = utf8_to_ucs4(buff); + lyx::docstring dstr(res.begin(), res.end()); + return dstr; } diff --git a/src/support/docstring.C b/src/support/docstring.C index de3404c41e..fb3115ba9a 100644 --- a/src/support/docstring.C +++ b/src/support/docstring.C @@ -41,7 +41,7 @@ docstring const from_ascii(std::string const & ascii) docstring const from_utf8(std::string const & utf8) { std::vector const ucs4 = - utf8_to_ucs4(std::vector(utf8.begin(), utf8.end())); + utf8_to_ucs4(utf8.data(), utf8.size()); return docstring(ucs4.begin(), ucs4.end()); } @@ -49,7 +49,7 @@ docstring const from_utf8(std::string const & utf8) std::string const to_utf8(docstring const & ucs4) { std::vector const utf8 = - ucs4_to_utf8(std::vector(ucs4.begin(), ucs4.end())); + ucs4_to_utf8(ucs4.data(), ucs4.size()); return std::string(utf8.begin(), utf8.end()); } diff --git a/src/support/unicode.C b/src/support/unicode.C index 5c09a0d73b..650962ab9b 100644 --- a/src/support/unicode.C +++ b/src/support/unicode.C @@ -125,19 +125,47 @@ iconv_convert(iconv_t * cd, std::vector utf8_to_ucs4(std::vector const & utf8str) +{ + return utf8_to_ucs4(&utf8str[0], utf8str.size()); +} + + +std::vector +utf8_to_ucs4(char const * utf8str, size_t ls) { static iconv_t cd = (iconv_t)(-1); return iconv_convert(&cd, ucs4_codeset, "UTF-8", - &utf8str[0], utf8str.size()); + utf8str, ls); +} + + +boost::uint32_t +ucs2_to_ucs4(unsigned short c) +{ + return ucs2_to_ucs4(&c, 1)[0]; } std::vector ucs2_to_ucs4(std::vector const & ucs2str) +{ + return ucs2_to_ucs4(&ucs2str[0], ucs2str.size()); +} + + +std::vector +ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls) { static iconv_t cd = (iconv_t)(-1); return iconv_convert(&cd, ucs4_codeset, ucs2_codeset, - &ucs2str[0], ucs2str.size()); + ucs2str, ls); +} + + +unsigned short +ucs4_to_ucs2(boost::uint32_t c) +{ + return ucs4_to_ucs2(&c, 1)[0]; } @@ -157,24 +185,25 @@ ucs4_to_ucs2(boost::uint32_t const * s, size_t ls) } -unsigned short -ucs4_to_ucs2(boost::uint32_t c) -{ - boost::uint32_t tmp[] = { c, 0 }; - return ucs4_to_ucs2(tmp, 1)[0]; -} - - -std::vector ucs4_to_utf8(std::vector const & ucs4str) -{ - static iconv_t cd = (iconv_t)(-1); - return iconv_convert(&cd, "UTF-8", ucs4_codeset, - &ucs4str[0], ucs4str.size()); -} - - -std::vector ucs4_to_utf8(boost::uint32_t c) +std::vector +ucs4_to_utf8(boost::uint32_t c) { static iconv_t cd = (iconv_t)(-1); return iconv_convert(&cd, "UTF-8", ucs4_codeset, &c, 1); } + + +std::vector +ucs4_to_utf8(std::vector const & ucs4str) +{ + return ucs4_to_utf8(&ucs4str[0], ucs4str.size()); +} + + +std::vector +ucs4_to_utf8(boost::uint32_t const * ucs4str, size_t ls) +{ + static iconv_t cd = (iconv_t)(-1); + return iconv_convert(&cd, "UTF-8", ucs4_codeset, + ucs4str, ls); +} diff --git a/src/support/unicode.h b/src/support/unicode.h index 21ff21afb5..977ded6fd8 100644 --- a/src/support/unicode.h +++ b/src/support/unicode.h @@ -16,25 +16,48 @@ #include #include +// utf8_to_ucs4 + +// A single codepoint conversion for utf8_to_ucs4 does not make +// sense, so that function is left out. + std::vector utf8_to_ucs4(std::vector const & utf8str); +std::vector +utf8_to_ucs4(char const * utf8str, size_t ls); + +// ucs2_to_ucs4 + +boost::uint32_t +ucs2_to_ucs4(unsigned short c); + std::vector ucs2_to_ucs4(std::vector const & ucs2str); +std::vector +ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls); + +// ucs4_to_ucs2 + +unsigned short +ucs4_to_ucs2(boost::uint32_t c); + std::vector ucs4_to_ucs2(std::vector const & ucs4str); std::vector ucs4_to_ucs2(boost::uint32_t const * s, size_t ls); -unsigned short -ucs4_to_ucs2(boost::uint32_t c); +// ucs4_to_utf8 + +std::vector +ucs4_to_utf8(boost::uint32_t c); std::vector ucs4_to_utf8(std::vector const & ucs4str); std::vector -ucs4_to_utf8(boost::uint32_t c); +ucs4_to_utf8(boost::uint32_t const * ucs4str, size_t ls); #endif