Fix byte order problems

* src/support/unicode.C
	(bytes_to_ucs4): make it work on big endian machines
	(bytes_to_ucs2): make it work on little endian machines
	(ucs2_to_ucs4): ditto
	(utf8_to_ucs4): Invoke iconv with explicit BE suffix
	(ucs2_to_ucs4): ditto
	(ucs4_to_ucs2): ditto
	(ucs4_to_utf8): ditto

	* configure.ac: Check for byte order

	* development/scons/SConstruct: Ditto


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14890 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2006-09-04 14:43:25 +00:00
parent b05e4c7a82
commit 15be4fb8ef
3 changed files with 28 additions and 9 deletions

View File

@ -64,6 +64,9 @@ AC_PROG_CC
AC_ISC_POSIX AC_ISC_POSIX
AC_AIX AC_AIX
### we need to know the byte order for unicode conversions
AC_C_BIGENDIAN
### check which frontend we want to use ### check which frontend we want to use
LYX_USE_FRONTENDS LYX_USE_FRONTENDS

View File

@ -1100,6 +1100,10 @@ int count()
(spell_engine is not None, spell_engine, (spell_engine is not None, spell_engine,
'Spell engine to use' 'Spell engine to use'
), ),
# we need to know the byte order for unicode conversions
(sys.byteorder == 'big', 'WORDS_BIGENDIAN'
'Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX).'
),
], ],
extra_items = [ extra_items = [
('#define PACKAGE "%s%s"' % (package, program_suffix), ('#define PACKAGE "%s%s"' % (package, program_suffix),

View File

@ -122,10 +122,17 @@ std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
boost::uint32_t c; boost::uint32_t c;
char * cc = reinterpret_cast<char *>(&c); char * cc = reinterpret_cast<char *>(&c);
#ifdef WORDS_BIGENDIAN
cc[0] = b1;
cc[1] = b2;
cc[2] = b3;
cc[3] = b4;
#else
cc[3] = b1; cc[3] = b1;
cc[2] = b2; cc[2] = b2;
cc[1] = b3; cc[1] = b3;
cc[0] = b4; cc[0] = b4;
#endif
if (c > 0xffff) { if (c > 0xffff) {
lyxerr << "Strange ucs4 value encountered\n"; lyxerr << "Strange ucs4 value encountered\n";
@ -158,8 +165,13 @@ std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
unsigned short c; unsigned short c;
char * cc = reinterpret_cast<char *>(&c); char * cc = reinterpret_cast<char *>(&c);
#ifdef WORDS_BIGENDIAN
cc[0] = b1; cc[0] = b1;
cc[1] = b2; cc[1] = b2;
#else
cc[1] = b1;
cc[0] = b2;
#endif
//lyxerr << "0x" //lyxerr << "0x"
// << std::setw(2) << std::setfill('0') << int(b2) // << std::setw(2) << std::setfill('0') << int(b2)
@ -185,7 +197,7 @@ std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
//lyxerr << "Res = " << string(res.begin(), res.end()) //lyxerr << "Res = " << string(res.begin(), res.end())
// << " (" << res.size() << ")" << endl; // << " (" << res.size() << ")" << endl;
std::vector<char> res = iconv_convert("UCS-4", "UTF-8", utf8str); std::vector<char> res = iconv_convert("UCS-4BE", "UTF-8", utf8str);
return bytes_to_ucs4(res); return bytes_to_ucs4(res);
} }
@ -200,13 +212,13 @@ ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
//lyxerr << std::hex; //lyxerr << std::hex;
for (; cit != end; ++cit) { for (; cit != end; ++cit) {
unsigned short s = *cit; unsigned short s = *cit;
in.push_back(static_cast<char>(s & 0x00ff));
in.push_back(static_cast<char>((s & 0xff00) >> 8)); in.push_back(static_cast<char>((s & 0xff00) >> 8));
lyxerr << std::setw(2) << std::setfill('0') << (s & 0x00ff) << endl; in.push_back(static_cast<char>(s & 0x00ff));
lyxerr << std::setw(2) << std::setfill('0') << ((s & 0xff00) >> 8) << endl; lyxerr << std::setw(2) << std::setfill('0') << ((s & 0xff00) >> 8) << endl;
lyxerr << std::setw(2) << std::setfill('0') << (s & 0x00ff) << endl;
} }
std::vector<char> res = iconv_convert("UCS-4", "UCS-2", in); std::vector<char> res = iconv_convert("UCS-4BE", "UCS-2BE", in);
return bytes_to_ucs4(res); return bytes_to_ucs4(res);
} }
@ -224,7 +236,7 @@ ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
in.push_back(static_cast<char>((s & 0x0000ff00) >> 8)); in.push_back(static_cast<char>((s & 0x0000ff00) >> 8));
in.push_back(static_cast<char>(s & 0x000000ff)); in.push_back(static_cast<char>(s & 0x000000ff));
} }
std::vector<char> res = iconv_convert("UCS-2", "UCS-4", in); std::vector<char> res = iconv_convert("UCS-2BE", "UCS-4BE", in);
return bytes_to_ucs2(res); return bytes_to_ucs2(res);
} }
@ -239,7 +251,7 @@ ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
in.push_back(static_cast<char>((s[i] & 0x0000ff00) >> 8)); in.push_back(static_cast<char>((s[i] & 0x0000ff00) >> 8));
in.push_back(static_cast<char>(s[i] & 0x000000ff)); in.push_back(static_cast<char>(s[i] & 0x000000ff));
} }
std::vector<char> res = iconv_convert("UCS-2", "UCS-4", in); std::vector<char> res = iconv_convert("UCS-2BE", "UCS-4BE", in);
return bytes_to_ucs2(res); return bytes_to_ucs2(res);
} }
@ -252,7 +264,7 @@ ucs4_to_ucs2(boost::uint32_t c)
in.push_back(static_cast<char>((c & 0x00ff0000) >> 16)); in.push_back(static_cast<char>((c & 0x00ff0000) >> 16));
in.push_back(static_cast<char>((c & 0x0000ff00) >> 8)); in.push_back(static_cast<char>((c & 0x0000ff00) >> 8));
in.push_back(static_cast<char>(c & 0x000000ff)); in.push_back(static_cast<char>(c & 0x000000ff));
std::vector<char> res = iconv_convert("UCS-2", "UCS-4", in); std::vector<char> res = iconv_convert("UCS-2BE", "UCS-4BE", in);
std::vector<unsigned short> us = bytes_to_ucs2(res); std::vector<unsigned short> us = bytes_to_ucs2(res);
if (!us.empty()) if (!us.empty())
return us[0]; return us[0];
@ -273,7 +285,7 @@ std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
in.push_back(static_cast<char>((s & 0x0000ff00) >> 8)); in.push_back(static_cast<char>((s & 0x0000ff00) >> 8));
in.push_back(static_cast<char>(s & 0x000000ff)); in.push_back(static_cast<char>(s & 0x000000ff));
} }
std::vector<char> res = iconv_convert("UTF-8", "UCS-4", in); std::vector<char> res = iconv_convert("UTF-8", "UCS-4BE", in);
return res; return res;
} }
@ -285,6 +297,6 @@ std::vector<char> ucs4_to_utf8(boost::uint32_t c)
in.push_back(static_cast<char>((c & 0x00ff0000) >> 16)); in.push_back(static_cast<char>((c & 0x00ff0000) >> 16));
in.push_back(static_cast<char>((c & 0x0000ff00) >> 8)); in.push_back(static_cast<char>((c & 0x0000ff00) >> 8));
in.push_back(static_cast<char>(c & 0x000000ff)); in.push_back(static_cast<char>(c & 0x000000ff));
std::vector<char> res = iconv_convert("UTF-8", "UCS-4", in); std::vector<char> res = iconv_convert("UTF-8", "UCS-4BE", in);
return res; return res;
} }