Fix character classification functions by using qt (bugs like 3270 and 1247)

* src/support/lstrings.C
	(uppercase): Use qt instead of non working libc/home grown solution
	(lowercase): ditto
	(local_lowercase): Use qt instead of libc tolower for ucs4 chars

	* src/support/qstring_helpers.C
	(qstring_to_ucs4): Use qchar_to_ucs4 because of the assertion

	* src/support/lstrings.h: Add some documentation

	* src/support/qstring_helpers.h
	(is_utf16): New function: Tests whether an ucs4 character is also a
	valid utf16 character
	(qchar_to_ucs4): Assert on is_utf16()
	(ucs4_to_qchar): Replace old assertion with better is_utf16()

	* src/support/textutils.h
	(isLetterChar): Delete non-working implementation
	(isPrintable): Ditto
	(isPrintableNonspace): Ditto
	(isDigit):

	* src/support/textutils.C: New file, contains new implementations
	using qt of the functions in textutils.h

	* src/support/Makefile.am: Add textutils.C

	* development/scons/scons_manifest.py: ditto


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17354 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2007-02-26 09:03:21 +00:00
parent 070138899e
commit 0418d14704
7 changed files with 62 additions and 110 deletions

View File

@ -160,6 +160,7 @@ src_support_files = Split('''
socktools.C
systemcall.C
tempname.C
textutils.C
unicode.C
unlink.C
userinfo.C

View File

@ -78,6 +78,7 @@ libsupport_la_SOURCES = \
systemcall.C \
systemcall.h \
tempname.C \
textutils.C \
textutils.h \
translator.h \
types.h \

View File

@ -14,6 +14,7 @@
#include "support/lstrings.h"
#include "support/lyxlib.h"
#include "support/convert.h"
#include "support/qstring_helpers.h"
#include "debug.h"
@ -32,17 +33,6 @@
#include <algorithm>
#include <sstream>
#ifdef LIBC_WCTYPE_USES_UCS4
// We can use the libc ctype functions because we unset the LC_CTYPE
// category of the current locale in gettext.C
#include <wctype.h>
#else
// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
// The code that we currently use does not really work.
#endif
using lyx::docstring;
using std::transform;
using std::string;
@ -321,38 +311,21 @@ char uppercase(char c)
}
// FIXME UNICODE
// for lowercase() and uppercase() function below when wchar_t is not used:
// 1) std::tolower() and std::toupper() are templates that
// compile fine with char_type. With the test (c >= 256) we
// do not trust these function to do the right thing with
// unicode char.
// 2) these functions use the current locale, which is wrong
// if it is not latin1 based (latin1 is a subset of UCS4).
char_type lowercase(char_type c)
{
#ifdef LIBC_WCTYPE_USES_UCS4
return towlower(c);
#else
if (c >= 256)
if (!is_utf16(c))
// We don't know how to lowercase a non-utf16 char
return c;
return tolower(c);
#endif
return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
}
char_type uppercase(char_type c)
{
#ifdef LIBC_WCTYPE_USES_UCS4
return towupper(c);
#else
if (c >= 256)
if (!is_utf16(c))
// We don't know how to uppercase a non-utf16 char
return c;
return toupper(c);
#endif
return qchar_to_ucs4(ucs4_to_qchar(c).toUpper());
}
@ -361,10 +334,16 @@ namespace {
// since we cannot use std::tolower and std::toupper directly in the
// calls to std::transform yet, we use these helper clases. (Lgb)
template<typename Char> struct local_lowercase {
Char operator()(Char c) const {
struct local_lowercase {
char operator()(char c) const {
return tolower(c);
}
char_type operator()(char_type c) const {
if (!is_utf16(c))
// We don't know how to lowercase a non-utf16 char
return c;
return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
}
};
struct local_uppercase {
@ -384,7 +363,7 @@ template<typename Char> struct local_ascii_lowercase {
string const lowercase(string const & a)
{
string tmp(a);
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char>());
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
return tmp;
}
@ -392,7 +371,7 @@ string const lowercase(string const & a)
docstring const lowercase(docstring const & a)
{
docstring tmp(a);
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char_type>());
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
return tmp;
}

View File

@ -24,17 +24,22 @@
namespace lyx {
namespace support {
///
/// Compare \p s and \p s2, ignoring the case.
/// Caution: Depends on the locale
int compare_no_case(std::string const & s, std::string const & s2);
/// Compare \p s and \p s2, ignoring the case.
/// Does not depend on the locale.
int compare_no_case(docstring const & s, docstring const & s2);
///
/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
int compare_ascii_no_case(std::string const & s, std::string const & s2);
///
/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
int compare_ascii_no_case(docstring const & s, docstring const & s2);
///
/// Compare the first \p len characters of \p s and \p s2, ignoring the case.
/// Caution: Depends on the locale
int compare_no_case(std::string const & s, std::string const & s2, unsigned int len);
///
@ -75,28 +80,37 @@ int hexToInt(lyx::docstring const & str);
/// is \p str pure ascii?
bool isAscii(docstring const & str);
///
/// Changes the case of \p c to lowercase.
/// Caution: Depends on the locale
char lowercase(char c);
///
/// Changes the case of \p c to uppercase.
/// Caution: Depends on the locale
char uppercase(char c);
/// changes the case only if c is a one-byte char
/// Changes the case of \p c to lowercase.
/// Does not depend on the locale.
char_type lowercase(char_type c);
/// changes the case only if c is a one-byte char
/// Changes the case of \p c to uppercase.
/// Does not depend on the locale.
char_type uppercase(char_type c);
/// same as lowercase(), but ignores locale
std::string const ascii_lowercase(std::string const &);
docstring const ascii_lowercase(docstring const &);
///
std::string const lowercase(std::string const &);
docstring const lowercase(docstring const &);
/// Changes the case of \p s to lowercase.
/// Caution: Depends on the locale
std::string const lowercase(std::string const & s);
///
std::string const uppercase(std::string const &);
/// Changes the case of \p s to lowercase.
/// Does not depend on the locale.
docstring const lowercase(docstring const & s);
/// Changes the case of \p s to uppercase.
/// Caution: Depends on the locale
std::string const uppercase(std::string const & s);
/// Does the string start with this prefix?
bool prefixIs(docstring const &, char_type);

View File

@ -24,6 +24,7 @@ using std::string;
// We use QString::fromUcs4 in Qt 4.2 and higher
QString const toqstr(docstring const & str)
{
// This does not properly convert surrogate pairs
QString s;
int i = static_cast<int>(str.size());
s.resize(i);
@ -44,7 +45,7 @@ docstring const qstring_to_ucs4(QString const & qstr)
int const ls = qstr.size();
docstring ucs4;
for (int i = 0; i < ls; ++i)
ucs4 += static_cast<char_type>(qstr[i].unicode());
ucs4 += qchar_to_ucs4(qstr[i].unicode());
return ucs4;
#endif
}

View File

@ -45,6 +45,14 @@ inline QString const toqstr(std::string const & str)
}
/// Is \p c a valid utf16 char?
inline bool is_utf16(char_type c)
{
// 0xd800 ... 0xdfff is the range of surrogate pairs.
return c < 0xd800 || (c > 0xdfff && c < 0x10000);
}
/**
* Convert a QChar into a UCS4 character.
* This is a hack (it does only make sense for the common part of the UCS4
@ -54,6 +62,7 @@ inline QString const toqstr(std::string const & str)
*/
inline char_type const qchar_to_ucs4(QChar const & qchar)
{
BOOST_ASSERT(is_utf16(static_cast<char_type>(qchar.unicode())));
return static_cast<char_type>(qchar.unicode());
}
@ -71,7 +80,7 @@ inline QChar const ucs4_to_qchar(char_type const ucs4)
// for the ucs2 subrange of unicode. Instead of an assertion we should
// return some special characters that indicates that its display is
// not supported.
BOOST_ASSERT(ucs4 < 65536);
BOOST_ASSERT(is_utf16(ucs4));
return QChar(static_cast<unsigned short>(ucs4));
}

View File

@ -17,15 +17,6 @@
#include "support/types.h"
#ifdef LIBC_WCTYPE_USES_UCS4
// We can use the libc ctype functions because we unset the LC_CTYPE
// category of the current locale in gettext.C
#include <wctype.h>
#else
// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
// The code that we currently use does not really work.
#endif
namespace lyx {
@ -36,61 +27,17 @@ bool isLineSeparatorChar(char_type c)
return c == ' ';
}
/// return true if a char is alphabetical (including accented chars)
inline
bool isLetterChar(char_type c)
{
#ifdef LIBC_WCTYPE_USES_UCS4
return iswalpha(c);
#else
// FIXME UNICODE This is wrong!
return (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')
|| (c >= 192 && c < 256); // in iso-8859-x these are accented chars
#endif
}
bool isLetterChar(char_type c);
/// return true if the char is printable
inline
bool isPrintable(char_type c)
{
#ifdef LIBC_WCTYPE_USES_UCS4
return iswprint(c);
#else
// FIXME UNICODE This is wrong!
return (c & 127) >= ' ';
#endif
}
bool isPrintable(char_type c);
/// return true if the char is printable and not a space
inline
bool isPrintableNonspace(char_type c)
{
#ifdef LIBC_WCTYPE_USES_UCS4
return iswprint(c) && !iswspace(c);
#else
// FIXME UNICODE This is wrong!
return (c & 127) > ' ';
#endif
}
bool isPrintableNonspace(char_type c);
/// return true if a unicode char is a digit.
inline
bool isDigit(char_type c)
{
#ifdef LIBC_WCTYPE_USES_UCS4
return iswdigit(c);
#else
// FIXME UNICODE This is wrong!
return c >= '0' && c <= '9';
#endif
}
bool isDigit(char_type c);
} // namespace lyx