mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-21 23:09:40 +00:00
Fix character classification functions by using qt (bugs like 3270 and 1247)
* src/support/lstrings.C (uppercase): Use qt instead of non working libc/home grown solution (lowercase): ditto (local_lowercase): Use qt instead of libc tolower for ucs4 chars * src/support/qstring_helpers.C (qstring_to_ucs4): Use qchar_to_ucs4 because of the assertion * src/support/lstrings.h: Add some documentation * src/support/qstring_helpers.h (is_utf16): New function: Tests whether an ucs4 character is also a valid utf16 character (qchar_to_ucs4): Assert on is_utf16() (ucs4_to_qchar): Replace old assertion with better is_utf16() * src/support/textutils.h (isLetterChar): Delete non-working implementation (isPrintable): Ditto (isPrintableNonspace): Ditto (isDigit): * src/support/textutils.C: New file, contains new implementations using qt of the functions in textutils.h * src/support/Makefile.am: Add textutils.C * development/scons/scons_manifest.py: ditto git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@17354 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
070138899e
commit
0418d14704
@ -160,6 +160,7 @@ src_support_files = Split('''
|
||||
socktools.C
|
||||
systemcall.C
|
||||
tempname.C
|
||||
textutils.C
|
||||
unicode.C
|
||||
unlink.C
|
||||
userinfo.C
|
||||
|
@ -78,6 +78,7 @@ libsupport_la_SOURCES = \
|
||||
systemcall.C \
|
||||
systemcall.h \
|
||||
tempname.C \
|
||||
textutils.C \
|
||||
textutils.h \
|
||||
translator.h \
|
||||
types.h \
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "support/lstrings.h"
|
||||
#include "support/lyxlib.h"
|
||||
#include "support/convert.h"
|
||||
#include "support/qstring_helpers.h"
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
@ -32,17 +33,6 @@
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
// We can use the libc ctype functions because we unset the LC_CTYPE
|
||||
// category of the current locale in gettext.C
|
||||
#include <wctype.h>
|
||||
#else
|
||||
// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
|
||||
// The code that we currently use does not really work.
|
||||
#endif
|
||||
|
||||
|
||||
using lyx::docstring;
|
||||
|
||||
using std::transform;
|
||||
using std::string;
|
||||
@ -321,38 +311,21 @@ char uppercase(char c)
|
||||
}
|
||||
|
||||
|
||||
// FIXME UNICODE
|
||||
// for lowercase() and uppercase() function below when wchar_t is not used:
|
||||
// 1) std::tolower() and std::toupper() are templates that
|
||||
// compile fine with char_type. With the test (c >= 256) we
|
||||
// do not trust these function to do the right thing with
|
||||
// unicode char.
|
||||
// 2) these functions use the current locale, which is wrong
|
||||
// if it is not latin1 based (latin1 is a subset of UCS4).
|
||||
|
||||
char_type lowercase(char_type c)
|
||||
{
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
return towlower(c);
|
||||
#else
|
||||
if (c >= 256)
|
||||
if (!is_utf16(c))
|
||||
// We don't know how to lowercase a non-utf16 char
|
||||
return c;
|
||||
|
||||
return tolower(c);
|
||||
#endif
|
||||
return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
|
||||
}
|
||||
|
||||
|
||||
char_type uppercase(char_type c)
|
||||
{
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
return towupper(c);
|
||||
#else
|
||||
if (c >= 256)
|
||||
if (!is_utf16(c))
|
||||
// We don't know how to uppercase a non-utf16 char
|
||||
return c;
|
||||
|
||||
return toupper(c);
|
||||
#endif
|
||||
return qchar_to_ucs4(ucs4_to_qchar(c).toUpper());
|
||||
}
|
||||
|
||||
|
||||
@ -361,10 +334,16 @@ namespace {
|
||||
// since we cannot use std::tolower and std::toupper directly in the
|
||||
// calls to std::transform yet, we use these helper clases. (Lgb)
|
||||
|
||||
template<typename Char> struct local_lowercase {
|
||||
Char operator()(Char c) const {
|
||||
struct local_lowercase {
|
||||
char operator()(char c) const {
|
||||
return tolower(c);
|
||||
}
|
||||
char_type operator()(char_type c) const {
|
||||
if (!is_utf16(c))
|
||||
// We don't know how to lowercase a non-utf16 char
|
||||
return c;
|
||||
return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
|
||||
}
|
||||
};
|
||||
|
||||
struct local_uppercase {
|
||||
@ -384,7 +363,7 @@ template<typename Char> struct local_ascii_lowercase {
|
||||
string const lowercase(string const & a)
|
||||
{
|
||||
string tmp(a);
|
||||
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char>());
|
||||
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
|
||||
return tmp;
|
||||
}
|
||||
|
||||
@ -392,7 +371,7 @@ string const lowercase(string const & a)
|
||||
docstring const lowercase(docstring const & a)
|
||||
{
|
||||
docstring tmp(a);
|
||||
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char_type>());
|
||||
transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
@ -24,17 +24,22 @@
|
||||
namespace lyx {
|
||||
namespace support {
|
||||
|
||||
///
|
||||
/// Compare \p s and \p s2, ignoring the case.
|
||||
/// Caution: Depends on the locale
|
||||
int compare_no_case(std::string const & s, std::string const & s2);
|
||||
|
||||
/// Compare \p s and \p s2, ignoring the case.
|
||||
/// Does not depend on the locale.
|
||||
int compare_no_case(docstring const & s, docstring const & s2);
|
||||
|
||||
///
|
||||
/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
|
||||
int compare_ascii_no_case(std::string const & s, std::string const & s2);
|
||||
|
||||
///
|
||||
/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
|
||||
int compare_ascii_no_case(docstring const & s, docstring const & s2);
|
||||
|
||||
///
|
||||
/// Compare the first \p len characters of \p s and \p s2, ignoring the case.
|
||||
/// Caution: Depends on the locale
|
||||
int compare_no_case(std::string const & s, std::string const & s2, unsigned int len);
|
||||
|
||||
///
|
||||
@ -75,28 +80,37 @@ int hexToInt(lyx::docstring const & str);
|
||||
/// is \p str pure ascii?
|
||||
bool isAscii(docstring const & str);
|
||||
|
||||
///
|
||||
/// Changes the case of \p c to lowercase.
|
||||
/// Caution: Depends on the locale
|
||||
char lowercase(char c);
|
||||
|
||||
///
|
||||
/// Changes the case of \p c to uppercase.
|
||||
/// Caution: Depends on the locale
|
||||
char uppercase(char c);
|
||||
|
||||
/// changes the case only if c is a one-byte char
|
||||
/// Changes the case of \p c to lowercase.
|
||||
/// Does not depend on the locale.
|
||||
char_type lowercase(char_type c);
|
||||
|
||||
/// changes the case only if c is a one-byte char
|
||||
/// Changes the case of \p c to uppercase.
|
||||
/// Does not depend on the locale.
|
||||
char_type uppercase(char_type c);
|
||||
|
||||
/// same as lowercase(), but ignores locale
|
||||
std::string const ascii_lowercase(std::string const &);
|
||||
docstring const ascii_lowercase(docstring const &);
|
||||
|
||||
///
|
||||
std::string const lowercase(std::string const &);
|
||||
docstring const lowercase(docstring const &);
|
||||
/// Changes the case of \p s to lowercase.
|
||||
/// Caution: Depends on the locale
|
||||
std::string const lowercase(std::string const & s);
|
||||
|
||||
///
|
||||
std::string const uppercase(std::string const &);
|
||||
/// Changes the case of \p s to lowercase.
|
||||
/// Does not depend on the locale.
|
||||
docstring const lowercase(docstring const & s);
|
||||
|
||||
/// Changes the case of \p s to uppercase.
|
||||
/// Caution: Depends on the locale
|
||||
std::string const uppercase(std::string const & s);
|
||||
|
||||
/// Does the string start with this prefix?
|
||||
bool prefixIs(docstring const &, char_type);
|
||||
|
@ -24,6 +24,7 @@ using std::string;
|
||||
// We use QString::fromUcs4 in Qt 4.2 and higher
|
||||
QString const toqstr(docstring const & str)
|
||||
{
|
||||
// This does not properly convert surrogate pairs
|
||||
QString s;
|
||||
int i = static_cast<int>(str.size());
|
||||
s.resize(i);
|
||||
@ -44,7 +45,7 @@ docstring const qstring_to_ucs4(QString const & qstr)
|
||||
int const ls = qstr.size();
|
||||
docstring ucs4;
|
||||
for (int i = 0; i < ls; ++i)
|
||||
ucs4 += static_cast<char_type>(qstr[i].unicode());
|
||||
ucs4 += qchar_to_ucs4(qstr[i].unicode());
|
||||
return ucs4;
|
||||
#endif
|
||||
}
|
||||
|
@ -45,6 +45,14 @@ inline QString const toqstr(std::string const & str)
|
||||
}
|
||||
|
||||
|
||||
/// Is \p c a valid utf16 char?
|
||||
inline bool is_utf16(char_type c)
|
||||
{
|
||||
// 0xd800 ... 0xdfff is the range of surrogate pairs.
|
||||
return c < 0xd800 || (c > 0xdfff && c < 0x10000);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a QChar into a UCS4 character.
|
||||
* This is a hack (it does only make sense for the common part of the UCS4
|
||||
@ -54,6 +62,7 @@ inline QString const toqstr(std::string const & str)
|
||||
*/
|
||||
inline char_type const qchar_to_ucs4(QChar const & qchar)
|
||||
{
|
||||
BOOST_ASSERT(is_utf16(static_cast<char_type>(qchar.unicode())));
|
||||
return static_cast<char_type>(qchar.unicode());
|
||||
}
|
||||
|
||||
@ -71,7 +80,7 @@ inline QChar const ucs4_to_qchar(char_type const ucs4)
|
||||
// for the ucs2 subrange of unicode. Instead of an assertion we should
|
||||
// return some special characters that indicates that its display is
|
||||
// not supported.
|
||||
BOOST_ASSERT(ucs4 < 65536);
|
||||
BOOST_ASSERT(is_utf16(ucs4));
|
||||
return QChar(static_cast<unsigned short>(ucs4));
|
||||
}
|
||||
|
||||
|
@ -17,15 +17,6 @@
|
||||
|
||||
#include "support/types.h"
|
||||
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
// We can use the libc ctype functions because we unset the LC_CTYPE
|
||||
// category of the current locale in gettext.C
|
||||
#include <wctype.h>
|
||||
#else
|
||||
// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
|
||||
// The code that we currently use does not really work.
|
||||
#endif
|
||||
|
||||
|
||||
namespace lyx {
|
||||
|
||||
@ -36,61 +27,17 @@ bool isLineSeparatorChar(char_type c)
|
||||
return c == ' ';
|
||||
}
|
||||
|
||||
|
||||
/// return true if a char is alphabetical (including accented chars)
|
||||
inline
|
||||
bool isLetterChar(char_type c)
|
||||
{
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
return iswalpha(c);
|
||||
#else
|
||||
// FIXME UNICODE This is wrong!
|
||||
return (c >= 'A' && c <= 'Z')
|
||||
|| (c >= 'a' && c <= 'z')
|
||||
|| (c >= 192 && c < 256); // in iso-8859-x these are accented chars
|
||||
#endif
|
||||
}
|
||||
|
||||
bool isLetterChar(char_type c);
|
||||
|
||||
/// return true if the char is printable
|
||||
inline
|
||||
bool isPrintable(char_type c)
|
||||
{
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
return iswprint(c);
|
||||
#else
|
||||
// FIXME UNICODE This is wrong!
|
||||
return (c & 127) >= ' ';
|
||||
#endif
|
||||
}
|
||||
|
||||
bool isPrintable(char_type c);
|
||||
|
||||
/// return true if the char is printable and not a space
|
||||
inline
|
||||
bool isPrintableNonspace(char_type c)
|
||||
{
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
return iswprint(c) && !iswspace(c);
|
||||
#else
|
||||
// FIXME UNICODE This is wrong!
|
||||
return (c & 127) > ' ';
|
||||
#endif
|
||||
}
|
||||
|
||||
bool isPrintableNonspace(char_type c);
|
||||
|
||||
/// return true if a unicode char is a digit.
|
||||
inline
|
||||
bool isDigit(char_type c)
|
||||
{
|
||||
#ifdef LIBC_WCTYPE_USES_UCS4
|
||||
return iswdigit(c);
|
||||
#else
|
||||
// FIXME UNICODE This is wrong!
|
||||
return c >= '0' && c <= '9';
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool isDigit(char_type c);
|
||||
|
||||
} // namespace lyx
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user