Use wctype character classification functions if possible

* src/buffer.C
	(Buffer::insertStringAsLines): Uncomment isPrintable test

	* src/support/lstrings.C
	(compare_no_case): Use char_type and not int for the docstring version
	(ascii_tolower): Convert to a template
	(compare_ascii_no_case): Do not use a template anymore, because we
	need int for the string version and char_type for the docstring
	version as intermediate type
	(lowercase): Use towlower if possible
	(uppercase): Use towupper if possible

	* src/support/textutils.h
	(isLetterChar): Use iswalpha if possible
	(isPrintable): Use iswprint if possible
	(isPrintableNonspace): Use iswprint and iswspace if possible
	(isDigit): Use iswdigit if possible

	* src/paragraph.C
	(Paragraph::asString): remove obsolete FIXME
	(Paragraph::transformChar): add FIXME

	* configure.ac: Add definition of LIBC_WCTYPE_USES_UCS4 to config.h

	* development/cmake/config.h.cmake: ditto

	* development/scons/SConstruct: ditto


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15893 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2006-11-13 09:53:25 +00:00
parent 3cc1001bd3
commit a116e04b8b
7 changed files with 98 additions and 28 deletions

View File

@ -383,6 +383,10 @@ int mkstemp(char*);
# define WANT_GETFILEATTRIBUTESEX_WRAPPER 1 # define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
#endif #endif
#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
# define LIBC_WCTYPE_USES_UCS4
#endif
#endif #endif
]) ])

View File

@ -158,6 +158,10 @@
# define WANT_GETFILEATTRIBUTESEX_WRAPPER 1 # define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
#endif #endif
#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
# define LIBC_WCTYPE_USES_UCS4
#endif
#if defined(MAKE_INTL_LIB) && defined(_MSC_VER) #if defined(MAKE_INTL_LIB) && defined(_MSC_VER)
#define __attribute__(x) #define __attribute__(x)
#define inline #define inline

View File

@ -895,6 +895,10 @@ utils.createConfigFile(conf,
# define WANT_GETFILEATTRIBUTESEX_WRAPPER 1 # define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
#endif #endif
#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
# define LIBC_WCTYPE_USES_UCS4
#endif
#endif #endif
''' '''
) )

View File

@ -554,11 +554,9 @@ void Buffer::insertStringAsLines(ParagraphList & pars,
} }
space_inserted = true; space_inserted = true;
} }
/* FIXME: not needed anymore?
} else if (!isPrintable(*cit)) { } else if (!isPrintable(*cit)) {
// Ignore unprintables // Ignore unprintables
continue; continue;
*/
} else { } else {
// just insert the character // just insert the character
par.insertChar(pos, *cit, font, params().trackChanges); par.insertChar(pos, *cit, font, params().trackChanges);

View File

@ -1392,7 +1392,6 @@ docstring const Paragraph::asString(Buffer const & buffer,
for (pos_type i = beg; i < end; ++i) { for (pos_type i = beg; i < end; ++i) {
value_type const c = getUChar(buffer.params(), i); value_type const c = getUChar(buffer.params(), i);
// FIXME: isPrintable does not work for lyx::char_type
if (isPrintable(c)) if (isPrintable(c))
os.put(c); os.put(c);
else if (c == META_INSET) else if (c == META_INSET)
@ -1570,6 +1569,7 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const
{ {
if (!Encodings::is_arabic(c)) if (!Encodings::is_arabic(c))
if (lyxrc.font_norm_type == LyXRC::ISO_8859_6_8 && isDigit(c)) if (lyxrc.font_norm_type == LyXRC::ISO_8859_6_8 && isDigit(c))
// FIXME UNICODE What does this do?
return c + (0xb0 - '0'); return c + (0xb0 - '0');
else else
return c; return c;

View File

@ -32,6 +32,16 @@
#include <algorithm> #include <algorithm>
#include <sstream> #include <sstream>
#ifdef LIBC_WCTYPE_USES_UCS4
// We can use the libc ctype functions because we unset the LC_CTYPE
// category of the current locale in gettext.C
#include <wctype.h>
#else
// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
// The code that we currently use does not really work.
#endif
using lyx::docstring; using lyx::docstring;
using std::transform; using std::transform;
@ -76,8 +86,8 @@ int compare_no_case(docstring const & s, docstring const & s2)
docstring::const_iterator p2 = s2.begin(); docstring::const_iterator p2 = s2.begin();
while (p != s.end() && p2 != s2.end()) { while (p != s.end() && p2 != s2.end()) {
int const lc1 = tolower(*p); char_type const lc1 = lowercase(*p);
int const lc2 = tolower(*p2); char_type const lc2 = lowercase(*p2);
if (lc1 != lc2) if (lc1 != lc2)
return (lc1 < lc2) ? -1 : 1; return (lc1 < lc2) ? -1 : 1;
++p; ++p;
@ -94,18 +104,20 @@ int compare_no_case(docstring const & s, docstring const & s2)
namespace { namespace {
int ascii_tolower(int c) { template<typename Char>
Char ascii_tolower(Char c) {
if (c >= 'A' && c <= 'Z') if (c >= 'A' && c <= 'Z')
return c - 'A' + 'a'; return c - 'A' + 'a';
return c; return c;
} }
}
template<typename String> inline
int do_compare_ascii_no_case(String const & s, String const & s2) int compare_ascii_no_case(string const & s, string const & s2)
{ {
typename String::const_iterator p = s.begin(); string::const_iterator p = s.begin();
typename String::const_iterator p2 = s2.begin(); string::const_iterator p2 = s2.begin();
while (p != s.end() && p2 != s2.end()) { while (p != s.end() && p2 != s2.end()) {
int const lc1 = ascii_tolower(*p); int const lc1 = ascii_tolower(*p);
@ -123,18 +135,26 @@ int do_compare_ascii_no_case(String const & s, String const & s2)
return 1; return 1;
} }
}
int compare_ascii_no_case(string const & s, string const & s2)
{
return do_compare_ascii_no_case(s, s2);
}
int compare_ascii_no_case(docstring const & s, docstring const & s2) int compare_ascii_no_case(docstring const & s, docstring const & s2)
{ {
return do_compare_ascii_no_case(s, s2); docstring::const_iterator p = s.begin();
docstring::const_iterator p2 = s2.begin();
while (p != s.end() && p2 != s2.end()) {
char_type const lc1 = ascii_tolower(*p);
char_type const lc2 = ascii_tolower(*p2);
if (lc1 != lc2)
return (lc1 < lc2) ? -1 : 1;
++p;
++p2;
}
if (s.size() == s2.size())
return 0;
if (s.size() < s2.size())
return -1;
return 1;
} }
@ -300,7 +320,9 @@ char uppercase(char c)
return char(toupper(c)); return char(toupper(c));
} }
// FIXME for lowercase() and uppercase() function below:
// FIXME UNICODE
// for lowercase() and uppercase() function below when wchar_t is not used:
// 1) std::tolower() and std::toupper() are templates that // 1) std::tolower() and std::toupper() are templates that
// compile fine with char_type. With the test (c >= 256) we // compile fine with char_type. With the test (c >= 256) we
// do not trust these function to do the right thing with // do not trust these function to do the right thing with
@ -310,19 +332,27 @@ char uppercase(char c)
char_type lowercase(char_type c) char_type lowercase(char_type c)
{ {
#ifdef LIBC_WCTYPE_USES_UCS4
return towlower(c);
#else
if (c >= 256) if (c >= 256)
return c; return c;
return tolower(c); return tolower(c);
#endif
} }
char_type uppercase(char_type c) char_type uppercase(char_type c)
{ {
#ifdef LIBC_WCTYPE_USES_UCS4
return towupper(c);
#else
if (c >= 256) if (c >= 256)
return c; return c;
return toupper(c); return toupper(c);
#endif
} }

View File

@ -17,12 +17,21 @@
#include "support/types.h" #include "support/types.h"
#ifdef LIBC_WCTYPE_USES_UCS4
// We can use the libc ctype functions because we unset the LC_CTYPE
// category of the current locale in gettext.C
#include <wctype.h>
#else
// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
// The code that we currently use does not really work.
#endif
namespace lyx { namespace lyx {
/// return true if the char is a line separator /// return true if the char is a line separator
inline inline
bool isLineSeparatorChar(lyx::char_type c) bool isLineSeparatorChar(char_type c)
{ {
return c == ' '; return c == ' ';
} }
@ -30,34 +39,55 @@ bool isLineSeparatorChar(lyx::char_type c)
/// return true if a char is alphabetical (including accented chars) /// return true if a char is alphabetical (including accented chars)
inline inline
bool isLetterChar(lyx::char_type c) bool isLetterChar(char_type c)
{ {
#ifdef LIBC_WCTYPE_USES_UCS4
return iswalpha(c);
#else
// FIXME UNICODE This is wrong!
return (c >= 'A' && c <= 'Z') return (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z') || (c >= 'a' && c <= 'z')
|| (c >= 192 && c < 256); // in iso-8859-x these are accented chars || (c >= 192 && c < 256); // in iso-8859-x these are accented chars
#endif
} }
/// return true if the char is printable (masked to 7-bit ASCII) /// return true if the char is printable
inline inline
bool isPrintable(lyx::char_type c) bool isPrintable(char_type c)
{ {
#ifdef LIBC_WCTYPE_USES_UCS4
return iswprint(c);
#else
// FIXME UNICODE This is wrong!
return (c & 127) >= ' '; return (c & 127) >= ' ';
#endif
} }
/// return true if the char is printable and not a space (masked to 7-bit ASCII) /// return true if the char is printable and not a space
inline inline
bool isPrintableNonspace(lyx::char_type c) bool isPrintableNonspace(char_type c)
{ {
#ifdef LIBC_WCTYPE_USES_UCS4
return iswprint(c) && !iswspace(c);
#else
// FIXME UNICODE This is wrong!
return (c & 127) > ' '; return (c & 127) > ' ';
#endif
} }
/// return true if a unicode char is a digit. /// return true if a unicode char is a digit.
inline inline
bool isDigit(lyx::char_type ch) bool isDigit(char_type c)
{ {
return ch >= '0' && ch <= '9'; #ifdef LIBC_WCTYPE_USES_UCS4
return iswdigit(c);
#else
// FIXME UNICODE This is wrong!
return c >= '0' && c <= '9';
#endif
} }