diff --git a/lib/lyx2lyx/lyx_1_4.py b/lib/lyx2lyx/lyx_1_4.py index 039851b84f..5d35f1d023 100644 --- a/lib/lyx2lyx/lyx_1_4.py +++ b/lib/lyx2lyx/lyx_1_4.py @@ -202,7 +202,7 @@ def revert_space_names(document): def lyx_support_escape(lab): - " Equivalent to lyx::support::escape()" + " Equivalent to pre-unicode lyx::support::escape()" hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'] enc = "" diff --git a/src/support/lstrings.C b/src/support/lstrings.C index 939d48531b..9761da6d34 100644 --- a/src/support/lstrings.C +++ b/src/support/lstrings.C @@ -702,21 +702,27 @@ string const rsplit(string const & a, string & piece, char delim) } -// This function escapes 8-bit characters and other problematic -// characters that cause problems in latex labels. docstring const escape(docstring const & lab) { - lyx::char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; docstring enc; for (docstring::size_type i = 0; i < lab.length(); ++i) { - lyx::char_type c = lab[i]; - // FIXME We must change the following algorithm for UCS4 - // chars, but that will be a file format change. + char_type c = lab[i]; if (c >= 128 || c == '=' || c == '%') { + // Although char_type is a 32 bit type we know that + // UCS4 occupies only 21 bits, so we don't need to + // encode bigger values. Test for 2^24 because we + // can encode that with the 6 hex digits that are + // needed for 21 bits anyway. + BOOST_ASSERT(c < (1 << 24)); enc += '='; - enc += hexdigit[c>>4]; - enc += hexdigit[c & 15]; + enc += hexdigit[(c>>20) & 15]; + enc += hexdigit[(c>>16) & 15]; + enc += hexdigit[(c>>12) & 15]; + enc += hexdigit[(c>> 8) & 15]; + enc += hexdigit[(c>> 4) & 15]; + enc += hexdigit[ c & 15]; } else { enc += c; } diff --git a/src/support/lstrings.h b/src/support/lstrings.h index 16288e6f44..21d57eeb91 100644 --- a/src/support/lstrings.h +++ b/src/support/lstrings.h @@ -216,7 +216,8 @@ std::string const split(std::string const & a, char delim); /// Same as split but uses the last delim. std::string const rsplit(std::string const & a, std::string & piece, char delim); -/// Escapes non ASCII chars +/// Escapes non ASCII chars and other problematic characters that cause +/// problems in latex labels. docstring const escape(docstring const & lab); /// gives a vector of stringparts which have the delimiter delim