mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-25 05:55:34 +00:00
Fix output of labels and references that contain characters with an UCS4
codepoint > 255. I decided that this is no file format change: We already had the change to format 249 that allowed unicode in .lyx files. The output to .tex of non-ascii characters is now different than before (even for those that were supported previously, e.g. german umlauts in latin1), but this is only relevant if people referenced a label in ERT. Since we cannot detect this anyway we don't need a file format change. * src/support/lstrings.C (escape): Extend the escaping algorithm from 8 bit to 24 bit. * src/support/lstrings.h (escape): Update comment * lib/lyx2lyx/lyx_1_4.py (lyx_support_escape): Update comment git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15883 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
df2aea27c6
commit
29012f2596
@ -202,7 +202,7 @@ def revert_space_names(document):
|
||||
|
||||
|
||||
def lyx_support_escape(lab):
|
||||
" Equivalent to lyx::support::escape()"
|
||||
" Equivalent to pre-unicode lyx::support::escape()"
|
||||
hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
|
||||
enc = ""
|
||||
|
@ -702,21 +702,27 @@ string const rsplit(string const & a, string & piece, char delim)
|
||||
}
|
||||
|
||||
|
||||
// This function escapes 8-bit characters and other problematic
|
||||
// characters that cause problems in latex labels.
|
||||
docstring const escape(docstring const & lab)
|
||||
{
|
||||
lyx::char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
||||
char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
||||
docstring enc;
|
||||
for (docstring::size_type i = 0; i < lab.length(); ++i) {
|
||||
lyx::char_type c = lab[i];
|
||||
// FIXME We must change the following algorithm for UCS4
|
||||
// chars, but that will be a file format change.
|
||||
char_type c = lab[i];
|
||||
if (c >= 128 || c == '=' || c == '%') {
|
||||
// Although char_type is a 32 bit type we know that
|
||||
// UCS4 occupies only 21 bits, so we don't need to
|
||||
// encode bigger values. Test for 2^24 because we
|
||||
// can encode that with the 6 hex digits that are
|
||||
// needed for 21 bits anyway.
|
||||
BOOST_ASSERT(c < (1 << 24));
|
||||
enc += '=';
|
||||
enc += hexdigit[c>>4];
|
||||
enc += hexdigit[c & 15];
|
||||
enc += hexdigit[(c>>20) & 15];
|
||||
enc += hexdigit[(c>>16) & 15];
|
||||
enc += hexdigit[(c>>12) & 15];
|
||||
enc += hexdigit[(c>> 8) & 15];
|
||||
enc += hexdigit[(c>> 4) & 15];
|
||||
enc += hexdigit[ c & 15];
|
||||
} else {
|
||||
enc += c;
|
||||
}
|
||||
|
@ -216,7 +216,8 @@ std::string const split(std::string const & a, char delim);
|
||||
/// Same as split but uses the last delim.
|
||||
std::string const rsplit(std::string const & a, std::string & piece, char delim);
|
||||
|
||||
/// Escapes non ASCII chars
|
||||
/// Escapes non ASCII chars and other problematic characters that cause
|
||||
/// problems in latex labels.
|
||||
docstring const escape(docstring const & lab);
|
||||
|
||||
/// gives a vector of stringparts which have the delimiter delim
|
||||
|
Loading…
Reference in New Issue
Block a user