diff --git a/lib/encodings b/lib/encodings index 3472b74186..0b66abd979 100644 --- a/lib/encodings +++ b/lib/encodings @@ -1,401 +1,59 @@ -Encoding iso8859-2 latin2 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 0104 02d8 0141 00a4 013d 015a 00a7 - 00a8 0160 015e 0164 0179 00ad 017d 017b - 00b0 0105 02db 0142 00b4 013e 015b 02c7 - 00b8 0161 015f 0165 017a 02dd 017e 017c - 0154 00c1 00c2 0102 00c4 0139 0106 00c7 - 010c 00c9 0118 00cb 011a 00cd 00ce 010e - 0110 0143 0147 00d3 00d4 0150 00d6 00d7 - 0158 016e 00da 0170 00dc 00dd 0162 00df - 0155 00e1 00e2 0103 00e4 013a 0107 00e7 - 010d 00e9 0119 00eb 011b 00ed 00ee 010f - 0111 0144 0148 00f3 00f4 0151 00f6 00f7 - 0159 016f 00fa 0171 00fc 00fd 0163 02d9 +# FIXME: Have a look at the encodings known by the inputenc package and add +# missing ones. +# FIXME: Find out whether this file is used for more than LaTeX file. +# generation. If not it doews not make make sense to have encodings with +# "unknown" LaTeX name. + +# Order of names: LyX name LaTeX name iconv name + +# FIXME: Add this (file format change): +#Encoding utf8 utf8 UTF-8 +#End + +Encoding iso8859-1 latin1 ISO-8859-1 End - -Encoding iso8859-3 latin3 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 0126 02d8 00a3 00a4 ffff 0124 00a7 - 00a8 0130 015e 011e 0134 00ad ffff 017b - 00b0 0127 00b2 00b3 00b4 00b5 0125 00b7 - 00b8 0131 015f 011f 0135 00bd ffff 017c - 00c0 00c1 00c2 ffff 00c4 010a 0108 00c7 - 00c8 00c9 00ca 00cb 00cc 00cd 00ce 00cf - ffff 00d1 00d2 00d3 00d4 0120 00d6 00d7 - 011c 00d9 00da 00db 00dc 016c 015c 00df - 00e0 00e1 00e2 ffff 00e4 010b 0109 00e7 - 00e8 00e9 00ea 00eb 00ec 00ed 00ee 00ef - ffff 00f1 00f2 00f3 00f4 0121 00f6 00f7 - 011d 00f9 00fa 00fb 00fc 016d 015d 02d9 +Encoding iso8859-2 latin2 ISO-8859-2 End - -Encoding iso8859-4 latin4 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 0104 0138 0156 00a4 0128 013b 00a7 - 00a8 0160 0112 0122 0166 00ad 017d 00af - 00b0 0105 02db 0157 00b4 0129 013c 02c7 - 00b8 0161 0113 0123 0167 014a 017e 014b - 0100 00c1 00c2 00c3 00c4 00c5 00c6 012e - 010c 00c9 0118 00cb 0116 00cd 00ce 012a - 0110 0145 014c 0136 00d4 00d5 00d6 00d7 - 00d8 0172 00da 00db 00dc 0168 016a 00df - 0101 00e1 00e2 00e3 00e4 00e5 00e6 012f - 010d 00e9 0119 00eb 0117 00ed 00ee 012b - 0111 0146 014d 0137 00f4 00f5 00f6 00f7 - 00f8 0173 00fa 00fb 00fc 0169 016b 02d9 +Encoding iso8859-3 latin3 ISO-8859-3 End - -Encoding iso8859-5 iso88595 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 0401 0402 0403 0404 0405 0406 0407 - 0408 0409 040a 040b 040c 00ad 040e 040f - 0410 0411 0412 0413 0414 0415 0416 0417 - 0418 0419 041a 041b 041c 041d 041e 041f - 0420 0421 0422 0423 0424 0425 0426 0427 - 0428 0429 042a 042b 042c 042d 042e 042f - 0430 0431 0432 0433 0434 0435 0436 0437 - 0438 0439 043a 043b 043c 043d 043e 043f - 0440 0441 0442 0443 0444 0445 0446 0447 - 0448 0449 044a 044b 044c 044d 044e 044f - 2116 0451 0452 0453 0454 0455 0456 0457 - 0458 0459 045a 045b 045c 00a7 045e 045f +Encoding iso8859-4 latin4 ISO-8859-4 End - -Encoding iso8859-6 unknown - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 0660 0661 0662 0663 0664 0665 0666 0667 - 0668 0669 003a 003b 003c 003d 003e 061f - 0040 fe80 fe81 fe83 fe85 fe87 fe89 fe8d - fe8f fe93 fe95 fe99 fe9d fea1 fea5 fea9 - feab fead feaf feb1 feb5 feb9 febd fec1 - fec5 fec9 fecd 005d 005c 005b 005e 005f - 0640 fed1 fed5 fed9 fedd fee1 fee5 fee9 - feed feef fef1 065b 065c 064d 064e 064f - 0650 0651 0652 0073 0074 0075 fef5 fef7 - fef9 fefb 007a 007b 007c 007d 007e 007f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - fe71 0091 0092 fe77 fe79 fe7b fe7f fe7d - 0098 0099 009a 009b 009c 009d 009e 009f - fe8c fe82 fe84 fe86 fe88 fe8e feaa feac - feae feb0 feee fef0 fe91 fe92 fe90 fe97 - fe98 fe94 fe97 fe98 fe96 fe9b fe9c fe9a - fe9f fea0 fe9e fea3 fea4 fea2 fea7 fea8 - fea6 feb3 feb4 feb2 feb7 feb8 feb6 febb - febc feba febf fec0 febe fec3 fec4 fec2 - fec7 fec8 fec6 fecb fecc feca fecf fed0 - fece fed3 fed4 fed2 fed7 fed8 fed6 fedb - fedc feda fedf fee0 fede fee3 fee4 fee2 - fee7 fee8 fee6 feeb feec feea fef3 fef4 - fef2 00f1 00f2 00f3 00f4 00f5 00f6 00f7 - fe8b fe8a fef6 fefa fef8 fefc 00fe 00ff +Encoding iso8859-5 iso88595 ISO-8859-5 End - -Encoding iso8859-7 iso-8859-7 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 0371 0372 00a3 ffff ffff 00a6 00a7 - 00a8 00a9 ffff 00ab 00ac 00ad ffff 2015 - 00b0 00b1 00b2 00b3 03f3 03f4 0386 00b7 - 0388 0389 038a 00bb 038c 00bd 038e 038f - 0390 0391 0392 0393 0394 0395 0396 0397 - 0398 0399 039a 039b 039c 039d 039e 039f - 03a0 03a1 ffff 03a3 03a4 03a5 03a6 03a7 - 03a8 03a9 03aa 03ab 03ac 03ad 03ae 03af - 03b0 03b1 03b2 03b3 03b4 03b5 03b6 03b7 - 03b8 03b9 03ba 03bb 03bc 03bd 03be 03bf - 03c0 03c1 03c2 03c3 03c4 03c5 03c6 03c7 - 03c8 03c9 03ca 03cb 03cc 03cd 03ce ffff +Encoding iso8859-6 unknown ISO-8859-6 End - -Encoding iso8859-9 latin5 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 00a1 00a2 00a3 00a4 00a5 00a6 00a7 - 00a8 00a9 00aa 00ab 00ac 00ad 00ae 00af - 00b0 00b1 00b2 00b3 00b4 00b5 00b6 00b7 - 00b8 00b9 00ba 00bb 00bc 00bd 00be 00bf - 00c0 00c1 00c2 00c3 00c4 00c5 00c6 00c7 - 00c8 00c9 00ca 00cb 00cc 00cd 00ce 00cf - 011e 00d1 00d2 00d3 00d4 00d5 00d6 00d7 - 00d8 00d9 00da 00db 00dc 0130 015e 00df - 00e0 00e1 00e2 00e3 00e4 00e5 00e6 00e7 - 00e8 00e9 00ea 00eb 00ec 00ed 00ee 00ef - 011f 00f1 00f2 00f3 00f4 00f5 00f6 00f7 - 00f8 00f9 00fa 00fb 00fc 0131 015f 00ff +Encoding iso8859-7 iso-8859-7 ISO-8859-7 End - -Encoding iso8859-13 l7xenc - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 201d 00a2 00a3 00a4 201e 00a6 00a7 - 00d8 00a9 0156 00ab 00ac 00ad 00ae 00c6 - 00b0 00b1 00b2 00b3 201c 00b5 00b6 00b7 - 00f8 00b9 0157 00bb 00bc 00bd 00be 00e6 - 0104 012e 0100 0106 00c4 00c5 0118 0112 - 010c 00c9 0179 0116 0122 0136 012a 013b - 0160 0143 0145 00d3 014c 00d5 00d6 00d7 - 0172 0141 015a 016a 00dc 017b 017d 00df - 0105 012f 0101 0107 00e4 00e5 0119 0113 - 010d 00e9 017a 0117 0123 0127 012b 013c - 0161 0144 0146 00f3 014d 00f5 00f6 00f7 - 0173 0142 015b 016b 00fc 017c 017e 2019 +Encoding iso8859-9 latin5 ISO-8859-9 End - -Encoding iso8859-15 latin9 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 00a1 00a2 00a3 20ac 00a5 0160 00a7 - 0161 00a9 00aa 00ab 00ac 00ad 00ae 00af - 00b0 00b1 00b2 00b3 017d 00b5 00b6 00b7 - 017e 00b9 00ba 00bb 0152 0153 0178 00bf - 00c0 00c1 00c2 00c3 00c4 00c5 00c6 00c7 - 00c8 00c9 00ca 00cb 00cc 00cd 00ce 00cf - 00d0 00d1 00d2 00d3 00d4 00d5 00d6 00d7 - 00d8 00d9 00da 00db 00dc 00dd 00de 00df - 00e0 00e1 00e2 00e3 00e4 00e5 00e6 00e7 - 00e8 00e9 00ea 00eb 00ec 00ed 00ee 00ef - 00f0 00f1 00f2 00f3 00f4 00f5 00f6 00f7 - 00f8 00f9 00fa 00fb 00fc 00fd 00fe 00ff +Encoding iso8859-13 l7xenc ISO-8859-13 End - -Encoding cp1255 cp1255 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a0 ffff 00a2 00a3 00a4 00a5 00a6 00a7 - 00a8 00a9 00d7 00ab 00ac 00ad 00ae 203e - 00b0 00b1 00b2 00b3 00b4 00b5 00b6 00b7 - 00b8 00b9 00f7 00bb 00bc 00bd 00be 00bf - 05b0 05b1 05b2 05b3 05b4 05b5 05b6 05b7 - 05b8 05b9 ffff 05bb 05bc 05bd 05be 05bf - 05c0 05c1 05c2 05c3 05f0 05f1 05f2 05f3 - 05f4 ffff ffff ffff ffff ffff ffff ffff - 05d0 05d1 05d2 05d3 05d4 05d5 05d6 05d7 - 05d8 05d9 05da 05db 05dc 05dd 05de 05df - 05e0 05e1 05e2 05e3 05e4 05e5 05e6 05e7 - 05e8 05e9 05ea ffff ffff ffff ffff ffff +Encoding iso8859-15 latin9 ISO-8859-15 End - -Encoding cp1251 cp1251 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 0402 0403 201a 0453 201e 2026 2020 2021 - 20ac 2030 0409 2039 040a 040c 040b 040f - 0452 2018 2019 201c 201d 2022 2013 2014 - 0098 2122 0459 203a 045a 045c 045b 045f - 00a0 040e 045e 0408 00a4 0490 00a6 00a7 - 0401 00a9 0404 00ab 00ac 00ad 00ae 0407 - 00b0 00b1 0406 0456 0491 00b5 00b6 00b7 - 0451 2116 0454 00bb 0458 0405 0455 0457 - 0410 0411 0412 0413 0414 0415 0416 0417 - 0418 0419 041a 041b 041c 041d 041e 041f - 0420 0421 0422 0423 0424 0425 0426 0427 - 0428 0429 042a 042b 042c 042d 042e 042f - 0430 0431 0432 0433 0434 0435 0436 0437 - 0438 0439 043a 043b 043c 043d 043e 043f - 0440 0441 0442 0443 0444 0445 0446 0447 - 0448 0449 044a 044b 044c 044d 044e 044f +Encoding cp1255 cp1255 CP1255 End - -Encoding koi8 koi8-r - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 2500 2502 250c 2510 2514 2518 251c 2524 - 252c 2534 253c 2580 2584 2588 258c 2590 - 2591 2592 2593 2320 25a0 2219 221a 2248 - 2264 2265 00a0 2321 00b0 00b2 00b7 00f7 - 2550 2551 2552 0451 2553 2554 2555 2556 - 2557 2558 2559 255a 255b 255c 255d 255e - 255f 2560 2561 0401 2562 2563 2564 2565 - 2566 2567 2568 2569 256a 256b 256c 00a9 - 044e 0430 0431 0446 0434 0435 0444 0433 - 0445 0438 0439 043a 043b 043c 043d 043e - 043f 044f 0440 0441 0442 0443 0436 0432 - 044c 044b 0437 0448 044d 0449 0447 044a - 042e 0410 0411 0426 0414 0415 0424 0413 - 0425 0418 0419 041a 041b 041c 041d 041e - 041f 042f 0420 0421 0422 0423 0416 0412 - 042c 042b 0417 0428 042d 0429 0427 042a +Encoding cp1251 cp1251 CP1251 End - -Encoding koi8-u koi8-u - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 2500 2502 250c 2510 2514 2518 251c 2524 - 252c 2534 253c 2580 2584 2588 258c 2590 - 2591 2592 2593 2320 25a0 2219 221a 2248 - 2264 2265 00a0 2321 00b0 00b2 00b7 00f7 - 2550 2551 2552 0451 0454 2554 0456 0457 - 2557 2558 2559 255a 255b 0491 255d 255e - 255f 2560 2561 0401 0404 2563 0406 0407 - 2566 2567 2568 2569 256a 0490 256c 00a9 - 044e 0430 0431 0446 0434 0435 0444 0433 - 0445 0438 0439 043a 043b 043c 043d 043e - 043f 044f 0440 0441 0442 0443 0436 0432 - 044c 044b 0437 0448 044d 0449 0447 044a - 042e 0410 0411 0426 0414 0415 0424 0413 - 0425 0418 0419 041a 041b 041c 041d 041e - 041f 042f 0420 0421 0422 0423 0416 0412 - 042c 042b 0417 0428 042d 0429 0427 042a +Encoding koi8 koi8-r KOI8-R End - -Encoding tis620-0 unknown - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f - 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f - 00a1 0e01 0e02 0e03 0e04 0e05 0e06 0e07 - 0e08 0e09 0e0a 0e0b 0e0c 0e0d 0e0e 0e0f - 0e10 0e11 0e12 0e13 0e14 0e15 0e16 0e17 - 0e18 0e19 0e1a 0e1b 0e1c 0e1d 0e1e 0e1f - 0e20 0e21 0e22 0e23 0e24 0e25 0e26 0e27 - 0e28 0e29 0e2a 0e2b 0e2c 0e2d 0e2e 0e2f - 0e30 0e31 0e32 0e33 0e34 0e35 0e36 0e37 - 0e38 0e39 0e3a 00db 00dc 00de 00de 0e3f - 0e40 0e41 0e42 0e43 0e44 0e45 0e46 0e47 - 0e48 0e49 0e4a 0e4b 0e4c 0e4d 0e4e 0e4f - 0e50 0e51 0e52 0e53 0e54 0e55 0e56 0e57 - 0e58 0e59 0e5a 0e5b 00fc 00fd 00fe 00ff +Encoding koi8-u koi8-u KOI8-U End -Encoding pt154 pt154 - 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f - 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f - 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f - 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f - 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f - 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f - 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f - 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f - 0496 0492 201a 0493 201e 2026 2020 04ae - 04b2 04af 04a0 2039 04a2 049a 04ba 04b8 - 0497 2018 2019 201c 201d 2022 2013 2014 - 04b3 2122 04a1 203a 04a3 049b 04bb 04b9 - 00a0 040e 045e 0408 04e8 0498 04b0 00a7 - 0401 00a9 04d8 00ab 00ac 00ad 00ae 049c - 00b0 04b1 0406 0456 0499 04e9 00b6 00b7 - 0451 2116 04d9 00bb 0458 04aa 04ab 049d - 0410 0411 0412 0413 0414 0415 0416 0417 - 0418 0419 041a 041b 041c 041d 041e 041f - 0420 0421 0422 0423 0424 0425 0426 0427 - 0428 0429 042a 042b 042c 042d 042e 042f - 0430 0431 0432 0433 0434 0435 0436 0437 - 0438 0439 043a 043b 043c 043d 043e 043f - 0440 0441 0442 0443 0444 0445 0446 0447 - 0448 0449 044a 044b 044c 044d 044e 044f +Encoding tis620-0 unknown TIS-620-0 +End + +Encoding pt154 pt154 PT154 End diff --git a/src/buffer.C b/src/buffer.C index a8e90a4b28..41185b825d 100644 --- a/src/buffer.C +++ b/src/buffer.C @@ -812,26 +812,44 @@ bool Buffer::do_writeFile(ostream & ofs) const } -void Buffer::makeLaTeXFile(string const & fname, +bool Buffer::makeLaTeXFile(string const & fname, string const & original_path, OutputParams const & runparams, bool output_preamble, bool output_body) { - lyxerr[Debug::LATEX] << "makeLaTeXFile..." << endl; + string const encoding = (params().inputenc == "auto") ? + params().language->encoding()->iconvName() : + encodings.getEncoding(params().inputenc)->iconvName(); + lyxerr[Debug::LATEX] << "makeLaTeXFile encoding: " + << encoding << "..." << endl; - // FIXME UNICODE - // This creates an utf8 encoded file, but the inputenc commands - // specify other encodings - odocfstream ofs; + odocfstream ofs(encoding); if (!openFileWrite(ofs, fname)) - return; + return false; - writeLaTeXSource(ofs, original_path, + try { + writeLaTeXSource(ofs, original_path, runparams, output_preamble, output_body); + } + catch (iconv_codecvt_facet_exception &) { + Alert::error(_("Encoding error"), + _("Some characters of your document are not " + "representable in the chosen encoding.\n" + "Changing the document encoding to utf8 could help.")); + return false; + } ofs.close(); - if (ofs.fail()) + if (ofs.fail()) { lyxerr << "File '" << fname << "' was not closed properly." << endl; + Alert::error(_("Error closing file"), + _("The output file could not be closed properly.\n" + " Probably some characters of your document are not " + "representable in the chosen encoding.\n" + "Changing the document encoding to utf8 could help.")); + return false; + } + return true; } diff --git a/src/buffer.h b/src/buffer.h index a58eb3f79e..e892192588 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -146,7 +146,7 @@ public: bool writeFile(std::string const &) const; /// Just a wrapper for the method below, first creating the ofstream. - void makeLaTeXFile(std::string const & filename, + bool makeLaTeXFile(std::string const & filename, std::string const & original_path, OutputParams const &, bool output_preamble = true, diff --git a/src/bufferparams.C b/src/bufferparams.C index 420df763da..1af925829e 100644 --- a/src/bufferparams.C +++ b/src/bufferparams.C @@ -835,32 +835,26 @@ bool BufferParams::writeLaTeX(odocstream & os, LaTeXFeatures & features, texrow.newline(); } - // TODO: Some people want to support more encodings than UTF-8. They can have a field day around here - if (true) { - os << "\\usepackage[utf8]{inputenc}\n"; + if (inputenc == "auto") { + string const doc_encoding = + language->encoding()->latexName(); + + // Create a list with all the input encodings used + // in the document + std::set encodings = + features.getEncodingSet(doc_encoding); + + os << "\\usepackage["; + std::set::const_iterator it = encodings.begin(); + std::set::const_iterator const end = encodings.end(); + for (; it != end; ++it) + os << from_ascii(*it) << ','; + os << from_ascii(doc_encoding) << "]{inputenc}\n"; + texrow.newline(); + } else if (inputenc != "default") { + os << "\\usepackage[" << from_ascii(inputenc) + << "]{inputenc}\n"; texrow.newline(); - } else { - if (inputenc == "auto") { - string const doc_encoding = - language->encoding()->latexName(); - - // Create a list with all the input encodings used - // in the document - std::set encodings = - features.getEncodingSet(doc_encoding); - - os << "\\usepackage["; - std::set::const_iterator it = encodings.begin(); - std::set::const_iterator const end = encodings.end(); - for (; it != end; ++it) - os << from_ascii(*it) << ','; - os << from_ascii(doc_encoding) << "]{inputenc}\n"; - texrow.newline(); - } else if (inputenc != "default") { - os << "\\usepackage[" << from_ascii(inputenc) - << "]{inputenc}\n"; - texrow.newline(); - } } if (use_geometry || nonstandard_papersize) { diff --git a/src/encoding.C b/src/encoding.C index 9c8f15c67d..a57fed5cc6 100644 --- a/src/encoding.C +++ b/src/encoding.C @@ -33,79 +33,6 @@ Encodings encodings; namespace { -char_type tab_iso8859_1[256] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff -}; - -#ifdef USE_UNICODE_FOR_SYMBOLS -char_type tab_symbol[256] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - - 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220b, - 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - -// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, -// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, - -// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, -// 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff, - 0x2245, 0x0041, 0x0042, 0x0058, 0x0394, 0x0045, 0x03c2, 0x03a9, - 0x0048, 0x0049, 0x03d1, 0x004b, 0x039b, 0x004d, 0x004e, 0x004f, - - 0x03a0, 0x0398, 0x0050, 0x03a3, 0x0054, 0x0059, 0x03c2, 0x03a9, - 0x039e, 0x03a8, 0x005a, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff, - - 0xffff, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03d5, 0x03b3, - 0x03b7, 0x03b9, 0x03c6, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, - - 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, - 0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x007e, 0xffff, - - 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - - 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - - 0xffff, 0x03d2, 0x2032, 0x2264, 0x2215, 0x221e, 0xffff, 0x2663, - 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, - - 0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022, - 0x00f7, 0x2260, 0x2261, 0x2248, 0x22ef, 0xffff, 0xffff, 0x21b5, - - 0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, - 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, - - 0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5, - 0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3, - - 0x2662, 0x2329, 0x00ae, 0x00a9, 0x2122, 0x2211, 0xffff, 0xffff, - 0xffff, 0x2308, 0xffff, 0x230a, 0xffff, 0xffff, 0xffff, 0xffff, - - 0xffff, 0x232a, 0x222b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - 0xffff, 0x2309, 0xffff, 0x230b, 0xffff, 0xffff, 0xffff, 0xffff -}; -#endif - - char_type arabic_table2[63][4] = { {0x41, 0x41, 0x41, 0x41}, // 0xc1 = hamza {0x42, 0xa1, 0x42, 0xa1}, // 0xc2 = ligature madda on alef @@ -252,13 +179,6 @@ char_type const arabic_start = 0xc1; -char_type Encoding::ucs(char_type c) const -{ - BOOST_ASSERT(c < 256); - return encoding_table[c]; -} - - bool Encodings::isComposeChar_hebrew(char_type c) { return c <= 0xd2 && c >= 0xc0 && @@ -315,13 +235,7 @@ Encoding const * Encodings::getEncoding(string const & encoding) const Encodings::Encodings() { - encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", tab_iso8859_1); - symbol_encoding_ = -#ifdef USE_UNICODE_FOR_SYMBOLS - Encoding("symbol", "", tab_symbol); -#else - Encoding("symbol", "", tab_iso8859_1); -#endif + symbol_encoding_ = Encoding("symbol", "", ""); } void Encodings::read(string const & filename) @@ -347,14 +261,10 @@ void Encodings::read(string const & filename) string const name = lex.getString(); lex.next(); string const latexname = lex.getString(); + lex.next(); + string const iconvname = lex.getString(); lyxerr[Debug::INFO] << "Reading encoding " << name << endl; - char_type table[256]; - for (unsigned int i = 0; i < 256; ++i) { - lex.next(); - string const tmp = lex.getString(); - table[i] = ::strtol(tmp.c_str(), 0 , 16); - } - encodinglist[name] = Encoding(name, latexname, table); + encodinglist[name] = Encoding(name, latexname, iconvname); if (lex.lex() != et_end) lex.printError("Encodings::read: " "missing end"); diff --git a/src/encoding.h b/src/encoding.h index 1463126c03..6821f93dba 100644 --- a/src/encoding.h +++ b/src/encoding.h @@ -26,25 +26,24 @@ public: /// Encoding() {} /// - Encoding(std::string const & n, std::string const & l, char_type const * e) - : Name_(n), LatexName_(l) + Encoding(std::string const & n, std::string const & l, + std::string const & i) + : Name_(n), LatexName_(l), iconvName_(i) { - for (int i = 0; i < 256; ++i) - encoding_table[i] = e[i]; } /// std::string const & name() const { return Name_; } /// std::string const & latexName() const { return LatexName_; } /// - char_type ucs(char_type c) const; + std::string const & iconvName() const { return iconvName_; } private: /// std::string Name_; /// std::string LatexName_; /// - char_type encoding_table[256]; + std::string iconvName_; }; extern Encoding symbol_encoding; diff --git a/src/exporter.C b/src/exporter.C index 762428fa5d..fa32fc1e5b 100644 --- a/src/exporter.C +++ b/src/exporter.C @@ -203,7 +203,8 @@ bool Exporter::Export(Buffer * buffer, string const & format, // LaTeX backend else if (backend_format == format) { runparams.nice = true; - buffer->makeLaTeXFile(filename, string(), runparams); + if (!buffer->makeLaTeXFile(filename, string(), runparams)) + return false; } else if (!lyxrc.tex_allows_spaces && contains(buffer->filePath(), ' ')) { Alert::error(_("File name error"), @@ -211,7 +212,8 @@ bool Exporter::Export(Buffer * buffer, string const & format, return false; } else { runparams.nice = false; - buffer->makeLaTeXFile(filename, buffer->filePath(), runparams); + if (!buffer->makeLaTeXFile(filename, buffer->filePath(), runparams)) + return false; } string const error_type = (format == "program")? "Build" : bufferFormat(*buffer); diff --git a/src/frontends/qt4/QDocumentDialog.C b/src/frontends/qt4/QDocumentDialog.C index 5bd74a7af0..e4cd7bb417 100644 --- a/src/frontends/qt4/QDocumentDialog.C +++ b/src/frontends/qt4/QDocumentDialog.C @@ -58,6 +58,10 @@ namespace frontend { namespace { +// FIXME: This list is incomplete. It should not be hardcoded but come from +// the available encodings in src/encodings.C +// FIXME: "default" is no valid encoding anymore. Nevertheless it occurs also +// in other source files. char const * encodings[] = { "LaTeX default", "latin1", "latin2", "latin3", "latin4", "latin5", "latin9", "koi8-r", "koi8-u", "cp866", "cp1251", @@ -938,15 +942,20 @@ void QDocumentDialog::update(BufferParams const & params) if (params.inputenc != "auto") { langModule->defaultencodingCB->setChecked(false); + // FIXME: "default" is no valid encoding anymore if (params.inputenc == "default") { langModule->encodingCO->setCurrentIndex(0); } else { int i = 0; while (encodings[i]) { - if (encodings[i] == params.inputenc) + if (encodings[i] == params.inputenc) { langModule->encodingCO->setCurrentIndex(i); + break; + } ++i; } + // FIXME: possible data loss because of encodings is + // incomplete } } diff --git a/src/output_latex.C b/src/output_latex.C index c76bd8c187..6ef28173db 100644 --- a/src/output_latex.C +++ b/src/output_latex.C @@ -29,6 +29,7 @@ #include "insets/insetoptarg.h" #include "support/lstrings.h" +#include "support/unicode.h" namespace lyx { @@ -236,7 +237,7 @@ ParagraphList::const_iterator TeXOnePar(Buffer const & buf, ParagraphList const & paragraphs, ParagraphList::const_iterator pit, - odocstream & os, TexRow & texrow, + odocstream & ucs4, TexRow & texrow, OutputParams const & runparams_in, string const & everypar) { @@ -274,34 +275,40 @@ TeXOnePar(Buffer const & buf, if (!lyxrc.language_command_end.empty() && previous_language->babel() != doc_language->babel()) { - os << from_ascii(subst(lyxrc.language_command_end, + ucs4 << from_ascii(subst(lyxrc.language_command_end, "$$lang", previous_language->babel())) - << endl; + << endl; texrow.newline(); } if (lyxrc.language_command_end.empty() || language->babel() != doc_language->babel()) { - os << from_ascii(subst( + ucs4 << from_ascii(subst( lyxrc.language_command_begin, "$$lang", language->babel())) - << endl; + << endl; texrow.newline(); } } - if (false) { - if (bparams.inputenc == "auto" && - language->encoding() != previous_language->encoding()) { - os << "\\inputencoding{" - << from_ascii(language->encoding()->latexName()) - << "}\n"; - texrow.newline(); - } + if (bparams.inputenc == "auto" && + language->encoding() != previous_language->encoding()) { + ucs4 << "\\inputencoding{" + << from_ascii(language->encoding()->latexName()) + << "}\n"; + texrow.newline(); } + // We need to output the paragraph to a temporary stream if we + // need to change the encoding. Don't do this if the result does + // not go to a file but to the builtin source viewer. + odocstringstream par_stream; + bool const change_encoding = !runparams_in.dryrun && + bparams.inputenc == "auto" && + language->encoding() != doc_language->encoding(); + odocstream & os(change_encoding ? par_stream : ucs4); // In an an inset with unlimited length (all in one row), // don't allow any special options in the paragraph @@ -474,6 +481,33 @@ TeXOnePar(Buffer const & buf, if (boost::next(pit) != paragraphs.end() && lyxerr.debugging(Debug::LATEX)) lyxerr << "TeXOnePar...done " << &*boost::next(pit) << endl; + + if (change_encoding) { + lyxerr[Debug::LATEX] << "Converting paragraph to encoding " + << language->encoding()->iconvName() << endl; + docstring const par = par_stream.str(); + // Convert the paragraph to the 8bit encoding that we need to + // output. + std::vector const encoded = lyx::ucs4_to_eightbit(par.c_str(), + par.size(), language->encoding()->iconvName()); + // Interpret this as if it was in the 8 bit encoding of the + // document language and convert it back to UCS4. That means + // that faked does not contain pure UCS4 anymore, but what + // will be written to the output file will be correct, because + // the real output stream will do a UCS4 -> document language + // encoding conversion. + // This is of course a hack, but not a bigger one than mixing + // two encodings in one file. + // FIXME: Catch iconv conversion errors and display an error + // dialog. + std::vector const faked = lyx::eightbit_to_ucs4(encoded.data(), + encoded.size(), doc_language->encoding()->iconvName()); + std::vector::const_iterator const end = faked.end(); + std::vector::const_iterator it = faked.begin(); + for (; it != end; ++it) + ucs4.put(*it); + } + return ++pit; } diff --git a/src/support/docstream.C b/src/support/docstream.C index a2bb3c3172..03ee204633 100644 --- a/src/support/docstream.C +++ b/src/support/docstream.C @@ -22,6 +22,8 @@ using lyx::ucs4_codeset; using lyx::ucs2_codeset; +using std::string; + namespace { @@ -31,51 +33,42 @@ char const * utf8_codeset = "UTF-8"; // lyxerr in the future. -class utf8_codecvt_facet_exception : public std::exception { -public: - virtual ~utf8_codecvt_facet_exception() throw() {} - virtual const char* what() const throw() - { - return "iconv problem in utf8_codecvt_facet initialization"; - } -}; - - /// codecvt facet for conversion of UCS4 (internal representation) to UTF8 /// (external representation) or vice versa -class utf8_codecvt_facet : public std::codecvt +class iconv_codecvt_facet : public std::codecvt { typedef std::codecvt base; public: /// Constructor. You have to specify with \p inout whether you want /// to use this facet only for input, only for output or for both. - explicit utf8_codecvt_facet(std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out, + explicit iconv_codecvt_facet(string const & encoding = "UTF-8", + std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out, size_t refs = 0) - : base(refs) + : base(refs), utf8_(encoding == "UTF-8") { if (inout & std::ios_base::in) { - in_cd_ = iconv_open(ucs4_codeset, utf8_codeset); + in_cd_ = iconv_open(ucs4_codeset, encoding.c_str()); if (in_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n", errno, strerror(errno)); fflush(stderr); - throw utf8_codecvt_facet_exception(); + throw lyx::iconv_codecvt_facet_exception(); } } else in_cd_ = (iconv_t)(-1); if (inout & std::ios_base::out) { - out_cd_ = iconv_open(utf8_codeset, ucs4_codeset); + out_cd_ = iconv_open(encoding.c_str(), ucs4_codeset); if (out_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n", errno, strerror(errno)); fflush(stderr); - throw utf8_codecvt_facet_exception(); + throw lyx::iconv_codecvt_facet_exception(); } } else out_cd_ = (iconv_t)(-1); } protected: - virtual ~utf8_codecvt_facet() + virtual ~iconv_codecvt_facet() { if (in_cd_ != (iconv_t)(-1)) if (iconv_close(in_cd_) == -1) { @@ -155,8 +148,10 @@ protected: } virtual int do_max_length() const throw() { - // UTF8 uses at most 6 bytes to represent one code point - return 6; + // UTF8 uses at most 6 bytes to represent one UCS4 code point. + // All other encodings encode one UCS4 code point in one byte + // (and can therefore only encode a subset of UCS4) + return utf8_ ? 6 : 1; } private: /// Do the actual conversion. The interface is equivalent to that of @@ -186,6 +181,8 @@ private: } iconv_t in_cd_; iconv_t out_cd_; + /// Is the narrow encoding UTF8? + bool utf8_; }; } // namespace anon @@ -194,10 +191,16 @@ private: namespace lyx { +const char * iconv_codecvt_facet_exception::what() const throw() +{ + return "iconv problem in iconv_codecvt_facet initialization"; +} + + idocfstream::idocfstream() : base() { std::locale global; - std::locale locale(global, new utf8_codecvt_facet(in)); + std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in)); imbue(locale); } @@ -207,26 +210,27 @@ idocfstream::idocfstream(const char* s, std::ios_base::openmode mode) { // We must imbue the stream before openening the file std::locale global; - std::locale locale(global, new utf8_codecvt_facet(in)); + std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in)); imbue(locale); open(s, mode); } -odocfstream::odocfstream() : base() +odocfstream::odocfstream(string const & encoding) : base() { std::locale global; - std::locale locale(global, new utf8_codecvt_facet(out)); + std::locale locale(global, new iconv_codecvt_facet(encoding, out)); imbue(locale); } - -odocfstream::odocfstream(const char* s, std::ios_base::openmode mode) + +odocfstream::odocfstream(const char* s, std::ios_base::openmode mode, + string const & encoding) : base() { // We must imbue the stream before openening the file std::locale global; - std::locale locale(global, new utf8_codecvt_facet(out)); + std::locale locale(global, new iconv_codecvt_facet(encoding, out)); imbue(locale); open(s, mode); } @@ -236,7 +240,7 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode) #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__) // We get undefined references to these virtual methods. This looks like // a bug in gcc. The implementation here does not do anything useful, since -// it is overriden in utf8_codecvt_facet. +// it is overriden in iconv_codecvt_facet. namespace std { template<> codecvt::result codecvt::do_out(mbstate_t &, const lyx::char_type *, const lyx::char_type *, const lyx::char_type *&, diff --git a/src/support/docstream.h b/src/support/docstream.h index c1f4f1ad8b..f5245ec521 100644 --- a/src/support/docstream.h +++ b/src/support/docstream.h @@ -19,6 +19,12 @@ namespace lyx { +class iconv_codecvt_facet_exception : public std::exception { +public: + virtual ~iconv_codecvt_facet_exception() throw() {} + virtual const char * what() const throw(); +}; + /// Base class for UCS4 input streams typedef std::basic_istream idocstream; @@ -45,14 +51,15 @@ public: ~idocfstream() {} }; -/// File stream for writing UTF8-encoded files with automatic conversion from -/// UCS4. +/// File stream for writing files in 8bit encoding \p encoding with automatic +/// conversion from UCS4. class odocfstream : public std::basic_ofstream { typedef std::basic_ofstream base; public: - odocfstream(); + odocfstream(std::string const & encoding = "UTF-8"); explicit odocfstream(const char* s, - std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc); + std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc, + std::string const & encoding = "UTF-8"); ~odocfstream() {} }; diff --git a/src/support/unicode.C b/src/support/unicode.C index aea453ef00..c9b9210a4e 100644 --- a/src/support/unicode.C +++ b/src/support/unicode.C @@ -20,6 +20,7 @@ #include #include +#include namespace lyx { @@ -226,4 +227,25 @@ ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls) } +std::vector +eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding) +{ + static std::map cd; + if (cd.find(encoding) == cd.end()) + cd[encoding] = (iconv_t)(-1); + return iconv_convert(&cd[encoding], ucs4_codeset, + encoding.c_str(), s, ls); +} + + +std::vector +ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding) +{ + static std::map cd; + if (cd.find(encoding) == cd.end()) + cd[encoding] = (iconv_t)(-1); + return iconv_convert(&cd[encoding], encoding.c_str(), + ucs4_codeset, ucs4str, ls); +} + } // namespace lyx diff --git a/src/support/unicode.h b/src/support/unicode.h index f1a75da189..7f99f52832 100644 --- a/src/support/unicode.h +++ b/src/support/unicode.h @@ -56,6 +56,16 @@ std::vector ucs4_to_utf8(std::vector const & ucs4str); std::vector ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls); +/// convert \p s from encoding \p encoding to ucs4. +/// \p encoding must be a valid iconv 8bit encoding +std::vector +eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding); + +/// convert \p s from ucs4 to encoding \p encoding. +/// \p encoding must be a valid iconv 8bit encoding +std::vector +ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding); + extern char const * ucs4_codeset; extern char const * ucs2_codeset;