Restore inputenc support

* src/encoding.h
	(encoding_table): remove, this is no longer needed with unicode
	(iconvName_): new member: name of the encoding in iconv syntax

	* src/exporter.C
	(Exporter::Export): Use return value of Buffer::makeLaTeXFile

	* src/buffer.[Ch]
	(Buffer::makeLaTeXFile): return whether the file has been created
	successfully

	* src/buffer.C
	(Buffer::makeLaTeXFile): Use a docstream that converts to the correct
	encoding
	Display an error dialog is something went wrong

	* src/bufferparams.C
	(BufferParams::writeLaTeX): Undo the utf8 inputenc hack

	* src/frontends/qt4/QDocumentDialog.C: Add some FIXMEs

	* src/support/unicode.[Ch]
	(eightbit_to_ucs4): New conversion function
	(ucs4_to_eightbit): New conversion function

	* src/support/docstream.[Ch]
	(utf8_codecvt_facet_exception): Move to header and rename to
	iconv_codecvt_facet_exception
	(odocfstream): Take the encoding as argument

	* src/support/docstream.C
	(utf8_codecvt_facet): Generalize to other encodings than utf8 and
	rename to iconv_codecvt_facet

	* src/encoding.C: Remove obsolete tables

	* src/output_latex.C
	(TeXOnePar): Convert the paragraph to different encoding if needed

	* lib/encodings: Remove obsolete tables


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15564 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2006-10-26 15:01:45 +00:00
parent 35847cdad8
commit fa1856440c
13 changed files with 222 additions and 555 deletions

View File

@ -1,401 +1,59 @@
Encoding iso8859-2 latin2
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 0104 02d8 0141 00a4 013d 015a 00a7
00a8 0160 015e 0164 0179 00ad 017d 017b
00b0 0105 02db 0142 00b4 013e 015b 02c7
00b8 0161 015f 0165 017a 02dd 017e 017c
0154 00c1 00c2 0102 00c4 0139 0106 00c7
010c 00c9 0118 00cb 011a 00cd 00ce 010e
0110 0143 0147 00d3 00d4 0150 00d6 00d7
0158 016e 00da 0170 00dc 00dd 0162 00df
0155 00e1 00e2 0103 00e4 013a 0107 00e7
010d 00e9 0119 00eb 011b 00ed 00ee 010f
0111 0144 0148 00f3 00f4 0151 00f6 00f7
0159 016f 00fa 0171 00fc 00fd 0163 02d9
# FIXME: Have a look at the encodings known by the inputenc package and add
# missing ones.
# FIXME: Find out whether this file is used for more than LaTeX file.
# generation. If not it doews not make make sense to have encodings with
# "unknown" LaTeX name.
# Order of names: LyX name LaTeX name iconv name
# FIXME: Add this (file format change):
#Encoding utf8 utf8 UTF-8
#End
Encoding iso8859-1 latin1 ISO-8859-1
End
Encoding iso8859-3 latin3
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 0126 02d8 00a3 00a4 ffff 0124 00a7
00a8 0130 015e 011e 0134 00ad ffff 017b
00b0 0127 00b2 00b3 00b4 00b5 0125 00b7
00b8 0131 015f 011f 0135 00bd ffff 017c
00c0 00c1 00c2 ffff 00c4 010a 0108 00c7
00c8 00c9 00ca 00cb 00cc 00cd 00ce 00cf
ffff 00d1 00d2 00d3 00d4 0120 00d6 00d7
011c 00d9 00da 00db 00dc 016c 015c 00df
00e0 00e1 00e2 ffff 00e4 010b 0109 00e7
00e8 00e9 00ea 00eb 00ec 00ed 00ee 00ef
ffff 00f1 00f2 00f3 00f4 0121 00f6 00f7
011d 00f9 00fa 00fb 00fc 016d 015d 02d9
Encoding iso8859-2 latin2 ISO-8859-2
End
Encoding iso8859-4 latin4
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 0104 0138 0156 00a4 0128 013b 00a7
00a8 0160 0112 0122 0166 00ad 017d 00af
00b0 0105 02db 0157 00b4 0129 013c 02c7
00b8 0161 0113 0123 0167 014a 017e 014b
0100 00c1 00c2 00c3 00c4 00c5 00c6 012e
010c 00c9 0118 00cb 0116 00cd 00ce 012a
0110 0145 014c 0136 00d4 00d5 00d6 00d7
00d8 0172 00da 00db 00dc 0168 016a 00df
0101 00e1 00e2 00e3 00e4 00e5 00e6 012f
010d 00e9 0119 00eb 0117 00ed 00ee 012b
0111 0146 014d 0137 00f4 00f5 00f6 00f7
00f8 0173 00fa 00fb 00fc 0169 016b 02d9
Encoding iso8859-3 latin3 ISO-8859-3
End
Encoding iso8859-5 iso88595
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 0401 0402 0403 0404 0405 0406 0407
0408 0409 040a 040b 040c 00ad 040e 040f
0410 0411 0412 0413 0414 0415 0416 0417
0418 0419 041a 041b 041c 041d 041e 041f
0420 0421 0422 0423 0424 0425 0426 0427
0428 0429 042a 042b 042c 042d 042e 042f
0430 0431 0432 0433 0434 0435 0436 0437
0438 0439 043a 043b 043c 043d 043e 043f
0440 0441 0442 0443 0444 0445 0446 0447
0448 0449 044a 044b 044c 044d 044e 044f
2116 0451 0452 0453 0454 0455 0456 0457
0458 0459 045a 045b 045c 00a7 045e 045f
Encoding iso8859-4 latin4 ISO-8859-4
End
Encoding iso8859-6 unknown
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
0660 0661 0662 0663 0664 0665 0666 0667
0668 0669 003a 003b 003c 003d 003e 061f
0040 fe80 fe81 fe83 fe85 fe87 fe89 fe8d
fe8f fe93 fe95 fe99 fe9d fea1 fea5 fea9
feab fead feaf feb1 feb5 feb9 febd fec1
fec5 fec9 fecd 005d 005c 005b 005e 005f
0640 fed1 fed5 fed9 fedd fee1 fee5 fee9
feed feef fef1 065b 065c 064d 064e 064f
0650 0651 0652 0073 0074 0075 fef5 fef7
fef9 fefb 007a 007b 007c 007d 007e 007f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
fe71 0091 0092 fe77 fe79 fe7b fe7f fe7d
0098 0099 009a 009b 009c 009d 009e 009f
fe8c fe82 fe84 fe86 fe88 fe8e feaa feac
feae feb0 feee fef0 fe91 fe92 fe90 fe97
fe98 fe94 fe97 fe98 fe96 fe9b fe9c fe9a
fe9f fea0 fe9e fea3 fea4 fea2 fea7 fea8
fea6 feb3 feb4 feb2 feb7 feb8 feb6 febb
febc feba febf fec0 febe fec3 fec4 fec2
fec7 fec8 fec6 fecb fecc feca fecf fed0
fece fed3 fed4 fed2 fed7 fed8 fed6 fedb
fedc feda fedf fee0 fede fee3 fee4 fee2
fee7 fee8 fee6 feeb feec feea fef3 fef4
fef2 00f1 00f2 00f3 00f4 00f5 00f6 00f7
fe8b fe8a fef6 fefa fef8 fefc 00fe 00ff
Encoding iso8859-5 iso88595 ISO-8859-5
End
Encoding iso8859-7 iso-8859-7
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 0371 0372 00a3 ffff ffff 00a6 00a7
00a8 00a9 ffff 00ab 00ac 00ad ffff 2015
00b0 00b1 00b2 00b3 03f3 03f4 0386 00b7
0388 0389 038a 00bb 038c 00bd 038e 038f
0390 0391 0392 0393 0394 0395 0396 0397
0398 0399 039a 039b 039c 039d 039e 039f
03a0 03a1 ffff 03a3 03a4 03a5 03a6 03a7
03a8 03a9 03aa 03ab 03ac 03ad 03ae 03af
03b0 03b1 03b2 03b3 03b4 03b5 03b6 03b7
03b8 03b9 03ba 03bb 03bc 03bd 03be 03bf
03c0 03c1 03c2 03c3 03c4 03c5 03c6 03c7
03c8 03c9 03ca 03cb 03cc 03cd 03ce ffff
Encoding iso8859-6 unknown ISO-8859-6
End
Encoding iso8859-9 latin5
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 00a1 00a2 00a3 00a4 00a5 00a6 00a7
00a8 00a9 00aa 00ab 00ac 00ad 00ae 00af
00b0 00b1 00b2 00b3 00b4 00b5 00b6 00b7
00b8 00b9 00ba 00bb 00bc 00bd 00be 00bf
00c0 00c1 00c2 00c3 00c4 00c5 00c6 00c7
00c8 00c9 00ca 00cb 00cc 00cd 00ce 00cf
011e 00d1 00d2 00d3 00d4 00d5 00d6 00d7
00d8 00d9 00da 00db 00dc 0130 015e 00df
00e0 00e1 00e2 00e3 00e4 00e5 00e6 00e7
00e8 00e9 00ea 00eb 00ec 00ed 00ee 00ef
011f 00f1 00f2 00f3 00f4 00f5 00f6 00f7
00f8 00f9 00fa 00fb 00fc 0131 015f 00ff
Encoding iso8859-7 iso-8859-7 ISO-8859-7
End
Encoding iso8859-13 l7xenc
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 201d 00a2 00a3 00a4 201e 00a6 00a7
00d8 00a9 0156 00ab 00ac 00ad 00ae 00c6
00b0 00b1 00b2 00b3 201c 00b5 00b6 00b7
00f8 00b9 0157 00bb 00bc 00bd 00be 00e6
0104 012e 0100 0106 00c4 00c5 0118 0112
010c 00c9 0179 0116 0122 0136 012a 013b
0160 0143 0145 00d3 014c 00d5 00d6 00d7
0172 0141 015a 016a 00dc 017b 017d 00df
0105 012f 0101 0107 00e4 00e5 0119 0113
010d 00e9 017a 0117 0123 0127 012b 013c
0161 0144 0146 00f3 014d 00f5 00f6 00f7
0173 0142 015b 016b 00fc 017c 017e 2019
Encoding iso8859-9 latin5 ISO-8859-9
End
Encoding iso8859-15 latin9
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 00a1 00a2 00a3 20ac 00a5 0160 00a7
0161 00a9 00aa 00ab 00ac 00ad 00ae 00af
00b0 00b1 00b2 00b3 017d 00b5 00b6 00b7
017e 00b9 00ba 00bb 0152 0153 0178 00bf
00c0 00c1 00c2 00c3 00c4 00c5 00c6 00c7
00c8 00c9 00ca 00cb 00cc 00cd 00ce 00cf
00d0 00d1 00d2 00d3 00d4 00d5 00d6 00d7
00d8 00d9 00da 00db 00dc 00dd 00de 00df
00e0 00e1 00e2 00e3 00e4 00e5 00e6 00e7
00e8 00e9 00ea 00eb 00ec 00ed 00ee 00ef
00f0 00f1 00f2 00f3 00f4 00f5 00f6 00f7
00f8 00f9 00fa 00fb 00fc 00fd 00fe 00ff
Encoding iso8859-13 l7xenc ISO-8859-13
End
Encoding cp1255 cp1255
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a0 ffff 00a2 00a3 00a4 00a5 00a6 00a7
00a8 00a9 00d7 00ab 00ac 00ad 00ae 203e
00b0 00b1 00b2 00b3 00b4 00b5 00b6 00b7
00b8 00b9 00f7 00bb 00bc 00bd 00be 00bf
05b0 05b1 05b2 05b3 05b4 05b5 05b6 05b7
05b8 05b9 ffff 05bb 05bc 05bd 05be 05bf
05c0 05c1 05c2 05c3 05f0 05f1 05f2 05f3
05f4 ffff ffff ffff ffff ffff ffff ffff
05d0 05d1 05d2 05d3 05d4 05d5 05d6 05d7
05d8 05d9 05da 05db 05dc 05dd 05de 05df
05e0 05e1 05e2 05e3 05e4 05e5 05e6 05e7
05e8 05e9 05ea ffff ffff ffff ffff ffff
Encoding iso8859-15 latin9 ISO-8859-15
End
Encoding cp1251 cp1251
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
0402 0403 201a 0453 201e 2026 2020 2021
20ac 2030 0409 2039 040a 040c 040b 040f
0452 2018 2019 201c 201d 2022 2013 2014
0098 2122 0459 203a 045a 045c 045b 045f
00a0 040e 045e 0408 00a4 0490 00a6 00a7
0401 00a9 0404 00ab 00ac 00ad 00ae 0407
00b0 00b1 0406 0456 0491 00b5 00b6 00b7
0451 2116 0454 00bb 0458 0405 0455 0457
0410 0411 0412 0413 0414 0415 0416 0417
0418 0419 041a 041b 041c 041d 041e 041f
0420 0421 0422 0423 0424 0425 0426 0427
0428 0429 042a 042b 042c 042d 042e 042f
0430 0431 0432 0433 0434 0435 0436 0437
0438 0439 043a 043b 043c 043d 043e 043f
0440 0441 0442 0443 0444 0445 0446 0447
0448 0449 044a 044b 044c 044d 044e 044f
Encoding cp1255 cp1255 CP1255
End
Encoding koi8 koi8-r
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
2500 2502 250c 2510 2514 2518 251c 2524
252c 2534 253c 2580 2584 2588 258c 2590
2591 2592 2593 2320 25a0 2219 221a 2248
2264 2265 00a0 2321 00b0 00b2 00b7 00f7
2550 2551 2552 0451 2553 2554 2555 2556
2557 2558 2559 255a 255b 255c 255d 255e
255f 2560 2561 0401 2562 2563 2564 2565
2566 2567 2568 2569 256a 256b 256c 00a9
044e 0430 0431 0446 0434 0435 0444 0433
0445 0438 0439 043a 043b 043c 043d 043e
043f 044f 0440 0441 0442 0443 0436 0432
044c 044b 0437 0448 044d 0449 0447 044a
042e 0410 0411 0426 0414 0415 0424 0413
0425 0418 0419 041a 041b 041c 041d 041e
041f 042f 0420 0421 0422 0423 0416 0412
042c 042b 0417 0428 042d 0429 0427 042a
Encoding cp1251 cp1251 CP1251
End
Encoding koi8-u koi8-u
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
2500 2502 250c 2510 2514 2518 251c 2524
252c 2534 253c 2580 2584 2588 258c 2590
2591 2592 2593 2320 25a0 2219 221a 2248
2264 2265 00a0 2321 00b0 00b2 00b7 00f7
2550 2551 2552 0451 0454 2554 0456 0457
2557 2558 2559 255a 255b 0491 255d 255e
255f 2560 2561 0401 0404 2563 0406 0407
2566 2567 2568 2569 256a 0490 256c 00a9
044e 0430 0431 0446 0434 0435 0444 0433
0445 0438 0439 043a 043b 043c 043d 043e
043f 044f 0440 0441 0442 0443 0436 0432
044c 044b 0437 0448 044d 0449 0447 044a
042e 0410 0411 0426 0414 0415 0424 0413
0425 0418 0419 041a 041b 041c 041d 041e
041f 042f 0420 0421 0422 0423 0416 0412
042c 042b 0417 0428 042d 0429 0427 042a
Encoding koi8 koi8-r KOI8-R
End
Encoding tis620-0 unknown
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
00a1 0e01 0e02 0e03 0e04 0e05 0e06 0e07
0e08 0e09 0e0a 0e0b 0e0c 0e0d 0e0e 0e0f
0e10 0e11 0e12 0e13 0e14 0e15 0e16 0e17
0e18 0e19 0e1a 0e1b 0e1c 0e1d 0e1e 0e1f
0e20 0e21 0e22 0e23 0e24 0e25 0e26 0e27
0e28 0e29 0e2a 0e2b 0e2c 0e2d 0e2e 0e2f
0e30 0e31 0e32 0e33 0e34 0e35 0e36 0e37
0e38 0e39 0e3a 00db 00dc 00de 00de 0e3f
0e40 0e41 0e42 0e43 0e44 0e45 0e46 0e47
0e48 0e49 0e4a 0e4b 0e4c 0e4d 0e4e 0e4f
0e50 0e51 0e52 0e53 0e54 0e55 0e56 0e57
0e58 0e59 0e5a 0e5b 00fc 00fd 00fe 00ff
Encoding koi8-u koi8-u KOI8-U
End
Encoding pt154 pt154
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
0496 0492 201a 0493 201e 2026 2020 04ae
04b2 04af 04a0 2039 04a2 049a 04ba 04b8
0497 2018 2019 201c 201d 2022 2013 2014
04b3 2122 04a1 203a 04a3 049b 04bb 04b9
00a0 040e 045e 0408 04e8 0498 04b0 00a7
0401 00a9 04d8 00ab 00ac 00ad 00ae 049c
00b0 04b1 0406 0456 0499 04e9 00b6 00b7
0451 2116 04d9 00bb 0458 04aa 04ab 049d
0410 0411 0412 0413 0414 0415 0416 0417
0418 0419 041a 041b 041c 041d 041e 041f
0420 0421 0422 0423 0424 0425 0426 0427
0428 0429 042a 042b 042c 042d 042e 042f
0430 0431 0432 0433 0434 0435 0436 0437
0438 0439 043a 043b 043c 043d 043e 043f
0440 0441 0442 0443 0444 0445 0446 0447
0448 0449 044a 044b 044c 044d 044e 044f
Encoding tis620-0 unknown TIS-620-0
End
Encoding pt154 pt154 PT154
End

View File

@ -812,26 +812,44 @@ bool Buffer::do_writeFile(ostream & ofs) const
}
void Buffer::makeLaTeXFile(string const & fname,
bool Buffer::makeLaTeXFile(string const & fname,
string const & original_path,
OutputParams const & runparams,
bool output_preamble, bool output_body)
{
lyxerr[Debug::LATEX] << "makeLaTeXFile..." << endl;
string const encoding = (params().inputenc == "auto") ?
params().language->encoding()->iconvName() :
encodings.getEncoding(params().inputenc)->iconvName();
lyxerr[Debug::LATEX] << "makeLaTeXFile encoding: "
<< encoding << "..." << endl;
// FIXME UNICODE
// This creates an utf8 encoded file, but the inputenc commands
// specify other encodings
odocfstream ofs;
odocfstream ofs(encoding);
if (!openFileWrite(ofs, fname))
return;
return false;
writeLaTeXSource(ofs, original_path,
try {
writeLaTeXSource(ofs, original_path,
runparams, output_preamble, output_body);
}
catch (iconv_codecvt_facet_exception &) {
Alert::error(_("Encoding error"),
_("Some characters of your document are not "
"representable in the chosen encoding.\n"
"Changing the document encoding to utf8 could help."));
return false;
}
ofs.close();
if (ofs.fail())
if (ofs.fail()) {
lyxerr << "File '" << fname << "' was not closed properly." << endl;
Alert::error(_("Error closing file"),
_("The output file could not be closed properly.\n"
" Probably some characters of your document are not "
"representable in the chosen encoding.\n"
"Changing the document encoding to utf8 could help."));
return false;
}
return true;
}

View File

@ -146,7 +146,7 @@ public:
bool writeFile(std::string const &) const;
/// Just a wrapper for the method below, first creating the ofstream.
void makeLaTeXFile(std::string const & filename,
bool makeLaTeXFile(std::string const & filename,
std::string const & original_path,
OutputParams const &,
bool output_preamble = true,

View File

@ -835,32 +835,26 @@ bool BufferParams::writeLaTeX(odocstream & os, LaTeXFeatures & features,
texrow.newline();
}
// TODO: Some people want to support more encodings than UTF-8. They can have a field day around here
if (true) {
os << "\\usepackage[utf8]{inputenc}\n";
if (inputenc == "auto") {
string const doc_encoding =
language->encoding()->latexName();
// Create a list with all the input encodings used
// in the document
std::set<string> encodings =
features.getEncodingSet(doc_encoding);
os << "\\usepackage[";
std::set<string>::const_iterator it = encodings.begin();
std::set<string>::const_iterator const end = encodings.end();
for (; it != end; ++it)
os << from_ascii(*it) << ',';
os << from_ascii(doc_encoding) << "]{inputenc}\n";
texrow.newline();
} else if (inputenc != "default") {
os << "\\usepackage[" << from_ascii(inputenc)
<< "]{inputenc}\n";
texrow.newline();
} else {
if (inputenc == "auto") {
string const doc_encoding =
language->encoding()->latexName();
// Create a list with all the input encodings used
// in the document
std::set<string> encodings =
features.getEncodingSet(doc_encoding);
os << "\\usepackage[";
std::set<string>::const_iterator it = encodings.begin();
std::set<string>::const_iterator const end = encodings.end();
for (; it != end; ++it)
os << from_ascii(*it) << ',';
os << from_ascii(doc_encoding) << "]{inputenc}\n";
texrow.newline();
} else if (inputenc != "default") {
os << "\\usepackage[" << from_ascii(inputenc)
<< "]{inputenc}\n";
texrow.newline();
}
}
if (use_geometry || nonstandard_papersize) {

View File

@ -33,79 +33,6 @@ Encodings encodings;
namespace {
char_type tab_iso8859_1[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
#ifdef USE_UNICODE_FOR_SYMBOLS
char_type tab_symbol[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220b,
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
// 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff,
0x2245, 0x0041, 0x0042, 0x0058, 0x0394, 0x0045, 0x03c2, 0x03a9,
0x0048, 0x0049, 0x03d1, 0x004b, 0x039b, 0x004d, 0x004e, 0x004f,
0x03a0, 0x0398, 0x0050, 0x03a3, 0x0054, 0x0059, 0x03c2, 0x03a9,
0x039e, 0x03a8, 0x005a, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff,
0xffff, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03d5, 0x03b3,
0x03b7, 0x03b9, 0x03c6, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x007e, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0x03d2, 0x2032, 0x2264, 0x2215, 0x221e, 0xffff, 0x2663,
0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193,
0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022,
0x00f7, 0x2260, 0x2261, 0x2248, 0x22ef, 0xffff, 0xffff, 0x21b5,
0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229,
0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5,
0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3,
0x2662, 0x2329, 0x00ae, 0x00a9, 0x2122, 0x2211, 0xffff, 0xffff,
0xffff, 0x2308, 0xffff, 0x230a, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0x232a, 0x222b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0x2309, 0xffff, 0x230b, 0xffff, 0xffff, 0xffff, 0xffff
};
#endif
char_type arabic_table2[63][4] = {
{0x41, 0x41, 0x41, 0x41}, // 0xc1 = hamza
{0x42, 0xa1, 0x42, 0xa1}, // 0xc2 = ligature madda on alef
@ -252,13 +179,6 @@ char_type const arabic_start = 0xc1;
char_type Encoding::ucs(char_type c) const
{
BOOST_ASSERT(c < 256);
return encoding_table[c];
}
bool Encodings::isComposeChar_hebrew(char_type c)
{
return c <= 0xd2 && c >= 0xc0 &&
@ -315,13 +235,7 @@ Encoding const * Encodings::getEncoding(string const & encoding) const
Encodings::Encodings()
{
encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", tab_iso8859_1);
symbol_encoding_ =
#ifdef USE_UNICODE_FOR_SYMBOLS
Encoding("symbol", "", tab_symbol);
#else
Encoding("symbol", "", tab_iso8859_1);
#endif
symbol_encoding_ = Encoding("symbol", "", "");
}
void Encodings::read(string const & filename)
@ -347,14 +261,10 @@ void Encodings::read(string const & filename)
string const name = lex.getString();
lex.next();
string const latexname = lex.getString();
lex.next();
string const iconvname = lex.getString();
lyxerr[Debug::INFO] << "Reading encoding " << name << endl;
char_type table[256];
for (unsigned int i = 0; i < 256; ++i) {
lex.next();
string const tmp = lex.getString();
table[i] = ::strtol(tmp.c_str(), 0 , 16);
}
encodinglist[name] = Encoding(name, latexname, table);
encodinglist[name] = Encoding(name, latexname, iconvname);
if (lex.lex() != et_end)
lex.printError("Encodings::read: "
"missing end");

View File

@ -26,25 +26,24 @@ public:
///
Encoding() {}
///
Encoding(std::string const & n, std::string const & l, char_type const * e)
: Name_(n), LatexName_(l)
Encoding(std::string const & n, std::string const & l,
std::string const & i)
: Name_(n), LatexName_(l), iconvName_(i)
{
for (int i = 0; i < 256; ++i)
encoding_table[i] = e[i];
}
///
std::string const & name() const { return Name_; }
///
std::string const & latexName() const { return LatexName_; }
///
char_type ucs(char_type c) const;
std::string const & iconvName() const { return iconvName_; }
private:
///
std::string Name_;
///
std::string LatexName_;
///
char_type encoding_table[256];
std::string iconvName_;
};
extern Encoding symbol_encoding;

View File

@ -203,7 +203,8 @@ bool Exporter::Export(Buffer * buffer, string const & format,
// LaTeX backend
else if (backend_format == format) {
runparams.nice = true;
buffer->makeLaTeXFile(filename, string(), runparams);
if (!buffer->makeLaTeXFile(filename, string(), runparams))
return false;
} else if (!lyxrc.tex_allows_spaces
&& contains(buffer->filePath(), ' ')) {
Alert::error(_("File name error"),
@ -211,7 +212,8 @@ bool Exporter::Export(Buffer * buffer, string const & format,
return false;
} else {
runparams.nice = false;
buffer->makeLaTeXFile(filename, buffer->filePath(), runparams);
if (!buffer->makeLaTeXFile(filename, buffer->filePath(), runparams))
return false;
}
string const error_type = (format == "program")? "Build" : bufferFormat(*buffer);

View File

@ -58,6 +58,10 @@ namespace frontend {
namespace {
// FIXME: This list is incomplete. It should not be hardcoded but come from
// the available encodings in src/encodings.C
// FIXME: "default" is no valid encoding anymore. Nevertheless it occurs also
// in other source files.
char const * encodings[] = { "LaTeX default", "latin1", "latin2",
"latin3", "latin4", "latin5", "latin9",
"koi8-r", "koi8-u", "cp866", "cp1251",
@ -938,15 +942,20 @@ void QDocumentDialog::update(BufferParams const & params)
if (params.inputenc != "auto") {
langModule->defaultencodingCB->setChecked(false);
// FIXME: "default" is no valid encoding anymore
if (params.inputenc == "default") {
langModule->encodingCO->setCurrentIndex(0);
} else {
int i = 0;
while (encodings[i]) {
if (encodings[i] == params.inputenc)
if (encodings[i] == params.inputenc) {
langModule->encodingCO->setCurrentIndex(i);
break;
}
++i;
}
// FIXME: possible data loss because of encodings is
// incomplete
}
}

View File

@ -29,6 +29,7 @@
#include "insets/insetoptarg.h"
#include "support/lstrings.h"
#include "support/unicode.h"
namespace lyx {
@ -236,7 +237,7 @@ ParagraphList::const_iterator
TeXOnePar(Buffer const & buf,
ParagraphList const & paragraphs,
ParagraphList::const_iterator pit,
odocstream & os, TexRow & texrow,
odocstream & ucs4, TexRow & texrow,
OutputParams const & runparams_in,
string const & everypar)
{
@ -274,34 +275,40 @@ TeXOnePar(Buffer const & buf,
if (!lyxrc.language_command_end.empty() &&
previous_language->babel() != doc_language->babel())
{
os << from_ascii(subst(lyxrc.language_command_end,
ucs4 << from_ascii(subst(lyxrc.language_command_end,
"$$lang",
previous_language->babel()))
<< endl;
<< endl;
texrow.newline();
}
if (lyxrc.language_command_end.empty() ||
language->babel() != doc_language->babel())
{
os << from_ascii(subst(
ucs4 << from_ascii(subst(
lyxrc.language_command_begin,
"$$lang",
language->babel()))
<< endl;
<< endl;
texrow.newline();
}
}
if (false) {
if (bparams.inputenc == "auto" &&
language->encoding() != previous_language->encoding()) {
os << "\\inputencoding{"
<< from_ascii(language->encoding()->latexName())
<< "}\n";
texrow.newline();
}
if (bparams.inputenc == "auto" &&
language->encoding() != previous_language->encoding()) {
ucs4 << "\\inputencoding{"
<< from_ascii(language->encoding()->latexName())
<< "}\n";
texrow.newline();
}
// We need to output the paragraph to a temporary stream if we
// need to change the encoding. Don't do this if the result does
// not go to a file but to the builtin source viewer.
odocstringstream par_stream;
bool const change_encoding = !runparams_in.dryrun &&
bparams.inputenc == "auto" &&
language->encoding() != doc_language->encoding();
odocstream & os(change_encoding ? par_stream : ucs4);
// In an an inset with unlimited length (all in one row),
// don't allow any special options in the paragraph
@ -474,6 +481,33 @@ TeXOnePar(Buffer const & buf,
if (boost::next(pit) != paragraphs.end() &&
lyxerr.debugging(Debug::LATEX))
lyxerr << "TeXOnePar...done " << &*boost::next(pit) << endl;
if (change_encoding) {
lyxerr[Debug::LATEX] << "Converting paragraph to encoding "
<< language->encoding()->iconvName() << endl;
docstring const par = par_stream.str();
// Convert the paragraph to the 8bit encoding that we need to
// output.
std::vector<char> const encoded = lyx::ucs4_to_eightbit(par.c_str(),
par.size(), language->encoding()->iconvName());
// Interpret this as if it was in the 8 bit encoding of the
// document language and convert it back to UCS4. That means
// that faked does not contain pure UCS4 anymore, but what
// will be written to the output file will be correct, because
// the real output stream will do a UCS4 -> document language
// encoding conversion.
// This is of course a hack, but not a bigger one than mixing
// two encodings in one file.
// FIXME: Catch iconv conversion errors and display an error
// dialog.
std::vector<char_type> const faked = lyx::eightbit_to_ucs4(encoded.data(),
encoded.size(), doc_language->encoding()->iconvName());
std::vector<char_type>::const_iterator const end = faked.end();
std::vector<char_type>::const_iterator it = faked.begin();
for (; it != end; ++it)
ucs4.put(*it);
}
return ++pit;
}

View File

@ -22,6 +22,8 @@
using lyx::ucs4_codeset;
using lyx::ucs2_codeset;
using std::string;
namespace {
@ -31,51 +33,42 @@ char const * utf8_codeset = "UTF-8";
// lyxerr in the future.
class utf8_codecvt_facet_exception : public std::exception {
public:
virtual ~utf8_codecvt_facet_exception() throw() {}
virtual const char* what() const throw()
{
return "iconv problem in utf8_codecvt_facet initialization";
}
};
/// codecvt facet for conversion of UCS4 (internal representation) to UTF8
/// (external representation) or vice versa
class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t>
class iconv_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t>
{
typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base;
public:
/// Constructor. You have to specify with \p inout whether you want
/// to use this facet only for input, only for output or for both.
explicit utf8_codecvt_facet(std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
explicit iconv_codecvt_facet(string const & encoding = "UTF-8",
std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
size_t refs = 0)
: base(refs)
: base(refs), utf8_(encoding == "UTF-8")
{
if (inout & std::ios_base::in) {
in_cd_ = iconv_open(ucs4_codeset, utf8_codeset);
in_cd_ = iconv_open(ucs4_codeset, encoding.c_str());
if (in_cd_ == (iconv_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n",
errno, strerror(errno));
fflush(stderr);
throw utf8_codecvt_facet_exception();
throw lyx::iconv_codecvt_facet_exception();
}
} else
in_cd_ = (iconv_t)(-1);
if (inout & std::ios_base::out) {
out_cd_ = iconv_open(utf8_codeset, ucs4_codeset);
out_cd_ = iconv_open(encoding.c_str(), ucs4_codeset);
if (out_cd_ == (iconv_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n",
errno, strerror(errno));
fflush(stderr);
throw utf8_codecvt_facet_exception();
throw lyx::iconv_codecvt_facet_exception();
}
} else
out_cd_ = (iconv_t)(-1);
}
protected:
virtual ~utf8_codecvt_facet()
virtual ~iconv_codecvt_facet()
{
if (in_cd_ != (iconv_t)(-1))
if (iconv_close(in_cd_) == -1) {
@ -155,8 +148,10 @@ protected:
}
virtual int do_max_length() const throw()
{
// UTF8 uses at most 6 bytes to represent one code point
return 6;
// UTF8 uses at most 6 bytes to represent one UCS4 code point.
// All other encodings encode one UCS4 code point in one byte
// (and can therefore only encode a subset of UCS4)
return utf8_ ? 6 : 1;
}
private:
/// Do the actual conversion. The interface is equivalent to that of
@ -186,6 +181,8 @@ private:
}
iconv_t in_cd_;
iconv_t out_cd_;
/// Is the narrow encoding UTF8?
bool utf8_;
};
} // namespace anon
@ -194,10 +191,16 @@ private:
namespace lyx {
const char * iconv_codecvt_facet_exception::what() const throw()
{
return "iconv problem in iconv_codecvt_facet initialization";
}
idocfstream::idocfstream() : base()
{
std::locale global;
std::locale locale(global, new utf8_codecvt_facet(in));
std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
imbue(locale);
}
@ -207,26 +210,27 @@ idocfstream::idocfstream(const char* s, std::ios_base::openmode mode)
{
// We must imbue the stream before openening the file
std::locale global;
std::locale locale(global, new utf8_codecvt_facet(in));
std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
imbue(locale);
open(s, mode);
}
odocfstream::odocfstream() : base()
odocfstream::odocfstream(string const & encoding) : base()
{
std::locale global;
std::locale locale(global, new utf8_codecvt_facet(out));
std::locale locale(global, new iconv_codecvt_facet(encoding, out));
imbue(locale);
}
odocfstream::odocfstream(const char* s, std::ios_base::openmode mode)
odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
string const & encoding)
: base()
{
// We must imbue the stream before openening the file
std::locale global;
std::locale locale(global, new utf8_codecvt_facet(out));
std::locale locale(global, new iconv_codecvt_facet(encoding, out));
imbue(locale);
open(s, mode);
}
@ -236,7 +240,7 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode)
#if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)
// We get undefined references to these virtual methods. This looks like
// a bug in gcc. The implementation here does not do anything useful, since
// it is overriden in utf8_codecvt_facet.
// it is overriden in iconv_codecvt_facet.
namespace std {
template<> codecvt<lyx::char_type, char, mbstate_t>::result
codecvt<lyx::char_type, char, mbstate_t>::do_out(mbstate_t &, const lyx::char_type *, const lyx::char_type *, const lyx::char_type *&,

View File

@ -19,6 +19,12 @@
namespace lyx {
class iconv_codecvt_facet_exception : public std::exception {
public:
virtual ~iconv_codecvt_facet_exception() throw() {}
virtual const char * what() const throw();
};
/// Base class for UCS4 input streams
typedef std::basic_istream<char_type> idocstream;
@ -45,14 +51,15 @@ public:
~idocfstream() {}
};
/// File stream for writing UTF8-encoded files with automatic conversion from
/// UCS4.
/// File stream for writing files in 8bit encoding \p encoding with automatic
/// conversion from UCS4.
class odocfstream : public std::basic_ofstream<char_type> {
typedef std::basic_ofstream<char_type> base;
public:
odocfstream();
odocfstream(std::string const & encoding = "UTF-8");
explicit odocfstream(const char* s,
std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc);
std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc,
std::string const & encoding = "UTF-8");
~odocfstream() {}
};

View File

@ -20,6 +20,7 @@
#include <cerrno>
#include <iomanip>
#include <map>
namespace lyx {
@ -226,4 +227,25 @@ ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
}
std::vector<lyx::char_type>
eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
{
static std::map<std::string, iconv_t> cd;
if (cd.find(encoding) == cd.end())
cd[encoding] = (iconv_t)(-1);
return iconv_convert<char_type>(&cd[encoding], ucs4_codeset,
encoding.c_str(), s, ls);
}
std::vector<char>
ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
{
static std::map<std::string, iconv_t> cd;
if (cd.find(encoding) == cd.end())
cd[encoding] = (iconv_t)(-1);
return iconv_convert<char>(&cd[encoding], encoding.c_str(),
ucs4_codeset, ucs4str, ls);
}
} // namespace lyx

View File

@ -56,6 +56,16 @@ std::vector<char> ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str);
std::vector<char> ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls);
/// convert \p s from encoding \p encoding to ucs4.
/// \p encoding must be a valid iconv 8bit encoding
std::vector<lyx::char_type>
eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding);
/// convert \p s from ucs4 to encoding \p encoding.
/// \p encoding must be a valid iconv 8bit encoding
std::vector<char>
ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding);
extern char const * ucs4_codeset;
extern char const * ucs2_codeset;