Add machinery to output arbitrary unicode characters with LaTeX commands

read from a text file. * src/encoding.[Ch] (Encoding::latexChar): New, output a character to LaTeX (Encoding::validate): New, add needed preamble stuff for a character (Encodings::read): Read new unicodesymbols file (Encodings::isCombiningChar): New, is a character a combining char? * src/paragraph_pimpl.C (isEncoding): Delete, no longer needed (getEncoding): New, get the real encoding of a font (Paragraph::Pimpl::latexSurrogatePair): New, output a surrogate pair to LaTeX (Paragraph::Pimpl::simpleTeXBlanks): Use latexSurrogatePair if needed (Paragraph::Pimpl::simpleTeXSpecialChars): Ditto, and replace several hardcoded characters with a call of encoding.latexChar() (Paragraph::Pimpl::validate): replace several hardcoded characters with a call of encoding.validate() * src/support/debugstream.h (basic_debugstream::disable): New, disable the stream completely (basic_debugstream::enable): New, reenable the stream * src/lyx_main.[Ch]: Adjust to changes above * src/paragraph.C: Ditto * lib/unicodesymbols: New file with UCS4 -> LaTeX command mapping. It is far from complete yet, but contains most accents on latin characters. * lib/Makefile.am: add lib/unicodesymbols * development/scons/scons_manifest.py: ditto * development/tools/unicodesymbols.py: Helper script to update lib/unicodesymbols with new symbols git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16920 a592a061-630c-0410-9148-cb99ea01b6c8
2024-11-22 10:00:33 +00:00 · 2007-01-28 21:27:45 +00:00 · 2007-01-28 21:27:45 +00:00 · feb7895965
commit feb7895965
parent eb495d2e98
12 changed files with 1148 additions and 147 deletions
--- a/development/scons/scons_manifest.py
+++ b/development/scons/scons_manifest.py
@ -1275,6 +1275,7 @@ lib_files = Split('''
    languages
    symbols
    syntax.default
+    unicodesymbols
    configure.py
 ''')

--- a/development/tools/unicodesymbols.py
+++ b/development/tools/unicodesymbols.py
@ -0,0 +1,119 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# file unciodesymbols.py
+# This file is part of LyX, the document processor.
+# Licence details can be found in the file COPYING.
+
+# author Georg Baum
+
+# Full author contact details are available in file CREDITS
+
+# This script reads a unicode symbol file and completes it in the given range
+
+
+import os, re, string, sys, unicodedata
+
+def usage(prog_name):
+    return ("Usage: %s start stop inputfile outputfile\n" % prog_name +
+            "or     %s start stop <inputfile >outputfile" % prog_name)
+
+
+def error(message):
+    sys.stderr.write(message + '\n')
+    sys.exit(1)
+
+
+def trim_eol(line):
+    " Remove end of line char(s)."
+    if line[-2:-1] == '\r':
+        return line[:-2]
+    elif line[-1:] == '\r' or line[-1:] == '\n':
+        return line[:-1]
+    else:
+        # file with no EOL in last line
+        return line
+
+
+def read(input):
+    " Read input file and strip lineendings."
+    lines = list()
+    while 1:
+        line = input.readline()
+        if not line:
+            break
+        line = trim_eol(line)
+        tokens = line.split()
+        char = -1
+        if len(tokens) > 0:
+            if tokens[0][0:2] == "0x":
+                char = int(tokens[0][2:], 16)
+            elif tokens[0][0:3] == "#0x":
+                char = int(tokens[0][3:], 16)
+        lines.append([char, line])
+    return lines
+
+
+def write(output, lines):
+    " Write output file with native lineendings."
+    for line in lines:
+        output.write(line[1] + os.linesep)
+
+
+def complete(lines, start, stop):
+    l = 0
+    for i in range(start, stop):
+        # This catches both comments (lines[l][0] == -1) and code points less than i
+        while l < len(lines) and lines[l][0] < i:
+            print lines[l]
+            l = l + 1
+            continue
+        if l >= len(lines) or lines[l][0] != i:
+            c = unichr(i)
+            name = unicodedata.name(c, "")
+            if name != "":
+                if unicodedata.combining(c):
+                    combining = "combining"
+                else:
+                    combining = ""
+                line = [i, '#0x%04x ""                         "" "%s" # %s' % (i, combining, name)]
+                lines.insert(l, line)
+                print lines[l]
+                l = l + 1
+
+
+def main(argv):
+
+    # Open files
+    if len(argv) == 3:
+        input = sys.stdin
+        output = sys.stdout
+    elif len(argv) == 5:
+        input = open(argv[3], 'rb')
+        output = open(argv[4], 'wb')
+    else:
+        error(usage(argv[0]))
+    if argv[1][:2] == "0x":
+        start = int(argv[1][2:], 16)
+    else:
+        start = int(argv[1])
+    if argv[2][:2] == "0x":
+        stop = int(argv[2][2:], 16)
+    else:
+        stop = int(argv[2])
+
+    # Do the real work
+    lines = read(input)
+    complete(lines, start, stop)
+    write(output, lines)
+
+    # Close files
+    if len(argv) == 3:
+        input.close()
+        output.close()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main(sys.argv)
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@ -5,7 +5,7 @@ SUBDIRS = doc lyx2lyx
 CHMOD = chmod

 dist_pkgdata_DATA = CREDITS chkconfig.ltx \
-	       external_templates encodings languages symbols syntax.default
+	       external_templates encodings languages symbols syntax.default unicodesymbols

 # Note that we "chmod 755" manually this file in install-data-hook.
 dist_pkgdata_PYTHON = configure.py 
--- a/lib/unicodesymbols
+++ b/lib/unicodesymbols
@ -0,0 +1,681 @@
+#
+# file unicodesymbols
+# This file is part of LyX, the document processor.
+# Licence details can be found in the file COPYING.
+#
+# author Georg Baum
+#
+# Full author contact details are available in file CREDITS.
+
+# This file is a database of LaTeX commands for unicode characters.
+# These commands will be used by LyX for LaTeX export for all characters
+# that are not representable in the chosen encoding.
+
+# syntax:
+# ucs4 command                    preamble flags
+# preamble can either be a known feature, or a LaTeX command.
+# Known flags:
+# - combining This is a combining char that will get combined with a base char
+# - force     Always output replacement command
+
+#0x00a0 ""                         "" "" # NO-BREAK SPACE
+#0x00a1 ""                         "" "" # INVERTED EXCLAMATION MARK
+0x00a2 "\\textcent"               "textcomp" "" # CENT SIGN
+0x00a3 "\\pounds"                 "" "" # £ POUND SIGN
+0x00a4 "\\textcurrency"           "textcomp" "" # CURRENCY SYMBOL
+0x00a5 "\\textyen"                "textcomp" "" # YEN SIGN
+0x00a6 "\\textbrokenbar"          "textcomp" "" # BROKEN BAR
+0x00a7 "\\textsection"            "textcomp" "" # SECTION SIGN
+0x00a8 "\\textasciidieresis"      "textcomp" "" # DIAERESIS
+0x00a9 "\textcopyright"           "textcomp" "" # COPYRIGHT SIGN
+0x00aa "\\textordfeminine"        "textcomp" "" # FEMININE ORDINAL INDICATOR
+#0x00ab ""                         "" "" # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00ac "\\textlnot"               "textcomp" "force" # ¬ NOT SIGN
+#0x00ad ""                         "" "" # SOFT HYPHEN
+0x00ae "\\textregistered"         "textcomp" "" # REGISTERED SIGN
+0x00af "\\textasciimacron"        "textcomp" "" # MACRON
+0x00b0 "\\textdegree"             "textcomp" "" # DEGREE SIGN
+0x00b1 "\\textpm"                 "textcomp" "force" # ± PLUS-MINUS SIGN
+0x00b2 "\\texttwosuperior"        "textcomp" "force" # ² SUPERSCRIPT TWO
+0x00b3 "\\textthreesuperior"      "textcomp" "force" # ³ SUPERSCRIPT THREE
+0x00b4 "\\textasciiacute"         "textcomp" "" # ACUTE ACCENT
+0x00b5 "\\textmu"                 "textcomp" "force" # µ MICRO SIGN
+0x00b6 "\\textpilcrow"            "textcomp" "" # PILCROW SIGN
+#0x00b7 ""                         "" "" # MIDDLE DOT
+0x00b8 "\\c\\ "                   "" "" # CEDILLA (command from latin1.def)
+0x00b9 "\\textonesuperior"        "textcomp" "force" # ¹ SUPERSCRIPT ONE
+0x00ba "\\textordmasculine"       "textcomp" "" # MASCULINE ORDINAL INDICATOR
+#0x00bb ""                         "" "" # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00bc "\\textonequarter"         "textcomp" "" # 1/4 FRACTION
+0x00bd "\\textonehalf"            "textcomp" "" # 1/2 FRACTION
+0x00be "\\textthreequarters"      "textcomp" "" # 3/4 FRACTION
+#0x00bf ""                         "" "" # INVERTED QUESTION MARK
+0x00c0 "\\`{A}"                   "" "" # LATIN CAPITAL LETTER A WITH GRAVE
+0x00c1 "\\'{A}"                   "" "" # LATIN CAPITAL LETTER A WITH ACUTE
+0x00c2 "\\^{A}"                   "" "" # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0x00c3 "\\~{A}"                   "" "" # LATIN CAPITAL LETTER A WITH TILDE
+0x00c4 "\\\"{A}"                  "" "" # LATIN CAPITAL LETTER A WITH DIAERESIS
+0x00c5 "\\r{A}"                   "" "" # LATIN CAPITAL LETTER A WITH RING ABOVE
+#0x00c6 ""                         "" "" # LATIN CAPITAL LETTER AE
+0x00c7 "\\c{C}"                   "" "" # LATIN CAPITAL LETTER C WITH CEDILLA
+0x00c8 "\\`{E}"                   "" "" # LATIN CAPITAL LETTER E WITH GRAVE
+0x00c9 "\\'{E}"                   "" "" # LATIN CAPITAL LETTER E WITH ACUTE
+0x00ca "\\^{E}"                   "" "" # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0x00cb "\\\"{E}"                  "" "" # LATIN CAPITAL LETTER E WITH DIAERESIS
+0x00cc "\\`{I}"                   "" "" # LATIN CAPITAL LETTER I WITH GRAVE
+0x00cd "\\'{I}"                   "" "" # LATIN CAPITAL LETTER I WITH ACUTE
+0x00ce "\\^{I}"                   "" "" # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0x00cf "\\\"{I}"                  "" "" # LATIN CAPITAL LETTER I WITH DIAERESIS
+#0x00d0 ""                         "" "" # LATIN CAPITAL LETTER ETH
+0x00d1 "\\~{N}"                   "" "" # LATIN CAPITAL LETTER N WITH TILDE
+0x00d2 "\\`{O}"                   "" "" # LATIN CAPITAL LETTER O WITH GRAVE
+0x00d3 "\\'{O}"                   "" "" # LATIN CAPITAL LETTER O WITH ACUTE
+0x00d4 "\\^{O}"                   "" "" # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0x00d5 "\\~{O}"                   "" "" # LATIN CAPITAL LETTER O WITH TILDE
+0x00d6 "\\\"{O}"                  "" "" # LATIN CAPITAL LETTER O WITH DIAERESIS
+0x00d7 "\\texttimes"              "textcomp" "force" # × MULTIPLICATION SIGN
+#0x00d8 ""                         "" "" # LATIN CAPITAL LETTER O WITH STROKE
+0x00d9 "\\`{U}"                   "" "" # LATIN CAPITAL LETTER U WITH GRAVE
+0x00da "\\'{U}"                   "" "" # LATIN CAPITAL LETTER U WITH ACUTE
+0x00db "\\^{U}"                   "" "" # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0x00dc "\\\"{U}"                  "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS
+0x00dd "\\'{Y}"                   "" "" # LATIN CAPITAL LETTER Y WITH ACUTE
+#0x00de ""                         "" "" # LATIN CAPITAL LETTER THORN
+#0x00df ""                         "" "" # LATIN SMALL LETTER SHARP S
+0x00e0 "\\`{a}"                   "" "" # LATIN SMALL LETTER A WITH GRAVE
+0x00e1 "\\'{a}"                   "" "" # LATIN SMALL LETTER A WITH ACUTE
+0x00e2 "\\^{a}"                   "" "" # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x00e3 "\\~{a}"                   "" "" # LATIN SMALL LETTER A WITH TILDE
+0x00e4 "\\\"{a}"                  "" "" # LATIN SMALL LETTER A WITH DIAERESIS
+0x00e5 "\\r{a}"                   "" "" # LATIN SMALL LETTER A WITH RING ABOVE
+#0x00e6 ""                         "" "" # LATIN SMALL LETTER AE
+0x00e7 "\\c{c}"                   "" "" # LATIN SMALL LETTER C WITH CEDILLA
+0x00e8 "\\`{e}"                   "" "" # LATIN SMALL LETTER E WITH GRAVE
+0x00e9 "\\'{e}"                   "" "" # LATIN SMALL LETTER E WITH ACUTE
+0x00ea "\\^{e}"                   "" "" # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x00eb "\\\"{e}"                  "" "" # LATIN SMALL LETTER E WITH DIAERESIS
+0x00ec "\\`{\\i}"                 "" "" # LATIN SMALL LETTER I WITH GRAVE
+0x00ed "\\'{\\i}"                 "" "" # LATIN SMALL LETTER I WITH ACUTE
+0x00ee "\\^{\\i}"                 "" "" # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x00ef "\\\"{\\i}"                "" "" # LATIN SMALL LETTER I WITH DIAERESIS
+#0x00f0 ""                         "" "" # LATIN SMALL LETTER ETH
+0x00f1 "\\~{n}"                   "" "" # LATIN SMALL LETTER N WITH TILDE
+0x00f2 "\\`{o}"                   "" "" # LATIN SMALL LETTER O WITH GRAVE
+0x00f3 "\\'{o}"                   "" "" # LATIN SMALL LETTER O WITH ACUTE
+0x00f4 "\\^{o}"                   "" "" # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0x00f5 "\\~{o}"                   "" "" # LATIN SMALL LETTER O WITH TILDE
+0x00f6 "\\\"{o}"                  "" "" # LATIN SMALL LETTER O WITH DIAERESIS
+0x00f7 "\\textdiv"                "textcomp" "force" # ÷ DIVISION SIGN
+#0x00f8 ""                         "" "" # LATIN SMALL LETTER O WITH STROKE
+0x00f9 "\\`{u}"                   "" "" # LATIN SMALL LETTER U WITH GRAVE
+0x00fa "\\'{u}"                   "" "" # LATIN SMALL LETTER U WITH ACUTE
+0x00fb "\\^{u}"                   "" "" # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0x00fc "\\\"{u}"                  "" "" # LATIN SMALL LETTER U WITH DIAERESIS
+0x00fd "\\'{y}"                   "" "" # LATIN SMALL LETTER Y WITH ACUTE
+#0x00fe ""                         "" "" # LATIN SMALL LETTER THORN
+0x00ff "\\\"{y}"                  "" "" # LATIN SMALL LETTER Y WITH DIAERESIS
+0x0100 "\\={A}"                   "" "" # LATIN CAPITAL LETTER A WITH MACRON
+0x0101 "\\={a}"                   "" "" # LATIN SMALL LETTER A WITH MACRON
+0x0102 "\\u{A}"                   "" "" # LATIN CAPITAL LETTER A WITH BREVE
+0x0103 "\\u{a}"                   "" "" # LATIN SMALL LETTER A WITH BREVE
+0x0104 "\\k{A}"                   "" "" # LATIN CAPITAL LETTER A WITH OGONEK
+0x0105 "\\k{a}"                   "" "" # LATIN SMALL LETTER A WITH OGONEK
+0x0106 "\\'{C}"                   "" "" # LATIN CAPITAL LETTER C WITH ACUTE
+0x0107 "\\'{c}"                   "" "" # LATIN SMALL LETTER C WITH ACUTE
+0x0108 "\\^{C}"                   "" "" # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0x0109 "\\^{c}"                   "" "" # LATIN SMALL LETTER C WITH CIRCUMFLEX
+0x010a "\\.{C}"                   "" "" # LATIN CAPITAL LETTER C WITH DOT ABOVE
+0x010b "\\.{c}"                   "" "" # LATIN SMALL LETTER C WITH DOT ABOVE
+0x010c "\\v{C}"                   "" "" # LATIN CAPITAL LETTER C WITH CARON
+0x010d "\\v{c}"                   "" "" # LATIN SMALL LETTER C WITH CARON
+0x010e "\\v{D}"                   "" "" # LATIN CAPITAL LETTER D WITH CARON
+0x010f "\\v{d}"                   "" "" # LATIN SMALL LETTER D WITH CARON
+#0x0110 ""                         "" "" # LATIN CAPITAL LETTER D WITH STROKE
+#0x0111 ""                         "" "" # LATIN SMALL LETTER D WITH STROKE
+0x0112 "\\={E}"                   "" "" # LATIN CAPITAL LETTER E WITH MACRON
+0x0113 "\\={e}"                   "" "" # LATIN SMALL LETTER E WITH MACRON
+0x0114 "\\u{E}"                   "" "" # LATIN CAPITAL LETTER E WITH BREVE
+0x0115 "\\u{e}"                   "" "" # LATIN SMALL LETTER E WITH BREVE
+0x0116 "\\.{E}"                   "" "" # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0x0117 "\\.{e}"                   "" "" # LATIN SMALL LETTER E WITH DOT ABOVE
+0x0118 "\\k{E}"                   "" "" # LATIN CAPITAL LETTER E WITH OGONEK
+0x0119 "\\k{e}"                   "" "" # LATIN SMALL LETTER E WITH OGONEK
+0x011a "\\v{E}"                   "" "" # LATIN CAPITAL LETTER E WITH CARON
+0x011b "\\v{e}"                   "" "" # LATIN SMALL LETTER E WITH CARON
+0x011c "\\^{G}"                   "" "" # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+0x011d "\\^{g}"                   "" "" # LATIN SMALL LETTER G WITH CIRCUMFLEX
+0x011e "\\u{G}"                   "" "" # LATIN CAPITAL LETTER G WITH BREVE
+0x011f "\\u{g}"                   "" "" # LATIN SMALL LETTER G WITH BREVE
+0x0120 "\\.{G}"                   "" "" # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0x0121 "\\.{g}"                   "" "" # LATIN SMALL LETTER G WITH DOT ABOVE
+0x0122 "\\c{G}"                   "" "" # LATIN CAPITAL LETTER G WITH CEDILLA
+0x0123 "\\c{g}"                   "" "" # LATIN SMALL LETTER G WITH CEDILLA
+0x0124 "\\^{H}"                   "" "" # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0x0125 "\\^{h}"                   "" "" # LATIN SMALL LETTER H WITH CIRCUMFLEX
+#0x0126 ""                         "" "" # LATIN CAPITAL LETTER H WITH STROKE
+#0x0127 ""                         "" "" # LATIN SMALL LETTER H WITH STROKE
+0x0128 "\\~{I}"                   "" "" # LATIN CAPITAL LETTER I WITH TILDE
+0x0129 "\\~{\\i}"                 "" "" # LATIN SMALL LETTER I WITH TILDE
+0x012a "\\={I}"                   "" "" # LATIN CAPITAL LETTER I WITH MACRON
+0x012b "\\={\\i}"                 "" "" # LATIN SMALL LETTER I WITH MACRON
+0x012c "\\u{I}"                   "" "" # LATIN CAPITAL LETTER I WITH BREVE
+0x012d "\\u{\\i}"                 "" "" # LATIN SMALL LETTER I WITH BREVE
+0x012e "\\k{I}"                   "" "" # LATIN CAPITAL LETTER I WITH OGONEK
+0x012f "\\k{i}"                   "" "" # LATIN SMALL LETTER I WITH OGONEK
+0x0130 "\\.{I}"                   "" "" # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0x0131 "\\i"                      "" "" # LATIN SMALL LETTER DOTLESS I
+#0x0132 ""                         "" "" # LATIN CAPITAL LIGATURE IJ
+#0x0133 ""                         "" "" # LATIN SMALL LIGATURE IJ
+0x0134 "\\^{J}"                   "" "" # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0x0135 "\\^{\\j}"                 "" "" # LATIN SMALL LETTER J WITH CIRCUMFLEX
+0x0136 "\\c{K}"                   "" "" # LATIN CAPITAL LETTER K WITH CEDILLA
+0x0137 "\\c{k}"                   "" "" # LATIN SMALL LETTER K WITH CEDILLA
+#0x0138 ""                         "" "" # LATIN SMALL LETTER KRA
+0x0139 "\\'{L}"                   "" "" # LATIN CAPITAL LETTER L WITH ACUTE
+0x013a "\\'{l}"                   "" "" # LATIN SMALL LETTER L WITH ACUTE
+0x013b "\\c{L}"                   "" "" # LATIN CAPITAL LETTER L WITH CEDILLA
+0x013c "\\c{l}"                   "" "" # LATIN SMALL LETTER L WITH CEDILLA
+0x013d "\\v{L}"                   "" "" # LATIN CAPITAL LETTER L WITH CARON
+0x013e "\\v{l}"                   "" "" # LATIN SMALL LETTER L WITH CARON
+#0x013f ""                         "" "" # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+#0x0140 ""                         "" "" # LATIN SMALL LETTER L WITH MIDDLE DOT
+0x0141 "\\L"                      "" "" # LATIN CAPITAL LETTER L WITH STROKE
+0x0142 "\\l"                      "" "" # LATIN SMALL LETTER L WITH STROKE
+0x0143 "\\'{N}"                   "" "" # LATIN CAPITAL LETTER N WITH ACUTE
+0x0144 "\\'{n}"                   "" "" # LATIN SMALL LETTER N WITH ACUTE
+0x0145 "\\c{N}"                   "" "" # LATIN CAPITAL LETTER N WITH CEDILLA
+0x0146 "\\c{n}"                   "" "" # LATIN SMALL LETTER N WITH CEDILLA
+0x0147 "\\v{N}"                   "" "" # LATIN CAPITAL LETTER N WITH CARON
+0x0148 "\\v{n}"                   "" "" # LATIN SMALL LETTER N WITH CARON
+#0x0149 ""                         "" "" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+#0x014a ""                         "" "" # LATIN CAPITAL LETTER ENG
+#0x014b ""                         "" "" # LATIN SMALL LETTER ENG
+0x014c "\\={O}"                   "" "" # LATIN CAPITAL LETTER O WITH MACRON
+0x014d "\\={o}"                   "" "" # LATIN SMALL LETTER O WITH MACRON
+0x014e "\\u{O}"                   "" "" # LATIN CAPITAL LETTER O WITH BREVE
+0x014f "\\u{o}"                   "" "" # LATIN SMALL LETTER O WITH BREVE
+0x0150 "\\H{O}"                   "" "" # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0x0151 "\\H{o}"                   "" "" # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+#0x0152 ""                         "" "" # LATIN CAPITAL LIGATURE OE
+#0x0153 ""                         "" "" # LATIN SMALL LIGATURE OE
+0x0154 "\\'{R}"                   "" "" # LATIN CAPITAL LETTER R WITH ACUTE
+0x0155 "\\'{r}"                   "" "" # LATIN SMALL LETTER R WITH ACUTE
+0x0156 "\\c{R}"                   "" "" # LATIN CAPITAL LETTER R WITH CEDILLA
+0x0157 "\\c{r}"                   "" "" # LATIN SMALL LETTER R WITH CEDILLA
+0x0158 "\\v{R}"                   "" "" # LATIN CAPITAL LETTER R WITH CARON
+0x0159 "\\v{r}"                   "" "" # LATIN SMALL LETTER R WITH CARON
+0x015a "\\'{S}"                   "" "" # LATIN CAPITAL LETTER S WITH ACUTE
+0x015b "\\'{s}"                   "" "" # LATIN SMALL LETTER S WITH ACUTE
+0x015c "\\^{S}"                   "" "" # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+0x015d "\\^{s}"                   "" "" # LATIN SMALL LETTER S WITH CIRCUMFLEX
+0x015e "\\c{S}"                   "" "" # LATIN CAPITAL LETTER S WITH CEDILLA
+0x015f "\\c{s}"                   "" "" # LATIN SMALL LETTER S WITH CEDILLA
+0x0160 "\\v{S}"                   "" "" # LATIN CAPITAL LETTER S WITH CARON
+0x0161 "\\v{s}"                   "" "" # LATIN SMALL LETTER S WITH CARON
+0x0162 "\\c{T}"                   "" "" # LATIN CAPITAL LETTER T WITH CEDILLA
+0x0163 "\\c{t}"                   "" "" # LATIN SMALL LETTER T WITH CEDILLA
+0x0164 "\\v{T}"                   "" "" # LATIN CAPITAL LETTER T WITH CARON
+0x0165 "\\v{t}"                   "" "" # LATIN SMALL LETTER T WITH CARON
+#0x0166 ""                         "" "" # LATIN CAPITAL LETTER T WITH STROKE
+#0x0167 ""                         "" "" # LATIN SMALL LETTER T WITH STROKE
+0x0168 "\\~{U}"                   "" "" # LATIN CAPITAL LETTER U WITH TILDE
+0x0169 "\\~{u}"                   "" "" # LATIN SMALL LETTER U WITH TILDE
+0x016a "\\={U}"                   "" "" # LATIN CAPITAL LETTER U WITH MACRON
+0x016b "\\={u}"                   "" "" # LATIN SMALL LETTER U WITH MACRON
+0x016c "\\u{U}"                   "" "" # LATIN CAPITAL LETTER U WITH BREVE
+0x016d "\\u{u}"                   "" "" # LATIN SMALL LETTER U WITH BREVE
+0x016e "\\r{U}"                   "" "" # LATIN CAPITAL LETTER U WITH RING ABOVE
+0x016f "\\r{u}"                   "" "" # LATIN SMALL LETTER U WITH RING ABOVE
+0x0170 "\\'{U}"                   "" "" # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0x0171 "\\'{u}"                   "" "" # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0x0172 "\\k{U}"                   "" "" # LATIN CAPITAL LETTER U WITH OGONEK
+0x0173 "\\k{u}"                   "" "" # LATIN SMALL LETTER U WITH OGONEK
+0x0174 "\\^{W}"                   "" "" # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0x0175 "\\^{w}"                   "" "" # LATIN SMALL LETTER W WITH CIRCUMFLEX
+0x0176 "\\^{Y}"                   "" "" # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0x0177 "\\^{y}"                   "" "" # LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0x0178 "\\\"{Y}"                  "" "" # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0x0179 "\\'{Z}"                   "" "" # LATIN CAPITAL LETTER Z WITH ACUTE
+0x017a "\\'{z}"                   "" "" # LATIN SMALL LETTER Z WITH ACUTE
+0x017b "\\.{Z}"                   "" "" # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0x017c "\\.{z}"                   "" "" # LATIN SMALL LETTER Z WITH DOT ABOVE
+0x017d "\\v{Z}"                   "" "" # LATIN CAPITAL LETTER Z WITH CARON
+0x017e "\\v{z}"                   "" "" # LATIN SMALL LETTER Z WITH CARON
+#0x017f ""                         "" "" # LATIN SMALL LETTER LONG S
+#0x0180 ""                         "" "" # LATIN SMALL LETTER B WITH STROKE
+#0x0181 ""                         "" "" # LATIN CAPITAL LETTER B WITH HOOK
+#0x0182 ""                         "" "" # LATIN CAPITAL LETTER B WITH TOPBAR
+#0x0183 ""                         "" "" # LATIN SMALL LETTER B WITH TOPBAR
+#0x0184 ""                         "" "" # LATIN CAPITAL LETTER TONE SIX
+#0x0185 ""                         "" "" # LATIN SMALL LETTER TONE SIX
+#0x0186 ""                         "" "" # LATIN CAPITAL LETTER OPEN O
+#0x0187 ""                         "" "" # LATIN CAPITAL LETTER C WITH HOOK
+#0x0188 ""                         "" "" # LATIN SMALL LETTER C WITH HOOK
+#0x0189 ""                         "" "" # LATIN CAPITAL LETTER AFRICAN D
+#0x018a ""                         "" "" # LATIN CAPITAL LETTER D WITH HOOK
+#0x018b ""                         "" "" # LATIN CAPITAL LETTER D WITH TOPBAR
+#0x018c ""                         "" "" # LATIN SMALL LETTER D WITH TOPBAR
+#0x018d ""                         "" "" # LATIN SMALL LETTER TURNED DELTA
+#0x018e ""                         "" "" # LATIN CAPITAL LETTER REVERSED E
+#0x018f ""                         "" "" # LATIN CAPITAL LETTER SCHWA
+#0x0190 ""                         "" "" # LATIN CAPITAL LETTER OPEN E
+#0x0191 ""                         "" "" # LATIN CAPITAL LETTER F WITH HOOK
+#0x0192 ""                         "" "" # LATIN SMALL LETTER F WITH HOOK
+#0x0193 ""                         "" "" # LATIN CAPITAL LETTER G WITH HOOK
+#0x0194 ""                         "" "" # LATIN CAPITAL LETTER GAMMA
+#0x0195 ""                         "" "" # LATIN SMALL LETTER HV
+#0x0196 ""                         "" "" # LATIN CAPITAL LETTER IOTA
+#0x0197 ""                         "" "" # LATIN CAPITAL LETTER I WITH STROKE
+#0x0198 ""                         "" "" # LATIN CAPITAL LETTER K WITH HOOK
+#0x0199 ""                         "" "" # LATIN SMALL LETTER K WITH HOOK
+#0x019a ""                         "" "" # LATIN SMALL LETTER L WITH BAR
+#0x019b ""                         "" "" # LATIN SMALL LETTER LAMBDA WITH STROKE
+#0x019c ""                         "" "" # LATIN CAPITAL LETTER TURNED M
+#0x019d ""                         "" "" # LATIN CAPITAL LETTER N WITH LEFT HOOK
+#0x019e ""                         "" "" # LATIN SMALL LETTER N WITH LONG RIGHT LEG
+#0x019f ""                         "" "" # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+#0x01a0 ""                         "" "" # LATIN CAPITAL LETTER O WITH HORN
+#0x01a1 ""                         "" "" # LATIN SMALL LETTER O WITH HORN
+#0x01a2 ""                         "" "" # LATIN CAPITAL LETTER OI
+#0x01a3 ""                         "" "" # LATIN SMALL LETTER OI
+#0x01a4 ""                         "" "" # LATIN CAPITAL LETTER P WITH HOOK
+#0x01a5 ""                         "" "" # LATIN SMALL LETTER P WITH HOOK
+#0x01a6 ""                         "" "" # LATIN LETTER YR
+#0x01a7 ""                         "" "" # LATIN CAPITAL LETTER TONE TWO
+#0x01a8 ""                         "" "" # LATIN SMALL LETTER TONE TWO
+#0x01a9 ""                         "" "" # LATIN CAPITAL LETTER ESH
+#0x01aa ""                         "" "" # LATIN LETTER REVERSED ESH LOOP
+#0x01ab ""                         "" "" # LATIN SMALL LETTER T WITH PALATAL HOOK
+#0x01ac ""                         "" "" # LATIN CAPITAL LETTER T WITH HOOK
+#0x01ad ""                         "" "" # LATIN SMALL LETTER T WITH HOOK
+#0x01ae ""                         "" "" # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+#0x01af ""                         "" "" # LATIN CAPITAL LETTER U WITH HORN
+#0x01b0 ""                         "" "" # LATIN SMALL LETTER U WITH HORN
+#0x01b1 ""                         "" "" # LATIN CAPITAL LETTER UPSILON
+#0x01b2 ""                         "" "" # LATIN CAPITAL LETTER V WITH HOOK
+#0x01b3 ""                         "" "" # LATIN CAPITAL LETTER Y WITH HOOK
+#0x01b4 ""                         "" "" # LATIN SMALL LETTER Y WITH HOOK
+#0x01b5 ""                         "" "" # LATIN CAPITAL LETTER Z WITH STROKE
+#0x01b6 ""                         "" "" # LATIN SMALL LETTER Z WITH STROKE
+#0x01b7 ""                         "" "" # LATIN CAPITAL LETTER EZH
+#0x01b8 ""                         "" "" # LATIN CAPITAL LETTER EZH REVERSED
+#0x01b9 ""                         "" "" # LATIN SMALL LETTER EZH REVERSED
+#0x01ba ""                         "" "" # LATIN SMALL LETTER EZH WITH TAIL
+#0x01bb ""                         "" "" # LATIN LETTER TWO WITH STROKE
+#0x01bc ""                         "" "" # LATIN CAPITAL LETTER TONE FIVE
+#0x01bd ""                         "" "" # LATIN SMALL LETTER TONE FIVE
+#0x01be ""                         "" "" # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
+#0x01bf ""                         "" "" # LATIN LETTER WYNN
+#0x01c0 ""                         "" "" # LATIN LETTER DENTAL CLICK
+#0x01c1 ""                         "" "" # LATIN LETTER LATERAL CLICK
+#0x01c2 ""                         "" "" # LATIN LETTER ALVEOLAR CLICK
+#0x01c3 ""                         "" "" # LATIN LETTER RETROFLEX CLICK
+#0x01c4 ""                         "" "" # LATIN CAPITAL LETTER DZ WITH CARON
+#0x01c5 ""                         "" "" # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+#0x01c6 ""                         "" "" # LATIN SMALL LETTER DZ WITH CARON
+#0x01c7 ""                         "" "" # LATIN CAPITAL LETTER LJ
+#0x01c8 ""                         "" "" # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+#0x01c9 ""                         "" "" # LATIN SMALL LETTER LJ
+#0x01ca ""                         "" "" # LATIN CAPITAL LETTER NJ
+#0x01cb ""                         "" "" # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+#0x01cc ""                         "" "" # LATIN SMALL LETTER NJ
+0x01cd "\\v{A}"                   "" "" # LATIN CAPITAL LETTER A WITH CARON
+0x01ce "\\v{a}"                   "" "" # LATIN SMALL LETTER A WITH CARON
+0x01cf "\\v{I}"                   "" "" # LATIN CAPITAL LETTER I WITH CARON
+0x01d0 "\\v{\\i}"                 "" "" # LATIN SMALL LETTER I WITH CARON
+0x01d1 "\\v{O}"                   "" "" # LATIN CAPITAL LETTER O WITH CARON
+0x01d2 "\\v{o}"                   "" "" # LATIN SMALL LETTER O WITH CARON
+0x01d3 "\\v{U}"                   "" "" # LATIN CAPITAL LETTER U WITH CARON
+0x01d4 "\\v{u}"                   "" "" # LATIN SMALL LETTER U WITH CARON
+#0x01d5 ""                         "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+#0x01d6 ""                         "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
+#0x01d7 ""                         "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+#0x01d8 ""                         "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
+#0x01d9 ""                         "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+#0x01da ""                         "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
+#0x01db ""                         "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+#0x01dc ""                         "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
+#0x01dd ""                         "" "" # LATIN SMALL LETTER TURNED E
+#0x01de ""                         "" "" # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+#0x01df ""                         "" "" # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
+#0x01e0 ""                         "" "" # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+#0x01e1 ""                         "" "" # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
+#0x01e2 ""                         "" "" # LATIN CAPITAL LETTER AE WITH MACRON
+#0x01e3 ""                         "" "" # LATIN SMALL LETTER AE WITH MACRON
+#0x01e4 ""                         "" "" # LATIN CAPITAL LETTER G WITH STROKE
+#0x01e5 ""                         "" "" # LATIN SMALL LETTER G WITH STROKE
+0x01e6 "\\v{G}"                   "" "" # LATIN CAPITAL LETTER G WITH CARON
+0x01e7 "\\v{g}"                   "" "" # LATIN SMALL LETTER G WITH CARON
+0x01e8 "\\v{K}"                   "" "" # LATIN CAPITAL LETTER K WITH CARON
+0x01e9 "\\v{k}"                   "" "" # LATIN SMALL LETTER K WITH CARON
+0x01ea "\\k{O}"                   "" "" # LATIN CAPITAL LETTER O WITH OGONEK
+0x01eb "\\k{o}"                   "" "" # LATIN SMALL LETTER O WITH OGONEK
+#0x01ec ""                         "" "" # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+#0x01ed ""                         "" "" # LATIN SMALL LETTER O WITH OGONEK AND MACRON
+#0x01ee ""                         "" "" # LATIN CAPITAL LETTER EZH WITH CARON
+#0x01ef ""                         "" "" # LATIN SMALL LETTER EZH WITH CARON
+0x01f0 "\\v{\\j}"                 "" "" # LATIN SMALL LETTER J WITH CARON
+0x01ce "\\v{a}"                   "" "" # LATIN SMALL LETTER A WITH CARON
+#0x01f1 ""                         "" "" # LATIN CAPITAL LETTER DZ
+#0x01f2 ""                         "" "" # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+#0x01f3 ""                         "" "" # LATIN SMALL LETTER DZ
+0x01f4 "\\'{G}"                   "" "" # LATIN CAPITAL LETTER G WITH ACUTE
+0x01f5 "\\'{g}"                   "" "" # LATIN SMALL LETTER G WITH ACUTE
+#0x01f6 ""                         "" "" # LATIN CAPITAL LETTER HWAIR
+#0x01f7 ""                         "" "" # LATIN CAPITAL LETTER WYNN
+0x01f8 "\\`{N}"                   "" "" # LATIN CAPITAL LETTER N WITH GRAVE
+0x01f9 "\\`{n}"                   "" "" # LATIN SMALL LETTER N WITH GRAVE
+#0x01fa ""                         "" "" # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+#0x01fb ""                         "" "" # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
+#0x01fc ""                         "" "" # LATIN CAPITAL LETTER AE WITH ACUTE
+#0x01fd ""                         "" "" # LATIN SMALL LETTER AE WITH ACUTE
+#0x01fe ""                         "" "" # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+#0x01ff ""                         "" "" # LATIN SMALL LETTER O WITH STROKE AND ACUTE
+#0x0200 ""                         "" "" # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+#0x0201 ""                         "" "" # LATIN SMALL LETTER A WITH DOUBLE GRAVE
+#0x0202 ""                         "" "" # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+#0x0203 ""                         "" "" # LATIN SMALL LETTER A WITH INVERTED BREVE
+#0x0204 ""                         "" "" # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+#0x0205 ""                         "" "" # LATIN SMALL LETTER E WITH DOUBLE GRAVE
+#0x0206 ""                         "" "" # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+#0x0207 ""                         "" "" # LATIN SMALL LETTER E WITH INVERTED BREVE
+#0x0208 ""                         "" "" # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+#0x0209 ""                         "" "" # LATIN SMALL LETTER I WITH DOUBLE GRAVE
+#0x020a ""                         "" "" # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+#0x020b ""                         "" "" # LATIN SMALL LETTER I WITH INVERTED BREVE
+#0x020c ""                         "" "" # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+#0x020d ""                         "" "" # LATIN SMALL LETTER O WITH DOUBLE GRAVE
+#0x020e ""                         "" "" # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+#0x020f ""                         "" "" # LATIN SMALL LETTER O WITH INVERTED BREVE
+#0x0210 ""                         "" "" # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+#0x0211 ""                         "" "" # LATIN SMALL LETTER R WITH DOUBLE GRAVE
+#0x0212 ""                         "" "" # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+#0x0213 ""                         "" "" # LATIN SMALL LETTER R WITH INVERTED BREVE
+#0x0214 ""                         "" "" # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+#0x0215 ""                         "" "" # LATIN SMALL LETTER U WITH DOUBLE GRAVE
+#0x0216 ""                         "" "" # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+#0x0217 ""                         "" "" # LATIN SMALL LETTER U WITH INVERTED BREVE
+#0x0218 ""                         "" "" # LATIN CAPITAL LETTER S WITH COMMA BELOW
+#0x0219 ""                         "" "" # LATIN SMALL LETTER S WITH COMMA BELOW
+#0x021a ""                         "" "" # LATIN CAPITAL LETTER T WITH COMMA BELOW
+#0x021b ""                         "" "" # LATIN SMALL LETTER T WITH COMMA BELOW
+#0x021c ""                         "" "" # LATIN CAPITAL LETTER YOGH
+#0x021d ""                         "" "" # LATIN SMALL LETTER YOGH
+0x021e "\\v{H}"                   "" "" # LATIN CAPITAL LETTER H WITH CARON
+0x021f "\\v{h}"                   "" "" # LATIN SMALL LETTER H WITH CARON
+#0x0220 ""                         "" "" # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+#0x0222 ""                         "" "" # LATIN CAPITAL LETTER OU
+#0x0223 ""                         "" "" # LATIN SMALL LETTER OU
+#0x0224 ""                         "" "" # LATIN CAPITAL LETTER Z WITH HOOK
+#0x0225 ""                         "" "" # LATIN SMALL LETTER Z WITH HOOK
+0x0226 "\\.{A}"                   "" "" # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0x0227 "\\.{a}"                   "" "" # LATIN SMALL LETTER A WITH DOT ABOVE
+0x0228 "\\c{E}"                   "" "" # LATIN CAPITAL LETTER E WITH CEDILLA
+0x0229 "\\c{e}"                   "" "" # LATIN SMALL LETTER E WITH CEDILLA
+#0x022a ""                         "" "" # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+#0x022b ""                         "" "" # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
+#0x022c ""                         "" "" # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+#0x022d ""                         "" "" # LATIN SMALL LETTER O WITH TILDE AND MACRON
+0x022e "\\.{O}"                   "" "" # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0x022f "\\.{o}"                   "" "" # LATIN SMALL LETTER O WITH DOT ABOVE
+#0x0230 ""                         "" "" # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+#0x0231 ""                         "" "" # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
+0x0232 "\\={Y}"                   "" "" # LATIN CAPITAL LETTER Y WITH MACRON
+0x0233 "\\={y}"                   "" "" # LATIN SMALL LETTER Y WITH MACRON
+0x0237 "\\j"                      "" "" # LATIN SMALL LETTER DOTLESS J
+#0x0250 ""                         "" "" # LATIN SMALL LETTER TURNED A
+#0x0251 ""                         "" "" # LATIN SMALL LETTER ALPHA
+#0x0252 ""                         "" "" # LATIN SMALL LETTER TURNED ALPHA
+#0x0253 ""                         "" "" # LATIN SMALL LETTER B WITH HOOK
+#0x0254 ""                         "" "" # LATIN SMALL LETTER OPEN O
+#0x0255 ""                         "" "" # LATIN SMALL LETTER C WITH CURL
+#0x0256 ""                         "" "" # LATIN SMALL LETTER D WITH TAIL
+#0x0257 ""                         "" "" # LATIN SMALL LETTER D WITH HOOK
+#0x0258 ""                         "" "" # LATIN SMALL LETTER REVERSED E
+#0x0259 ""                         "" "" # LATIN SMALL LETTER SCHWA
+#0x025a ""                         "" "" # LATIN SMALL LETTER SCHWA WITH HOOK
+#0x025b ""                         "" "" # LATIN SMALL LETTER OPEN E
+#0x025c ""                         "" "" # LATIN SMALL LETTER REVERSED OPEN E
+#0x025d ""                         "" "" # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
+#0x025e ""                         "" "" # LATIN SMALL LETTER CLOSED REVERSED OPEN E
+#0x025f ""                         "" "" # LATIN SMALL LETTER DOTLESS J WITH STROKE
+#0x0260 ""                         "" "" # LATIN SMALL LETTER G WITH HOOK
+#0x0261 ""                         "" "" # LATIN SMALL LETTER SCRIPT G
+#0x0262 ""                         "" "" # LATIN LETTER SMALL CAPITAL G
+#0x0263 ""                         "" "" # LATIN SMALL LETTER GAMMA
+#0x0264 ""                         "" "" # LATIN SMALL LETTER RAMS HORN
+#0x0265 ""                         "" "" # LATIN SMALL LETTER TURNED H
+#0x0266 ""                         "" "" # LATIN SMALL LETTER H WITH HOOK
+#0x0267 ""                         "" "" # LATIN SMALL LETTER HENG WITH HOOK
+#0x0268 ""                         "" "" # LATIN SMALL LETTER I WITH STROKE
+#0x0269 ""                         "" "" # LATIN SMALL LETTER IOTA
+#0x026a ""                         "" "" # LATIN LETTER SMALL CAPITAL I
+#0x026b ""                         "" "" # LATIN SMALL LETTER L WITH MIDDLE TILDE
+#0x026c ""                         "" "" # LATIN SMALL LETTER L WITH BELT
+#0x026d ""                         "" "" # LATIN SMALL LETTER L WITH RETROFLEX HOOK
+#0x026e ""                         "" "" # LATIN SMALL LETTER LEZH
+#0x026f ""                         "" "" # LATIN SMALL LETTER TURNED M
+#0x0270 ""                         "" "" # LATIN SMALL LETTER TURNED M WITH LONG LEG
+#0x0271 ""                         "" "" # LATIN SMALL LETTER M WITH HOOK
+#0x0272 ""                         "" "" # LATIN SMALL LETTER N WITH LEFT HOOK
+#0x0273 ""                         "" "" # LATIN SMALL LETTER N WITH RETROFLEX HOOK
+#0x0274 ""                         "" "" # LATIN LETTER SMALL CAPITAL N
+#0x0275 ""                         "" "" # LATIN SMALL LETTER BARRED O
+#0x0276 ""                         "" "" # LATIN LETTER SMALL CAPITAL OE
+#0x0277 ""                         "" "" # LATIN SMALL LETTER CLOSED OMEGA
+#0x0278 ""                         "" "" # LATIN SMALL LETTER PHI
+#0x0279 ""                         "" "" # LATIN SMALL LETTER TURNED R
+#0x027a ""                         "" "" # LATIN SMALL LETTER TURNED R WITH LONG LEG
+#0x027b ""                         "" "" # LATIN SMALL LETTER TURNED R WITH HOOK
+#0x027c ""                         "" "" # LATIN SMALL LETTER R WITH LONG LEG
+#0x027d ""                         "" "" # LATIN SMALL LETTER R WITH TAIL
+#0x027e ""                         "" "" # LATIN SMALL LETTER R WITH FISHHOOK
+#0x027f ""                         "" "" # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
+#0x0280 ""                         "" "" # LATIN LETTER SMALL CAPITAL R
+#0x0281 ""                         "" "" # LATIN LETTER SMALL CAPITAL INVERTED R
+#0x0282 ""                         "" "" # LATIN SMALL LETTER S WITH HOOK
+#0x0283 ""                         "" "" # LATIN SMALL LETTER ESH
+#0x0284 ""                         "" "" # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
+#0x0285 ""                         "" "" # LATIN SMALL LETTER SQUAT REVERSED ESH
+#0x0286 ""                         "" "" # LATIN SMALL LETTER ESH WITH CURL
+#0x0287 ""                         "" "" # LATIN SMALL LETTER TURNED T
+#0x0288 ""                         "" "" # LATIN SMALL LETTER T WITH RETROFLEX HOOK
+#0x0289 ""                         "" "" # LATIN SMALL LETTER U BAR
+#0x028a ""                         "" "" # LATIN SMALL LETTER UPSILON
+#0x028b ""                         "" "" # LATIN SMALL LETTER V WITH HOOK
+#0x028c ""                         "" "" # LATIN SMALL LETTER TURNED V
+#0x028d ""                         "" "" # LATIN SMALL LETTER TURNED W
+#0x028e ""                         "" "" # LATIN SMALL LETTER TURNED Y
+#0x028f ""                         "" "" # LATIN LETTER SMALL CAPITAL Y
+#0x0290 ""                         "" "" # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
+#0x0291 ""                         "" "" # LATIN SMALL LETTER Z WITH CURL
+#0x0292 ""                         "" "" # LATIN SMALL LETTER EZH
+#0x0293 ""                         "" "" # LATIN SMALL LETTER EZH WITH CURL
+#0x0294 ""                         "" "" # LATIN LETTER GLOTTAL STOP
+#0x0295 ""                         "" "" # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
+#0x0296 ""                         "" "" # LATIN LETTER INVERTED GLOTTAL STOP
+#0x0297 ""                         "" "" # LATIN LETTER STRETCHED C
+#0x0298 ""                         "" "" # LATIN LETTER BILABIAL CLICK
+#0x0299 ""                         "" "" # LATIN LETTER SMALL CAPITAL B
+#0x029a ""                         "" "" # LATIN SMALL LETTER CLOSED OPEN E
+#0x029b ""                         "" "" # LATIN LETTER SMALL CAPITAL G WITH HOOK
+#0x029c ""                         "" "" # LATIN LETTER SMALL CAPITAL H
+#0x029d ""                         "" "" # LATIN SMALL LETTER J WITH CROSSED-TAIL
+#0x029e ""                         "" "" # LATIN SMALL LETTER TURNED K
+#0x029f ""                         "" "" # LATIN LETTER SMALL CAPITAL L
+#0x02a0 ""                         "" "" # LATIN SMALL LETTER Q WITH HOOK
+#0x02a1 ""                         "" "" # LATIN LETTER GLOTTAL STOP WITH STROKE
+#0x02a2 ""                         "" "" # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
+#0x02a3 ""                         "" "" # LATIN SMALL LETTER DZ DIGRAPH
+#0x02a4 ""                         "" "" # LATIN SMALL LETTER DEZH DIGRAPH
+#0x02a5 ""                         "" "" # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
+#0x02a6 ""                         "" "" # LATIN SMALL LETTER TS DIGRAPH
+#0x02a7 ""                         "" "" # LATIN SMALL LETTER TESH DIGRAPH
+#0x02a8 ""                         "" "" # LATIN SMALL LETTER TC DIGRAPH WITH CURL
+#0x02a9 ""                         "" "" # LATIN SMALL LETTER FENG DIGRAPH
+#0x02aa ""                         "" "" # LATIN SMALL LETTER LS DIGRAPH
+#0x02ab ""                         "" "" # LATIN SMALL LETTER LZ DIGRAPH
+#0x02ac ""                         "" "" # LATIN LETTER BILABIAL PERCUSSIVE
+#0x02ad ""                         "" "" # LATIN LETTER BIDENTAL PERCUSSIVE
+#0x02b0 ""                         "" "" # MODIFIER LETTER SMALL H
+#0x02b1 ""                         "" "" # MODIFIER LETTER SMALL H WITH HOOK
+#0x02b2 ""                         "" "" # MODIFIER LETTER SMALL J
+#0x02b3 ""                         "" "" # MODIFIER LETTER SMALL R
+#0x02b4 ""                         "" "" # MODIFIER LETTER SMALL TURNED R
+#0x02b5 ""                         "" "" # MODIFIER LETTER SMALL TURNED R WITH HOOK
+#0x02b6 ""                         "" "" # MODIFIER LETTER SMALL CAPITAL INVERTED R
+#0x02b7 ""                         "" "" # MODIFIER LETTER SMALL W
+#0x02b8 ""                         "" "" # MODIFIER LETTER SMALL Y
+#0x02b9 ""                         "" "" # MODIFIER LETTER PRIME
+#0x02ba ""                         "" "" # MODIFIER LETTER DOUBLE PRIME
+#0x02bb ""                         "" "" # MODIFIER LETTER TURNED COMMA
+#0x02bc ""                         "" "" # MODIFIER LETTER APOSTROPHE
+#0x02bd ""                         "" "" # MODIFIER LETTER REVERSED COMMA
+#0x02be ""                         "" "" # MODIFIER LETTER RIGHT HALF RING
+#0x02bf ""                         "" "" # MODIFIER LETTER LEFT HALF RING
+#0x02c0 ""                         "" "" # MODIFIER LETTER GLOTTAL STOP
+#0x02c1 ""                         "" "" # MODIFIER LETTER REVERSED GLOTTAL STOP
+#0x02c2 ""                         "" "" # MODIFIER LETTER LEFT ARROWHEAD
+#0x02c3 ""                         "" "" # MODIFIER LETTER RIGHT ARROWHEAD
+#0x02c4 ""                         "" "" # MODIFIER LETTER UP ARROWHEAD
+#0x02c5 ""                         "" "" # MODIFIER LETTER DOWN ARROWHEAD
+#0x02c6 ""                         "" "" # MODIFIER LETTER CIRCUMFLEX ACCENT
+0x02c7 "\\v{ }"                   "" "" # CARON
+#0x02c8 ""                         "" "" # MODIFIER LETTER VERTICAL LINE
+#0x02c9 ""                         "" "" # MODIFIER LETTER MACRON
+#0x02ca ""                         "" "" # MODIFIER LETTER ACUTE ACCENT
+#0x02cb ""                         "" "" # MODIFIER LETTER GRAVE ACCENT
+#0x02cc ""                         "" "" # MODIFIER LETTER LOW VERTICAL LINE
+#0x02cd ""                         "" "" # MODIFIER LETTER LOW MACRON
+#0x02ce ""                         "" "" # MODIFIER LETTER LOW GRAVE ACCENT
+#0x02cf ""                         "" "" # MODIFIER LETTER LOW ACUTE ACCENT
+#0x02d0 ""                         "" "" # MODIFIER LETTER TRIANGULAR COLON
+#0x02d1 ""                         "" "" # MODIFIER LETTER HALF TRIANGULAR COLON
+#0x02d2 ""                         "" "" # MODIFIER LETTER CENTRED RIGHT HALF RING
+#0x02d3 ""                         "" "" # MODIFIER LETTER CENTRED LEFT HALF RING
+#0x02d4 ""                         "" "" # MODIFIER LETTER UP TACK
+#0x02d5 ""                         "" "" # MODIFIER LETTER DOWN TACK
+#0x02d6 ""                         "" "" # MODIFIER LETTER PLUS SIGN
+#0x02d7 ""                         "" "" # MODIFIER LETTER MINUS SIGN
+0x02d8 "\\u{ }"                   "" "" # BREVE
+0x02d9 "\\.{ }"                   "" "" # DOT ABOVE
+0x02da "\\r{ }"                   "" "" # RING ABOVE
+0x02db "\\k{ }"                   "" "" # OGONEK
+0x02dc "\\~{ }"                   "" "" # SMALL TILDE
+0x02dd "\\H{ }"                   "" "" # DOUBLE ACUTE ACCENT
+#0x02de ""                         "" "" # MODIFIER LETTER RHOTIC HOOK
+#0x02df ""                         "" "" # MODIFIER LETTER CROSS ACCENT
+#0x02e0 ""                         "" "" # MODIFIER LETTER SMALL GAMMA
+#0x02e1 ""                         "" "" # MODIFIER LETTER SMALL L
+#0x02e2 ""                         "" "" # MODIFIER LETTER SMALL S
+#0x02e3 ""                         "" "" # MODIFIER LETTER SMALL X
+#0x02e4 ""                         "" "" # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+#0x02e5 ""                         "" "" # MODIFIER LETTER EXTRA-HIGH TONE BAR
+#0x02e6 ""                         "" "" # MODIFIER LETTER HIGH TONE BAR
+#0x02e7 ""                         "" "" # MODIFIER LETTER MID TONE BAR
+#0x02e8 ""                         "" "" # MODIFIER LETTER LOW TONE BAR
+#0x02e9 ""                         "" "" # MODIFIER LETTER EXTRA-LOW TONE BAR
+#0x02ea ""                         "" "" # MODIFIER LETTER YIN DEPARTING TONE MARK
+#0x02eb ""                         "" "" # MODIFIER LETTER YANG DEPARTING TONE MARK
+#0x02ec ""                         "" "" # MODIFIER LETTER VOICING
+#0x02ed ""                         "" "" # MODIFIER LETTER UNASPIRATED
+#0x02ee ""                         "" "" # MODIFIER LETTER DOUBLE APOSTROPHE
+0x0300 "\\`"                      "" "combining" # COMBINING GRAVE ACCENT
+0x0301 "\\'"                      "" "combining" # COMBINING ACUTE ACCENT
+0x0302 "\\^"                      "" "combining" # COMBINING CIRCUMFLEX ACCENT
+0x0303 "\\~"                      "" "combining" # COMBINING TILDE
+0x0304 "\\="                      "" "combining" # COMBINING MACRON
+#0x0305 ""                         "" "combining" # COMBINING OVERLINE
+0x0306 "\\u"                      "" "combining" # COMBINING BREVE
+0x0307 "\\."                      "" "combining" # COMBINING DOT ABOVE
+0x0308 "\\\""                     "" "combining" # COMBINING DIAERESIS
+#0x0309 ""                        "" "combining" # COMBINING HOOK ABOVE
+0x030a "\\r"                      "" "combining" # COMBINING RING ABOVE
+0x030b "\\H"                      "" "combining" # COMBINING DOUBLE ACUTE ACCENT
+0x030c "\\v"                      "" "combining" # COMBINING CARON
+#0x030d ""                         "" "combining" # COMBINING VERTICAL LINE ABOVE
+#0x030e ""                         "" "combining" # COMBINING DOUBLE VERTICAL LINE ABOVE
+#0x030f ""                         "" "combining" # COMBINING DOUBLE GRAVE ACCENT
+#0x0310 ""                         "" "combining" # COMBINING CANDRABINDU
+#0x0311 ""                         "" "combining" # COMBINING INVERTED BREVE
+#0x0312 ""                         "" "combining" # COMBINING TURNED COMMA ABOVE
+#0x0313 ""                         "" "combining" # COMBINING COMMA ABOVE
+#0x0314 ""                         "" "combining" # COMBINING REVERSED COMMA ABOVE
+#0x0315 ""                         "" "combining" # COMBINING COMMA ABOVE RIGHT
+#0x0316 ""                         "" "combining" # COMBINING GRAVE ACCENT BELOW
+#0x0317 ""                         "" "combining" # COMBINING ACUTE ACCENT BELOW
+#0x0318 ""                         "" "combining" # COMBINING LEFT TACK BELOW
+#0x0319 ""                         "" "combining" # COMBINING RIGHT TACK BELOW
+#0x031a ""                         "" "combining" # COMBINING LEFT ANGLE ABOVE
+#0x031b ""                         "" "combining" # COMBINING HORN
+#0x031c ""                         "" "combining" # COMBINING LEFT HALF RING BELOW
+#0x031d ""                         "" "combining" # COMBINING UP TACK BELOW
+#0x031e ""                         "" "combining" # COMBINING DOWN TACK BELOW
+#0x031f ""                         "" "combining" # COMBINING PLUS SIGN BELOW
+0x0320 "\\b"                      "" "combining" # COMBINING MINUS SIGN BELOW
+#0x0321 ""                         "" "combining" # COMBINING PALATALIZED HOOK BELOW
+#0x0322 ""                         "" "combining" # COMBINING RETROFLEX HOOK BELOW
+0x0323 "\\d"                      "" "combining" # COMBINING DOT BELOW
+#0x0324 ""                         "" "combining" # COMBINING DIAERESIS BELOW
+#0x0325 ""                         "" "combining" # COMBINING RING BELOW
+#0x0326 ""                         "" "combining" # COMBINING COMMA BELOW
+0x0327 "\\c"                      "" "combining" # COMBINING CEDILLA
+0x0328 "\\k"                      "" "combining" # COMBINING OGONEK
+#0x0329 ""                         "" "combining" # COMBINING VERTICAL LINE BELOW
+#0x032a ""                         "" "combining" # COMBINING BRIDGE BELOW
+#0x032b ""                         "" "combining" # COMBINING INVERTED DOUBLE ARCH BELOW
+#0x032c ""                         "" "combining" # COMBINING CARON BELOW
+#0x032d ""                         "" "combining" # COMBINING CIRCUMFLEX ACCENT BELOW
+#0x032e ""                         "" "combining" # COMBINING BREVE BELOW
+#0x032f ""                         "" "combining" # COMBINING INVERTED BREVE BELOW
+#0x0330 ""                         "" "combining" # COMBINING TILDE BELOW
+#0x0331 ""                         "" "combining" # COMBINING MACRON BELOW
+#0x0332 ""                         "" "combining" # COMBINING LOW LINE
+#0x0333 ""                         "" "combining" # COMBINING DOUBLE LOW LINE
+#0x0334 ""                         "" "combining" # COMBINING TILDE OVERLAY
+#0x0335 ""                         "" "combining" # COMBINING SHORT STROKE OVERLAY
+#0x0336 ""                         "" "combining" # COMBINING LONG STROKE OVERLAY
+#0x0337 ""                         "" "combining" # COMBINING SHORT SOLIDUS OVERLAY
+#0x0338 ""                         "" "combining" # COMBINING LONG SOLIDUS OVERLAY
+#0x0339 ""                         "" "combining" # COMBINING RIGHT HALF RING BELOW
+#0x033a ""                         "" "combining" # COMBINING INVERTED BRIDGE BELOW
+#0x033b ""                         "" "combining" # COMBINING SQUARE BELOW
+#0x033c ""                         "" "combining" # COMBINING SEAGULL BELOW
+#0x033d ""                         "" "combining" # COMBINING X ABOVE
+#0x033e ""                         "" "combining" # COMBINING VERTICAL TILDE
+#0x033f ""                         "" "combining" # COMBINING DOUBLE OVERLINE
+#0x0340 ""                         "" "combining" # COMBINING GRAVE TONE MARK
+#0x0341 ""                         "" "combining" # COMBINING ACUTE TONE MARK
+#0x0342 ""                         "" "combining" # COMBINING GREEK PERISPOMENI
+#0x0343 ""                         "" "combining" # COMBINING GREEK KORONIS
+#0x0344 ""                         "" "combining" # COMBINING GREEK DIALYTIKA TONOS
+#0x0345 ""                         "" "combining" # COMBINING GREEK YPOGEGRAMMENI
+#0x0346 ""                         "" "combining" # COMBINING BRIDGE ABOVE
+#0x0347 ""                         "" "combining" # COMBINING EQUALS SIGN BELOW
+#0x0348 ""                         "" "combining" # COMBINING DOUBLE VERTICAL LINE BELOW
+#0x0349 ""                         "" "combining" # COMBINING LEFT ANGLE BELOW
+#0x034a ""                         "" "combining" # COMBINING NOT TILDE ABOVE
+#0x034b ""                         "" "combining" # COMBINING HOMOTHETIC ABOVE
+#0x034c ""                         "" "combining" # COMBINING ALMOST EQUAL TO ABOVE
+#0x034d ""                         "" "combining" # COMBINING LEFT RIGHT ARROW BELOW
+#0x034e ""                         "" "combining" # COMBINING UPWARDS ARROW BELOW
+#0x034f ""                         "" "" # COMBINING GRAPHEME JOINER
+#0x0360 ""                         "" "combining" # COMBINING DOUBLE TILDE
+0x0361 "\\t"                      "" "combining" # COMBINING DOUBLE INVERTED BREVE (ligature tie)
+#0x0362 ""                         "" "combining" # COMBINING DOUBLE RIGHTWARDS ARROW BELOW
+#0x0363 ""                         "" "combining" # COMBINING LATIN SMALL LETTER A
+#0x0364 ""                         "" "combining" # COMBINING LATIN SMALL LETTER E
+#0x0365 ""                         "" "combining" # COMBINING LATIN SMALL LETTER I
+#0x0366 ""                         "" "combining" # COMBINING LATIN SMALL LETTER O
+#0x0367 ""                         "" "combining" # COMBINING LATIN SMALL LETTER U
+#0x0368 ""                         "" "combining" # COMBINING LATIN SMALL LETTER C
+#0x0369 ""                         "" "combining" # COMBINING LATIN SMALL LETTER D
+#0x036a ""                         "" "combining" # COMBINING LATIN SMALL LETTER H
+#0x036b ""                         "" "combining" # COMBINING LATIN SMALL LETTER M
+#0x036c ""                         "" "combining" # COMBINING LATIN SMALL LETTER R
+#0x036d ""                         "" "combining" # COMBINING LATIN SMALL LETTER T
+#0x036e ""                         "" "combining" # COMBINING LATIN SMALL LETTER V
+#0x036f ""                         "" "combining" # COMBINING LATIN SMALL LETTER X
+0x1ea1 "\\d{a}"                   "" "" # LATIN SMALL LETTER A WITH DOT BELOW
+0x20ac "\\texteuro"               "textcomp" "" # EURO SIGN
+
--- a/src/encoding.C
+++ b/src/encoding.C
@ -15,14 +15,19 @@
 #include "encoding.h"

 #include "debug.h"
+#include "LaTeXFeatures.h"
 #include "lyxlex.h"
 #include "lyxrc.h"

 #include "support/filename.h"
+#include "support/lstrings.h"
+#include "support/unicode.h"


 namespace lyx {

+using support::FileName;
+
 #ifndef CXX_GLOBAL_CSTD
 using std::strtol;
 #endif
@ -177,9 +182,97 @@ char_type arabic_table[63][2] = {

 char_type const arabic_start = 0xc1;

+
+/// Information about a single UCS4 character
+struct CharInfo {
+	/// LaTeX command for this character
+	docstring command;
+	/// Needed LaTeX preamble (or feature)
+	string preamble;
+	/// Is this a combining character?
+	bool combining;
+	/// Is \c preamble a feature known by LaTeXFeatures, or a raw LaTeX
+	/// command?
+	bool feature;
+	/// Always force the LaTeX command, even if the encoding contains
+	/// this character?
+	bool force;
+};
+
+
+typedef std::map<char_type, CharInfo> CharInfoMap;
+CharInfoMap unicodesymbols;
+
 } // namespace anon


+Encoding::Encoding(string const & n, string const & l, string const & i)
+	: Name_(n), LatexName_(l), iconvName_(i)
+{
+	if (n == "utf8")
+		// UTF8 can encode all 1<<20 + 1<<16 UCS4 code points
+		start_encodable_ = 0x110000;
+	else {
+		start_encodable_ = 0;
+		// temporarily switch off lyxerr, since we will generate iconv errors
+		lyxerr.disable();
+		for (unsigned short j = 0; j < 256; ++j) {
+			char const c = j;
+			std::vector<char_type> const ucs4 = eightbit_to_ucs4(&c, 1, i);
+			if (ucs4.size() == 1) {
+				char_type const c = ucs4[0];
+				CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+				if (it == unicodesymbols.end() || !it->second.force)
+					encodable_.insert(c);
+			}
+		}
+		lyxerr.enable();
+		CharSet::iterator it = encodable_.find(start_encodable_);
+		while (it != encodable_.end()) {
+			encodable_.erase(it);
+			++start_encodable_;
+			it = encodable_.find(start_encodable_);
+		}
+	}
+}
+
+
+docstring const Encoding::latexChar(char_type c) const
+{
+	if (c < start_encodable_)
+		return docstring(1, c);
+	if (encodable_.find(c) == encodable_.end()) {
+		// c cannot be encoded in this encoding
+		CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+		if (it == unicodesymbols.end())
+			lyxerr << "Could not find LaTeX command for character 0x"
+			       << std::hex << c << ".\nLaTeX export will fail."
+			       << endl;
+		else
+			return it->second.command;
+	}
+	return docstring(1, c);
+}
+
+
+void Encoding::validate(char_type c, LaTeXFeatures & features) const
+{
+	if (c < start_encodable_)
+		return;
+
+	if (encodable_.find(c) != encodable_.end())
+		return;
+
+	// c cannot be encoded in this encoding
+	CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+	if (it != unicodesymbols.end() && !it->second.preamble.empty()) {
+		if (it->second.feature)
+			features.require(it->second.preamble);
+		else
+			features.addPreambleSnippet(it->second.preamble);
+	}
+}
+

 bool Encodings::isComposeChar_hebrew(char_type c)
 {
@ -226,6 +319,15 @@ char_type Encodings::transformChar(char_type c,
 }


+bool Encodings::isCombiningChar(char_type c)
+{
+	CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+	if (it != unicodesymbols.end())
+		return it->second.combining;
+	return false;
+}
+
+
 Encoding const * Encodings::getFromLyXName(string const & name) const
 {
 	EncodingList::const_iterator it = encodinglist.find(name);
@ -255,8 +357,68 @@ Encodings::Encodings()
 {
 }

-void Encodings::read(support::FileName const & filename)
+
+void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
 {
+	// We must read the symbolsfile first, because the Encoding
+	// constructor depends on it.
+	LyXLex symbolslex(0, 0);
+	symbolslex.setFile(symbolsfile);
+	while (symbolslex.isOK()) {
+		char_type symbol;
+		CharInfo info;
+		string flags;
+
+		if (symbolslex.next(true)) {
+			std::istringstream is(symbolslex.getString());
+			// reading symbol directly does not work if
+			// char_type == std::wchar_t.
+			boost::uint32_t tmp;
+			if(!(is >> std::hex >> tmp))
+				break;
+			symbol = tmp;
+		} else
+			break;
+		if (symbolslex.next(true))
+			info.command = symbolslex.getDocString();
+		else
+			break;
+		if (symbolslex.next(true))
+			info.preamble = symbolslex.getString();
+		else
+			break;
+		if (symbolslex.next(true))
+			flags = symbolslex.getString();
+		else
+			break;
+
+		info.combining = false;
+		info.force = false;
+		while (!flags.empty()) {
+			string flag;
+			flags = support::split(flags, flag, ',');
+			if (flag == "combining")
+				info.combining = true;
+			else if (flag == "force")
+				info.force = true;
+			else
+				lyxerr << "Ignoring unknown flag `" << flag
+				       << "' for symbol `0x" << std::hex
+				       << symbol << "'." << endl;
+		}
+
+		if (!info.preamble.empty())
+			info.feature = info.preamble[0] != '\\';
+
+		lyxerr[Debug::INFO]
+			<< "Read unicode symbol " << symbol << " '"
+			<< to_utf8(info.command) << "' '" << info.preamble
+			<< "' " << info.combining << ' ' << info.feature
+			<< endl;
+		unicodesymbols[symbol] = info;
+	}
+
+	// Now read the encodings
 	enum Encodingtags {
 		et_encoding = 1,
 		et_end,
@ -269,7 +431,7 @@ void Encodings::read(support::FileName const & filename)
 	};

 	LyXLex lex(encodingtags, et_last - 1);
-	lex.setFile(filename);
+	lex.setFile(encfile);
 	while (lex.isOK()) {
 		switch (lex.lex()) {
 		case et_encoding:
--- a/src/encoding.h
+++ b/src/encoding.h
@ -13,15 +13,17 @@
 #ifndef ENCODING_H
 #define ENCODING_H

-#include <map>
-#include <string>
+#include "support/docstring.h"

-#include "support/types.h"
+#include <set>

 namespace lyx {

 namespace support { class FileName; }

+class LaTeXFeatures;
+
+
 ///
 class Encoding {
 public:
@ -29,16 +31,25 @@ public:
 	Encoding() {}
 	///
 	Encoding(std::string const & n, std::string const & l,
-	         std::string const & i)
-		: Name_(n), LatexName_(l), iconvName_(i)
-	{
-	}
+	         std::string const & i);
 	///
 	std::string const & name() const { return Name_; }
 	///
 	std::string const & latexName() const { return LatexName_; }
 	///
 	std::string const & iconvName() const { return iconvName_; }
+	/**
+	 * Convert \p c to something that LaTeX can understand.
+	 * This is either the character itself (if it is representable
+	 * in this encoding), or a LaTeX macro.
+	 * If the character is not representable in this encoding, but no
+	 * LaTeX macro is known, a warning is given of lyxerr, and the
+	 * character is returned.
+	 */
+	docstring const latexChar(char_type c) const;
+	/// Add the preamble snippet needed for the output of latexChar(c)
+	/// to \p features.
+	void validate(char_type c, LaTeXFeatures & features) const;
 private:
 	///
 	std::string Name_;
@ -46,6 +57,15 @@ private:
 	std::string LatexName_;
 	///
 	std::string iconvName_;
+	///
+	typedef std::set<char_type> CharSet;
+	/// Set of UCS4 characters that we can encode (for singlebyte
+	/// encodings only)
+	CharSet encodable_;
+	/// All code points below this are encodable. This helps us to avoid
+	/// lokup of ASCII characters in encodable_ and gives about 1 sec
+	/// speedup on export of the Userguide.
+	char_type start_encodable_;
 };

 class Encodings {
@ -64,8 +84,11 @@ public:
 	};
 	///
 	Encodings();
-	///
-	void read(support::FileName const & filename);
+	/// Read the encodings.
+	/// \param encfile encodings definition file
+	/// \param symbolsfile unicode->LaTeX mapping file
+	void read(support::FileName const & encfile,
+	          support::FileName const & symbolsfile);
 	/// Get encoding from LyX name \p name
 	Encoding const * getFromLyXName(std::string const & name) const;
 	/// Get encoding from LaTeX name \p name
@ -97,6 +120,8 @@ public:
 	static bool is_arabic(char_type c);
 	///
 	static char_type transformChar(char_type c, Letter_Form form);
+	/// Is this a combining char?
+	static bool isCombiningChar(char_type c);

 private:
 	///
--- a/src/lyx_main.C
+++ b/src/lyx_main.C
@ -880,7 +880,7 @@ bool LyX::init()
 	if (!readRcFile("preferences"))
 		return false;

-	if (!readEncodingsFile("encodings"))
+	if (!readEncodingsFile("encodings", "unicodesymbols"))
 		return false;
 	if (!readLanguagesFile("languages"))
 		return false;
@ -1247,16 +1247,24 @@ bool LyX::readLanguagesFile(string const & name)


 // Read the encodings file `name'
-bool LyX::readEncodingsFile(string const & name)
+bool LyX::readEncodingsFile(string const & enc_name,
+                            string const & symbols_name)
 {
-	lyxerr[Debug::INIT] << "About to read " << name << "..." << endl;
+	lyxerr[Debug::INIT] << "About to read " << enc_name << " and "
+	                    << symbols_name << "..." << endl;

-	FileName const enc_path = libFileSearch(string(), name);
-	if (enc_path.empty()) {
-		showFileError(name);
+	FileName const symbols_path = libFileSearch(string(), symbols_name);
+	if (symbols_path.empty()) {
+		showFileError(symbols_name);
 		return false;
 	}
-	encodings.read(enc_path);
+
+	FileName const enc_path = libFileSearch(string(), enc_name);
+	if (enc_path.empty()) {
+		showFileError(enc_name);
+		return false;
+	}
+	encodings.read(enc_path, symbols_path);
 	return true;
 }

--- a/src/lyx_main.h
+++ b/src/lyx_main.h
@ -149,8 +149,11 @@ private:
 	bool readUIFile(std::string const & name, bool include = false);
 	/// read the given languages file
 	bool readLanguagesFile(std::string const & name);
-	/// read the given encodings file
-	bool readEncodingsFile(std::string const & name);
+	/// read the encodings.
+	/// \param enc_name encodings definition file
+	/// \param symbols_name unicode->LaTeX mapping file
+	bool readEncodingsFile(std::string const & enc_name,
+	                       std::string const & symbols_name);
 	/// parsing of non-gui LyX options.
 	void easyParse(int & argc, char * argv[]);
 	/// shows up a parsing error on screen
--- a/src/paragraph.C
+++ b/src/paragraph.C
@ -1007,6 +1007,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
 						    runparams.moving_arg);
 	}

+	// Computed only once per paragraph since bparams.encoding() is expensive
+	Encoding const & doc_encoding = bparams.encoding();
 	for (pos_type i = 0; i < size(); ++i) {
 		++column;
 		// First char in paragraph or after label?
@ -1066,10 +1068,18 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
 		if (c == ' ') {
 			// Do not print the separation of the optional argument
 			if (i != body_pos - 1) {
-				// FIXME: change tracking
-				// Is this correct WRT change tracking?
-				pimpl_->simpleTeXBlanks(os, texrow, i,
-						       column, font, *style);
+				if (pimpl_->simpleTeXBlanks(bparams,
+						doc_encoding, os, texrow,
+						i, column, font, *style))
+					// A surrogate pair was output. We
+					// must not call simpleTeXSpecialChars
+					// in this iteration, since
+					// simpleTeXBlanks incremented i, and
+					// simpleTeXSpecialChars would output
+					// the combining character again.
+					// FIXME: change tracking
+					// Is this correct WRT change tracking?
+					continue;
 			}
 		}

@ -1101,7 +1111,7 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
 			rp.local_font = &font;
 			rp.intitle = style->intitle;
 			pimpl_->simpleTeXSpecialChars(buf, bparams,
-						os, texrow, rp,
+						doc_encoding, os, texrow, rp,
 						font, running_font,
 						basefont, outerfont, open_font,
 						runningChangeType,
--- a/src/paragraph_pimpl.C
+++ b/src/paragraph_pimpl.C
@ -59,16 +59,15 @@ special_phrase const special_phrases[] = {
 size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);


-bool isEncoding(BufferParams const & bparams, LyXFont const & font,
-		string const & encoding)
+/// Get the real encoding of a character with font \p font.
+/// doc_encoding == bparams.encoding(), but we use a precomputed variable
+/// since bparams.encoding() is expensive
+inline Encoding const & getEncoding(BufferParams const & bparams,
+		Encoding const & doc_encoding, LyXFont const & font)
 {
-	// We do ignore bparams.inputenc == "default" here because characters
-	// in this encoding could be treated by TeX as something different,
-	// e.g. if they are inside a CJK environment. See also
-	// http://bugzilla.lyx.org/show_bug.cgi?id=3043.
-	return (bparams.inputenc == encoding
-		|| (bparams.inputenc == "auto"
-		    && font.language()->encoding()->latexName() == encoding));
+	if (bparams.inputenc == "auto" || bparams.inputenc == "default")
+		return *(font.language()->encoding());
+	return doc_encoding;
 }

 } // namespace anon
@ -381,14 +380,44 @@ int Paragraph::Pimpl::eraseChars(pos_type start, pos_type end, bool trackChanges
 }


-void Paragraph::Pimpl::simpleTeXBlanks(odocstream & os, TexRow & texrow,
-				       pos_type const i,
+int Paragraph::Pimpl::latexSurrogatePair(odocstream & os, value_type c,
+		value_type next, Encoding const & encoding)
+{
+	// Writing next here may circumvent a possible font change between
+	// c and next. Since next is only output if it forms a surrogate pair
+	// with c we can ignore this:
+	// A font change inside a surrogate pair does not make sense and is
+	// hopefully impossible to input.
+	// FIXME: change tracking
+	// Is this correct WRT change tracking?
+	docstring const latex1 = encoding.latexChar(next);
+	docstring const latex2 = encoding.latexChar(c);
+	os << latex1 << '{' << latex2 << '}';
+	return latex1.length() + latex2.length() + 2;
+}
+
+
+bool Paragraph::Pimpl::simpleTeXBlanks(BufferParams const & bparams,
+                                       Encoding const & doc_encoding,
+                                       odocstream & os, TexRow & texrow,
+                                       pos_type & i,
 				       unsigned int & column,
 				       LyXFont const & font,
 				       LyXLayout const & style)
 {
 	if (style.pass_thru)
-		return;
+		return false;
+
+	if (i < size() - 1) {
+		char_type next = getChar(i + 1);
+		if (Encodings::isCombiningChar(next)) {
+			// This space has an accent, so we must always output it.
+			Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
+			column += latexSurrogatePair(os, ' ', next, encoding) - 1;
+			++i;
+			return true;
+		}
+	}

 	if (lyxrc.plaintext_linelen > 0
 	    && column > lyxrc.plaintext_linelen
@ -413,6 +442,7 @@ void Paragraph::Pimpl::simpleTeXBlanks(odocstream & os, TexRow & texrow,
 	} else {
 		os << ' ';
 	}
+	return false;
 }


@ -448,6 +478,7 @@ bool Paragraph::Pimpl::isTextAt(string const & str, pos_type pos) const

 void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 					     BufferParams const & bparams,
+					     Encoding const & doc_encoding,
 					     odocstream & os,
 					     TexRow & texrow,
 					     OutputParams const & runparams,
@ -465,6 +496,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 	if (style.pass_thru) {
 		if (c != Paragraph::META_INSET) {
 			if (c != '\0')
+				// FIXME UNICODE: This can fail if c cannot
+				// be encoded in the current encoding.
 				os.put(c);
 		} else
 			owner_->getInset(i)->plaintext(buf, os, runparams);
@ -581,25 +614,6 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 		// would be wrongly converted on systems where char is signed, so we give
 		// the code points.
 		// This also makes us independant from the encoding of this source file.
-		case 0xb1:    // ± PLUS-MINUS SIGN
-		case 0xb2:    // ² SUPERSCRIPT TWO
-		case 0xb3:    // ³ SUPERSCRIPT THREE
-		case 0xd7:    // × MULTIPLICATION SIGN
-		case 0xf7:    // ÷ DIVISION SIGN
-		case 0xb9:    // ¹ SUPERSCRIPT ONE
-		case 0xac:    // ¬ NOT SIGN
-		case 0xb5:    // µ MICRO SIGN
-			if (isEncoding(bparams, font, "latin1")
-			    || isEncoding(bparams, font, "latin9")) {
-				os << "\\ensuremath{";
-				os.put(c);
-				os << '}';
-				column += 13;
-			} else {
-				os.put(c);
-			}
-			break;
-
 		case '|': case '<': case '>':
 			// In T1 encoding, these characters exist
 			if (lyxrc.fontenc == "T1") {
@ -658,82 +672,6 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 			column += 9;
 			break;

-		case 0xa3:    // £ POUND SIGN
-			if (bparams.inputenc == "default") {
-				os << "\\pounds{}";
-				column += 8;
-			} else {
-				os.put(c);
-			}
-			break;
-
-		case 0x20ac:    // EURO SIGN
-			if (isEncoding(bparams, font, "latin9")
-			    || isEncoding(bparams, font, "cp1251")
-			    || isEncoding(bparams, font, "utf8")
-			    || isEncoding(bparams, font, "latin10")
-			    || isEncoding(bparams, font, "cp858")) {
-				os.put(c);
-			} else {
-				os << "\\texteuro{}";
-				column += 10;
-			}
-			break;
-
-		// These characters are covered by latin1, but not
-		// by latin9 (a.o.). We have to support them because
-		// we switched the default of latin1-languages to latin9
-		case 0xa4:    // CURRENCY SYMBOL
-		case 0xa6:    // BROKEN BAR
-		case 0xa8:    // DIAERESIS
-		case 0xb4:    // ACUTE ACCENT
-		case 0xb8:    // CEDILLA
-		case 0xbd:    // 1/2 FRACTION
-		case 0xbc:    // 1/4 FRACTION
-		case 0xbe:    // 3/4 FRACTION
-			if (isEncoding(bparams, font, "latin1")
-			    || isEncoding(bparams, font, "latin5")
-			    || isEncoding(bparams, font, "utf8")) {
-				os.put(c);
-				break;
-			} else {
-				switch (c) {
-				case 0xa4:
-					os << "\\textcurrency{}";
-					column += 15;
-					break;
-				case 0xa6:
-					os << "\\textbrokenbar{}";
-					column += 16;
-					break;
-				case 0xa8:
-					os << "\\textasciidieresis{}";
-					column += 20;
-					break;
-				case 0xb4:
-					os << "\\textasciiacute{}";
-					column += 17;
-					break;
-				case 0xb8: // from latin1.def:
-					os << "\\c\\ ";
-					column += 3;
-					break;
-				case 0xbd:
-					os << "\\textonehalf{}";
-					column += 14;
-					break;
-				case 0xbc:
-					os << "\\textonequarter{}";
-					column += 17;
-					break;
-				case 0xbe:
-					os << "\\textthreequarters{}";
-					column += 20;
-					break;
-				}
-				break;
-			}
-
 		case '$': case '&':
 		case '%': case '#': case '{':
 		case '}': case '_':
@ -771,6 +709,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 		default:

 			// I assume this is hack treating typewriter as verbatim
+			// FIXME UNICODE: This can fail if c cannot be encoded
+			// in the current encoding.
 			if (font.family() == LyXFont::TYPEWRITER_FAMILY) {
 				if (c != '\0') {
 					os.put(c);
@ -798,7 +738,27 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 			}

 			if (pnr == phrases_nr && c != '\0') {
-				os.put(c);
+				Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
+				if (i < size() - 1) {
+					char_type next = getChar(i + 1);
+					if (Encodings::isCombiningChar(next)) {
+						column += latexSurrogatePair(os, c, next, encoding) - 1;
+						++i;
+						break;
+					}
+				}
+				docstring const latex = encoding.latexChar(c);
+				if (latex.length() > 1 &&
+				    latex[latex.length() - 1] != '}') {
+					// Prevent eating of a following
+					// space or command corruption by
+					// following characters
+					column += latex.length() + 1;
+					os << latex << "{}";
+				} else {
+					column += latex.length() - 1;
+					os << latex;
+				}
 			}
 			break;
 		}
@ -876,6 +836,7 @@ void Paragraph::Pimpl::validate(LaTeXFeatures & features,
 	}

 	// then the contents
+	Encoding const & doc_encoding = bparams.encoding();
 	for (pos_type i = 0; i < size() ; ++i) {
 		for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
 			if (!special_phrases[pnr].builtin
@ -884,12 +845,12 @@ void Paragraph::Pimpl::validate(LaTeXFeatures & features,
 				break;
 			}
 		}
-		// these glyphs require the textcomp package
-		if (getChar(i) == 0x20ac || getChar(i) == 0xa4
-		    || getChar(i) == 0xa6 || getChar(i) == 0xa8
-		    || getChar(i) == 0xb4 || getChar(i) == 0xbd
-		    || getChar(i) == 0xbc || getChar(i) == 0xbe)
-			features.require("textcomp");
+		// We do not need the completely realized font, since we are
+		// only interested in the language, and that is never inherited.
+		// Therefore we can use getFontSettings instead of getFont.
+		LyXFont const & font = owner_->getFontSettings(bparams, i);
+		Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
+		encoding.validate(getChar(i), features);
 	}
 }

--- a/src/paragraph_pimpl.h
+++ b/src/paragraph_pimpl.h
@ -26,6 +26,7 @@

 namespace lyx {

+class Encoding;
 class LyXLayout;


@ -123,16 +124,23 @@ public:
 	///
 	FontList fontlist;

-	///
-	void simpleTeXBlanks(odocstream &, TexRow & texrow,
-			     pos_type const i,
+	/// Output the surrogate pair formed by \p c and \p next to \p os.
+	/// \return the number of characters written.
+	int latexSurrogatePair(odocstream & os, value_type c, value_type next,
+	                       Encoding const &);
+	/// Output a space in appropriate formatting (or a surrogate pair
+	/// if the next character is a combining character).
+	/// \return whether a surrogate pair was output.
+	bool simpleTeXBlanks(BufferParams const &, Encoding const &,
+	                     odocstream &, TexRow & texrow,
+			     pos_type & i,
 			     unsigned int & column,
 			     LyXFont const & font,
 			     LyXLayout const & style);
 	///
 	void simpleTeXSpecialChars(Buffer const &, BufferParams const &,
-				   odocstream &, TexRow & texrow,
-				   OutputParams const &,
+	                           Encoding const &, odocstream &,
+	                           TexRow & texrow, OutputParams const &,
 				   LyXFont & font, LyXFont & running_font,
 				   LyXFont & basefont,
 				   LyXFont const & outerfont,
--- a/src/support/debugstream.h
+++ b/src/support/debugstream.h
@ -64,12 +64,14 @@ public:
 	typedef typename debug::type Type;

 	basic_debugstream()
-		: std::basic_ostream<charT, traits>(0), dt(debug::NONE)
+		: std::basic_ostream<charT, traits>(0), dt(debug::NONE),
+		  realbuf_(0), enabled_(true)
 	{}

 	/// Constructor, sets the debug level to t.
 	explicit basic_debugstream(std::basic_streambuf<charT, traits> * buf)
-		: std::basic_ostream<charT, traits>(buf), dt(debug::NONE)
+		: std::basic_ostream<charT, traits>(buf), dt(debug::NONE),
+		  realbuf_(0), enabled_(true)
 	{}

 	/// Sets the debug level to t.
@ -99,11 +101,32 @@ public:
 			return *this;
 		return nullstream;
 	}
+	/// Disable the stream completely
+	void disable()
+	{
+		if (enabled_) {
+			realbuf_ = this->rdbuf();
+			rdbuf(nullstream.rdbuf());
+			enabled_ = false;
+		}
+	}
+	/// Enable the stream after a possible call of disable()
+	void enable()
+	{
+		if (!enabled_) {
+			this->rdbuf(realbuf_);
+			enabled_ = true;
+		}
+	}
 private:
 	/// The current debug level
 	Type dt;
 	/// The no-op stream.
 	boost::basic_onullstream<charT, traits> nullstream;
+	/// The buffer of the real stream
+	std::streambuf * realbuf_;
+	/// Is the stream enabled?
+	bool enabled_;
 };

 typedef basic_debugstream<debug_trait> debugstream;