Add machinery to output arbitrary unicode characters with LaTeX commands

read from a text file.

	* src/encoding.[Ch]

	(Encoding::latexChar): New, output a character to LaTeX
	(Encoding::validate): New, add needed preamble stuff for a character
	(Encodings::read): Read new unicodesymbols file
	(Encodings::isCombiningChar): New, is a character a combining char?

	* src/paragraph_pimpl.C
	(isEncoding): Delete, no longer needed
	(getEncoding): New, get the real encoding of a font
	(Paragraph::Pimpl::latexSurrogatePair): New, output a surrogate pair
	to LaTeX
	(Paragraph::Pimpl::simpleTeXBlanks): Use latexSurrogatePair if needed
	(Paragraph::Pimpl::simpleTeXSpecialChars): Ditto, and replace several
	hardcoded characters with a call of encoding.latexChar()
	(Paragraph::Pimpl::validate): replace several hardcoded characters
	with a call of encoding.validate()

	* src/support/debugstream.h
	(basic_debugstream::disable): New, disable the stream completely
	(basic_debugstream::enable): New, reenable the stream

	* src/lyx_main.[Ch]: Adjust to changes above

	* src/paragraph.C: Ditto

	* lib/unicodesymbols: New file with UCS4 -> LaTeX command mapping.
	It is far from complete yet, but contains most accents on latin
	characters.

	* lib/Makefile.am: add lib/unicodesymbols

	* development/scons/scons_manifest.py: ditto

	* development/tools/unicodesymbols.py: Helper script to update
	lib/unicodesymbols with new symbols


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16920 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2007-01-28 21:27:45 +00:00
parent eb495d2e98
commit feb7895965
12 changed files with 1148 additions and 147 deletions

View File

@ -1275,6 +1275,7 @@ lib_files = Split('''
languages languages
symbols symbols
syntax.default syntax.default
unicodesymbols
configure.py configure.py
''') ''')

View File

@ -0,0 +1,119 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# file unciodesymbols.py
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
# author Georg Baum
# Full author contact details are available in file CREDITS
# This script reads a unicode symbol file and completes it in the given range
import os, re, string, sys, unicodedata
def usage(prog_name):
return ("Usage: %s start stop inputfile outputfile\n" % prog_name +
"or %s start stop <inputfile >outputfile" % prog_name)
def error(message):
sys.stderr.write(message + '\n')
sys.exit(1)
def trim_eol(line):
" Remove end of line char(s)."
if line[-2:-1] == '\r':
return line[:-2]
elif line[-1:] == '\r' or line[-1:] == '\n':
return line[:-1]
else:
# file with no EOL in last line
return line
def read(input):
" Read input file and strip lineendings."
lines = list()
while 1:
line = input.readline()
if not line:
break
line = trim_eol(line)
tokens = line.split()
char = -1
if len(tokens) > 0:
if tokens[0][0:2] == "0x":
char = int(tokens[0][2:], 16)
elif tokens[0][0:3] == "#0x":
char = int(tokens[0][3:], 16)
lines.append([char, line])
return lines
def write(output, lines):
" Write output file with native lineendings."
for line in lines:
output.write(line[1] + os.linesep)
def complete(lines, start, stop):
l = 0
for i in range(start, stop):
# This catches both comments (lines[l][0] == -1) and code points less than i
while l < len(lines) and lines[l][0] < i:
print lines[l]
l = l + 1
continue
if l >= len(lines) or lines[l][0] != i:
c = unichr(i)
name = unicodedata.name(c, "")
if name != "":
if unicodedata.combining(c):
combining = "combining"
else:
combining = ""
line = [i, '#0x%04x "" "" "%s" # %s' % (i, combining, name)]
lines.insert(l, line)
print lines[l]
l = l + 1
def main(argv):
# Open files
if len(argv) == 3:
input = sys.stdin
output = sys.stdout
elif len(argv) == 5:
input = open(argv[3], 'rb')
output = open(argv[4], 'wb')
else:
error(usage(argv[0]))
if argv[1][:2] == "0x":
start = int(argv[1][2:], 16)
else:
start = int(argv[1])
if argv[2][:2] == "0x":
stop = int(argv[2][2:], 16)
else:
stop = int(argv[2])
# Do the real work
lines = read(input)
complete(lines, start, stop)
write(output, lines)
# Close files
if len(argv) == 3:
input.close()
output.close()
return 0
if __name__ == "__main__":
main(sys.argv)

View File

@ -5,7 +5,7 @@ SUBDIRS = doc lyx2lyx
CHMOD = chmod CHMOD = chmod
dist_pkgdata_DATA = CREDITS chkconfig.ltx \ dist_pkgdata_DATA = CREDITS chkconfig.ltx \
external_templates encodings languages symbols syntax.default external_templates encodings languages symbols syntax.default unicodesymbols
# Note that we "chmod 755" manually this file in install-data-hook. # Note that we "chmod 755" manually this file in install-data-hook.
dist_pkgdata_PYTHON = configure.py dist_pkgdata_PYTHON = configure.py

681
lib/unicodesymbols Normal file
View File

@ -0,0 +1,681 @@
#
# file unicodesymbols
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
#
# author Georg Baum
#
# Full author contact details are available in file CREDITS.
# This file is a database of LaTeX commands for unicode characters.
# These commands will be used by LyX for LaTeX export for all characters
# that are not representable in the chosen encoding.
# syntax:
# ucs4 command preamble flags
# preamble can either be a known feature, or a LaTeX command.
# Known flags:
# - combining This is a combining char that will get combined with a base char
# - force Always output replacement command
#0x00a0 "" "" "" # NO-BREAK SPACE
#0x00a1 "" "" "" # INVERTED EXCLAMATION MARK
0x00a2 "\\textcent" "textcomp" "" # CENT SIGN
0x00a3 "\\pounds" "" "" # £ POUND SIGN
0x00a4 "\\textcurrency" "textcomp" "" # CURRENCY SYMBOL
0x00a5 "\\textyen" "textcomp" "" # YEN SIGN
0x00a6 "\\textbrokenbar" "textcomp" "" # BROKEN BAR
0x00a7 "\\textsection" "textcomp" "" # SECTION SIGN
0x00a8 "\\textasciidieresis" "textcomp" "" # DIAERESIS
0x00a9 "\textcopyright" "textcomp" "" # COPYRIGHT SIGN
0x00aa "\\textordfeminine" "textcomp" "" # FEMININE ORDINAL INDICATOR
#0x00ab "" "" "" # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac "\\textlnot" "textcomp" "force" # ¬ NOT SIGN
#0x00ad "" "" "" # SOFT HYPHEN
0x00ae "\\textregistered" "textcomp" "" # REGISTERED SIGN
0x00af "\\textasciimacron" "textcomp" "" # MACRON
0x00b0 "\\textdegree" "textcomp" "" # DEGREE SIGN
0x00b1 "\\textpm" "textcomp" "force" # ± PLUS-MINUS SIGN
0x00b2 "\\texttwosuperior" "textcomp" "force" # ² SUPERSCRIPT TWO
0x00b3 "\\textthreesuperior" "textcomp" "force" # ³ SUPERSCRIPT THREE
0x00b4 "\\textasciiacute" "textcomp" "" # ACUTE ACCENT
0x00b5 "\\textmu" "textcomp" "force" # µ MICRO SIGN
0x00b6 "\\textpilcrow" "textcomp" "" # PILCROW SIGN
#0x00b7 "" "" "" # MIDDLE DOT
0x00b8 "\\c\\ " "" "" # CEDILLA (command from latin1.def)
0x00b9 "\\textonesuperior" "textcomp" "force" # ¹ SUPERSCRIPT ONE
0x00ba "\\textordmasculine" "textcomp" "" # MASCULINE ORDINAL INDICATOR
#0x00bb "" "" "" # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc "\\textonequarter" "textcomp" "" # 1/4 FRACTION
0x00bd "\\textonehalf" "textcomp" "" # 1/2 FRACTION
0x00be "\\textthreequarters" "textcomp" "" # 3/4 FRACTION
#0x00bf "" "" "" # INVERTED QUESTION MARK
0x00c0 "\\`{A}" "" "" # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1 "\\'{A}" "" "" # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2 "\\^{A}" "" "" # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c3 "\\~{A}" "" "" # LATIN CAPITAL LETTER A WITH TILDE
0x00c4 "\\\"{A}" "" "" # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5 "\\r{A}" "" "" # LATIN CAPITAL LETTER A WITH RING ABOVE
#0x00c6 "" "" "" # LATIN CAPITAL LETTER AE
0x00c7 "\\c{C}" "" "" # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8 "\\`{E}" "" "" # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9 "\\'{E}" "" "" # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca "\\^{E}" "" "" # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cb "\\\"{E}" "" "" # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00cc "\\`{I}" "" "" # LATIN CAPITAL LETTER I WITH GRAVE
0x00cd "\\'{I}" "" "" # LATIN CAPITAL LETTER I WITH ACUTE
0x00ce "\\^{I}" "" "" # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00cf "\\\"{I}" "" "" # LATIN CAPITAL LETTER I WITH DIAERESIS
#0x00d0 "" "" "" # LATIN CAPITAL LETTER ETH
0x00d1 "\\~{N}" "" "" # LATIN CAPITAL LETTER N WITH TILDE
0x00d2 "\\`{O}" "" "" # LATIN CAPITAL LETTER O WITH GRAVE
0x00d3 "\\'{O}" "" "" # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4 "\\^{O}" "" "" # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d5 "\\~{O}" "" "" # LATIN CAPITAL LETTER O WITH TILDE
0x00d6 "\\\"{O}" "" "" # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7 "\\texttimes" "textcomp" "force" # × MULTIPLICATION SIGN
#0x00d8 "" "" "" # LATIN CAPITAL LETTER O WITH STROKE
0x00d9 "\\`{U}" "" "" # LATIN CAPITAL LETTER U WITH GRAVE
0x00da "\\'{U}" "" "" # LATIN CAPITAL LETTER U WITH ACUTE
0x00db "\\^{U}" "" "" # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00dc "\\\"{U}" "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dd "\\'{Y}" "" "" # LATIN CAPITAL LETTER Y WITH ACUTE
#0x00de "" "" "" # LATIN CAPITAL LETTER THORN
#0x00df "" "" "" # LATIN SMALL LETTER SHARP S
0x00e0 "\\`{a}" "" "" # LATIN SMALL LETTER A WITH GRAVE
0x00e1 "\\'{a}" "" "" # LATIN SMALL LETTER A WITH ACUTE
0x00e2 "\\^{a}" "" "" # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e3 "\\~{a}" "" "" # LATIN SMALL LETTER A WITH TILDE
0x00e4 "\\\"{a}" "" "" # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5 "\\r{a}" "" "" # LATIN SMALL LETTER A WITH RING ABOVE
#0x00e6 "" "" "" # LATIN SMALL LETTER AE
0x00e7 "\\c{c}" "" "" # LATIN SMALL LETTER C WITH CEDILLA
0x00e8 "\\`{e}" "" "" # LATIN SMALL LETTER E WITH GRAVE
0x00e9 "\\'{e}" "" "" # LATIN SMALL LETTER E WITH ACUTE
0x00ea "\\^{e}" "" "" # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb "\\\"{e}" "" "" # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec "\\`{\\i}" "" "" # LATIN SMALL LETTER I WITH GRAVE
0x00ed "\\'{\\i}" "" "" # LATIN SMALL LETTER I WITH ACUTE
0x00ee "\\^{\\i}" "" "" # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef "\\\"{\\i}" "" "" # LATIN SMALL LETTER I WITH DIAERESIS
#0x00f0 "" "" "" # LATIN SMALL LETTER ETH
0x00f1 "\\~{n}" "" "" # LATIN SMALL LETTER N WITH TILDE
0x00f2 "\\`{o}" "" "" # LATIN SMALL LETTER O WITH GRAVE
0x00f3 "\\'{o}" "" "" # LATIN SMALL LETTER O WITH ACUTE
0x00f4 "\\^{o}" "" "" # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f5 "\\~{o}" "" "" # LATIN SMALL LETTER O WITH TILDE
0x00f6 "\\\"{o}" "" "" # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7 "\\textdiv" "textcomp" "force" # ÷ DIVISION SIGN
#0x00f8 "" "" "" # LATIN SMALL LETTER O WITH STROKE
0x00f9 "\\`{u}" "" "" # LATIN SMALL LETTER U WITH GRAVE
0x00fa "\\'{u}" "" "" # LATIN SMALL LETTER U WITH ACUTE
0x00fb "\\^{u}" "" "" # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc "\\\"{u}" "" "" # LATIN SMALL LETTER U WITH DIAERESIS
0x00fd "\\'{y}" "" "" # LATIN SMALL LETTER Y WITH ACUTE
#0x00fe "" "" "" # LATIN SMALL LETTER THORN
0x00ff "\\\"{y}" "" "" # LATIN SMALL LETTER Y WITH DIAERESIS
0x0100 "\\={A}" "" "" # LATIN CAPITAL LETTER A WITH MACRON
0x0101 "\\={a}" "" "" # LATIN SMALL LETTER A WITH MACRON
0x0102 "\\u{A}" "" "" # LATIN CAPITAL LETTER A WITH BREVE
0x0103 "\\u{a}" "" "" # LATIN SMALL LETTER A WITH BREVE
0x0104 "\\k{A}" "" "" # LATIN CAPITAL LETTER A WITH OGONEK
0x0105 "\\k{a}" "" "" # LATIN SMALL LETTER A WITH OGONEK
0x0106 "\\'{C}" "" "" # LATIN CAPITAL LETTER C WITH ACUTE
0x0107 "\\'{c}" "" "" # LATIN SMALL LETTER C WITH ACUTE
0x0108 "\\^{C}" "" "" # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
0x0109 "\\^{c}" "" "" # LATIN SMALL LETTER C WITH CIRCUMFLEX
0x010a "\\.{C}" "" "" # LATIN CAPITAL LETTER C WITH DOT ABOVE
0x010b "\\.{c}" "" "" # LATIN SMALL LETTER C WITH DOT ABOVE
0x010c "\\v{C}" "" "" # LATIN CAPITAL LETTER C WITH CARON
0x010d "\\v{c}" "" "" # LATIN SMALL LETTER C WITH CARON
0x010e "\\v{D}" "" "" # LATIN CAPITAL LETTER D WITH CARON
0x010f "\\v{d}" "" "" # LATIN SMALL LETTER D WITH CARON
#0x0110 "" "" "" # LATIN CAPITAL LETTER D WITH STROKE
#0x0111 "" "" "" # LATIN SMALL LETTER D WITH STROKE
0x0112 "\\={E}" "" "" # LATIN CAPITAL LETTER E WITH MACRON
0x0113 "\\={e}" "" "" # LATIN SMALL LETTER E WITH MACRON
0x0114 "\\u{E}" "" "" # LATIN CAPITAL LETTER E WITH BREVE
0x0115 "\\u{e}" "" "" # LATIN SMALL LETTER E WITH BREVE
0x0116 "\\.{E}" "" "" # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x0117 "\\.{e}" "" "" # LATIN SMALL LETTER E WITH DOT ABOVE
0x0118 "\\k{E}" "" "" # LATIN CAPITAL LETTER E WITH OGONEK
0x0119 "\\k{e}" "" "" # LATIN SMALL LETTER E WITH OGONEK
0x011a "\\v{E}" "" "" # LATIN CAPITAL LETTER E WITH CARON
0x011b "\\v{e}" "" "" # LATIN SMALL LETTER E WITH CARON
0x011c "\\^{G}" "" "" # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
0x011d "\\^{g}" "" "" # LATIN SMALL LETTER G WITH CIRCUMFLEX
0x011e "\\u{G}" "" "" # LATIN CAPITAL LETTER G WITH BREVE
0x011f "\\u{g}" "" "" # LATIN SMALL LETTER G WITH BREVE
0x0120 "\\.{G}" "" "" # LATIN CAPITAL LETTER G WITH DOT ABOVE
0x0121 "\\.{g}" "" "" # LATIN SMALL LETTER G WITH DOT ABOVE
0x0122 "\\c{G}" "" "" # LATIN CAPITAL LETTER G WITH CEDILLA
0x0123 "\\c{g}" "" "" # LATIN SMALL LETTER G WITH CEDILLA
0x0124 "\\^{H}" "" "" # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0x0125 "\\^{h}" "" "" # LATIN SMALL LETTER H WITH CIRCUMFLEX
#0x0126 "" "" "" # LATIN CAPITAL LETTER H WITH STROKE
#0x0127 "" "" "" # LATIN SMALL LETTER H WITH STROKE
0x0128 "\\~{I}" "" "" # LATIN CAPITAL LETTER I WITH TILDE
0x0129 "\\~{\\i}" "" "" # LATIN SMALL LETTER I WITH TILDE
0x012a "\\={I}" "" "" # LATIN CAPITAL LETTER I WITH MACRON
0x012b "\\={\\i}" "" "" # LATIN SMALL LETTER I WITH MACRON
0x012c "\\u{I}" "" "" # LATIN CAPITAL LETTER I WITH BREVE
0x012d "\\u{\\i}" "" "" # LATIN SMALL LETTER I WITH BREVE
0x012e "\\k{I}" "" "" # LATIN CAPITAL LETTER I WITH OGONEK
0x012f "\\k{i}" "" "" # LATIN SMALL LETTER I WITH OGONEK
0x0130 "\\.{I}" "" "" # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x0131 "\\i" "" "" # LATIN SMALL LETTER DOTLESS I
#0x0132 "" "" "" # LATIN CAPITAL LIGATURE IJ
#0x0133 "" "" "" # LATIN SMALL LIGATURE IJ
0x0134 "\\^{J}" "" "" # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0x0135 "\\^{\\j}" "" "" # LATIN SMALL LETTER J WITH CIRCUMFLEX
0x0136 "\\c{K}" "" "" # LATIN CAPITAL LETTER K WITH CEDILLA
0x0137 "\\c{k}" "" "" # LATIN SMALL LETTER K WITH CEDILLA
#0x0138 "" "" "" # LATIN SMALL LETTER KRA
0x0139 "\\'{L}" "" "" # LATIN CAPITAL LETTER L WITH ACUTE
0x013a "\\'{l}" "" "" # LATIN SMALL LETTER L WITH ACUTE
0x013b "\\c{L}" "" "" # LATIN CAPITAL LETTER L WITH CEDILLA
0x013c "\\c{l}" "" "" # LATIN SMALL LETTER L WITH CEDILLA
0x013d "\\v{L}" "" "" # LATIN CAPITAL LETTER L WITH CARON
0x013e "\\v{l}" "" "" # LATIN SMALL LETTER L WITH CARON
#0x013f "" "" "" # LATIN CAPITAL LETTER L WITH MIDDLE DOT
#0x0140 "" "" "" # LATIN SMALL LETTER L WITH MIDDLE DOT
0x0141 "\\L" "" "" # LATIN CAPITAL LETTER L WITH STROKE
0x0142 "\\l" "" "" # LATIN SMALL LETTER L WITH STROKE
0x0143 "\\'{N}" "" "" # LATIN CAPITAL LETTER N WITH ACUTE
0x0144 "\\'{n}" "" "" # LATIN SMALL LETTER N WITH ACUTE
0x0145 "\\c{N}" "" "" # LATIN CAPITAL LETTER N WITH CEDILLA
0x0146 "\\c{n}" "" "" # LATIN SMALL LETTER N WITH CEDILLA
0x0147 "\\v{N}" "" "" # LATIN CAPITAL LETTER N WITH CARON
0x0148 "\\v{n}" "" "" # LATIN SMALL LETTER N WITH CARON
#0x0149 "" "" "" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
#0x014a "" "" "" # LATIN CAPITAL LETTER ENG
#0x014b "" "" "" # LATIN SMALL LETTER ENG
0x014c "\\={O}" "" "" # LATIN CAPITAL LETTER O WITH MACRON
0x014d "\\={o}" "" "" # LATIN SMALL LETTER O WITH MACRON
0x014e "\\u{O}" "" "" # LATIN CAPITAL LETTER O WITH BREVE
0x014f "\\u{o}" "" "" # LATIN SMALL LETTER O WITH BREVE
0x0150 "\\H{O}" "" "" # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x0151 "\\H{o}" "" "" # LATIN SMALL LETTER O WITH DOUBLE ACUTE
#0x0152 "" "" "" # LATIN CAPITAL LIGATURE OE
#0x0153 "" "" "" # LATIN SMALL LIGATURE OE
0x0154 "\\'{R}" "" "" # LATIN CAPITAL LETTER R WITH ACUTE
0x0155 "\\'{r}" "" "" # LATIN SMALL LETTER R WITH ACUTE
0x0156 "\\c{R}" "" "" # LATIN CAPITAL LETTER R WITH CEDILLA
0x0157 "\\c{r}" "" "" # LATIN SMALL LETTER R WITH CEDILLA
0x0158 "\\v{R}" "" "" # LATIN CAPITAL LETTER R WITH CARON
0x0159 "\\v{r}" "" "" # LATIN SMALL LETTER R WITH CARON
0x015a "\\'{S}" "" "" # LATIN CAPITAL LETTER S WITH ACUTE
0x015b "\\'{s}" "" "" # LATIN SMALL LETTER S WITH ACUTE
0x015c "\\^{S}" "" "" # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
0x015d "\\^{s}" "" "" # LATIN SMALL LETTER S WITH CIRCUMFLEX
0x015e "\\c{S}" "" "" # LATIN CAPITAL LETTER S WITH CEDILLA
0x015f "\\c{s}" "" "" # LATIN SMALL LETTER S WITH CEDILLA
0x0160 "\\v{S}" "" "" # LATIN CAPITAL LETTER S WITH CARON
0x0161 "\\v{s}" "" "" # LATIN SMALL LETTER S WITH CARON
0x0162 "\\c{T}" "" "" # LATIN CAPITAL LETTER T WITH CEDILLA
0x0163 "\\c{t}" "" "" # LATIN SMALL LETTER T WITH CEDILLA
0x0164 "\\v{T}" "" "" # LATIN CAPITAL LETTER T WITH CARON
0x0165 "\\v{t}" "" "" # LATIN SMALL LETTER T WITH CARON
#0x0166 "" "" "" # LATIN CAPITAL LETTER T WITH STROKE
#0x0167 "" "" "" # LATIN SMALL LETTER T WITH STROKE
0x0168 "\\~{U}" "" "" # LATIN CAPITAL LETTER U WITH TILDE
0x0169 "\\~{u}" "" "" # LATIN SMALL LETTER U WITH TILDE
0x016a "\\={U}" "" "" # LATIN CAPITAL LETTER U WITH MACRON
0x016b "\\={u}" "" "" # LATIN SMALL LETTER U WITH MACRON
0x016c "\\u{U}" "" "" # LATIN CAPITAL LETTER U WITH BREVE
0x016d "\\u{u}" "" "" # LATIN SMALL LETTER U WITH BREVE
0x016e "\\r{U}" "" "" # LATIN CAPITAL LETTER U WITH RING ABOVE
0x016f "\\r{u}" "" "" # LATIN SMALL LETTER U WITH RING ABOVE
0x0170 "\\'{U}" "" "" # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x0171 "\\'{u}" "" "" # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x0172 "\\k{U}" "" "" # LATIN CAPITAL LETTER U WITH OGONEK
0x0173 "\\k{u}" "" "" # LATIN SMALL LETTER U WITH OGONEK
0x0174 "\\^{W}" "" "" # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0x0175 "\\^{w}" "" "" # LATIN SMALL LETTER W WITH CIRCUMFLEX
0x0176 "\\^{Y}" "" "" # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0x0177 "\\^{y}" "" "" # LATIN SMALL LETTER Y WITH CIRCUMFLEX
0x0178 "\\\"{Y}" "" "" # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x0179 "\\'{Z}" "" "" # LATIN CAPITAL LETTER Z WITH ACUTE
0x017a "\\'{z}" "" "" # LATIN SMALL LETTER Z WITH ACUTE
0x017b "\\.{Z}" "" "" # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x017c "\\.{z}" "" "" # LATIN SMALL LETTER Z WITH DOT ABOVE
0x017d "\\v{Z}" "" "" # LATIN CAPITAL LETTER Z WITH CARON
0x017e "\\v{z}" "" "" # LATIN SMALL LETTER Z WITH CARON
#0x017f "" "" "" # LATIN SMALL LETTER LONG S
#0x0180 "" "" "" # LATIN SMALL LETTER B WITH STROKE
#0x0181 "" "" "" # LATIN CAPITAL LETTER B WITH HOOK
#0x0182 "" "" "" # LATIN CAPITAL LETTER B WITH TOPBAR
#0x0183 "" "" "" # LATIN SMALL LETTER B WITH TOPBAR
#0x0184 "" "" "" # LATIN CAPITAL LETTER TONE SIX
#0x0185 "" "" "" # LATIN SMALL LETTER TONE SIX
#0x0186 "" "" "" # LATIN CAPITAL LETTER OPEN O
#0x0187 "" "" "" # LATIN CAPITAL LETTER C WITH HOOK
#0x0188 "" "" "" # LATIN SMALL LETTER C WITH HOOK
#0x0189 "" "" "" # LATIN CAPITAL LETTER AFRICAN D
#0x018a "" "" "" # LATIN CAPITAL LETTER D WITH HOOK
#0x018b "" "" "" # LATIN CAPITAL LETTER D WITH TOPBAR
#0x018c "" "" "" # LATIN SMALL LETTER D WITH TOPBAR
#0x018d "" "" "" # LATIN SMALL LETTER TURNED DELTA
#0x018e "" "" "" # LATIN CAPITAL LETTER REVERSED E
#0x018f "" "" "" # LATIN CAPITAL LETTER SCHWA
#0x0190 "" "" "" # LATIN CAPITAL LETTER OPEN E
#0x0191 "" "" "" # LATIN CAPITAL LETTER F WITH HOOK
#0x0192 "" "" "" # LATIN SMALL LETTER F WITH HOOK
#0x0193 "" "" "" # LATIN CAPITAL LETTER G WITH HOOK
#0x0194 "" "" "" # LATIN CAPITAL LETTER GAMMA
#0x0195 "" "" "" # LATIN SMALL LETTER HV
#0x0196 "" "" "" # LATIN CAPITAL LETTER IOTA
#0x0197 "" "" "" # LATIN CAPITAL LETTER I WITH STROKE
#0x0198 "" "" "" # LATIN CAPITAL LETTER K WITH HOOK
#0x0199 "" "" "" # LATIN SMALL LETTER K WITH HOOK
#0x019a "" "" "" # LATIN SMALL LETTER L WITH BAR
#0x019b "" "" "" # LATIN SMALL LETTER LAMBDA WITH STROKE
#0x019c "" "" "" # LATIN CAPITAL LETTER TURNED M
#0x019d "" "" "" # LATIN CAPITAL LETTER N WITH LEFT HOOK
#0x019e "" "" "" # LATIN SMALL LETTER N WITH LONG RIGHT LEG
#0x019f "" "" "" # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
#0x01a0 "" "" "" # LATIN CAPITAL LETTER O WITH HORN
#0x01a1 "" "" "" # LATIN SMALL LETTER O WITH HORN
#0x01a2 "" "" "" # LATIN CAPITAL LETTER OI
#0x01a3 "" "" "" # LATIN SMALL LETTER OI
#0x01a4 "" "" "" # LATIN CAPITAL LETTER P WITH HOOK
#0x01a5 "" "" "" # LATIN SMALL LETTER P WITH HOOK
#0x01a6 "" "" "" # LATIN LETTER YR
#0x01a7 "" "" "" # LATIN CAPITAL LETTER TONE TWO
#0x01a8 "" "" "" # LATIN SMALL LETTER TONE TWO
#0x01a9 "" "" "" # LATIN CAPITAL LETTER ESH
#0x01aa "" "" "" # LATIN LETTER REVERSED ESH LOOP
#0x01ab "" "" "" # LATIN SMALL LETTER T WITH PALATAL HOOK
#0x01ac "" "" "" # LATIN CAPITAL LETTER T WITH HOOK
#0x01ad "" "" "" # LATIN SMALL LETTER T WITH HOOK
#0x01ae "" "" "" # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
#0x01af "" "" "" # LATIN CAPITAL LETTER U WITH HORN
#0x01b0 "" "" "" # LATIN SMALL LETTER U WITH HORN
#0x01b1 "" "" "" # LATIN CAPITAL LETTER UPSILON
#0x01b2 "" "" "" # LATIN CAPITAL LETTER V WITH HOOK
#0x01b3 "" "" "" # LATIN CAPITAL LETTER Y WITH HOOK
#0x01b4 "" "" "" # LATIN SMALL LETTER Y WITH HOOK
#0x01b5 "" "" "" # LATIN CAPITAL LETTER Z WITH STROKE
#0x01b6 "" "" "" # LATIN SMALL LETTER Z WITH STROKE
#0x01b7 "" "" "" # LATIN CAPITAL LETTER EZH
#0x01b8 "" "" "" # LATIN CAPITAL LETTER EZH REVERSED
#0x01b9 "" "" "" # LATIN SMALL LETTER EZH REVERSED
#0x01ba "" "" "" # LATIN SMALL LETTER EZH WITH TAIL
#0x01bb "" "" "" # LATIN LETTER TWO WITH STROKE
#0x01bc "" "" "" # LATIN CAPITAL LETTER TONE FIVE
#0x01bd "" "" "" # LATIN SMALL LETTER TONE FIVE
#0x01be "" "" "" # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
#0x01bf "" "" "" # LATIN LETTER WYNN
#0x01c0 "" "" "" # LATIN LETTER DENTAL CLICK
#0x01c1 "" "" "" # LATIN LETTER LATERAL CLICK
#0x01c2 "" "" "" # LATIN LETTER ALVEOLAR CLICK
#0x01c3 "" "" "" # LATIN LETTER RETROFLEX CLICK
#0x01c4 "" "" "" # LATIN CAPITAL LETTER DZ WITH CARON
#0x01c5 "" "" "" # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
#0x01c6 "" "" "" # LATIN SMALL LETTER DZ WITH CARON
#0x01c7 "" "" "" # LATIN CAPITAL LETTER LJ
#0x01c8 "" "" "" # LATIN CAPITAL LETTER L WITH SMALL LETTER J
#0x01c9 "" "" "" # LATIN SMALL LETTER LJ
#0x01ca "" "" "" # LATIN CAPITAL LETTER NJ
#0x01cb "" "" "" # LATIN CAPITAL LETTER N WITH SMALL LETTER J
#0x01cc "" "" "" # LATIN SMALL LETTER NJ
0x01cd "\\v{A}" "" "" # LATIN CAPITAL LETTER A WITH CARON
0x01ce "\\v{a}" "" "" # LATIN SMALL LETTER A WITH CARON
0x01cf "\\v{I}" "" "" # LATIN CAPITAL LETTER I WITH CARON
0x01d0 "\\v{\\i}" "" "" # LATIN SMALL LETTER I WITH CARON
0x01d1 "\\v{O}" "" "" # LATIN CAPITAL LETTER O WITH CARON
0x01d2 "\\v{o}" "" "" # LATIN SMALL LETTER O WITH CARON
0x01d3 "\\v{U}" "" "" # LATIN CAPITAL LETTER U WITH CARON
0x01d4 "\\v{u}" "" "" # LATIN SMALL LETTER U WITH CARON
#0x01d5 "" "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
#0x01d6 "" "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
#0x01d7 "" "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
#0x01d8 "" "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
#0x01d9 "" "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
#0x01da "" "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
#0x01db "" "" "" # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
#0x01dc "" "" "" # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
#0x01dd "" "" "" # LATIN SMALL LETTER TURNED E
#0x01de "" "" "" # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
#0x01df "" "" "" # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
#0x01e0 "" "" "" # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
#0x01e1 "" "" "" # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
#0x01e2 "" "" "" # LATIN CAPITAL LETTER AE WITH MACRON
#0x01e3 "" "" "" # LATIN SMALL LETTER AE WITH MACRON
#0x01e4 "" "" "" # LATIN CAPITAL LETTER G WITH STROKE
#0x01e5 "" "" "" # LATIN SMALL LETTER G WITH STROKE
0x01e6 "\\v{G}" "" "" # LATIN CAPITAL LETTER G WITH CARON
0x01e7 "\\v{g}" "" "" # LATIN SMALL LETTER G WITH CARON
0x01e8 "\\v{K}" "" "" # LATIN CAPITAL LETTER K WITH CARON
0x01e9 "\\v{k}" "" "" # LATIN SMALL LETTER K WITH CARON
0x01ea "\\k{O}" "" "" # LATIN CAPITAL LETTER O WITH OGONEK
0x01eb "\\k{o}" "" "" # LATIN SMALL LETTER O WITH OGONEK
#0x01ec "" "" "" # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
#0x01ed "" "" "" # LATIN SMALL LETTER O WITH OGONEK AND MACRON
#0x01ee "" "" "" # LATIN CAPITAL LETTER EZH WITH CARON
#0x01ef "" "" "" # LATIN SMALL LETTER EZH WITH CARON
0x01f0 "\\v{\\j}" "" "" # LATIN SMALL LETTER J WITH CARON
0x01ce "\\v{a}" "" "" # LATIN SMALL LETTER A WITH CARON
#0x01f1 "" "" "" # LATIN CAPITAL LETTER DZ
#0x01f2 "" "" "" # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
#0x01f3 "" "" "" # LATIN SMALL LETTER DZ
0x01f4 "\\'{G}" "" "" # LATIN CAPITAL LETTER G WITH ACUTE
0x01f5 "\\'{g}" "" "" # LATIN SMALL LETTER G WITH ACUTE
#0x01f6 "" "" "" # LATIN CAPITAL LETTER HWAIR
#0x01f7 "" "" "" # LATIN CAPITAL LETTER WYNN
0x01f8 "\\`{N}" "" "" # LATIN CAPITAL LETTER N WITH GRAVE
0x01f9 "\\`{n}" "" "" # LATIN SMALL LETTER N WITH GRAVE
#0x01fa "" "" "" # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
#0x01fb "" "" "" # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
#0x01fc "" "" "" # LATIN CAPITAL LETTER AE WITH ACUTE
#0x01fd "" "" "" # LATIN SMALL LETTER AE WITH ACUTE
#0x01fe "" "" "" # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
#0x01ff "" "" "" # LATIN SMALL LETTER O WITH STROKE AND ACUTE
#0x0200 "" "" "" # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
#0x0201 "" "" "" # LATIN SMALL LETTER A WITH DOUBLE GRAVE
#0x0202 "" "" "" # LATIN CAPITAL LETTER A WITH INVERTED BREVE
#0x0203 "" "" "" # LATIN SMALL LETTER A WITH INVERTED BREVE
#0x0204 "" "" "" # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
#0x0205 "" "" "" # LATIN SMALL LETTER E WITH DOUBLE GRAVE
#0x0206 "" "" "" # LATIN CAPITAL LETTER E WITH INVERTED BREVE
#0x0207 "" "" "" # LATIN SMALL LETTER E WITH INVERTED BREVE
#0x0208 "" "" "" # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
#0x0209 "" "" "" # LATIN SMALL LETTER I WITH DOUBLE GRAVE
#0x020a "" "" "" # LATIN CAPITAL LETTER I WITH INVERTED BREVE
#0x020b "" "" "" # LATIN SMALL LETTER I WITH INVERTED BREVE
#0x020c "" "" "" # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
#0x020d "" "" "" # LATIN SMALL LETTER O WITH DOUBLE GRAVE
#0x020e "" "" "" # LATIN CAPITAL LETTER O WITH INVERTED BREVE
#0x020f "" "" "" # LATIN SMALL LETTER O WITH INVERTED BREVE
#0x0210 "" "" "" # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
#0x0211 "" "" "" # LATIN SMALL LETTER R WITH DOUBLE GRAVE
#0x0212 "" "" "" # LATIN CAPITAL LETTER R WITH INVERTED BREVE
#0x0213 "" "" "" # LATIN SMALL LETTER R WITH INVERTED BREVE
#0x0214 "" "" "" # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
#0x0215 "" "" "" # LATIN SMALL LETTER U WITH DOUBLE GRAVE
#0x0216 "" "" "" # LATIN CAPITAL LETTER U WITH INVERTED BREVE
#0x0217 "" "" "" # LATIN SMALL LETTER U WITH INVERTED BREVE
#0x0218 "" "" "" # LATIN CAPITAL LETTER S WITH COMMA BELOW
#0x0219 "" "" "" # LATIN SMALL LETTER S WITH COMMA BELOW
#0x021a "" "" "" # LATIN CAPITAL LETTER T WITH COMMA BELOW
#0x021b "" "" "" # LATIN SMALL LETTER T WITH COMMA BELOW
#0x021c "" "" "" # LATIN CAPITAL LETTER YOGH
#0x021d "" "" "" # LATIN SMALL LETTER YOGH
0x021e "\\v{H}" "" "" # LATIN CAPITAL LETTER H WITH CARON
0x021f "\\v{h}" "" "" # LATIN SMALL LETTER H WITH CARON
#0x0220 "" "" "" # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
#0x0222 "" "" "" # LATIN CAPITAL LETTER OU
#0x0223 "" "" "" # LATIN SMALL LETTER OU
#0x0224 "" "" "" # LATIN CAPITAL LETTER Z WITH HOOK
#0x0225 "" "" "" # LATIN SMALL LETTER Z WITH HOOK
0x0226 "\\.{A}" "" "" # LATIN CAPITAL LETTER A WITH DOT ABOVE
0x0227 "\\.{a}" "" "" # LATIN SMALL LETTER A WITH DOT ABOVE
0x0228 "\\c{E}" "" "" # LATIN CAPITAL LETTER E WITH CEDILLA
0x0229 "\\c{e}" "" "" # LATIN SMALL LETTER E WITH CEDILLA
#0x022a "" "" "" # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
#0x022b "" "" "" # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
#0x022c "" "" "" # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
#0x022d "" "" "" # LATIN SMALL LETTER O WITH TILDE AND MACRON
0x022e "\\.{O}" "" "" # LATIN CAPITAL LETTER O WITH DOT ABOVE
0x022f "\\.{o}" "" "" # LATIN SMALL LETTER O WITH DOT ABOVE
#0x0230 "" "" "" # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
#0x0231 "" "" "" # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
0x0232 "\\={Y}" "" "" # LATIN CAPITAL LETTER Y WITH MACRON
0x0233 "\\={y}" "" "" # LATIN SMALL LETTER Y WITH MACRON
0x0237 "\\j" "" "" # LATIN SMALL LETTER DOTLESS J
#0x0250 "" "" "" # LATIN SMALL LETTER TURNED A
#0x0251 "" "" "" # LATIN SMALL LETTER ALPHA
#0x0252 "" "" "" # LATIN SMALL LETTER TURNED ALPHA
#0x0253 "" "" "" # LATIN SMALL LETTER B WITH HOOK
#0x0254 "" "" "" # LATIN SMALL LETTER OPEN O
#0x0255 "" "" "" # LATIN SMALL LETTER C WITH CURL
#0x0256 "" "" "" # LATIN SMALL LETTER D WITH TAIL
#0x0257 "" "" "" # LATIN SMALL LETTER D WITH HOOK
#0x0258 "" "" "" # LATIN SMALL LETTER REVERSED E
#0x0259 "" "" "" # LATIN SMALL LETTER SCHWA
#0x025a "" "" "" # LATIN SMALL LETTER SCHWA WITH HOOK
#0x025b "" "" "" # LATIN SMALL LETTER OPEN E
#0x025c "" "" "" # LATIN SMALL LETTER REVERSED OPEN E
#0x025d "" "" "" # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
#0x025e "" "" "" # LATIN SMALL LETTER CLOSED REVERSED OPEN E
#0x025f "" "" "" # LATIN SMALL LETTER DOTLESS J WITH STROKE
#0x0260 "" "" "" # LATIN SMALL LETTER G WITH HOOK
#0x0261 "" "" "" # LATIN SMALL LETTER SCRIPT G
#0x0262 "" "" "" # LATIN LETTER SMALL CAPITAL G
#0x0263 "" "" "" # LATIN SMALL LETTER GAMMA
#0x0264 "" "" "" # LATIN SMALL LETTER RAMS HORN
#0x0265 "" "" "" # LATIN SMALL LETTER TURNED H
#0x0266 "" "" "" # LATIN SMALL LETTER H WITH HOOK
#0x0267 "" "" "" # LATIN SMALL LETTER HENG WITH HOOK
#0x0268 "" "" "" # LATIN SMALL LETTER I WITH STROKE
#0x0269 "" "" "" # LATIN SMALL LETTER IOTA
#0x026a "" "" "" # LATIN LETTER SMALL CAPITAL I
#0x026b "" "" "" # LATIN SMALL LETTER L WITH MIDDLE TILDE
#0x026c "" "" "" # LATIN SMALL LETTER L WITH BELT
#0x026d "" "" "" # LATIN SMALL LETTER L WITH RETROFLEX HOOK
#0x026e "" "" "" # LATIN SMALL LETTER LEZH
#0x026f "" "" "" # LATIN SMALL LETTER TURNED M
#0x0270 "" "" "" # LATIN SMALL LETTER TURNED M WITH LONG LEG
#0x0271 "" "" "" # LATIN SMALL LETTER M WITH HOOK
#0x0272 "" "" "" # LATIN SMALL LETTER N WITH LEFT HOOK
#0x0273 "" "" "" # LATIN SMALL LETTER N WITH RETROFLEX HOOK
#0x0274 "" "" "" # LATIN LETTER SMALL CAPITAL N
#0x0275 "" "" "" # LATIN SMALL LETTER BARRED O
#0x0276 "" "" "" # LATIN LETTER SMALL CAPITAL OE
#0x0277 "" "" "" # LATIN SMALL LETTER CLOSED OMEGA
#0x0278 "" "" "" # LATIN SMALL LETTER PHI
#0x0279 "" "" "" # LATIN SMALL LETTER TURNED R
#0x027a "" "" "" # LATIN SMALL LETTER TURNED R WITH LONG LEG
#0x027b "" "" "" # LATIN SMALL LETTER TURNED R WITH HOOK
#0x027c "" "" "" # LATIN SMALL LETTER R WITH LONG LEG
#0x027d "" "" "" # LATIN SMALL LETTER R WITH TAIL
#0x027e "" "" "" # LATIN SMALL LETTER R WITH FISHHOOK
#0x027f "" "" "" # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
#0x0280 "" "" "" # LATIN LETTER SMALL CAPITAL R
#0x0281 "" "" "" # LATIN LETTER SMALL CAPITAL INVERTED R
#0x0282 "" "" "" # LATIN SMALL LETTER S WITH HOOK
#0x0283 "" "" "" # LATIN SMALL LETTER ESH
#0x0284 "" "" "" # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
#0x0285 "" "" "" # LATIN SMALL LETTER SQUAT REVERSED ESH
#0x0286 "" "" "" # LATIN SMALL LETTER ESH WITH CURL
#0x0287 "" "" "" # LATIN SMALL LETTER TURNED T
#0x0288 "" "" "" # LATIN SMALL LETTER T WITH RETROFLEX HOOK
#0x0289 "" "" "" # LATIN SMALL LETTER U BAR
#0x028a "" "" "" # LATIN SMALL LETTER UPSILON
#0x028b "" "" "" # LATIN SMALL LETTER V WITH HOOK
#0x028c "" "" "" # LATIN SMALL LETTER TURNED V
#0x028d "" "" "" # LATIN SMALL LETTER TURNED W
#0x028e "" "" "" # LATIN SMALL LETTER TURNED Y
#0x028f "" "" "" # LATIN LETTER SMALL CAPITAL Y
#0x0290 "" "" "" # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
#0x0291 "" "" "" # LATIN SMALL LETTER Z WITH CURL
#0x0292 "" "" "" # LATIN SMALL LETTER EZH
#0x0293 "" "" "" # LATIN SMALL LETTER EZH WITH CURL
#0x0294 "" "" "" # LATIN LETTER GLOTTAL STOP
#0x0295 "" "" "" # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
#0x0296 "" "" "" # LATIN LETTER INVERTED GLOTTAL STOP
#0x0297 "" "" "" # LATIN LETTER STRETCHED C
#0x0298 "" "" "" # LATIN LETTER BILABIAL CLICK
#0x0299 "" "" "" # LATIN LETTER SMALL CAPITAL B
#0x029a "" "" "" # LATIN SMALL LETTER CLOSED OPEN E
#0x029b "" "" "" # LATIN LETTER SMALL CAPITAL G WITH HOOK
#0x029c "" "" "" # LATIN LETTER SMALL CAPITAL H
#0x029d "" "" "" # LATIN SMALL LETTER J WITH CROSSED-TAIL
#0x029e "" "" "" # LATIN SMALL LETTER TURNED K
#0x029f "" "" "" # LATIN LETTER SMALL CAPITAL L
#0x02a0 "" "" "" # LATIN SMALL LETTER Q WITH HOOK
#0x02a1 "" "" "" # LATIN LETTER GLOTTAL STOP WITH STROKE
#0x02a2 "" "" "" # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
#0x02a3 "" "" "" # LATIN SMALL LETTER DZ DIGRAPH
#0x02a4 "" "" "" # LATIN SMALL LETTER DEZH DIGRAPH
#0x02a5 "" "" "" # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
#0x02a6 "" "" "" # LATIN SMALL LETTER TS DIGRAPH
#0x02a7 "" "" "" # LATIN SMALL LETTER TESH DIGRAPH
#0x02a8 "" "" "" # LATIN SMALL LETTER TC DIGRAPH WITH CURL
#0x02a9 "" "" "" # LATIN SMALL LETTER FENG DIGRAPH
#0x02aa "" "" "" # LATIN SMALL LETTER LS DIGRAPH
#0x02ab "" "" "" # LATIN SMALL LETTER LZ DIGRAPH
#0x02ac "" "" "" # LATIN LETTER BILABIAL PERCUSSIVE
#0x02ad "" "" "" # LATIN LETTER BIDENTAL PERCUSSIVE
#0x02b0 "" "" "" # MODIFIER LETTER SMALL H
#0x02b1 "" "" "" # MODIFIER LETTER SMALL H WITH HOOK
#0x02b2 "" "" "" # MODIFIER LETTER SMALL J
#0x02b3 "" "" "" # MODIFIER LETTER SMALL R
#0x02b4 "" "" "" # MODIFIER LETTER SMALL TURNED R
#0x02b5 "" "" "" # MODIFIER LETTER SMALL TURNED R WITH HOOK
#0x02b6 "" "" "" # MODIFIER LETTER SMALL CAPITAL INVERTED R
#0x02b7 "" "" "" # MODIFIER LETTER SMALL W
#0x02b8 "" "" "" # MODIFIER LETTER SMALL Y
#0x02b9 "" "" "" # MODIFIER LETTER PRIME
#0x02ba "" "" "" # MODIFIER LETTER DOUBLE PRIME
#0x02bb "" "" "" # MODIFIER LETTER TURNED COMMA
#0x02bc "" "" "" # MODIFIER LETTER APOSTROPHE
#0x02bd "" "" "" # MODIFIER LETTER REVERSED COMMA
#0x02be "" "" "" # MODIFIER LETTER RIGHT HALF RING
#0x02bf "" "" "" # MODIFIER LETTER LEFT HALF RING
#0x02c0 "" "" "" # MODIFIER LETTER GLOTTAL STOP
#0x02c1 "" "" "" # MODIFIER LETTER REVERSED GLOTTAL STOP
#0x02c2 "" "" "" # MODIFIER LETTER LEFT ARROWHEAD
#0x02c3 "" "" "" # MODIFIER LETTER RIGHT ARROWHEAD
#0x02c4 "" "" "" # MODIFIER LETTER UP ARROWHEAD
#0x02c5 "" "" "" # MODIFIER LETTER DOWN ARROWHEAD
#0x02c6 "" "" "" # MODIFIER LETTER CIRCUMFLEX ACCENT
0x02c7 "\\v{ }" "" "" # CARON
#0x02c8 "" "" "" # MODIFIER LETTER VERTICAL LINE
#0x02c9 "" "" "" # MODIFIER LETTER MACRON
#0x02ca "" "" "" # MODIFIER LETTER ACUTE ACCENT
#0x02cb "" "" "" # MODIFIER LETTER GRAVE ACCENT
#0x02cc "" "" "" # MODIFIER LETTER LOW VERTICAL LINE
#0x02cd "" "" "" # MODIFIER LETTER LOW MACRON
#0x02ce "" "" "" # MODIFIER LETTER LOW GRAVE ACCENT
#0x02cf "" "" "" # MODIFIER LETTER LOW ACUTE ACCENT
#0x02d0 "" "" "" # MODIFIER LETTER TRIANGULAR COLON
#0x02d1 "" "" "" # MODIFIER LETTER HALF TRIANGULAR COLON
#0x02d2 "" "" "" # MODIFIER LETTER CENTRED RIGHT HALF RING
#0x02d3 "" "" "" # MODIFIER LETTER CENTRED LEFT HALF RING
#0x02d4 "" "" "" # MODIFIER LETTER UP TACK
#0x02d5 "" "" "" # MODIFIER LETTER DOWN TACK
#0x02d6 "" "" "" # MODIFIER LETTER PLUS SIGN
#0x02d7 "" "" "" # MODIFIER LETTER MINUS SIGN
0x02d8 "\\u{ }" "" "" # BREVE
0x02d9 "\\.{ }" "" "" # DOT ABOVE
0x02da "\\r{ }" "" "" # RING ABOVE
0x02db "\\k{ }" "" "" # OGONEK
0x02dc "\\~{ }" "" "" # SMALL TILDE
0x02dd "\\H{ }" "" "" # DOUBLE ACUTE ACCENT
#0x02de "" "" "" # MODIFIER LETTER RHOTIC HOOK
#0x02df "" "" "" # MODIFIER LETTER CROSS ACCENT
#0x02e0 "" "" "" # MODIFIER LETTER SMALL GAMMA
#0x02e1 "" "" "" # MODIFIER LETTER SMALL L
#0x02e2 "" "" "" # MODIFIER LETTER SMALL S
#0x02e3 "" "" "" # MODIFIER LETTER SMALL X
#0x02e4 "" "" "" # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
#0x02e5 "" "" "" # MODIFIER LETTER EXTRA-HIGH TONE BAR
#0x02e6 "" "" "" # MODIFIER LETTER HIGH TONE BAR
#0x02e7 "" "" "" # MODIFIER LETTER MID TONE BAR
#0x02e8 "" "" "" # MODIFIER LETTER LOW TONE BAR
#0x02e9 "" "" "" # MODIFIER LETTER EXTRA-LOW TONE BAR
#0x02ea "" "" "" # MODIFIER LETTER YIN DEPARTING TONE MARK
#0x02eb "" "" "" # MODIFIER LETTER YANG DEPARTING TONE MARK
#0x02ec "" "" "" # MODIFIER LETTER VOICING
#0x02ed "" "" "" # MODIFIER LETTER UNASPIRATED
#0x02ee "" "" "" # MODIFIER LETTER DOUBLE APOSTROPHE
0x0300 "\\`" "" "combining" # COMBINING GRAVE ACCENT
0x0301 "\\'" "" "combining" # COMBINING ACUTE ACCENT
0x0302 "\\^" "" "combining" # COMBINING CIRCUMFLEX ACCENT
0x0303 "\\~" "" "combining" # COMBINING TILDE
0x0304 "\\=" "" "combining" # COMBINING MACRON
#0x0305 "" "" "combining" # COMBINING OVERLINE
0x0306 "\\u" "" "combining" # COMBINING BREVE
0x0307 "\\." "" "combining" # COMBINING DOT ABOVE
0x0308 "\\\"" "" "combining" # COMBINING DIAERESIS
#0x0309 "" "" "combining" # COMBINING HOOK ABOVE
0x030a "\\r" "" "combining" # COMBINING RING ABOVE
0x030b "\\H" "" "combining" # COMBINING DOUBLE ACUTE ACCENT
0x030c "\\v" "" "combining" # COMBINING CARON
#0x030d "" "" "combining" # COMBINING VERTICAL LINE ABOVE
#0x030e "" "" "combining" # COMBINING DOUBLE VERTICAL LINE ABOVE
#0x030f "" "" "combining" # COMBINING DOUBLE GRAVE ACCENT
#0x0310 "" "" "combining" # COMBINING CANDRABINDU
#0x0311 "" "" "combining" # COMBINING INVERTED BREVE
#0x0312 "" "" "combining" # COMBINING TURNED COMMA ABOVE
#0x0313 "" "" "combining" # COMBINING COMMA ABOVE
#0x0314 "" "" "combining" # COMBINING REVERSED COMMA ABOVE
#0x0315 "" "" "combining" # COMBINING COMMA ABOVE RIGHT
#0x0316 "" "" "combining" # COMBINING GRAVE ACCENT BELOW
#0x0317 "" "" "combining" # COMBINING ACUTE ACCENT BELOW
#0x0318 "" "" "combining" # COMBINING LEFT TACK BELOW
#0x0319 "" "" "combining" # COMBINING RIGHT TACK BELOW
#0x031a "" "" "combining" # COMBINING LEFT ANGLE ABOVE
#0x031b "" "" "combining" # COMBINING HORN
#0x031c "" "" "combining" # COMBINING LEFT HALF RING BELOW
#0x031d "" "" "combining" # COMBINING UP TACK BELOW
#0x031e "" "" "combining" # COMBINING DOWN TACK BELOW
#0x031f "" "" "combining" # COMBINING PLUS SIGN BELOW
0x0320 "\\b" "" "combining" # COMBINING MINUS SIGN BELOW
#0x0321 "" "" "combining" # COMBINING PALATALIZED HOOK BELOW
#0x0322 "" "" "combining" # COMBINING RETROFLEX HOOK BELOW
0x0323 "\\d" "" "combining" # COMBINING DOT BELOW
#0x0324 "" "" "combining" # COMBINING DIAERESIS BELOW
#0x0325 "" "" "combining" # COMBINING RING BELOW
#0x0326 "" "" "combining" # COMBINING COMMA BELOW
0x0327 "\\c" "" "combining" # COMBINING CEDILLA
0x0328 "\\k" "" "combining" # COMBINING OGONEK
#0x0329 "" "" "combining" # COMBINING VERTICAL LINE BELOW
#0x032a "" "" "combining" # COMBINING BRIDGE BELOW
#0x032b "" "" "combining" # COMBINING INVERTED DOUBLE ARCH BELOW
#0x032c "" "" "combining" # COMBINING CARON BELOW
#0x032d "" "" "combining" # COMBINING CIRCUMFLEX ACCENT BELOW
#0x032e "" "" "combining" # COMBINING BREVE BELOW
#0x032f "" "" "combining" # COMBINING INVERTED BREVE BELOW
#0x0330 "" "" "combining" # COMBINING TILDE BELOW
#0x0331 "" "" "combining" # COMBINING MACRON BELOW
#0x0332 "" "" "combining" # COMBINING LOW LINE
#0x0333 "" "" "combining" # COMBINING DOUBLE LOW LINE
#0x0334 "" "" "combining" # COMBINING TILDE OVERLAY
#0x0335 "" "" "combining" # COMBINING SHORT STROKE OVERLAY
#0x0336 "" "" "combining" # COMBINING LONG STROKE OVERLAY
#0x0337 "" "" "combining" # COMBINING SHORT SOLIDUS OVERLAY
#0x0338 "" "" "combining" # COMBINING LONG SOLIDUS OVERLAY
#0x0339 "" "" "combining" # COMBINING RIGHT HALF RING BELOW
#0x033a "" "" "combining" # COMBINING INVERTED BRIDGE BELOW
#0x033b "" "" "combining" # COMBINING SQUARE BELOW
#0x033c "" "" "combining" # COMBINING SEAGULL BELOW
#0x033d "" "" "combining" # COMBINING X ABOVE
#0x033e "" "" "combining" # COMBINING VERTICAL TILDE
#0x033f "" "" "combining" # COMBINING DOUBLE OVERLINE
#0x0340 "" "" "combining" # COMBINING GRAVE TONE MARK
#0x0341 "" "" "combining" # COMBINING ACUTE TONE MARK
#0x0342 "" "" "combining" # COMBINING GREEK PERISPOMENI
#0x0343 "" "" "combining" # COMBINING GREEK KORONIS
#0x0344 "" "" "combining" # COMBINING GREEK DIALYTIKA TONOS
#0x0345 "" "" "combining" # COMBINING GREEK YPOGEGRAMMENI
#0x0346 "" "" "combining" # COMBINING BRIDGE ABOVE
#0x0347 "" "" "combining" # COMBINING EQUALS SIGN BELOW
#0x0348 "" "" "combining" # COMBINING DOUBLE VERTICAL LINE BELOW
#0x0349 "" "" "combining" # COMBINING LEFT ANGLE BELOW
#0x034a "" "" "combining" # COMBINING NOT TILDE ABOVE
#0x034b "" "" "combining" # COMBINING HOMOTHETIC ABOVE
#0x034c "" "" "combining" # COMBINING ALMOST EQUAL TO ABOVE
#0x034d "" "" "combining" # COMBINING LEFT RIGHT ARROW BELOW
#0x034e "" "" "combining" # COMBINING UPWARDS ARROW BELOW
#0x034f "" "" "" # COMBINING GRAPHEME JOINER
#0x0360 "" "" "combining" # COMBINING DOUBLE TILDE
0x0361 "\\t" "" "combining" # COMBINING DOUBLE INVERTED BREVE (ligature tie)
#0x0362 "" "" "combining" # COMBINING DOUBLE RIGHTWARDS ARROW BELOW
#0x0363 "" "" "combining" # COMBINING LATIN SMALL LETTER A
#0x0364 "" "" "combining" # COMBINING LATIN SMALL LETTER E
#0x0365 "" "" "combining" # COMBINING LATIN SMALL LETTER I
#0x0366 "" "" "combining" # COMBINING LATIN SMALL LETTER O
#0x0367 "" "" "combining" # COMBINING LATIN SMALL LETTER U
#0x0368 "" "" "combining" # COMBINING LATIN SMALL LETTER C
#0x0369 "" "" "combining" # COMBINING LATIN SMALL LETTER D
#0x036a "" "" "combining" # COMBINING LATIN SMALL LETTER H
#0x036b "" "" "combining" # COMBINING LATIN SMALL LETTER M
#0x036c "" "" "combining" # COMBINING LATIN SMALL LETTER R
#0x036d "" "" "combining" # COMBINING LATIN SMALL LETTER T
#0x036e "" "" "combining" # COMBINING LATIN SMALL LETTER V
#0x036f "" "" "combining" # COMBINING LATIN SMALL LETTER X
0x1ea1 "\\d{a}" "" "" # LATIN SMALL LETTER A WITH DOT BELOW
0x20ac "\\texteuro" "textcomp" "" # EURO SIGN

View File

@ -15,14 +15,19 @@
#include "encoding.h" #include "encoding.h"
#include "debug.h" #include "debug.h"
#include "LaTeXFeatures.h"
#include "lyxlex.h" #include "lyxlex.h"
#include "lyxrc.h" #include "lyxrc.h"
#include "support/filename.h" #include "support/filename.h"
#include "support/lstrings.h"
#include "support/unicode.h"
namespace lyx { namespace lyx {
using support::FileName;
#ifndef CXX_GLOBAL_CSTD #ifndef CXX_GLOBAL_CSTD
using std::strtol; using std::strtol;
#endif #endif
@ -177,9 +182,97 @@ char_type arabic_table[63][2] = {
char_type const arabic_start = 0xc1; char_type const arabic_start = 0xc1;
/// Information about a single UCS4 character
struct CharInfo {
/// LaTeX command for this character
docstring command;
/// Needed LaTeX preamble (or feature)
string preamble;
/// Is this a combining character?
bool combining;
/// Is \c preamble a feature known by LaTeXFeatures, or a raw LaTeX
/// command?
bool feature;
/// Always force the LaTeX command, even if the encoding contains
/// this character?
bool force;
};
typedef std::map<char_type, CharInfo> CharInfoMap;
CharInfoMap unicodesymbols;
} // namespace anon } // namespace anon
Encoding::Encoding(string const & n, string const & l, string const & i)
: Name_(n), LatexName_(l), iconvName_(i)
{
if (n == "utf8")
// UTF8 can encode all 1<<20 + 1<<16 UCS4 code points
start_encodable_ = 0x110000;
else {
start_encodable_ = 0;
// temporarily switch off lyxerr, since we will generate iconv errors
lyxerr.disable();
for (unsigned short j = 0; j < 256; ++j) {
char const c = j;
std::vector<char_type> const ucs4 = eightbit_to_ucs4(&c, 1, i);
if (ucs4.size() == 1) {
char_type const c = ucs4[0];
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it == unicodesymbols.end() || !it->second.force)
encodable_.insert(c);
}
}
lyxerr.enable();
CharSet::iterator it = encodable_.find(start_encodable_);
while (it != encodable_.end()) {
encodable_.erase(it);
++start_encodable_;
it = encodable_.find(start_encodable_);
}
}
}
docstring const Encoding::latexChar(char_type c) const
{
if (c < start_encodable_)
return docstring(1, c);
if (encodable_.find(c) == encodable_.end()) {
// c cannot be encoded in this encoding
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it == unicodesymbols.end())
lyxerr << "Could not find LaTeX command for character 0x"
<< std::hex << c << ".\nLaTeX export will fail."
<< endl;
else
return it->second.command;
}
return docstring(1, c);
}
void Encoding::validate(char_type c, LaTeXFeatures & features) const
{
if (c < start_encodable_)
return;
if (encodable_.find(c) != encodable_.end())
return;
// c cannot be encoded in this encoding
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it != unicodesymbols.end() && !it->second.preamble.empty()) {
if (it->second.feature)
features.require(it->second.preamble);
else
features.addPreambleSnippet(it->second.preamble);
}
}
bool Encodings::isComposeChar_hebrew(char_type c) bool Encodings::isComposeChar_hebrew(char_type c)
{ {
@ -226,6 +319,15 @@ char_type Encodings::transformChar(char_type c,
} }
bool Encodings::isCombiningChar(char_type c)
{
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it != unicodesymbols.end())
return it->second.combining;
return false;
}
Encoding const * Encodings::getFromLyXName(string const & name) const Encoding const * Encodings::getFromLyXName(string const & name) const
{ {
EncodingList::const_iterator it = encodinglist.find(name); EncodingList::const_iterator it = encodinglist.find(name);
@ -255,8 +357,68 @@ Encodings::Encodings()
{ {
} }
void Encodings::read(support::FileName const & filename)
void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
{ {
// We must read the symbolsfile first, because the Encoding
// constructor depends on it.
LyXLex symbolslex(0, 0);
symbolslex.setFile(symbolsfile);
while (symbolslex.isOK()) {
char_type symbol;
CharInfo info;
string flags;
if (symbolslex.next(true)) {
std::istringstream is(symbolslex.getString());
// reading symbol directly does not work if
// char_type == std::wchar_t.
boost::uint32_t tmp;
if(!(is >> std::hex >> tmp))
break;
symbol = tmp;
} else
break;
if (symbolslex.next(true))
info.command = symbolslex.getDocString();
else
break;
if (symbolslex.next(true))
info.preamble = symbolslex.getString();
else
break;
if (symbolslex.next(true))
flags = symbolslex.getString();
else
break;
info.combining = false;
info.force = false;
while (!flags.empty()) {
string flag;
flags = support::split(flags, flag, ',');
if (flag == "combining")
info.combining = true;
else if (flag == "force")
info.force = true;
else
lyxerr << "Ignoring unknown flag `" << flag
<< "' for symbol `0x" << std::hex
<< symbol << "'." << endl;
}
if (!info.preamble.empty())
info.feature = info.preamble[0] != '\\';
lyxerr[Debug::INFO]
<< "Read unicode symbol " << symbol << " '"
<< to_utf8(info.command) << "' '" << info.preamble
<< "' " << info.combining << ' ' << info.feature
<< endl;
unicodesymbols[symbol] = info;
}
// Now read the encodings
enum Encodingtags { enum Encodingtags {
et_encoding = 1, et_encoding = 1,
et_end, et_end,
@ -269,7 +431,7 @@ void Encodings::read(support::FileName const & filename)
}; };
LyXLex lex(encodingtags, et_last - 1); LyXLex lex(encodingtags, et_last - 1);
lex.setFile(filename); lex.setFile(encfile);
while (lex.isOK()) { while (lex.isOK()) {
switch (lex.lex()) { switch (lex.lex()) {
case et_encoding: case et_encoding:

View File

@ -13,15 +13,17 @@
#ifndef ENCODING_H #ifndef ENCODING_H
#define ENCODING_H #define ENCODING_H
#include <map> #include "support/docstring.h"
#include <string>
#include "support/types.h" #include <set>
namespace lyx { namespace lyx {
namespace support { class FileName; } namespace support { class FileName; }
class LaTeXFeatures;
/// ///
class Encoding { class Encoding {
public: public:
@ -29,16 +31,25 @@ public:
Encoding() {} Encoding() {}
/// ///
Encoding(std::string const & n, std::string const & l, Encoding(std::string const & n, std::string const & l,
std::string const & i) std::string const & i);
: Name_(n), LatexName_(l), iconvName_(i)
{
}
/// ///
std::string const & name() const { return Name_; } std::string const & name() const { return Name_; }
/// ///
std::string const & latexName() const { return LatexName_; } std::string const & latexName() const { return LatexName_; }
/// ///
std::string const & iconvName() const { return iconvName_; } std::string const & iconvName() const { return iconvName_; }
/**
* Convert \p c to something that LaTeX can understand.
* This is either the character itself (if it is representable
* in this encoding), or a LaTeX macro.
* If the character is not representable in this encoding, but no
* LaTeX macro is known, a warning is given of lyxerr, and the
* character is returned.
*/
docstring const latexChar(char_type c) const;
/// Add the preamble snippet needed for the output of latexChar(c)
/// to \p features.
void validate(char_type c, LaTeXFeatures & features) const;
private: private:
/// ///
std::string Name_; std::string Name_;
@ -46,6 +57,15 @@ private:
std::string LatexName_; std::string LatexName_;
/// ///
std::string iconvName_; std::string iconvName_;
///
typedef std::set<char_type> CharSet;
/// Set of UCS4 characters that we can encode (for singlebyte
/// encodings only)
CharSet encodable_;
/// All code points below this are encodable. This helps us to avoid
/// lokup of ASCII characters in encodable_ and gives about 1 sec
/// speedup on export of the Userguide.
char_type start_encodable_;
}; };
class Encodings { class Encodings {
@ -64,8 +84,11 @@ public:
}; };
/// ///
Encodings(); Encodings();
/// /// Read the encodings.
void read(support::FileName const & filename); /// \param encfile encodings definition file
/// \param symbolsfile unicode->LaTeX mapping file
void read(support::FileName const & encfile,
support::FileName const & symbolsfile);
/// Get encoding from LyX name \p name /// Get encoding from LyX name \p name
Encoding const * getFromLyXName(std::string const & name) const; Encoding const * getFromLyXName(std::string const & name) const;
/// Get encoding from LaTeX name \p name /// Get encoding from LaTeX name \p name
@ -97,6 +120,8 @@ public:
static bool is_arabic(char_type c); static bool is_arabic(char_type c);
/// ///
static char_type transformChar(char_type c, Letter_Form form); static char_type transformChar(char_type c, Letter_Form form);
/// Is this a combining char?
static bool isCombiningChar(char_type c);
private: private:
/// ///

View File

@ -880,7 +880,7 @@ bool LyX::init()
if (!readRcFile("preferences")) if (!readRcFile("preferences"))
return false; return false;
if (!readEncodingsFile("encodings")) if (!readEncodingsFile("encodings", "unicodesymbols"))
return false; return false;
if (!readLanguagesFile("languages")) if (!readLanguagesFile("languages"))
return false; return false;
@ -1247,16 +1247,24 @@ bool LyX::readLanguagesFile(string const & name)
// Read the encodings file `name' // Read the encodings file `name'
bool LyX::readEncodingsFile(string const & name) bool LyX::readEncodingsFile(string const & enc_name,
string const & symbols_name)
{ {
lyxerr[Debug::INIT] << "About to read " << name << "..." << endl; lyxerr[Debug::INIT] << "About to read " << enc_name << " and "
<< symbols_name << "..." << endl;
FileName const enc_path = libFileSearch(string(), name); FileName const symbols_path = libFileSearch(string(), symbols_name);
if (enc_path.empty()) { if (symbols_path.empty()) {
showFileError(name); showFileError(symbols_name);
return false; return false;
} }
encodings.read(enc_path);
FileName const enc_path = libFileSearch(string(), enc_name);
if (enc_path.empty()) {
showFileError(enc_name);
return false;
}
encodings.read(enc_path, symbols_path);
return true; return true;
} }

View File

@ -149,8 +149,11 @@ private:
bool readUIFile(std::string const & name, bool include = false); bool readUIFile(std::string const & name, bool include = false);
/// read the given languages file /// read the given languages file
bool readLanguagesFile(std::string const & name); bool readLanguagesFile(std::string const & name);
/// read the given encodings file /// read the encodings.
bool readEncodingsFile(std::string const & name); /// \param enc_name encodings definition file
/// \param symbols_name unicode->LaTeX mapping file
bool readEncodingsFile(std::string const & enc_name,
std::string const & symbols_name);
/// parsing of non-gui LyX options. /// parsing of non-gui LyX options.
void easyParse(int & argc, char * argv[]); void easyParse(int & argc, char * argv[]);
/// shows up a parsing error on screen /// shows up a parsing error on screen

View File

@ -1007,6 +1007,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
runparams.moving_arg); runparams.moving_arg);
} }
// Computed only once per paragraph since bparams.encoding() is expensive
Encoding const & doc_encoding = bparams.encoding();
for (pos_type i = 0; i < size(); ++i) { for (pos_type i = 0; i < size(); ++i) {
++column; ++column;
// First char in paragraph or after label? // First char in paragraph or after label?
@ -1066,10 +1068,18 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
if (c == ' ') { if (c == ' ') {
// Do not print the separation of the optional argument // Do not print the separation of the optional argument
if (i != body_pos - 1) { if (i != body_pos - 1) {
// FIXME: change tracking if (pimpl_->simpleTeXBlanks(bparams,
// Is this correct WRT change tracking? doc_encoding, os, texrow,
pimpl_->simpleTeXBlanks(os, texrow, i, i, column, font, *style))
column, font, *style); // A surrogate pair was output. We
// must not call simpleTeXSpecialChars
// in this iteration, since
// simpleTeXBlanks incremented i, and
// simpleTeXSpecialChars would output
// the combining character again.
// FIXME: change tracking
// Is this correct WRT change tracking?
continue;
} }
} }
@ -1101,7 +1111,7 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
rp.local_font = &font; rp.local_font = &font;
rp.intitle = style->intitle; rp.intitle = style->intitle;
pimpl_->simpleTeXSpecialChars(buf, bparams, pimpl_->simpleTeXSpecialChars(buf, bparams,
os, texrow, rp, doc_encoding, os, texrow, rp,
font, running_font, font, running_font,
basefont, outerfont, open_font, basefont, outerfont, open_font,
runningChangeType, runningChangeType,

View File

@ -59,16 +59,15 @@ special_phrase const special_phrases[] = {
size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase); size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
bool isEncoding(BufferParams const & bparams, LyXFont const & font, /// Get the real encoding of a character with font \p font.
string const & encoding) /// doc_encoding == bparams.encoding(), but we use a precomputed variable
/// since bparams.encoding() is expensive
inline Encoding const & getEncoding(BufferParams const & bparams,
Encoding const & doc_encoding, LyXFont const & font)
{ {
// We do ignore bparams.inputenc == "default" here because characters if (bparams.inputenc == "auto" || bparams.inputenc == "default")
// in this encoding could be treated by TeX as something different, return *(font.language()->encoding());
// e.g. if they are inside a CJK environment. See also return doc_encoding;
// http://bugzilla.lyx.org/show_bug.cgi?id=3043.
return (bparams.inputenc == encoding
|| (bparams.inputenc == "auto"
&& font.language()->encoding()->latexName() == encoding));
} }
} // namespace anon } // namespace anon
@ -381,14 +380,44 @@ int Paragraph::Pimpl::eraseChars(pos_type start, pos_type end, bool trackChanges
} }
void Paragraph::Pimpl::simpleTeXBlanks(odocstream & os, TexRow & texrow, int Paragraph::Pimpl::latexSurrogatePair(odocstream & os, value_type c,
pos_type const i, value_type next, Encoding const & encoding)
{
// Writing next here may circumvent a possible font change between
// c and next. Since next is only output if it forms a surrogate pair
// with c we can ignore this:
// A font change inside a surrogate pair does not make sense and is
// hopefully impossible to input.
// FIXME: change tracking
// Is this correct WRT change tracking?
docstring const latex1 = encoding.latexChar(next);
docstring const latex2 = encoding.latexChar(c);
os << latex1 << '{' << latex2 << '}';
return latex1.length() + latex2.length() + 2;
}
bool Paragraph::Pimpl::simpleTeXBlanks(BufferParams const & bparams,
Encoding const & doc_encoding,
odocstream & os, TexRow & texrow,
pos_type & i,
unsigned int & column, unsigned int & column,
LyXFont const & font, LyXFont const & font,
LyXLayout const & style) LyXLayout const & style)
{ {
if (style.pass_thru) if (style.pass_thru)
return; return false;
if (i < size() - 1) {
char_type next = getChar(i + 1);
if (Encodings::isCombiningChar(next)) {
// This space has an accent, so we must always output it.
Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
column += latexSurrogatePair(os, ' ', next, encoding) - 1;
++i;
return true;
}
}
if (lyxrc.plaintext_linelen > 0 if (lyxrc.plaintext_linelen > 0
&& column > lyxrc.plaintext_linelen && column > lyxrc.plaintext_linelen
@ -413,6 +442,7 @@ void Paragraph::Pimpl::simpleTeXBlanks(odocstream & os, TexRow & texrow,
} else { } else {
os << ' '; os << ' ';
} }
return false;
} }
@ -448,6 +478,7 @@ bool Paragraph::Pimpl::isTextAt(string const & str, pos_type pos) const
void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf, void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
BufferParams const & bparams, BufferParams const & bparams,
Encoding const & doc_encoding,
odocstream & os, odocstream & os,
TexRow & texrow, TexRow & texrow,
OutputParams const & runparams, OutputParams const & runparams,
@ -465,6 +496,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
if (style.pass_thru) { if (style.pass_thru) {
if (c != Paragraph::META_INSET) { if (c != Paragraph::META_INSET) {
if (c != '\0') if (c != '\0')
// FIXME UNICODE: This can fail if c cannot
// be encoded in the current encoding.
os.put(c); os.put(c);
} else } else
owner_->getInset(i)->plaintext(buf, os, runparams); owner_->getInset(i)->plaintext(buf, os, runparams);
@ -581,25 +614,6 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
// would be wrongly converted on systems where char is signed, so we give // would be wrongly converted on systems where char is signed, so we give
// the code points. // the code points.
// This also makes us independant from the encoding of this source file. // This also makes us independant from the encoding of this source file.
case 0xb1: // ± PLUS-MINUS SIGN
case 0xb2: // ² SUPERSCRIPT TWO
case 0xb3: // ³ SUPERSCRIPT THREE
case 0xd7: // × MULTIPLICATION SIGN
case 0xf7: // ÷ DIVISION SIGN
case 0xb9: // ¹ SUPERSCRIPT ONE
case 0xac: // ¬ NOT SIGN
case 0xb5: // µ MICRO SIGN
if (isEncoding(bparams, font, "latin1")
|| isEncoding(bparams, font, "latin9")) {
os << "\\ensuremath{";
os.put(c);
os << '}';
column += 13;
} else {
os.put(c);
}
break;
case '|': case '<': case '>': case '|': case '<': case '>':
// In T1 encoding, these characters exist // In T1 encoding, these characters exist
if (lyxrc.fontenc == "T1") { if (lyxrc.fontenc == "T1") {
@ -658,82 +672,6 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
column += 9; column += 9;
break; break;
case 0xa3: // £ POUND SIGN
if (bparams.inputenc == "default") {
os << "\\pounds{}";
column += 8;
} else {
os.put(c);
}
break;
case 0x20ac: // EURO SIGN
if (isEncoding(bparams, font, "latin9")
|| isEncoding(bparams, font, "cp1251")
|| isEncoding(bparams, font, "utf8")
|| isEncoding(bparams, font, "latin10")
|| isEncoding(bparams, font, "cp858")) {
os.put(c);
} else {
os << "\\texteuro{}";
column += 10;
}
break;
// These characters are covered by latin1, but not
// by latin9 (a.o.). We have to support them because
// we switched the default of latin1-languages to latin9
case 0xa4: // CURRENCY SYMBOL
case 0xa6: // BROKEN BAR
case 0xa8: // DIAERESIS
case 0xb4: // ACUTE ACCENT
case 0xb8: // CEDILLA
case 0xbd: // 1/2 FRACTION
case 0xbc: // 1/4 FRACTION
case 0xbe: // 3/4 FRACTION
if (isEncoding(bparams, font, "latin1")
|| isEncoding(bparams, font, "latin5")
|| isEncoding(bparams, font, "utf8")) {
os.put(c);
break;
} else {
switch (c) {
case 0xa4:
os << "\\textcurrency{}";
column += 15;
break;
case 0xa6:
os << "\\textbrokenbar{}";
column += 16;
break;
case 0xa8:
os << "\\textasciidieresis{}";
column += 20;
break;
case 0xb4:
os << "\\textasciiacute{}";
column += 17;
break;
case 0xb8: // from latin1.def:
os << "\\c\\ ";
column += 3;
break;
case 0xbd:
os << "\\textonehalf{}";
column += 14;
break;
case 0xbc:
os << "\\textonequarter{}";
column += 17;
break;
case 0xbe:
os << "\\textthreequarters{}";
column += 20;
break;
}
break;
}
case '$': case '&': case '$': case '&':
case '%': case '#': case '{': case '%': case '#': case '{':
case '}': case '_': case '}': case '_':
@ -771,6 +709,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
default: default:
// I assume this is hack treating typewriter as verbatim // I assume this is hack treating typewriter as verbatim
// FIXME UNICODE: This can fail if c cannot be encoded
// in the current encoding.
if (font.family() == LyXFont::TYPEWRITER_FAMILY) { if (font.family() == LyXFont::TYPEWRITER_FAMILY) {
if (c != '\0') { if (c != '\0') {
os.put(c); os.put(c);
@ -798,7 +738,27 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
} }
if (pnr == phrases_nr && c != '\0') { if (pnr == phrases_nr && c != '\0') {
os.put(c); Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
if (i < size() - 1) {
char_type next = getChar(i + 1);
if (Encodings::isCombiningChar(next)) {
column += latexSurrogatePair(os, c, next, encoding) - 1;
++i;
break;
}
}
docstring const latex = encoding.latexChar(c);
if (latex.length() > 1 &&
latex[latex.length() - 1] != '}') {
// Prevent eating of a following
// space or command corruption by
// following characters
column += latex.length() + 1;
os << latex << "{}";
} else {
column += latex.length() - 1;
os << latex;
}
} }
break; break;
} }
@ -876,6 +836,7 @@ void Paragraph::Pimpl::validate(LaTeXFeatures & features,
} }
// then the contents // then the contents
Encoding const & doc_encoding = bparams.encoding();
for (pos_type i = 0; i < size() ; ++i) { for (pos_type i = 0; i < size() ; ++i) {
for (size_t pnr = 0; pnr < phrases_nr; ++pnr) { for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
if (!special_phrases[pnr].builtin if (!special_phrases[pnr].builtin
@ -884,12 +845,12 @@ void Paragraph::Pimpl::validate(LaTeXFeatures & features,
break; break;
} }
} }
// these glyphs require the textcomp package // We do not need the completely realized font, since we are
if (getChar(i) == 0x20ac || getChar(i) == 0xa4 // only interested in the language, and that is never inherited.
|| getChar(i) == 0xa6 || getChar(i) == 0xa8 // Therefore we can use getFontSettings instead of getFont.
|| getChar(i) == 0xb4 || getChar(i) == 0xbd LyXFont const & font = owner_->getFontSettings(bparams, i);
|| getChar(i) == 0xbc || getChar(i) == 0xbe) Encoding const & encoding = getEncoding(bparams, doc_encoding, font);
features.require("textcomp"); encoding.validate(getChar(i), features);
} }
} }

View File

@ -26,6 +26,7 @@
namespace lyx { namespace lyx {
class Encoding;
class LyXLayout; class LyXLayout;
@ -123,16 +124,23 @@ public:
/// ///
FontList fontlist; FontList fontlist;
/// /// Output the surrogate pair formed by \p c and \p next to \p os.
void simpleTeXBlanks(odocstream &, TexRow & texrow, /// \return the number of characters written.
pos_type const i, int latexSurrogatePair(odocstream & os, value_type c, value_type next,
Encoding const &);
/// Output a space in appropriate formatting (or a surrogate pair
/// if the next character is a combining character).
/// \return whether a surrogate pair was output.
bool simpleTeXBlanks(BufferParams const &, Encoding const &,
odocstream &, TexRow & texrow,
pos_type & i,
unsigned int & column, unsigned int & column,
LyXFont const & font, LyXFont const & font,
LyXLayout const & style); LyXLayout const & style);
/// ///
void simpleTeXSpecialChars(Buffer const &, BufferParams const &, void simpleTeXSpecialChars(Buffer const &, BufferParams const &,
odocstream &, TexRow & texrow, Encoding const &, odocstream &,
OutputParams const &, TexRow & texrow, OutputParams const &,
LyXFont & font, LyXFont & running_font, LyXFont & font, LyXFont & running_font,
LyXFont & basefont, LyXFont & basefont,
LyXFont const & outerfont, LyXFont const & outerfont,

View File

@ -64,12 +64,14 @@ public:
typedef typename debug::type Type; typedef typename debug::type Type;
basic_debugstream() basic_debugstream()
: std::basic_ostream<charT, traits>(0), dt(debug::NONE) : std::basic_ostream<charT, traits>(0), dt(debug::NONE),
realbuf_(0), enabled_(true)
{} {}
/// Constructor, sets the debug level to t. /// Constructor, sets the debug level to t.
explicit basic_debugstream(std::basic_streambuf<charT, traits> * buf) explicit basic_debugstream(std::basic_streambuf<charT, traits> * buf)
: std::basic_ostream<charT, traits>(buf), dt(debug::NONE) : std::basic_ostream<charT, traits>(buf), dt(debug::NONE),
realbuf_(0), enabled_(true)
{} {}
/// Sets the debug level to t. /// Sets the debug level to t.
@ -99,11 +101,32 @@ public:
return *this; return *this;
return nullstream; return nullstream;
} }
/// Disable the stream completely
void disable()
{
if (enabled_) {
realbuf_ = this->rdbuf();
rdbuf(nullstream.rdbuf());
enabled_ = false;
}
}
/// Enable the stream after a possible call of disable()
void enable()
{
if (!enabled_) {
this->rdbuf(realbuf_);
enabled_ = true;
}
}
private: private:
/// The current debug level /// The current debug level
Type dt; Type dt;
/// The no-op stream. /// The no-op stream.
boost::basic_onullstream<charT, traits> nullstream; boost::basic_onullstream<charT, traits> nullstream;
/// The buffer of the real stream
std::streambuf * realbuf_;
/// Is the stream enabled?
bool enabled_;
}; };
typedef basic_debugstream<debug_trait> debugstream; typedef basic_debugstream<debug_trait> debugstream;