1
0
mirror of https://git.lyx.org/repos/lyx.git synced 2025-01-08 18:19:42 +00:00
lyx_mirror/lib/encodings
Günter Milde 197735747e Support for documents in other scripts with parts in CJK scripts.
Fix CJK support for documents using input-encoding "utf8-cjk"
when the main language is not Chinese, Korean, or Japanese.
2019-01-24 00:40:51 +01:00

254 lines
8.4 KiB
Plaintext

# FIXME: Have a look at the encodings known by the inputenc package and add
# missing ones. Caution: File format change!
# Note that you can only add singlebyte encodings to this file.
# LyX does not support the output of multibyte encodings (e.g. utf16).
# It does support singlebyte encodings with variable width (e.g. utf8).
# These are marked with the "variable" keyword.
# Fixed width encodings are marked with the "fixed" keyword.
# The code points of TeX control characters like {, } and \ can occur in the
# second byte of some variable width encodings. These encodings must not be
# set as document encodings and are marked with the "variableunsafe" keyword.
# They are only needed for proper tex2lyx import.
# Most encodings require loading a latex package such as "inputenc" or "CJK".
# There is no "japanese" latex package, rather this keyword indicates to LyX
# to switch the buffer format and use platex instead of standard (pdf)latex.
# In this case, TeX control characters in high bytes is not a problem.
# The invocation is platex -kanji=<LaTeX name>
# The set of "iconv" supported encodings is system dependent.
# For Gnu libiconv, supported encodings are listed at
# https://www.gnu.org/software/libiconv/
# and available via the `iconv --list` command.
# Syntax: Encoding <LyX name> <LaTeX name> <GUI name> <iconv name> <width> <package> End
# LyX name: Name used by the file format and in lib/languages. Must be unique!
# LaTeX name: Used in the latex export or passed to platex as command-line switch.
# GUI name: Displayed in document settings.
# iconv name: Used by iconv.
# width: One of fixed, variable, or variableunsafe (see above).
# package: One of none, inputenc, CJK, or japanese (see above).
# Encodings used with inputenc.sty
# ================================
# "inputenc" is a base LaTeX package that provides an extensible framework
# for conversion of a document encoding into an "LaTeX Internal Character
# Representation" (LICR) and a set of encoding definitions
# (<LaTeX name>.def files). Additional encoding definitions are provided by
# several language support packages.
#
# The following encodings from http://www.ctan.org/pkg/latex-cyrillic are
# not included, because they are not widely used and lack iconv support:
# ctt, dbk, isoir111, koi8-ru, lcyenc, maccyr, macukr, mik, mls, mnk, mos,
# and pt254.
# inputenc's standard utf8 support:
Encoding utf8 utf8 "Unicode (utf8)" UTF-8 variable inputenc
End
# extended utf8 support from the "ucs" package:
# Larger set of supported characters but conflicts with some packages.
Encoding utf8x utf8x "Unicode (ucs-extended) (utf8x)" UTF-8 variable inputenc
End
# from http://www.ctan.org/pkg/armtex
Encoding armscii8 armscii8 "Armenian (ArmSCII8)" ARMSCII-8 fixed inputenc
End
Encoding iso8859-1 latin1 "Western European (ISO 8859-1)" ISO-8859-1 fixed inputenc
End
Encoding iso8859-2 latin2 "Central European (ISO 8859-2)" ISO-8859-2 fixed inputenc
End
Encoding iso8859-3 latin3 "South European (ISO 8859-3)" ISO-8859-3 fixed inputenc
End
Encoding iso8859-4 latin4 "Baltic (ISO 8859-4)" ISO-8859-4 fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding iso8859-5 iso88595 "Cyrillic (ISO 8859-5)" ISO-8859-5 fixed inputenc
End
# from http://www.ctan.org/pkg/arabi
Encoding iso8859-6 8859-6 "Arabic (ISO 8859-6)" ISO-8859-6 fixed inputenc
End
# from http://www.ctan.org/pkg/greek-inputenc
Encoding iso8859-7 iso-8859-7 "Greek (ISO 8859-7)" ISO-8859-7 fixed inputenc
End
Encoding iso8859-8 8859-8 "Hebrew (ISO 8859-8)" ISO-8859-8 fixed inputenc
End
Encoding iso8859-9 latin5 "Turkish (ISO 8859-9)" ISO-8859-9 fixed inputenc
End
Encoding iso8859-13 latin7 "Baltic (ISO 8859-13)" ISO-8859-13 fixed inputenc
End
Encoding iso8859-15 latin9 "Western European (ISO 8859-15)" ISO-8859-15 fixed inputenc
End
Encoding iso8859-16 latin10 "South-Eastern European (ISO 8859-16)" ISO-8859-16 fixed inputenc
End
Encoding applemac applemac "Western European (Macintosh Roman)" Macintosh fixed inputenc
End
Encoding cp437 cp437 "DOS (CP 437)" CP437 fixed inputenc
End
# like cp437, but on position 225 is sz instead of beta
Encoding cp437de cp437de "DOS-de (CP 437-de)" CP437 fixed inputenc
End
Encoding cp850 cp850 "Western European (CP 850)" CP850 fixed inputenc
End
Encoding cp852 cp852 "Central European (CP 852)" CP852 fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding cp855 cp855 "Cyrillic (CP 855)" CP855 fixed inputenc
End
# Gnu iconv only supports cp858, if configured with "--enable-extra-encodings"
# (see https://www.gnu.org/software/libiconv/)
Encoding cp858 cp858 "Western European (CP 858)" CP858 fixed inputenc
End
Encoding cp862 cp862 "Hebrew (CP 862)" CP862 fixed inputenc
End
Encoding cp865 cp865 "Nordic languages (CP 865)" CP865 fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding cp866 cp866 "Cyrillic (CP 866)" CP866 fixed inputenc
End
Encoding cp1250 cp1250 "Central European (CP 1250)" CP1250 fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding cp1251 cp1251 "Cyrillic (CP 1251)" CP1251 fixed inputenc
End
# "ansinew" is harcoded as a synonym of this (see Encodings::fromLaTeXName)
Encoding cp1252 cp1252 "Western European (CP 1252)" CP1252 fixed inputenc
End
Encoding cp1255 cp1255 "Hebrew (CP 1255)" CP1255 fixed inputenc
End
# from http://www.ctan.org/pkg/arabi
Encoding cp1256 cp1256 "Arabic (CP 1256)" CP1256 fixed inputenc
End
Encoding cp1257 cp1257 "Baltic (CP 1257)" CP1257 fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding koi8-r koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding koi8-u koi8-u "Cyrillic (KOI8-U)" KOI8-U fixed inputenc
End
# from http://www.ctan.org/pkg/latex-cyrillic
Encoding pt154 pt154 "Cyrillic (pt 154)" PT154 fixed inputenc
End
# from https://ctan.org/pkg/babel-thai
Encoding tis620-0 tis620 "Thai (TIS 620-0)" TIS620-0 fixed inputenc
End
# encodings used by CJK.sty
# The following encodings that are supported by the CJK package are not
# included here, because they are not widely used and lack proper iconv support:
# Bg5+, GBt
# See the NOTES file of libiconv for details.
# For traditional chinese
Encoding big5 Bg5 "Chinese (traditional) (Big5)" BIG5 variableunsafe CJK
End
# For japanese
# We use CP932 and not SJIS, since it is a super set of SJIS except that it
# translates SJIS 0x5c to U+005C (REVERSE SOLIDUS aka backslash) instead of
# U+00A5 (YEN SIGN). Strictly speaking this is wrong, but there is a long
# standing confusion which translation is correct, and using CP932 is also
# consistent with the SJIS example file <texmf>/doc/latex/cjk/examples/SJIS.tex
# of the CJK package. The CP932 extensions over SJIS do not matter here.
Encoding shift-jis SJIS "Japanese (CJK) (SJIS)" CP932 variableunsafe CJK
End
# The following encodings need hardcoded support of the encodable unicode
# range, but are known by iconv:
# For simplified chinese
Encoding euc-cn GB "Chinese (simplified) (EUC-CN)" EUC-CN variable CJK
End
# For simplified chinese
Encoding gbk GBK "Chinese (simplified) (GBK)" GBK variable CJK
End
# For japanese
Encoding jis JIS "Japanese (CJK) (JIS)" ISO-2022-JP variable CJK
End
# For korean
Encoding euc-kr KS "Korean (EUC-KR)" EUC-KR variable CJK
End
# The CJK package has yet another name for utf8...
Encoding utf8-cjk UTF8 "Unicode (CJK) (utf8)" UTF-8 variable CJK
End
# For traditional chinese
Encoding euc-tw EUC-TW "Chinese (traditional) (EUC-TW)" EUC-TW variable CJK
End
# For japanese
Encoding euc-jp EUC-JP "Japanese (CJK) (EUC-JP)" EUC-JP variable CJK
End
# encodings that do not use a package
# Traditional Japanese TeX programs require the japanese package.
# that is incompatible with CJK and inputenc.
Encoding euc-jp-platex euc "Japanese (pLaTeX) (EUC-JP)" EUC-JP variable japanese
End
Encoding jis-platex jis "Japanese (pLaTeX) (JIS)" ISO-2022-JP variable japanese
End
Encoding shift-jis-platex sjis "Japanese (pLaTeX) (SJIS)" CP932 variable japanese
End
Encoding utf8-platex utf8 "Japanese (pLaTeX) (UTF8)" UTF-8 variable japanese
End
# A plain utf8 encoding that does not use the inputenc package
# nor the LyX-added conversions in lib/unicodesymbols.
# Such an encoding is required for XeTeX and LuaTeX.
Encoding utf8-plain utf8-plain "Unicode (XeTeX) (utf8)" UTF-8 variable none
End
# Pure 7bit ASCII encoding (partially hardcoded in LyX)
Encoding ascii ascii "ASCII" ascii fixed none
End
# Semantic encodings
# Inherit encoding of the context (used by verbatim)
Encoding inherit inherit "" "" fixed none
End