ctests: update CJK export tests.

This commit is contained in:
Günter Milde 2019-01-28 17:24:39 +01:00
parent 4435be026e
commit 964c15f80b
9 changed files with 525 additions and 92 deletions

View File

@ -0,0 +1,126 @@
#LyX 2.4 created this file. For more info see https://www.lyx.org/
\lyxformat 566
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\use_default_options true
\maintain_unincluded_children false
\language greek
\language_package default
\inputencoding utf8-cjk
\fontencoding auto
\font_roman "lmodern" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\font_cjk gkai
\use_microtype false
\use_dash_ligatures true
\graphics default
\default_output_format pdf2
\output_sync 0
\bibtex_command default
\index_command default
\float_placement class
\float_alignment class
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style french
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Enumerate
This is a Greek document with LGR fonts:
\end_layout
\begin_layout Enumerate
ASCII characters are treated as a Latin transliteration and become Greek
letters in the output: Greetings; Grüße
\end_layout
\begin_layout Enumerate
Non-ASCII works fine: Приветы; χαιρετισμός, 迎接.
\end_layout
\begin_layout Enumerate
Setting the correct language does help:
\lang english
Greetings;
\lang ngerman
Grüße
\end_layout
\begin_layout Enumerate
However, not with CJK languages (that are not given
\emph on
Babel
\emph default
language tags):
\end_layout
\begin_deeper
\begin_layout Standard
\lang chinese-simplified
Chinesisch: 你还需要关心什么东西应该出现在页面上的什么位置强调某处文本就是改变一下字体。这就是所见即所得的字处理软件的哲学根基WYSIWYG
What You See Is What You Get。不幸的是它常常变为“所见到的就是你的所有”。
\end_layout
\end_deeper
\begin_layout Enumerate
This is a pity
\end_layout
\end_body
\end_document

View File

@ -0,0 +1,54 @@
%% LyX 2.4.0dev created this file. For more info, see https://www.lyx.org/.
%% Do not edit unless you really know what you are doing.
\documentclass[ngerman,greek,russian,english]{article}
\usepackage{CJKutf8}
\usepackage{DejaVuSerif}
\usepackage[T2A,LGR,T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp}
\makeatletter
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
\providecommand{\LyX}{L\kern-.1667em\lower.25em\hbox{Y}\kern-.125emX\@}
\DeclareRobustCommand*{\lyxarrow}{%
\@ifstar
{\leavevmode\,$\triangleleft$\,\allowbreak}
{\leavevmode\,$\triangleright$\,\allowbreak}}
\makeatother
\usepackage{babel}
\begin{document}
For a multi-lingual document, with parts in European languages, you
can use the CJK LaTeX bundle.
\begin{enumerate}
\item \begin{CJK}{UTF8}{min}\LyX は優秀な取扱説明書を同梱していますので、これをお使いください!まずは\textsf{ヘルプ\lyxarrow はじめの一歩}から始めてください。これは、各取扱説明書を簡潔に紹介しています。つぎに\textsf{ヘルプ\lyxarrow 入門篇}をお読みになれば、\LyX の使い方を学ぶことができます。
\item \end{CJK}\inputencoding{latin9}Short texts may be written without
setting the language (hyphenation will be missing and spell-checking
complain): Greetings; Gr館e; \textcyrillic{\CYRP\cyrr\cyri\cyrv\cyre\cyrt\cyrery};
\textgreek{\textLambda\textomicron\textgamma\textomicron\textfinalsigma}
\item For longer text parts, it is recommended to set the correct language:
\begin{description}
\item [{English:}] Greetings from <20>sterreich 5~\textmu m snow.
\selectlanguage{ngerman}%
\item [{Deutsch:}] Gr館e aus <20>sterreich 5~\textmu m Schnee.
\item [{Griechisch:}] \foreignlanguage{greek}{\textEta \textbf{\textEpsilon\textlambda\textlambda\'\textalpha\textdelta\textalpha}
(\textsigma\texttau\texteta\textnu \textkappa\textalpha\texttheta\textalpha\textrho\textepsilon\'\textupsilon\textomicron\textupsilon\textsigma\textalpha
\textEpsilon\textlambda\textlambda\'\textalpha\textfinalsigma), \textsigma\textupsilon\textnu\texttau\textalpha\textgamma\textmu\textalpha\texttau\textiota\textkappa\'\textomicron
\'\textomicron\textnu\textomicron\textmu\textalpha \textEpsilon\textlambda\textlambda\texteta\textnu\textiota\textkappa\'\texteta
\textDelta\texteta\textmu\textomicron\textkappa\textrho\textalpha\texttau\'\textiota\textalpha,
\textepsilon\'\textiota\textnu\textalpha\textiota \textchi\'\textomega\textrho\textalpha
\texttau\texteta\textfinalsigma \textnu\textomicron\texttau\textiota\textomicron\textalpha\textnu\textalpha\texttau\textomicron\textlambda\textiota\textkappa\'\texteta\textfinalsigma
\textEpsilon\textupsilon\textrho\'\textomega\textpi\texteta\textfinalsigma
\textsigma\texttau\textomicron \textnu\textomicron\texttau\textiota\'\textomicron\texttau\textepsilon\textrho\textomicron
\'\textalpha\textkappa\textrho\textomicron \texttau\texteta\textfinalsigma
\textBeta\textalpha\textlambda\textkappa\textalpha\textnu\textiota\textkappa\'\texteta\textfinalsigma
\textchi\textepsilon\textrho\textsigma\textomicron\textnu\'\texteta\textsigma\textomicron\textupsilon.}
\item [{Russisch:}] \foreignlanguage{russian}{\CYRP\cyrr\cyri\cyrv\cyre\cyrt
\cyrs \textbf{\cyrn\cyro\cyrv\cyrery\cyrm} \cyrg\cyro\cyrd\cyro\cyrm!
\CYRU \cyrn\cyra\cyrs \cyrerev\cyrs\cyrt\cyrsftsn 5~\textmu m \CYRS\cyrn\cyre\cyrg.}\selectlanguage{english}%
\end{description}
\end{enumerate}
\end{document}

View File

@ -0,0 +1,123 @@
#LyX 2.4 created this file. For more info see https://www.lyx.org/
\lyxformat 566
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding utf8-cjk
\fontencoding auto
\font_roman "lmodern" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\font_cjk gkai
\use_microtype false
\use_dash_ligatures true
\graphics default
\default_output_format pdf2
\output_sync 0
\bibtex_command default
\index_command default
\float_placement class
\float_alignment class
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Enumerate
This is an English document with CJK text parts;
\begin_inset Quotes eld
\end_inset
inputencoding
\begin_inset Quotes erd
\end_inset
is cjk-utf8.
\end_layout
\begin_layout Enumerate
Short texts in other languages: Grüße; Приветы; χαιρετισμός, 迎接.
\end_layout
\begin_deeper
\begin_layout Standard
The last paragraph is Standard and nested in an environment.
LaTeX complains:
\begin_inset Newline newline
\end_inset
! LaTeX Error:
\backslash
begin{enumerate} on input line 27 ended by
\backslash
end{CJK}.
\begin_inset Newline newline
\end_inset
The output is OK.
\end_layout
\begin_layout Standard
We must not close the CJK environment in a nested context.
\end_layout
\end_deeper
\end_body
\end_document

View File

@ -5,12 +5,6 @@
\save_transient_properties true \save_transient_properties true
\origin unavailable \origin unavailable
\textclass article \textclass article
\begin_preamble
% Fix the MIKRO SIGN symbol:
% CJKutf8 overwrites \textmu with $\mu$,
% use the symbol from "textcomp" or the Greek fonts instead
%\DeclareTextSymbolDefault{\textmu}{TS1}
\end_preamble
\use_default_options true \use_default_options true
\maintain_unincluded_children false \maintain_unincluded_children false
\language english \language english
@ -109,7 +103,7 @@ CJKutf8 defines the MICRO SIGN
\end_layout \end_layout
\begin_layout Itemize \begin_layout Itemize
The package The packages
\begin_inset Quotes eld \begin_inset Quotes eld
\end_inset \end_inset
@ -117,8 +111,8 @@ textcomp
\begin_inset Quotes erd \begin_inset Quotes erd
\end_inset \end_inset
overwrites this definition with a correct looking symbol, but only if loaded and the Greek (LGR) font definition file overwrite this definition with
a correct symbol, but only if loaded
\series bold \series bold
after after
\series default \series default
@ -126,15 +120,7 @@ CJKutf8.
\end_layout \end_layout
\begin_layout Itemize \begin_layout Itemize
LyX gets this right for LyX now loads CJKutf8 before the font setup, so that
\begin_inset Quotes eld
\end_inset
textcomp
\begin_inset Quotes erd
\end_inset
but not for
\begin_inset Quotes eld \begin_inset Quotes eld
\end_inset \end_inset
@ -142,15 +128,16 @@ libertine
\begin_inset Quotes erd \begin_inset Quotes erd
\end_inset \end_inset
that internally loads textcomp and is called before CJKutf8 by LyX. (that internally loads textcomp) does not lead to wrong output.
\end_layout \end_layout
\begin_layout Itemize \begin_layout Itemize
A workaround is adding A workaround for earlier LyX-versions is adding
\end_layout \end_layout
\begin_deeper \begin_deeper
\begin_layout LyX-Code \begin_layout Verbatim
\backslash \backslash
DeclareTextSymbolDefault{ DeclareTextSymbolDefault{
@ -168,34 +155,23 @@ Example:
\end_layout \end_layout
\begin_layout Enumerate \begin_layout Enumerate
In order to include Chinese, Korean, or Japanese text, we use CJKutf8.
\lang chinese-simplified
In order to include Chinese text (强调某处文本就是改变一下字体), we use CJKutf8.
\end_layout \end_layout
\begin_layout Enumerate \begin_layout Enumerate
Short texts in Latin, Greek, and Cyrillic may be written without setting Short texts may be written without setting the language (hyphenation will
the language (hyphenation will be missing and spell-checking complain): be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός
Greetings; Grüße; Приветы; χαιρετισμός. , 迎接.
\end_layout \end_layout
\begin_layout Enumerate \begin_layout Enumerate
Setting the correct language does not help: Setting the correct language does not change the appearance of the letter
MU:
\end_layout \end_layout
\begin_deeper \begin_deeper
\begin_layout Description \begin_layout Description
\lang ngerman
Deutsch: Grüße aus 5
\begin_inset space ~
\end_inset
µm Schnee.
\end_layout
\begin_layout Description
\lang ngerman \lang ngerman
Griechisch: Griechisch:
\lang greek \lang greek

View File

@ -0,0 +1,156 @@
#LyX 2.4 created this file. For more info see https://www.lyx.org/
\lyxformat 566
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\use_default_options false
\maintain_unincluded_children false
\language chinese-simplified
\language_package auto
\inputencoding utf8-cjk
\fontencoding auto
\font_roman "DejaVuSerif" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\font_cjk gbsn
\use_microtype false
\use_dash_ligatures false
\graphics default
\default_output_format pdf2
\output_sync 0
\bibtex_command default
\index_command default
\float_placement class
\float_alignment class
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 0
\use_minted 0
\index 索引
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Standard
\lang english
For multi-lingual documents with parts in European languages, you can use
the CJK LaTeX bundle.
\end_layout
\begin_layout Enumerate
Chinesisch: 就是说我们都已经习惯于关心字符排版的细枝末节,几乎所有的字处理软件也都是这种理念。人们还在使用 Tab 键创建空白;你还需要关心什么东西
应该出现在页面上的什么位置;强调某处文本就是改变一下字体
\end_layout
\begin_layout Enumerate
Short texts may be written without setting the language (hyphenation will
be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός
\end_layout
\begin_layout Enumerate
For longer text parts, it is recommended to set the correct language:
\end_layout
\begin_deeper
\begin_layout Description
\lang english
English: Greetings from Österreich 5
\begin_inset space ~
\end_inset
µm snow.
\end_layout
\begin_layout Description
\lang ngerman
Deutsch: Grüße aus Österreich 5
\begin_inset space ~
\end_inset
µm Schnee.
\end_layout
\begin_layout Description
\lang ngerman
Griechisch:
\lang greek
Η
\series bold
Ελλάδα
\series default
(στην καθαρεύουσα Ελλάς), συνταγματικό όνομα Ελληνική Δημοκρατία, είναι
χώρα της νοτιοανατολικής Ευρώπης στο νοτιότερο άκρο της Βαλκανικής χερσονήσου.
\end_layout
\begin_layout Description
\lang ngerman
Russisch:
\lang russian
Привет с
\series bold
новым
\series default
годом! У нас эсть 5
\begin_inset space ~
\end_inset
µm Снег.
\end_layout
\end_deeper
\end_body
\end_document

View File

@ -5,10 +5,10 @@
\save_transient_properties true \save_transient_properties true
\origin unavailable \origin unavailable
\textclass article \textclass article
\use_default_options true \use_default_options false
\maintain_unincluded_children false \maintain_unincluded_children false
\language chinese-simplified \language chinese-simplified
\language_package default \language_package auto
\inputencoding utf8 \inputencoding utf8
\fontencoding auto \fontencoding auto
\font_roman "DejaVuSerif" "default" \font_roman "DejaVuSerif" "default"
@ -23,7 +23,7 @@
\font_tt_scale 100 100 \font_tt_scale 100 100
\font_cjk gbsn \font_cjk gbsn
\use_microtype false \use_microtype false
\use_dash_ligatures true \use_dash_ligatures false
\graphics default \graphics default
\default_output_format pdf2 \default_output_format pdf2
\output_sync 0 \output_sync 0
@ -54,9 +54,9 @@
\paperorientation portrait \paperorientation portrait
\suppress_date false \suppress_date false
\justification true \justification true
\use_refstyle 1 \use_refstyle 0
\use_minted 0 \use_minted 0
\index Index \index 索引
\shortcut idx \shortcut idx
\color #008000 \color #008000
\end_index \end_index
@ -83,28 +83,36 @@
\begin_layout Standard \begin_layout Standard
\lang english \lang english
Test CJK with the vanilla For multi-lingual documents with parts in European languages, you can use
\begin_inset Quotes eld the CJK LaTeX bundle.
\end_inset
utf8
\begin_inset Quotes erd
\end_inset
input encoding.
\end_layout \end_layout
\begin_layout Quote \begin_layout Enumerate
Chinesisch: 就是说我们都已经习惯于关心字符排版的细枝末节,几乎所有的字处理软件也都是这种理念。人们还在使用 Tab 键创建空白;你还需要关心什么东西
应该出现在页面上的什么位置;强调某处文本就是改变一下字体
\end_layout
\begin_layout Enumerate
Short texts may be written without setting the language (hyphenation will
be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός
\end_layout
\begin_layout Enumerate
For longer text parts, it is recommended to set the correct language:
\end_layout
\begin_deeper
\begin_layout Description
\lang english \lang english
English: Grüße aus Österreich 5 English: Greetings from Österreich 5
\begin_inset space ~ \begin_inset space ~
\end_inset \end_inset
µm Schnee. µm snow.
\end_layout \end_layout
\begin_layout Standard \begin_layout Description
\lang ngerman \lang ngerman
Deutsch: Grüße aus Österreich 5 Deutsch: Grüße aus Österreich 5
@ -112,38 +120,22 @@ Deutsch: Grüße aus Österreich 5
\end_inset \end_inset
µm Schnee. µm Schnee.
\begin_inset Foot
status open
\begin_layout Plain Layout
\lang ngerman
Gibts Brötchen?
\end_layout \end_layout
\end_inset \begin_layout Description
\end_layout
\begin_layout Standard
\lang ngerman \lang ngerman
Griechisch: Griechisch:
\lang greek \lang greek
Γρυσσε αυσ Οεστερρειχ 5 Η
\lang chinese-simplified \series bold
Ελλάδα
\begin_inset space ~ \series default
\end_inset (στην καθαρεύουσα Ελλάς), συνταγματικό όνομα Ελληνική Δημοκρατία, είναι
χώρα της νοτιοανατολικής Ευρώπης στο νοτιότερο άκρο της Βαλκανικής χερσονήσου.
µm
\lang greek
Σνη.
\end_layout \end_layout
\begin_layout Standard \begin_layout Description
\lang ngerman \lang ngerman
Russisch: Russisch:
@ -159,12 +151,6 @@ Russisch:
µm Снег. µm Снег.
\end_layout \end_layout
\begin_layout Standard \end_deeper
Chinesisch: 你还需要关心什么东西应该出现在页面上的什么位置强调某处文本就是改变一下字体。这就是所见即所得的字处理软件的哲学根基WYSIWYG
What You See Is What You Get。不幸的是它常常变为“所见到的就是你的所有”。Frühe
\lang ngerman
Grüße
\end_layout
\end_body \end_body
\end_document \end_document

View File

@ -14,9 +14,9 @@
\language_package default \language_package default
\inputencoding utf8-cjk \inputencoding utf8-cjk
\fontencoding auto \fontencoding auto
\font_roman "lmodern" "Noto Sans CJK SC" \font_roman "lmodern" "WenQuanYi Micro Hei"
\font_sans "default" "Noto Sans CJK SC Light" \font_sans "default" "WenQuanYi Micro Hei"
\font_typewriter "default" "Noto Sans Mono CJK SC" \font_typewriter "default" "WenQuanYi Micro Hei Mono"
\font_math "auto" "auto" \font_math "auto" "auto"
\font_default_family default \font_default_family default
\use_non_tex_fonts false \use_non_tex_fonts false

View File

@ -88,9 +88,12 @@ export/examples/ja/multilingual_.*_systemF
# input-encoding "utf8" should work for documents using a CJK language: # input-encoding "utf8" should work for documents using a CJK language:
export/export/latex/CJK/.*-en-de-el-ru_utf8_pdf2 export/export/latex/CJK/.*-en-de-el-ru_utf8_pdf2
# #
# but not so easy if the main language does not require CJK: # but that is not so easy if the main language does not require CJK:
export/export/latex/CJK/en-de-el-ru-.*_utf8_pdf2 export/export/latex/CJK/en-de-el-ru-.*_utf8_pdf2
# #
# CJK environment closes too early when the final paragraph is nested:
export/export/latex/CJK/final-paragraph-nested_utf8-cjk_pdf2
#
# "language default" legacy encodings fail (missing fonts) # "language default" legacy encodings fail (missing fonts)
export/export/latex/CJK/ko_default_pdf2 export/export/latex/CJK/ko_default_pdf2
export/export/latex/CJK/zh_CN_default_pdf2 export/export/latex/CJK/zh_CN_default_pdf2

View File

@ -117,10 +117,19 @@ export/templates/acmart_dvi.*
#export/templates/acmart_ps # not tested by ctest autotests #export/templates/acmart_ps # not tested by ctest autotests
export/templates/acmart_pdf export/templates/acmart_pdf
# lyx2lyx back-conversion of "Date" info-inset writes # lyx2lyx back-conversion of "Date" info-inset writes
# the name of the day in English instead of Japanese. # the name of the day in English instead of Japanese.
export/examples/ja/multilingual_lyx.* export/examples/ja/multilingual_lyx.*
# CJKutf8 uses $\mu$ for \textmu unless overwritten by textcomp. # CJKutf8 uses $\mu$ for \textmu unless overwritten by textcomp.
# libertine loads textcomp and is loaded before CJKutf8 # libertine loads textcomp and is loaded before CJKutf8
export/export/latex/CJK/micro-sign_utf8-cjk_libertine.* export/export/latex/CJK/micro-sign_utf8-cjk_libertine.*
# No localization of auto-strings with Chinese language:
export/export/latex/CJK/zh_CN-toc.*
# CJK languages don't change/reset the "language" (which is problematic for
# documents in a language that uses a non-standard font encoding)
# In a Greek document, Latin letters in a text part set to "Chinese"
# come out as Greek letters:
export/export/latex/CJK/el-zh_CN_utf8-cjk.*