From a3f4f2d1e3c2c9cb6907859a72e9e7e0592fbdc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20St=C3=B6hr?= Date: Sun, 24 Jun 2012 06:38:33 +0200 Subject: [PATCH] CJK support for tex2lyx - support as best as possible; setting a document language is however not possible - fixes bug #4377 --- src/tex2lyx/Parser.h | 4 +- src/tex2lyx/Preamble.cpp | 12 +++++- src/tex2lyx/test/CJK.tex | 39 +++++++++++++++++ src/tex2lyx/tex2lyx.cpp | 1 + src/tex2lyx/tex2lyx.h | 2 + src/tex2lyx/text.cpp | 90 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 src/tex2lyx/test/CJK.tex diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index c0c5685bb8..3cf3dbde83 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -251,6 +251,8 @@ public: void setCatCode(char c, CatCode cat); /// CatCode getCatCode(char c) const; + /// latex name of the current encoding + std::string encoding_latex_; private: /// @@ -265,8 +267,6 @@ private: idocstringstream * iss_; /// idocstream & is_; - /// latex name of the current encoding - std::string encoding_latex_; }; diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index da9c2e9426..2f2499b842 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -205,7 +205,7 @@ const char * const known_xetex_packages[] = {"arabxetex", "fixlatvian", /// packages that are automatically skipped if loaded by LyX const char * const known_lyx_packages[] = {"amsbsy", "amsmath", "amssymb", -"amstext", "amsthm", "array", "booktabs", "calc", "color", "float", "fontspec", +"amstext", "amsthm", "array", "booktabs", "calc", "CJK", "color", "float", "fontspec", "graphicx", "hhline", "ifthen", "longtable", "makeidx", "multirow", "nomencl", "pdfpages", "rotating", "rotfloat", "splitidx", "setspace", "subscript", "textcomp", "ulem", "url", "varioref", "verbatim", "wrapfig", @@ -734,6 +734,16 @@ void Preamble::handle_package(Parser &p, string const & name, p.setEncoding("utf8"); } + else if (name == "CJK") { + // It is impossible to determine the document language if CJK is used. + // All we can do is to notify the user that he has to set this by hisself. + have_CJK = true; + // set the encoding to "auto" because it might be set to "default" by the babel handling + // and this would not be correct for CJK + h_inputencoding = "auto"; + registerAutomaticallyLoadedPackage("CJK"); + } + else if (name == "fontenc") { h_fontencoding = getStringFromVector(options, ","); /* We could do the following for better round trip support, diff --git a/src/tex2lyx/test/CJK.tex b/src/tex2lyx/test/CJK.tex new file mode 100644 index 0000000000..49ee35fa56 --- /dev/null +++ b/src/tex2lyx/test/CJK.tex @@ -0,0 +1,39 @@ +\documentclass[english]{article} +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{CJK} +\usepackage{babel} + +% It is impossible to get the document language because the document could start with a command, +% whitespace, and English word or whatever and the rest is in Japanese. Checking for the first CJK +% environment is no solution because the document could be English and contain only some Japanese. + +\begin{document} + +\begin{CJK}{EUC-JP}{}% +Japanese \end{CJK} \begin{CJK}{UTF8}{}Chinese traditional\end{CJK} \begin{CJK}{EUC-JP}{} +Japanese \end{CJK} +hello +\begin{CJK}{GB}{}% +Chinese simplified \end{CJK}\begin{CJK}{EUC-JP}{hei} Japanese \end{CJK} \begin{CJK}{GB}{}Chinese simplified +\end{CJK} + +\inputencoding{latin9}% +\selectlanguage{english}% +English + +\begin{CJK}{SJIS}{} +Shift_JIS 日本語の文章 +\end{CJK} + +hello + +\begin{CJK}{JIS}{} +JIS-code $BF|K\8l$NJ8>O(B +\end{CJK} + +\begin{CJK}{KS}{}% +Korean +\end{CJK} + +\end{document} diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index d890130b6b..37bcf7d038 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -332,6 +332,7 @@ bool checkModule(string const & name, bool command) bool noweb_mode = false; bool pdflatex = false; bool xetex = false; +bool have_CJK = false; bool roundtrip = false; diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index 085bc6f4de..b448d6d46d 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -166,6 +166,8 @@ extern bool noweb_mode; extern bool pdflatex; /// Did we recognize any xetex-only construct? extern bool xetex; +/// Do we have CJK? +extern bool have_CJK; /// LyX format that is created by tex2lyx extern int const LYX_FORMAT; diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index b01925ea30..add0541b8f 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -152,6 +152,27 @@ const char * const coded_polyglossia_languages[] = { "american", "ancientgreek", "australian", "british", "greek", "newzealand", "polutonikogreek", 0}; +/** + * supported CJK encodings + */ +const char * const supported_CJK_encodings[] = { +"EUC-JP", "KS", "GB", "UTF8", 0}; + +/** + * the same as supported_CJK_encodings with .lyx names + * please keep this in sync with supported_CJK_encodings line by line! + */ +const char * const coded_supported_CJK_encodings[] = { +"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0}; + +string CJK2lyx(string const & encoding) +{ + char const * const * where = is_known(encoding, supported_CJK_encodings); + if (where) + return coded_supported_CJK_encodings[where - supported_CJK_encodings]; + return encoding; +} + /*! * natbib commands. * The starred forms are also known except for "citefullauthor", @@ -1433,6 +1454,57 @@ void parse_environment(Parser & p, ostream & os, bool outer, os << "\n\\begin_layout Standard\n"; } + else if (name == "CJK") { + // the scheme is \begin{CJK}{encoding}{mapping}{text} + // It is impossible to decide if a CJK environment was in its own paragraph or within + // a line. We therefore always assume a paragraph since the latter is a rare case. + eat_whitespace(p, os, parent_context, false); + parent_context.check_end_layout(os); + // store the encoding to be able to reset it + string const encoding_old = p.encoding_latex_; + string const encoding = p.getArg('{', '}'); + // SJIS and BIG5 don't work with LaTeX according to the comment in unicode.cpp + // JIS does not work with LyX's encoding conversion + if (encoding != "SJIS" && encoding != "BIG5" && encoding != "JIS") + p.setEncoding(encoding); + else + p.setEncoding("utf8"); + // LyX doesn't support the second argument so if + // this is used we need to output everything as ERT + string const mapping = p.getArg('{', '}'); + if ( (!mapping.empty() && mapping != " ") + || (!is_known(encoding, supported_CJK_encodings))) { + parent_context.check_layout(os); + handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}", + parent_context); + // we must parse the content as verbatim because e.g. SJIS can contain + // normally invalid characters + string const s = p.plainEnvironment("CJK"); + string::const_iterator it2 = s.begin(); + for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { + if (*it == '\\') + handle_ert(os, "\\", parent_context); + else if (*it == '$') + handle_ert(os, "$", parent_context); + else + os << *it; + } + p.skip_spaces(); + handle_ert(os, "\\end{" + name + "}", + parent_context); + } else { + string const lang = CJK2lyx(encoding); + // store the language because we must reset it at the end + string const lang_old = parent_context.font.language; + parent_context.font.language = lang; + parse_text_in_inset(p, os, FLAG_END, outer, parent_context); + parent_context.font.language = lang_old; + parent_context.new_paragraph(os); + } + p.encoding_latex_ = encoding_old; + p.skip_spaces(); + } + else if (name == "lyxgreyedout") { eat_whitespace(p, os, parent_context, false); parent_context.check_layout(os); @@ -2029,6 +2101,24 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, while (p.good()) { Token const & t = p.get_token(); + // it is impossible to determine the correct document language if CJK is used. + // Therefore write a note at the beginning of the document + if (have_CJK) { + context.check_layout(os); + begin_inset(os, "Note Note\n"); + os << "status open\n\\begin_layout Plain Layout\n" + << "\\series bold\n" + << "Important information:\n" + << "\\end_layout\n\n" + << "\\begin_layout Plain Layout\n" + << "This document contains text in Chinese, Japanese or Korean.\n" + << " It was therefore impossible for tex2lyx to set the correct document langue for your document." + << " Please set in the document settings by yourself!\n" + << "\\end_layout\n"; + end_inset(os); + have_CJK = false; + } + #ifdef FILEDEBUG debugToken(cerr, t, flags); #endif