diff --git a/src/tex2lyx/Context.cpp b/src/tex2lyx/Context.cpp index 2db797d67d..ecf6a14199 100644 --- a/src/tex2lyx/Context.cpp +++ b/src/tex2lyx/Context.cpp @@ -113,6 +113,9 @@ void Context::begin_layout(ostream & os, Layout const * const & l) } // FIXME: This is not enough for things like // \\Huge par1 \\par par2 + // FIXME: If the document language is not english this outputs a + // superflous language change. Fortunately this is only file format + // bloat an does not change the TeX export of LyX. output_font_change(os, normalfont, font); } diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 530e73aa47..01f14b8dc9 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -552,7 +552,7 @@ void Parser::tokenize_one() push_back(Token(s, catSpace)); break; } - + case catNewline: { ++lineno_; docstring s(1, getNewline(is_, c)); @@ -565,7 +565,7 @@ void Parser::tokenize_one() push_back(Token(s, catNewline)); break; } - + case catComment: { // We don't treat "%\n" combinations here specially because // we want to preserve them in the preamble @@ -581,7 +581,7 @@ void Parser::tokenize_one() push_back(Token(s, catComment)); break; } - + case catEscape: { is_.get(c); if (!is_) { @@ -599,12 +599,12 @@ void Parser::tokenize_one() } break; } - + case catIgnore: { cerr << "ignoring a char: " << c << "\n"; break; } - + default: push_back(Token(docstring(1, c), catcode(c))); } diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index afa0997904..430708c0fc 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -44,9 +44,8 @@ Preamble preamble; namespace { -// "chinese-simplified", "chinese-traditional", "japanese-cjk", "korean" -// cannot be supported because it is impossible to determine the correct document -// language if CJK is used. +// CJK languages are handled in text.cpp, polyglossia languages are listed +// further down. /** * known babel language names (including synonyms) * not in standard babel: arabic, arabtex, armenian, belarusian, serbian-latin, thai @@ -88,6 +87,9 @@ const char * const known_coded_languages[] = {"french", "afrikaans", "albanian", "uppersorbian", "uppersorbian", "english", "english", "vietnamese", "welsh", 0}; +/// languages with danish quotes (.lyx names) +const char * const known_danish_quotes_languages[] = {"danish", 0}; + /// languages with english quotes (.lyx names) const char * const known_english_quotes_languages[] = {"american", "australian", "bahasa", "bahasam", "brazilian", "canadian", "chinese-simplified", "english", @@ -446,6 +448,7 @@ Preamble::Preamble() : one_language(true), title_layout_found(false) h_font_roman = "default"; h_font_sans = "default"; h_font_typewriter = "default"; + h_font_math = "auto"; h_font_default_family = "default"; h_use_non_tex_fonts = "false"; h_font_sc = "false"; @@ -506,7 +509,7 @@ Preamble::Preamble() : one_language(true), title_layout_found(false) h_use_indices = "false"; h_use_geometry = "false"; h_use_default_options = "false"; - h_use_hyperref = "0"; + h_use_hyperref = "false"; h_use_refstyle = "0"; h_use_packages["amsmath"] = "1"; h_use_packages["amssymb"] = "0"; @@ -522,7 +525,7 @@ void Preamble::handle_hyperref(vector & options) { // FIXME swallow inputencoding changes that might surround the // hyperref setup if it was written by LyX - h_use_hyperref = "1"; + h_use_hyperref = "true"; // swallow "unicode=true", since LyX does always write that vector::iterator it = find(options.begin(), options.end(), "unicode=true"); @@ -638,6 +641,7 @@ void Preamble::handle_package(Parser &p, string const & name, vector options = split_options(opts); add_package(name, options); string scale; + char const * const * where = 0; if (is_known(name, known_xetex_packages)) { xetex = true; @@ -648,10 +652,8 @@ void Preamble::handle_package(Parser &p, string const & name, } // roman fonts - if (is_known(name, known_roman_fonts)) { + if (is_known(name, known_roman_fonts)) h_font_roman = name; - p.skip_spaces(); - } if (name == "fourier") { h_font_roman = "utopia"; @@ -754,9 +756,6 @@ void Preamble::handle_package(Parser &p, string const & name, } else if (name == "CJK") { - // It is impossible to determine the document language if CJK is used. - // All we can do is to notify the user that he has to set this by himself. - have_CJK = true; // set the encoding to "auto" because it might be set to "default" by the babel handling // and this would not be correct for CJK if (h_inputencoding == "default") @@ -816,10 +815,16 @@ void Preamble::handle_package(Parser &p, string const & name, else if (is_known(name, known_lyx_packages) && options.empty()) { if (name == "splitidx") h_use_indices = "true"; - if (!in_lyx_preamble) + if (!in_lyx_preamble) { h_preamble << package_beg_sep << name << package_mid_sep << "\\usepackage{" - << name << "}\n" << package_end_sep; + << name << '}'; + if (p.next_token().cat() == catNewline || + (p.next_token().cat() == catSpace && + p.next_next_token().cat() == catNewline)) + h_preamble << '\n'; + h_preamble << package_end_sep; + } } else if (name == "geometry") @@ -828,8 +833,8 @@ void Preamble::handle_package(Parser &p, string const & name, else if (name == "subfig") ; // ignore this FIXME: Use the package separator mechanism instead - else if (is_known(name, known_languages)) - h_language = name; + else if ((where = is_known(name, known_languages))) + h_language = known_coded_languages[where - known_languages]; else if (name == "natbib") { h_biblio_style = "plainnat"; @@ -859,12 +864,16 @@ void Preamble::handle_package(Parser &p, string const & name, else if (!in_lyx_preamble) { if (options.empty()) - h_preamble << "\\usepackage{" << name << "}\n"; + h_preamble << "\\usepackage{" << name << '}'; else { h_preamble << "\\usepackage[" << opts << "]{" - << name << "}\n"; + << name << '}'; options.clear(); } + if (p.next_token().cat() == catNewline || + (p.next_token().cat() == catSpace && + p.next_next_token().cat() == catNewline)) + h_preamble << '\n'; } // We need to do something with the options... @@ -905,7 +914,7 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) // http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage // (quotes for kazakh and interlingua are unknown) // danish - if (h_language == "danish") + if (is_known(h_language, known_danish_quotes_languages)) h_quotes_language = "danish"; // french else if (is_known(h_language, known_french_quotes_languages)) @@ -982,6 +991,7 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) << "\\font_roman " << h_font_roman << "\n" << "\\font_sans " << h_font_sans << "\n" << "\\font_typewriter " << h_font_typewriter << "\n" + << "\\font_math " << h_font_math << "\n" << "\\font_default_family " << h_font_default_family << "\n" << "\\use_non_tex_fonts " << h_use_non_tex_fonts << "\n" << "\\font_sc " << h_font_sc << "\n" @@ -1000,7 +1010,7 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) os << "\\paperfontsize " << h_paperfontsize << "\n" << "\\spacing " << h_spacing << "\n" << "\\use_hyperref " << h_use_hyperref << '\n'; - if (h_use_hyperref == "1") { + if (h_use_hyperref == "true") { if (!h_pdf_title.empty()) os << "\\pdf_title \"" << h_pdf_title << "\"\n"; if (!h_pdf_author.empty()) @@ -1634,6 +1644,16 @@ void Preamble::parse(Parser & p, string const & forceclass, ss << tc.sides(); h_papersides = ss.str(); } + + // If the CJK package is used we cannot set the document language from + // the babel options. Instead, we guess which language is used most + // and set this one. + default_language = h_language; + if (is_full_document && auto_packages.find("CJK") != auto_packages.end()) { + p.pushPosition(); + h_language = guessLanguage(p, default_language); + p.popPosition(); + } } diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h index bd95e370b0..db2242f3ce 100644 --- a/src/tex2lyx/Preamble.h +++ b/src/tex2lyx/Preamble.h @@ -38,8 +38,10 @@ public: std::string inputencoding() const { return h_inputencoding; } /// std::string notefontcolor() const { return h_notefontcolor; } - /// - std::string language() const { return h_language; } + /// The document language + std::string docLanguage() const { return h_language; } + /// The language of text which is not explicitly marked + std::string defaultLanguage() const { return default_language; } /// std::string use_indices() const { return h_use_indices; } /// @@ -89,6 +91,8 @@ private: /// needed to handle encodings with babel bool one_language; + /// the main non-CJK language + std::string default_language; /// was at least one title layout found? bool title_layout_found; @@ -105,6 +109,7 @@ private: std::string h_float_placement; std::string h_fontcolor; std::string h_fontencoding; + std::string h_font_math; std::string h_font_roman; std::string h_font_sans; std::string h_font_typewriter; diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt index a1efc09c97..2bdc1d94aa 100644 --- a/src/tex2lyx/TODO.txt +++ b/src/tex2lyx/TODO.txt @@ -62,6 +62,20 @@ Format LaTeX feature LyX feature 411 support for polyglossia \language_package (the cases of no package, of babel and of custom package is supported) 415 undertilde.sty fonts 427 tipa.sty \begin{IPA}, \textipa InsetTIPA +434 libertine-type1, libertine.sty, \font_osf, \font_roman + libertine-legacy.sty +435 txtt LaTeX font \font_typewriter +436 mathdesign.sty \font_roman +437 TeX Gyre LaTeX fonts \font_* +438 \tone{51}, \tone{15}, \tone{45}, InsetTIPA + \tone{12}, \tone{454}, \toptiebar, + \bottomtiebar +439 MinionPro.sty \font_roman, \font_osf +440 MinionPro.sty, eulervm.sty \font_math +442 newtxmath.sty, \font_math +443 unicode-math.sty InsetMath* +444 biolinum-type1.sty, libertine.sty, \font_osf, \font_sans + libertine-legacy.sty General diff --git a/src/tex2lyx/test/CJK.lyx.lyx b/src/tex2lyx/test/CJK.lyx.lyx index 60bf6fa0bd..7d29cc858c 100644 --- a/src/tex2lyx/test/CJK.lyx.lyx +++ b/src/tex2lyx/test/CJK.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -23,6 +23,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -36,7 +37,7 @@ \index_command default \paperfontsize default \spacing single -\use_hyperref 0 +\use_hyperref false \papersize default \use_geometry false \use_package amsmath 1 @@ -55,6 +56,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/DummyDocument.lyx b/src/tex2lyx/test/DummyDocument.lyx index 2625ebca29..1c78873178 100644 --- a/src/tex2lyx/test/DummyDocument.lyx +++ b/src/tex2lyx/test/DummyDocument.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -12,6 +12,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -25,7 +26,7 @@ \index_command default \paperfontsize 12 \spacing onehalf -\use_hyperref 0 +\use_hyperref false \papersize a4paper \use_geometry false \use_package amsmath 2 @@ -44,6 +45,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx index dc6e5cda52..731ab9897b 100644 --- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx +++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -23,6 +23,7 @@ \font_roman Linux Libertine O \font_sans Linux Biolinum O \font_typewriter Linux Biolinum O +\font_math auto \font_default_family default \use_non_tex_fonts true \font_sc false @@ -36,7 +37,7 @@ \index_command default \paperfontsize default \spacing single -\use_hyperref 0 +\use_hyperref false \papersize default \use_geometry false \use_package amsmath 1 @@ -55,6 +56,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent @@ -74,12 +79,14 @@ \begin_layout Part +\lang british df \end_layout \begin_layout Standard +\lang british 1 English \lang ancientgreek diff --git a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx index c17dd9d434..f1412b5ca3 100644 --- a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx +++ b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -47,6 +47,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -60,7 +61,7 @@ \index_command default \paperfontsize 12 \spacing other 1.2 -\use_hyperref 0 +\use_hyperref false \papersize b4paper \use_geometry true \use_package amsmath 2 @@ -82,6 +83,10 @@ \notefontcolor #0000ff \backgroundcolor #ff5500 \boxbgcolor #ffff00 +\index Index +\shortcut idx +\color #008000 +\end_index \leftmargin 3cm \topmargin 1cm \rightmargin 4cm diff --git a/src/tex2lyx/test/test-insets.lyx.lyx b/src/tex2lyx/test/test-insets.lyx.lyx index f5484ad87c..d1acb84042 100644 --- a/src/tex2lyx/test/test-insets.lyx.lyx +++ b/src/tex2lyx/test/test-insets.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -18,6 +18,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -31,7 +32,7 @@ \index_command default \paperfontsize 12 \spacing onehalf -\use_hyperref 0 +\use_hyperref false \papersize a4paper \use_geometry false \use_package amsmath 2 @@ -50,6 +51,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/test-structure.lyx.lyx b/src/tex2lyx/test/test-structure.lyx.lyx index 8fc60c923f..2bc1882474 100644 --- a/src/tex2lyx/test/test-structure.lyx.lyx +++ b/src/tex2lyx/test/test-structure.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -51,6 +51,7 @@ logicalmkup \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -65,7 +66,7 @@ logicalmkup \index_command default \paperfontsize default \spacing single -\use_hyperref 0 +\use_hyperref false \papersize legalpaper \use_geometry false \use_package amsmath 1 @@ -84,6 +85,10 @@ logicalmkup \suppress_date true \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/test.lyx.lyx b/src/tex2lyx/test/test.lyx.lyx index 6327f5fb9e..a0ddb99cb4 100644 --- a/src/tex2lyx/test/test.lyx.lyx +++ b/src/tex2lyx/test/test.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -23,6 +23,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -36,7 +37,7 @@ \index_command default \paperfontsize 11 \spacing single -\use_hyperref 0 +\use_hyperref false \papersize default \use_geometry false \use_package amsmath 1 @@ -55,6 +56,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 384c22836d..57436cbcda 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -332,7 +332,6 @@ bool checkModule(string const & name, bool command) bool noweb_mode = false; bool pdflatex = false; bool xetex = false; -bool have_CJK = false; bool is_nonCJKJapanese = false; bool roundtrip = false; @@ -704,7 +703,7 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding) stringstream ss; // store the document language in the context to be able to handle the // commands like \foreignlanguage and \textenglish etc. - context.font.language = preamble.language(); + context.font.language = preamble.defaultLanguage(); // parse the main text parse_text(p, ss, FLAG_END, true, context); if (Context::empty) diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index a471a0f3f4..1a98a5f91a 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -67,6 +67,10 @@ void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags, bool outer, Context const & context, InsetLayout const * layout = 0); +/// Guess document language from \p p if CJK is used. +/// \p lang is used for all non-CJK contents. +std::string guessLanguage(Parser & p, std::string const & lang); + /// in math.cpp void parse_math(Parser & p, std::ostream & os, unsigned flags, mode_type mode); @@ -166,8 +170,6 @@ extern bool noweb_mode; extern bool pdflatex; /// Did we recognize any xetex-only construct? extern bool xetex; -/// Do we have CJK? -extern bool have_CJK; /// Do we have non-CJK Japanese? extern bool is_nonCJKJapanese; /// LyX format that is created by tex2lyx diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 2ee3afc832..34ffbe86c1 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -130,17 +130,9 @@ const char * const supported_CJK_encodings[] = { * the same as supported_CJK_encodings with their corresponding LyX language name * please keep this in sync with supported_CJK_encodings line by line! */ -const char * const coded_supported_CJK_encodings[] = { +const char * const supported_CJK_languages[] = { "japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0}; -string CJK2lyx(string const & encoding) -{ - char const * const * where = is_known(encoding, supported_CJK_encodings); - if (where) - return coded_supported_CJK_encodings[where - supported_CJK_encodings]; - return encoding; -} - /*! * natbib commands. * The starred forms are also known except for "citefullauthor", @@ -1440,8 +1432,9 @@ void parse_environment(Parser & p, ostream & os, bool outer, // LyX doesn't support the second argument so if // this is used we need to output everything as ERT string const mapping = p.getArg('{', '}'); - if ((!mapping.empty() && mapping != " ") - || (!is_known(encoding, supported_CJK_encodings))) { + char const * const * const where = + is_known(encoding, supported_CJK_encodings); + if ((!mapping.empty() && mapping != " ") || !where) { parent_context.check_layout(os); handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}", parent_context); @@ -1459,7 +1452,8 @@ void parse_environment(Parser & p, ostream & os, bool outer, handle_ert(os, "\\end{" + name + "}", parent_context); } else { - string const lang = CJK2lyx(encoding); + string const lang = + supported_CJK_languages[where - supported_CJK_encodings]; // store the language because we must reset it at the end string const lang_old = parent_context.font.language; parent_context.font.language = lang; @@ -2142,24 +2136,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, while (p.good()) { Token const & t = p.get_token(); - // it is impossible to determine the correct document language if CJK is used. - // Therefore write a note at the beginning of the document - if (have_CJK) { - context.check_layout(os); - begin_inset(os, "Note Note\n"); - os << "status open\n\\begin_layout Plain Layout\n" - << "\\series bold\n" - << "Important information:\n" - << "\\end_layout\n\n" - << "\\begin_layout Plain Layout\n" - << "This document contains text in Chinese, Japanese or Korean.\n" - << " It was therefore impossible for tex2lyx to set the correct document langue for your document." - << " Please set the language manually in the document settings.\n" - << "\\end_layout\n"; - end_inset(os); - have_CJK = false; - } - // it is impossible to determine the correct encoding for non-CJK Japanese. // Therefore write a note at the beginning of the document if (is_nonCJKJapanese) { @@ -2739,8 +2715,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context.check_layout(os); p.skip_spaces(); begin_inset(os, "Caption\n"); - Context newcontext(true, context.textclass); - newcontext.font = context.font; + Context newcontext(true, context.textclass, 0, 0, context.font); newcontext.check_layout(os); if (p.next_token().cat() != catEscape && p.next_token().character() == '[') { @@ -2790,8 +2765,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, os << "\n\\begin_layout Plain Layout"; p.skip_spaces(); begin_inset(os, "Caption\n"); - Context newcontext(true, context.textclass); - newcontext.font = context.font; + Context newcontext(true, context.textclass, + 0, 0, context.font); newcontext.check_layout(os); os << caption << "\n"; newcontext.check_end_layout(os); @@ -4427,6 +4402,79 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } } + +string guessLanguage(Parser & p, string const & lang) +{ + typedef std::map LangMap; + // map from language names to number of characters + LangMap used; + used[lang] = 0; + for (char const * const * i = supported_CJK_languages; *i; i++) + used[string(*i)] = 0; + + while (p.good()) { + Token const t = p.get_token(); + // comments are not counted for any language + if (t.cat() == catComment) + continue; + // commands are not counted as well, but we need to detect + // \begin{CJK} and switch encoding if needed + if (t.cat() == catEscape) { + if (t.cs() == "inputencoding") { + string const enc = subst(p.verbatim_item(), "\n", " "); + p.setEncoding(enc); + continue; + } + if (t.cs() != "begin") + continue; + } else { + // Non-CJK content is counted for lang. + // We do not care about the real language here: + // If we have more non-CJK contents than CJK contents, + // we simply use the language that was specified as + // babel main language. + used[lang] += t.asInput().length(); + continue; + } + // Now we are starting an environment + p.pushPosition(); + string const name = p.getArg('{', '}'); + if (name != "CJK") { + p.popPosition(); + continue; + } + // It is a CJK environment + p.popPosition(); + /* name = */ p.getArg('{', '}'); + string const encoding = p.getArg('{', '}'); + /* mapping = */ p.getArg('{', '}'); + string const encoding_old = p.getEncoding(); + char const * const * const where = + is_known(encoding, supported_CJK_encodings); + if (where) + p.setEncoding(encoding); + else + p.setEncoding("utf8"); + string const text = p.verbatimEnvironment("CJK"); + p.setEncoding(encoding_old); + p.skip_spaces(); + if (!where) { + // ignore contents in unknown CJK encoding + continue; + } + // the language of the text + string const cjk = + supported_CJK_languages[where - supported_CJK_encodings]; + used[cjk] += text.length(); + } + LangMap::const_iterator use = used.begin(); + for (LangMap::const_iterator it = used.begin(); it != used.end(); ++it) { + if (it->second > use->second) + use = it; + } + return use->first; +} + // }])