From 28dc8a77ee281f606de3bd7c56d467466ee28b64 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Fri, 5 Oct 2012 22:16:46 +0200 Subject: [PATCH 1/6] Fix regression introduced in commit 7cfac95 Commit 7cfac95 got rid of empty lines that were created by removing \usepackage statements. However, it added an additional newline in case the \usepackage was not at the end of the line. This is now fixed. --- src/tex2lyx/Parser.cpp | 10 +++++----- src/tex2lyx/Preamble.cpp | 22 +++++++++++++++------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 530e73aa47..01f14b8dc9 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -552,7 +552,7 @@ void Parser::tokenize_one() push_back(Token(s, catSpace)); break; } - + case catNewline: { ++lineno_; docstring s(1, getNewline(is_, c)); @@ -565,7 +565,7 @@ void Parser::tokenize_one() push_back(Token(s, catNewline)); break; } - + case catComment: { // We don't treat "%\n" combinations here specially because // we want to preserve them in the preamble @@ -581,7 +581,7 @@ void Parser::tokenize_one() push_back(Token(s, catComment)); break; } - + case catEscape: { is_.get(c); if (!is_) { @@ -599,12 +599,12 @@ void Parser::tokenize_one() } break; } - + case catIgnore: { cerr << "ignoring a char: " << c << "\n"; break; } - + default: push_back(Token(docstring(1, c), catcode(c))); } diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index afa0997904..6486307653 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -648,10 +648,8 @@ void Preamble::handle_package(Parser &p, string const & name, } // roman fonts - if (is_known(name, known_roman_fonts)) { + if (is_known(name, known_roman_fonts)) h_font_roman = name; - p.skip_spaces(); - } if (name == "fourier") { h_font_roman = "utopia"; @@ -816,10 +814,16 @@ void Preamble::handle_package(Parser &p, string const & name, else if (is_known(name, known_lyx_packages) && options.empty()) { if (name == "splitidx") h_use_indices = "true"; - if (!in_lyx_preamble) + if (!in_lyx_preamble) { h_preamble << package_beg_sep << name << package_mid_sep << "\\usepackage{" - << name << "}\n" << package_end_sep; + << name << '}'; + if (p.next_token().cat() == catNewline || + (p.next_token().cat() == catSpace && + p.next_next_token().cat() == catNewline)) + h_preamble << '\n'; + h_preamble << package_end_sep; + } } else if (name == "geometry") @@ -859,12 +863,16 @@ void Preamble::handle_package(Parser &p, string const & name, else if (!in_lyx_preamble) { if (options.empty()) - h_preamble << "\\usepackage{" << name << "}\n"; + h_preamble << "\\usepackage{" << name << '}'; else { h_preamble << "\\usepackage[" << opts << "]{" - << name << "}\n"; + << name << '}'; options.clear(); } + if (p.next_token().cat() == catNewline || + (p.next_token().cat() == catSpace && + p.next_next_token().cat() == catNewline)) + h_preamble << '\n'; } // We need to do something with the options... From 87641e506ec1560dbd46c6e6da33cfb06d5cf552 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Fri, 5 Oct 2012 23:44:21 +0200 Subject: [PATCH 2/6] Fix typo --- src/tex2lyx/text.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 2ee3afc832..a97d00bd36 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -2153,7 +2153,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, << "\\end_layout\n\n" << "\\begin_layout Plain Layout\n" << "This document contains text in Chinese, Japanese or Korean.\n" - << " It was therefore impossible for tex2lyx to set the correct document langue for your document." + << " It was therefore impossible for tex2lyx to set the correct document language for your document." << " Please set the language manually in the document settings.\n" << "\\end_layout\n"; end_inset(os); @@ -2739,8 +2739,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context.check_layout(os); p.skip_spaces(); begin_inset(os, "Caption\n"); - Context newcontext(true, context.textclass); - newcontext.font = context.font; + Context newcontext(true, context.textclass, 0, 0, context.font); newcontext.check_layout(os); if (p.next_token().cat() != catEscape && p.next_token().character() == '[') { @@ -2790,8 +2789,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, os << "\n\\begin_layout Plain Layout"; p.skip_spaces(); begin_inset(os, "Caption\n"); - Context newcontext(true, context.textclass); - newcontext.font = context.font; + Context newcontext(true, context.textclass, + 0, 0, context.font); newcontext.check_layout(os); os << caption << "\n"; newcontext.check_end_layout(os); From c85228432ca04bdc4e473bd5191a9ddfb8664b6b Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Fri, 5 Oct 2012 23:50:28 +0200 Subject: [PATCH 3/6] Update file format of test references --- src/tex2lyx/test/CJK.lyx.lyx | 8 ++++++-- src/tex2lyx/test/DummyDocument.lyx | 8 ++++++-- src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx | 8 ++++++-- src/tex2lyx/test/box-color-size-space-align.lyx.lyx | 8 ++++++-- src/tex2lyx/test/test-insets.lyx.lyx | 8 ++++++-- src/tex2lyx/test/test-structure.lyx.lyx | 8 ++++++-- src/tex2lyx/test/test.lyx.lyx | 8 ++++++-- 7 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/tex2lyx/test/CJK.lyx.lyx b/src/tex2lyx/test/CJK.lyx.lyx index 60bf6fa0bd..06488f5565 100644 --- a/src/tex2lyx/test/CJK.lyx.lyx +++ b/src/tex2lyx/test/CJK.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -55,6 +55,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/DummyDocument.lyx b/src/tex2lyx/test/DummyDocument.lyx index 2625ebca29..d2c533f8ab 100644 --- a/src/tex2lyx/test/DummyDocument.lyx +++ b/src/tex2lyx/test/DummyDocument.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -44,6 +44,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx index dc6e5cda52..68b59cc56b 100644 --- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx +++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -55,6 +55,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx index c17dd9d434..4a8e09a28f 100644 --- a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx +++ b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -82,6 +82,10 @@ \notefontcolor #0000ff \backgroundcolor #ff5500 \boxbgcolor #ffff00 +\index Index +\shortcut idx +\color #008000 +\end_index \leftmargin 3cm \topmargin 1cm \rightmargin 4cm diff --git a/src/tex2lyx/test/test-insets.lyx.lyx b/src/tex2lyx/test/test-insets.lyx.lyx index f5484ad87c..075f3e8839 100644 --- a/src/tex2lyx/test/test-insets.lyx.lyx +++ b/src/tex2lyx/test/test-insets.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -50,6 +50,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/test-structure.lyx.lyx b/src/tex2lyx/test/test-structure.lyx.lyx index 8fc60c923f..f9f89661be 100644 --- a/src/tex2lyx/test/test-structure.lyx.lyx +++ b/src/tex2lyx/test/test-structure.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -84,6 +84,10 @@ logicalmkup \suppress_date true \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent diff --git a/src/tex2lyx/test/test.lyx.lyx b/src/tex2lyx/test/test.lyx.lyx index 6327f5fb9e..666a0788e6 100644 --- a/src/tex2lyx/test/test.lyx.lyx +++ b/src/tex2lyx/test/test.lyx.lyx @@ -1,5 +1,5 @@ -#LyX file created by tex2lyx 2.1.0svn -\lyxformat 433 +#LyX file created by tex2lyx 2.1.0dev +\lyxformat 444 \begin_document \begin_header \textclass article @@ -55,6 +55,10 @@ \suppress_date false \justification true \use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index \secnumdepth 3 \tocdepth 3 \paragraph_separation indent From fce4f66718dc25e43da1e3f8a144b1f9cb74918e Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Fri, 5 Oct 2012 23:58:16 +0200 Subject: [PATCH 4/6] Acknowledge additional \lang command --- src/tex2lyx/Context.cpp | 3 +++ src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/tex2lyx/Context.cpp b/src/tex2lyx/Context.cpp index 2db797d67d..ecf6a14199 100644 --- a/src/tex2lyx/Context.cpp +++ b/src/tex2lyx/Context.cpp @@ -113,6 +113,9 @@ void Context::begin_layout(ostream & os, Layout const * const & l) } // FIXME: This is not enough for things like // \\Huge par1 \\par par2 + // FIXME: If the document language is not english this outputs a + // superflous language change. Fortunately this is only file format + // bloat an does not change the TeX export of LyX. output_font_change(os, normalfont, font); } diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx index 68b59cc56b..cf1a7ed58e 100644 --- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx +++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx @@ -78,12 +78,14 @@ \begin_layout Part +\lang british df \end_layout \begin_layout Standard +\lang british 1 English \lang ancientgreek From 8863ac62aa25ea11dc9ac78c525d01aee3defc23 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sat, 6 Oct 2012 00:29:16 +0200 Subject: [PATCH 5/6] Really output file format 444 The previous output was read correctly by LyX as well, but the new version is more similar to the files written by LyX. Also update TODO with recent file format changes. --- src/tex2lyx/Preamble.cpp | 8 +++++--- src/tex2lyx/Preamble.h | 1 + src/tex2lyx/TODO.txt | 14 ++++++++++++++ src/tex2lyx/test/CJK.lyx.lyx | 3 ++- src/tex2lyx/test/DummyDocument.lyx | 3 ++- src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx | 3 ++- .../test/box-color-size-space-align.lyx.lyx | 3 ++- src/tex2lyx/test/test-insets.lyx.lyx | 3 ++- src/tex2lyx/test/test-structure.lyx.lyx | 3 ++- src/tex2lyx/test/test.lyx.lyx | 3 ++- 10 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 6486307653..6ea980eb1d 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -446,6 +446,7 @@ Preamble::Preamble() : one_language(true), title_layout_found(false) h_font_roman = "default"; h_font_sans = "default"; h_font_typewriter = "default"; + h_font_math = "auto"; h_font_default_family = "default"; h_use_non_tex_fonts = "false"; h_font_sc = "false"; @@ -506,7 +507,7 @@ Preamble::Preamble() : one_language(true), title_layout_found(false) h_use_indices = "false"; h_use_geometry = "false"; h_use_default_options = "false"; - h_use_hyperref = "0"; + h_use_hyperref = "false"; h_use_refstyle = "0"; h_use_packages["amsmath"] = "1"; h_use_packages["amssymb"] = "0"; @@ -522,7 +523,7 @@ void Preamble::handle_hyperref(vector & options) { // FIXME swallow inputencoding changes that might surround the // hyperref setup if it was written by LyX - h_use_hyperref = "1"; + h_use_hyperref = "true"; // swallow "unicode=true", since LyX does always write that vector::iterator it = find(options.begin(), options.end(), "unicode=true"); @@ -990,6 +991,7 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) << "\\font_roman " << h_font_roman << "\n" << "\\font_sans " << h_font_sans << "\n" << "\\font_typewriter " << h_font_typewriter << "\n" + << "\\font_math " << h_font_math << "\n" << "\\font_default_family " << h_font_default_family << "\n" << "\\use_non_tex_fonts " << h_use_non_tex_fonts << "\n" << "\\font_sc " << h_font_sc << "\n" @@ -1008,7 +1010,7 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) os << "\\paperfontsize " << h_paperfontsize << "\n" << "\\spacing " << h_spacing << "\n" << "\\use_hyperref " << h_use_hyperref << '\n'; - if (h_use_hyperref == "1") { + if (h_use_hyperref == "true") { if (!h_pdf_title.empty()) os << "\\pdf_title \"" << h_pdf_title << "\"\n"; if (!h_pdf_author.empty()) diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h index bd95e370b0..2ce75fd065 100644 --- a/src/tex2lyx/Preamble.h +++ b/src/tex2lyx/Preamble.h @@ -105,6 +105,7 @@ private: std::string h_float_placement; std::string h_fontcolor; std::string h_fontencoding; + std::string h_font_math; std::string h_font_roman; std::string h_font_sans; std::string h_font_typewriter; diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt index a1efc09c97..2bdc1d94aa 100644 --- a/src/tex2lyx/TODO.txt +++ b/src/tex2lyx/TODO.txt @@ -62,6 +62,20 @@ Format LaTeX feature LyX feature 411 support for polyglossia \language_package (the cases of no package, of babel and of custom package is supported) 415 undertilde.sty fonts 427 tipa.sty \begin{IPA}, \textipa InsetTIPA +434 libertine-type1, libertine.sty, \font_osf, \font_roman + libertine-legacy.sty +435 txtt LaTeX font \font_typewriter +436 mathdesign.sty \font_roman +437 TeX Gyre LaTeX fonts \font_* +438 \tone{51}, \tone{15}, \tone{45}, InsetTIPA + \tone{12}, \tone{454}, \toptiebar, + \bottomtiebar +439 MinionPro.sty \font_roman, \font_osf +440 MinionPro.sty, eulervm.sty \font_math +442 newtxmath.sty, \font_math +443 unicode-math.sty InsetMath* +444 biolinum-type1.sty, libertine.sty, \font_osf, \font_sans + libertine-legacy.sty General diff --git a/src/tex2lyx/test/CJK.lyx.lyx b/src/tex2lyx/test/CJK.lyx.lyx index 06488f5565..7d29cc858c 100644 --- a/src/tex2lyx/test/CJK.lyx.lyx +++ b/src/tex2lyx/test/CJK.lyx.lyx @@ -23,6 +23,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -36,7 +37,7 @@ \index_command default \paperfontsize default \spacing single -\use_hyperref 0 +\use_hyperref false \papersize default \use_geometry false \use_package amsmath 1 diff --git a/src/tex2lyx/test/DummyDocument.lyx b/src/tex2lyx/test/DummyDocument.lyx index d2c533f8ab..1c78873178 100644 --- a/src/tex2lyx/test/DummyDocument.lyx +++ b/src/tex2lyx/test/DummyDocument.lyx @@ -12,6 +12,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -25,7 +26,7 @@ \index_command default \paperfontsize 12 \spacing onehalf -\use_hyperref 0 +\use_hyperref false \papersize a4paper \use_geometry false \use_package amsmath 2 diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx index cf1a7ed58e..731ab9897b 100644 --- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx +++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx @@ -23,6 +23,7 @@ \font_roman Linux Libertine O \font_sans Linux Biolinum O \font_typewriter Linux Biolinum O +\font_math auto \font_default_family default \use_non_tex_fonts true \font_sc false @@ -36,7 +37,7 @@ \index_command default \paperfontsize default \spacing single -\use_hyperref 0 +\use_hyperref false \papersize default \use_geometry false \use_package amsmath 1 diff --git a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx index 4a8e09a28f..f1412b5ca3 100644 --- a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx +++ b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx @@ -47,6 +47,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -60,7 +61,7 @@ \index_command default \paperfontsize 12 \spacing other 1.2 -\use_hyperref 0 +\use_hyperref false \papersize b4paper \use_geometry true \use_package amsmath 2 diff --git a/src/tex2lyx/test/test-insets.lyx.lyx b/src/tex2lyx/test/test-insets.lyx.lyx index 075f3e8839..d1acb84042 100644 --- a/src/tex2lyx/test/test-insets.lyx.lyx +++ b/src/tex2lyx/test/test-insets.lyx.lyx @@ -18,6 +18,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -31,7 +32,7 @@ \index_command default \paperfontsize 12 \spacing onehalf -\use_hyperref 0 +\use_hyperref false \papersize a4paper \use_geometry false \use_package amsmath 2 diff --git a/src/tex2lyx/test/test-structure.lyx.lyx b/src/tex2lyx/test/test-structure.lyx.lyx index f9f89661be..2bc1882474 100644 --- a/src/tex2lyx/test/test-structure.lyx.lyx +++ b/src/tex2lyx/test/test-structure.lyx.lyx @@ -51,6 +51,7 @@ logicalmkup \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -65,7 +66,7 @@ logicalmkup \index_command default \paperfontsize default \spacing single -\use_hyperref 0 +\use_hyperref false \papersize legalpaper \use_geometry false \use_package amsmath 1 diff --git a/src/tex2lyx/test/test.lyx.lyx b/src/tex2lyx/test/test.lyx.lyx index 666a0788e6..a0ddb99cb4 100644 --- a/src/tex2lyx/test/test.lyx.lyx +++ b/src/tex2lyx/test/test.lyx.lyx @@ -23,6 +23,7 @@ \font_roman default \font_sans default \font_typewriter default +\font_math auto \font_default_family default \use_non_tex_fonts false \font_sc false @@ -36,7 +37,7 @@ \index_command default \paperfontsize 11 \spacing single -\use_hyperref 0 +\use_hyperref false \papersize default \use_geometry false \use_package amsmath 1 From 23d7d6449feffd72bce486e19f0dfbd429fe9301 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sat, 6 Oct 2012 09:38:14 +0200 Subject: [PATCH 6/6] Set CJK document language instead of adding a note. Instead of annoying the user with an automatically created note in the output document which she needs to delete manually, determine the document language automatically for documents that use CJK. This is done using a heuristic which roughly counts the number of characters in each language and sets the one that is used most often. This is not perfect, but it works for the two major use cases: A document with only some CJK parts (in this case the babel language is used), and a document which is mainly written in one CJK language. It is only a minor problem if the heuristic is wrong, since the TeX export is still correct, and there is no spell checking support for CJK anyway. Now all regression tests do pass except for some relative path issues depending on the location of the build directory. --- src/tex2lyx/Preamble.cpp | 28 ++++++---- src/tex2lyx/Preamble.h | 8 ++- src/tex2lyx/tex2lyx.cpp | 3 +- src/tex2lyx/tex2lyx.h | 6 ++- src/tex2lyx/text.cpp | 109 ++++++++++++++++++++++++++++----------- 5 files changed, 109 insertions(+), 45 deletions(-) diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 6ea980eb1d..430708c0fc 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -44,9 +44,8 @@ Preamble preamble; namespace { -// "chinese-simplified", "chinese-traditional", "japanese-cjk", "korean" -// cannot be supported because it is impossible to determine the correct document -// language if CJK is used. +// CJK languages are handled in text.cpp, polyglossia languages are listed +// further down. /** * known babel language names (including synonyms) * not in standard babel: arabic, arabtex, armenian, belarusian, serbian-latin, thai @@ -88,6 +87,9 @@ const char * const known_coded_languages[] = {"french", "afrikaans", "albanian", "uppersorbian", "uppersorbian", "english", "english", "vietnamese", "welsh", 0}; +/// languages with danish quotes (.lyx names) +const char * const known_danish_quotes_languages[] = {"danish", 0}; + /// languages with english quotes (.lyx names) const char * const known_english_quotes_languages[] = {"american", "australian", "bahasa", "bahasam", "brazilian", "canadian", "chinese-simplified", "english", @@ -639,6 +641,7 @@ void Preamble::handle_package(Parser &p, string const & name, vector options = split_options(opts); add_package(name, options); string scale; + char const * const * where = 0; if (is_known(name, known_xetex_packages)) { xetex = true; @@ -753,9 +756,6 @@ void Preamble::handle_package(Parser &p, string const & name, } else if (name == "CJK") { - // It is impossible to determine the document language if CJK is used. - // All we can do is to notify the user that he has to set this by himself. - have_CJK = true; // set the encoding to "auto" because it might be set to "default" by the babel handling // and this would not be correct for CJK if (h_inputencoding == "default") @@ -833,8 +833,8 @@ void Preamble::handle_package(Parser &p, string const & name, else if (name == "subfig") ; // ignore this FIXME: Use the package separator mechanism instead - else if (is_known(name, known_languages)) - h_language = name; + else if ((where = is_known(name, known_languages))) + h_language = known_coded_languages[where - known_languages]; else if (name == "natbib") { h_biblio_style = "plainnat"; @@ -914,7 +914,7 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) // http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage // (quotes for kazakh and interlingua are unknown) // danish - if (h_language == "danish") + if (is_known(h_language, known_danish_quotes_languages)) h_quotes_language = "danish"; // french else if (is_known(h_language, known_french_quotes_languages)) @@ -1644,6 +1644,16 @@ void Preamble::parse(Parser & p, string const & forceclass, ss << tc.sides(); h_papersides = ss.str(); } + + // If the CJK package is used we cannot set the document language from + // the babel options. Instead, we guess which language is used most + // and set this one. + default_language = h_language; + if (is_full_document && auto_packages.find("CJK") != auto_packages.end()) { + p.pushPosition(); + h_language = guessLanguage(p, default_language); + p.popPosition(); + } } diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h index 2ce75fd065..db2242f3ce 100644 --- a/src/tex2lyx/Preamble.h +++ b/src/tex2lyx/Preamble.h @@ -38,8 +38,10 @@ public: std::string inputencoding() const { return h_inputencoding; } /// std::string notefontcolor() const { return h_notefontcolor; } - /// - std::string language() const { return h_language; } + /// The document language + std::string docLanguage() const { return h_language; } + /// The language of text which is not explicitly marked + std::string defaultLanguage() const { return default_language; } /// std::string use_indices() const { return h_use_indices; } /// @@ -89,6 +91,8 @@ private: /// needed to handle encodings with babel bool one_language; + /// the main non-CJK language + std::string default_language; /// was at least one title layout found? bool title_layout_found; diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 384c22836d..57436cbcda 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -332,7 +332,6 @@ bool checkModule(string const & name, bool command) bool noweb_mode = false; bool pdflatex = false; bool xetex = false; -bool have_CJK = false; bool is_nonCJKJapanese = false; bool roundtrip = false; @@ -704,7 +703,7 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding) stringstream ss; // store the document language in the context to be able to handle the // commands like \foreignlanguage and \textenglish etc. - context.font.language = preamble.language(); + context.font.language = preamble.defaultLanguage(); // parse the main text parse_text(p, ss, FLAG_END, true, context); if (Context::empty) diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index a471a0f3f4..1a98a5f91a 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -67,6 +67,10 @@ void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags, bool outer, Context const & context, InsetLayout const * layout = 0); +/// Guess document language from \p p if CJK is used. +/// \p lang is used for all non-CJK contents. +std::string guessLanguage(Parser & p, std::string const & lang); + /// in math.cpp void parse_math(Parser & p, std::ostream & os, unsigned flags, mode_type mode); @@ -166,8 +170,6 @@ extern bool noweb_mode; extern bool pdflatex; /// Did we recognize any xetex-only construct? extern bool xetex; -/// Do we have CJK? -extern bool have_CJK; /// Do we have non-CJK Japanese? extern bool is_nonCJKJapanese; /// LyX format that is created by tex2lyx diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index a97d00bd36..34ffbe86c1 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -130,17 +130,9 @@ const char * const supported_CJK_encodings[] = { * the same as supported_CJK_encodings with their corresponding LyX language name * please keep this in sync with supported_CJK_encodings line by line! */ -const char * const coded_supported_CJK_encodings[] = { +const char * const supported_CJK_languages[] = { "japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0}; -string CJK2lyx(string const & encoding) -{ - char const * const * where = is_known(encoding, supported_CJK_encodings); - if (where) - return coded_supported_CJK_encodings[where - supported_CJK_encodings]; - return encoding; -} - /*! * natbib commands. * The starred forms are also known except for "citefullauthor", @@ -1440,8 +1432,9 @@ void parse_environment(Parser & p, ostream & os, bool outer, // LyX doesn't support the second argument so if // this is used we need to output everything as ERT string const mapping = p.getArg('{', '}'); - if ((!mapping.empty() && mapping != " ") - || (!is_known(encoding, supported_CJK_encodings))) { + char const * const * const where = + is_known(encoding, supported_CJK_encodings); + if ((!mapping.empty() && mapping != " ") || !where) { parent_context.check_layout(os); handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}", parent_context); @@ -1459,7 +1452,8 @@ void parse_environment(Parser & p, ostream & os, bool outer, handle_ert(os, "\\end{" + name + "}", parent_context); } else { - string const lang = CJK2lyx(encoding); + string const lang = + supported_CJK_languages[where - supported_CJK_encodings]; // store the language because we must reset it at the end string const lang_old = parent_context.font.language; parent_context.font.language = lang; @@ -2142,24 +2136,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, while (p.good()) { Token const & t = p.get_token(); - // it is impossible to determine the correct document language if CJK is used. - // Therefore write a note at the beginning of the document - if (have_CJK) { - context.check_layout(os); - begin_inset(os, "Note Note\n"); - os << "status open\n\\begin_layout Plain Layout\n" - << "\\series bold\n" - << "Important information:\n" - << "\\end_layout\n\n" - << "\\begin_layout Plain Layout\n" - << "This document contains text in Chinese, Japanese or Korean.\n" - << " It was therefore impossible for tex2lyx to set the correct document language for your document." - << " Please set the language manually in the document settings.\n" - << "\\end_layout\n"; - end_inset(os); - have_CJK = false; - } - // it is impossible to determine the correct encoding for non-CJK Japanese. // Therefore write a note at the beginning of the document if (is_nonCJKJapanese) { @@ -4426,6 +4402,79 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } } + +string guessLanguage(Parser & p, string const & lang) +{ + typedef std::map LangMap; + // map from language names to number of characters + LangMap used; + used[lang] = 0; + for (char const * const * i = supported_CJK_languages; *i; i++) + used[string(*i)] = 0; + + while (p.good()) { + Token const t = p.get_token(); + // comments are not counted for any language + if (t.cat() == catComment) + continue; + // commands are not counted as well, but we need to detect + // \begin{CJK} and switch encoding if needed + if (t.cat() == catEscape) { + if (t.cs() == "inputencoding") { + string const enc = subst(p.verbatim_item(), "\n", " "); + p.setEncoding(enc); + continue; + } + if (t.cs() != "begin") + continue; + } else { + // Non-CJK content is counted for lang. + // We do not care about the real language here: + // If we have more non-CJK contents than CJK contents, + // we simply use the language that was specified as + // babel main language. + used[lang] += t.asInput().length(); + continue; + } + // Now we are starting an environment + p.pushPosition(); + string const name = p.getArg('{', '}'); + if (name != "CJK") { + p.popPosition(); + continue; + } + // It is a CJK environment + p.popPosition(); + /* name = */ p.getArg('{', '}'); + string const encoding = p.getArg('{', '}'); + /* mapping = */ p.getArg('{', '}'); + string const encoding_old = p.getEncoding(); + char const * const * const where = + is_known(encoding, supported_CJK_encodings); + if (where) + p.setEncoding(encoding); + else + p.setEncoding("utf8"); + string const text = p.verbatimEnvironment("CJK"); + p.setEncoding(encoding_old); + p.skip_spaces(); + if (!where) { + // ignore contents in unknown CJK encoding + continue; + } + // the language of the text + string const cjk = + supported_CJK_languages[where - supported_CJK_encodings]; + used[cjk] += text.length(); + } + LangMap::const_iterator use = used.begin(); + for (LangMap::const_iterator it = used.begin(); it != used.end(); ++it) { + if (it->second > use->second) + use = it; + } + return use->first; +} + // }])