From d8e0a2ba6b0f44b0219b2a6282d60264e090cdb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Spitzm=C3=BCller?= Date: Mon, 17 Dec 2007 10:01:57 +0000 Subject: [PATCH] Sanitize CJK support (fix bug 3561, 4349 and 4337): * src/output_latex.{cpp, h}: - Many changes in order to allow CJK in a mulitlingual context. * src/Paragraph.cpp: - Fix file encoding switch and language nesting with CJK. * src/Buffer.cpp: - Move the opening and closing CJK and lanaguage tags to ouput_latex.cpp. * src/Font.cpp: - switchEncoding call now takes runparams as argument. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/branches/BRANCH_1_5_X@22181 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/Buffer.cpp | 37 ------- src/Font.cpp | 6 +- src/Paragraph.cpp | 18 +++- src/output_latex.cpp | 249 ++++++++++++++++++++++++++++++++++++------- src/output_latex.h | 7 +- 5 files changed, 231 insertions(+), 86 deletions(-) diff --git a/src/Buffer.cpp b/src/Buffer.cpp index 093ee66a91..e4a4d4db58 100644 --- a/src/Buffer.cpp +++ b/src/Buffer.cpp @@ -1044,26 +1044,6 @@ void Buffer::writeLaTeXSource(odocstream & os, } // output_preamble LYXERR(Debug::INFO) << "preamble finished, now the body." << endl; - if (!lyxrc.language_auto_begin && - !params().language->babel().empty()) { - // FIXME UNICODE - os << from_utf8(subst(lyxrc.language_command_begin, - "$$lang", - params().language->babel())) - << '\n'; - texrow().newline(); - } - - Encoding const & encoding = params().encoding(); - if (encoding.package() == Encoding::CJK) { - // Open a CJK environment, since in contrast to the encodings - // handled by inputenc the document encoding is not set in - // the preamble if it is handled by CJK.sty. - os << "\\begin{CJK}{" << from_ascii(encoding.latexName()) - << "}{}\n"; - texrow().newline(); - } - // if we are doing a real file with body, even if this is the // child of some other buffer, let's cut the link here. // This happens for example if only a child document is printed. @@ -1084,23 +1064,6 @@ void Buffer::writeLaTeXSource(odocstream & os, os << endl; texrow().newline(); - if (encoding.package() == Encoding::CJK) { - // Close the open CJK environment. - // latexParagraphs will have opened one even if the last text - // was not CJK. - os << "\\end{CJK}\n"; - texrow().newline(); - } - - if (!lyxrc.language_auto_end && - !params().language->babel().empty()) { - os << from_utf8(subst(lyxrc.language_command_end, - "$$lang", - params().language->babel())) - << '\n'; - texrow().newline(); - } - if (output_preamble) { os << "\\end{document}\n"; texrow().newline(); diff --git a/src/Font.cpp b/src/Font.cpp index b0073ebeb8..fb58520d45 100644 --- a/src/Font.cpp +++ b/src/Font.cpp @@ -787,8 +787,7 @@ int Font::latexWriteStartChanges(odocstream & os, BufferParams const & bparams, if (language()->encoding()->package() == Encoding::CJK) { pair const c = switchEncoding(os, bparams, - runparams.moving_arg, *(runparams.encoding), - *(language()->encoding())); + runparams, *(language()->encoding())); if (c.first) { open_encoding_ = true; count += c.second; @@ -948,8 +947,7 @@ int Font::latexWriteEndChanges(odocstream & os, BufferParams const & bparams, // to do correct environment nesting Encoding const * const ascii = encodings.getFromLyXName("ascii"); pair const c = switchEncoding(os, bparams, - runparams.moving_arg, *(runparams.encoding), - *ascii); + runparams, *ascii); BOOST_ASSERT(c.first); count += c.second; runparams.encoding = ascii; diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index f1c7766b7b..3a94b5ea8b 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -68,6 +68,7 @@ namespace lyx { using support::contains; using support::prefixIs; +using support::subst; using support::suffixIs; using support::rsplit; @@ -2163,12 +2164,21 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, open_font = false; } + // close babel's font environment before opening CJK. + if (!running_font.language()->babel().empty() && + font.language()->encoding()->package() == Encoding::CJK) { + string end_tag = subst(lyxrc.language_command_end, + "$$lang", + running_font.language()->babel()); + os << from_ascii(end_tag); + column += end_tag.length(); + } + // Switch file encoding if necessary - if (runparams.encoding->package() == Encoding::inputenc && - font.language()->encoding()->package() == Encoding::inputenc) { + if (runparams.encoding->package() != Encoding::none && + font.language()->encoding()->package() != Encoding::none) { std::pair const enc_switch = switchEncoding(os, bparams, - runparams.moving_arg, *(runparams.encoding), - *(font.language()->encoding())); + runparams, *(font.language()->encoding())); if (enc_switch.first) { column += enc_switch.second; runparams.encoding = font.language()->encoding(); diff --git a/src/output_latex.cpp b/src/output_latex.cpp index d8481d5f52..c0aff236d4 100644 --- a/src/output_latex.cpp +++ b/src/output_latex.cpp @@ -43,6 +43,17 @@ using std::make_pair; namespace { + +enum OpenEncoding { + none, + inputenc, + CJK + }; + +static int open_encoding_ = none; +static bool cjk_inherited_ = false; + + ParagraphList::const_iterator TeXEnvironment(Buffer const & buf, ParagraphList const & paragraphs, @@ -161,6 +172,18 @@ TeXEnvironment(Buffer const & buf, os << from_ascii(style->latexparam()) << '\n'; texrow.newline(); } + + // in multilingual environments, the CJK tags have to be nested properly + bool cjk_nested = false; + if (par_language->encoding()->package() == Encoding::CJK && + open_encoding_ != CJK && pit->isMultiLingual(bparams)) { + os << "\\begin{CJK}{" << from_ascii(par_language->encoding()->latexName()) + << "}{}%\n"; + open_encoding_ = CJK; + cjk_nested = true; + texrow.newline(); + } + ParagraphList::const_iterator par = pit; do { par = TeXOnePar(buf, paragraphs, par, os, texrow, runparams); @@ -199,6 +222,14 @@ TeXEnvironment(Buffer const & buf, && par->params().depth() == pit->params().depth() && par->params().leftIndent() == pit->params().leftIndent()); + if (open_encoding_ == CJK && cjk_nested) { + // We need to close the encoding even if it does not change + // to do correct environment nesting + os << "\\end{CJK}\n"; + texrow.newline(); + open_encoding_ = none; + } + if (style->isEnvironment()) { os << "\\end{" << from_ascii(style->latexname()) << "}\n"; texrow.newline(); @@ -261,6 +292,32 @@ TeXOnePar(Buffer const & buf, OutputParams runparams = runparams_in; runparams.moving_arg |= style->needprotect; + // we are at the beginning of an inset and CJK is already open. + if (pit == paragraphs.begin() && runparams.local_font != 0 && + open_encoding_ == CJK) { + cjk_inherited_ = true; + open_encoding_ = none; + } + + if (pit == paragraphs.begin() && runparams.local_font == 0) { + // Open a CJK environment at the beginning of the main buffer + // if the document's language is a CJK language + if (bparams.encoding().package() == Encoding::CJK) { + os << "\\begin{CJK}{" << from_ascii(bparams.encoding().latexName()) + << "}{}%\n"; + texrow.newline(); + open_encoding_ = CJK; + } + if (!lyxrc.language_auto_begin && !bparams.language->babel().empty()) { + // FIXME UNICODE + os << from_utf8(subst(lyxrc.language_command_begin, + "$$lang", + bparams.language->babel())) + << '\n'; + texrow.newline(); + } + } + // This paragraph's language Language const * const par_language = pit->getParLanguage(bparams); // The document's language @@ -342,13 +399,16 @@ TeXOnePar(Buffer const & buf, else os << "\\L{"; } - os << from_ascii(subst( - lyxrc.language_command_begin, - "$$lang", - par_language->babel())) - // the '%' is necessary to prevent unwanted whitespace - << "%\n"; - texrow.newline(); + // With CJK, the CJK tag has to be closed first (see below) + if (runparams.encoding->package() != Encoding::CJK) { + os << from_ascii(subst( + lyxrc.language_command_begin, + "$$lang", + par_language->babel())) + // the '%' is necessary to prevent unwanted whitespace + << "%\n"; + texrow.newline(); + } } } @@ -356,14 +416,14 @@ TeXOnePar(Buffer const & buf, // encoding, since this only affects the position of the outputted // \inputencoding command; the encoding switch will occur when necessary if (bparams.inputenc == "auto" && - runparams.encoding->package() == Encoding::inputenc) { + runparams.encoding->package() != Encoding::none) { // Look ahead for future encoding changes. // We try to output them at the beginning of the paragraph, // since the \inputencoding command is not allowed e.g. in // sections. for (pos_type i = 0; i < pit->size(); ++i) { char_type const c = pit->getChar(i); - if (c < 0x80) + if (runparams.encoding->package() == Encoding::inputenc && c < 0x80) continue; if (pit->isInset(i)) break; @@ -372,17 +432,44 @@ TeXOnePar(Buffer const & buf, // encoding to that required by the language of c. Encoding const * const encoding = pit->getFontSettings(bparams, i).language()->encoding(); - pair enc_switch = switchEncoding(os, bparams, false, - *(runparams.encoding), *encoding); - if (encoding->package() == Encoding::inputenc && enc_switch.first) { - runparams.encoding = encoding; - if (enc_switch.second > 0) { - // the '%' is necessary to prevent unwanted whitespace - os << "%\n"; + + // with CJK, only add switch if we have CJK content at the beginning + // of the paragraph + if (encoding->package() != Encoding::CJK || i == 0) { + OutputParams tmp_rp = runparams; + runparams.moving_arg = false; + pair enc_switch = switchEncoding(os, bparams, runparams, + *encoding); + runparams = tmp_rp; + // the following is necessary after a CJK environment in a multilingual + // context (nesting issue). + if (par_language->encoding()->package() == Encoding::CJK && + open_encoding_ != CJK && !cjk_inherited_) { + os << "\\begin{CJK}{" << from_ascii(par_language->encoding()->latexName()) + << "}{}%\n"; + open_encoding_ = CJK; texrow.newline(); } + if (encoding->package() != Encoding::none && enc_switch.first) { + if (enc_switch.second > 0) { + // the '%' is necessary to prevent unwanted whitespace + os << "%\n"; + texrow.newline(); + } + // With CJK, the CJK tag had to be closed first (see above) + if (runparams.encoding->package() == Encoding::CJK) { + os << from_ascii(subst( + lyxrc.language_command_begin, + "$$lang", + par_language->babel())) + // the '%' is necessary to prevent unwanted whitespace + << "%\n"; + texrow.newline(); + } + runparams.encoding = encoding; + } + break; } - break; } } @@ -540,20 +627,23 @@ TeXOnePar(Buffer const & buf, os << '\n'; texrow.newline(); } - if (lyxrc.language_command_end.empty()) { - if (!prev_language->babel().empty()) { + // when the paragraph uses CJK, the language has to be closed earlier + if (font.language()->encoding()->package() != Encoding::CJK) { + if (lyxrc.language_command_end.empty()) { + if (!prev_language->babel().empty()) { + os << from_ascii(subst( + lyxrc.language_command_begin, + "$$lang", + prev_language->babel())); + pending_newline = true; + } + } else if (!par_language->babel().empty()) { os << from_ascii(subst( - lyxrc.language_command_begin, + lyxrc.language_command_end, "$$lang", - prev_language->babel())); + par_language->babel())); pending_newline = true; } - } else if (!par_language->babel().empty()) { - os << from_ascii(subst( - lyxrc.language_command_end, - "$$lang", - par_language->babel())); - pending_newline = true; } } if (closing_rtl_ltr_environment) @@ -564,6 +654,56 @@ TeXOnePar(Buffer const & buf, texrow.newline(); } + // if this is a CJK-paragraph and the next isn't, close CJK + // also if the next paragraph is a multilingual environment (because of nesting) + if (boost::next(pit) != paragraphs.end() && open_encoding_ == CJK && + (boost::next(pit)->getParLanguage(bparams)->encoding()->package() != Encoding::CJK || + boost::next(pit)->layout()->isEnvironment() && boost::next(pit)->isMultiLingual(bparams)) + // in environments, CJK has to be closed later (nesting!) + && !style->isEnvironment()) { + os << "\\end{CJK}\n"; + open_encoding_ = none; + } + + // If this is the last paragraph, close the CJK environment + // if necessary. If it's an environment, we'll have to \end that first. + if (boost::next(pit) == paragraphs.end() && !style->isEnvironment()) { + switch (open_encoding_) { + case CJK: { + // end of main text + if (runparams.local_font == 0) { + os << '\n'; + texrow.newline(); + os << "\\end{CJK}\n"; + texrow.newline(); + // end of an inset + } else + os << "\\end{CJK}"; + open_encoding_ = none; + break; + } + case inputenc: { + os << "\\egroup"; + open_encoding_ = none; + break; + } + case none: + default: + // do nothing + break; + } + // auto_end tag only if the last par is in a babel language + if (runparams.local_font == 0 && !lyxrc.language_auto_end && + !bparams.language->babel().empty() && + font.language()->encoding()->package() != Encoding::CJK) { + os << from_utf8(subst(lyxrc.language_command_end, + "$$lang", + bparams.language->babel())) + << '\n'; + texrow.newline(); + } + } + // If this is the last paragraph, and a local_font was set upon entering // the inset, the encoding should be set back to that local_font's // encoding. We don't use switchEncoding(), because no explicit encoding @@ -693,13 +833,26 @@ void latexParagraphs(Buffer const & buf, } texrow.newline(); } + // If the last paragraph is an environment, we'll have to close + // CJK at the very end to do proper nesting. + if (open_encoding_ == CJK) { + os << "\\end{CJK}\n"; + texrow.newline(); + open_encoding_ = none; + } + // reset inherited encoding + if (cjk_inherited_) { + open_encoding_ = CJK; + cjk_inherited_ = false; + } } pair switchEncoding(odocstream & os, BufferParams const & bparams, - bool moving_arg, Encoding const & oldEnc, - Encoding const & newEnc) + OutputParams const & runparams, Encoding const & newEnc) { + Encoding const oldEnc = *runparams.encoding; + bool moving_arg = runparams.moving_arg; if ((bparams.inputenc != "auto" && bparams.inputenc != "default") || moving_arg) return make_pair(false, 0); @@ -724,27 +877,49 @@ pair switchEncoding(odocstream & os, BufferParams const & bparams, if (bparams.inputenc == "default") return make_pair(true, 0); - docstring const inputenc(from_ascii(newEnc.latexName())); + docstring const inputenc_arg(from_ascii(newEnc.latexName())); switch (newEnc.package()) { case Encoding::none: // shouldn't ever reach here, see above return make_pair(true, 0); case Encoding::inputenc: { - int count = inputenc.length(); - if (oldEnc.package() == Encoding::CJK) { + int count = inputenc_arg.length(); + if (oldEnc.package() == Encoding::CJK && + open_encoding_ == CJK) { os << "\\end{CJK}"; + open_encoding_ = none; count += 9; } - os << "\\inputencoding{" << inputenc << '}'; + else if (oldEnc.package() == Encoding::inputenc && + open_encoding_ == inputenc) { + os << "\\egroup"; + open_encoding_ = none; + count += 7; + } + if (runparams.local_font != 0 && oldEnc.package() == Encoding::CJK) { + // within insets, \inputenc switches need to be + // embraced within \bgroup ... \egroup; else CJK fails. + os << "\\bgroup"; + count += 7; + open_encoding_ = inputenc; + } + os << "\\inputencoding{" << inputenc_arg << '}'; return make_pair(true, count + 16); - } + } case Encoding::CJK: { - int count = inputenc.length(); - if (oldEnc.package() == Encoding::CJK) { + int count = inputenc_arg.length(); + if (oldEnc.package() == Encoding::CJK && + open_encoding_ == CJK) { os << "\\end{CJK}"; count += 9; } - os << "\\begin{CJK}{" << inputenc << "}{}"; + if (oldEnc.package() == Encoding::inputenc && + open_encoding_ == inputenc) { + os << "\\egroup"; + count += 7; + } + os << "\\begin{CJK}{" << inputenc_arg << "}{}"; + open_encoding_ = CJK; return make_pair(true, count + 15); } } diff --git a/src/output_latex.h b/src/output_latex.h index 008564ed79..886306fdd3 100644 --- a/src/output_latex.h +++ b/src/output_latex.h @@ -44,12 +44,11 @@ void latexParagraphs(Buffer const & buf, OutputParams const &, std::string const & everypar = std::string()); -/// Switch the encoding of \p os from \p oldEnc to \p newEnc if needed. +/// Switch the encoding of \p os from runparams.encoding to \p newEnc if needed. /// \return (did the encoding change?, number of characters written to \p os) -std::pair switchEncoding(odocstream & os, +std::pair switchEncoding(odocstream & os, BufferParams const & bparams, - bool moving_arg, Encoding const & oldEnc, - Encoding const & newEnc); + OutputParams const &, Encoding const & newEnc); } // namespace lyx