Georg's patch for bug 3043:

Introduces new encodings for the CJK-languages.
The CJK-languages can now be selected from LyX's menu.
For remaining small problems, see the thread "Help needed for bug 3043" on the devel-list.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@18216 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Uwe Stöhr 2007-05-06 20:26:02 +00:00
parent 54d2810b01
commit 80b94c6bf5
15 changed files with 411 additions and 135 deletions

View File

@ -1,6 +1,18 @@
LyX file-format changes
-----------------------
2007-05-06 Uwe Stöhr <uwestoehr@web.de>
* format incremented to 268: add support for the CJK encodings
- all encodings supported by CJK.sty are now in lib/encodings
- lib/encoding has got two new flags:
- encoding package:
"inputenc" for those languages that use the inputenc-package and
"CJK" for the CJK encodings
- "fixed"/"variable" to divide between multi- or single-byte encoding
- lib/languages allows now to set an empty language for babel
in this case babel isn't called.
2007-05-04 Jürgen Spitzmüller <j.spitzmueller@gmx.de>
* format incremented to 267: add plain utf8 encoding (for XeTeX).

View File

@ -1,134 +1,189 @@
# FIXME: Have a look at the encodings known by the inputenc package and add
# missing ones. Caution: File format change!
# Note that you can only add single byte encodings to this file without
# changing some C++ code.
# The only multibyte encoding that is currently supported is utf8, and this
# support is hardcoded to the iconv name "UTF-8".
# Note that you can only add singlebyte encodings to this file.
# LyX does not support the output of multibyte encodings (e.g. utf16).
# It does support singlebyte encodings with variable with (e.g. utf8).
# These are marked with the "variable" keyword.
# Fixed width encodings are marked with the "fixed" keyword.
# Order of names: LyX name LaTeX name iconv name
# Syntax: Encoding <LyX name> <LaTeX name> <iconv name> fixed|variable <package> End
Encoding utf8 utf8 UTF-8
# encodings used by inputenc.sty
Encoding utf8 utf8 UTF-8 variable inputenc
End
# This one is used by many CJK packages. utf8 is supposed to be the successor,
# but does not have all features of utf8x yet.
Encoding utf8x utf8x UTF-8
Encoding utf8x utf8x UTF-8 variable inputenc
End
# A plain utf8 encoding that does not use the inputenc package.
# Such an encoding is required for XeTeX.
Encoding utf8-plain utf8-plain UTF-8
Encoding utf8-plain utf8-plain UTF-8 variable none
End
# This encoding is used to typeset Armenian using the armtex package
Encoding armscii8 armscii8 ARMSCII-8
Encoding armscii8 armscii8 ARMSCII-8 fixed inputenc
End
Encoding iso8859-1 latin1 ISO-8859-1
Encoding iso8859-1 latin1 ISO-8859-1 fixed inputenc
End
Encoding iso8859-2 latin2 ISO-8859-2
Encoding iso8859-2 latin2 ISO-8859-2 fixed inputenc
End
Encoding iso8859-3 latin3 ISO-8859-3
Encoding iso8859-3 latin3 ISO-8859-3 fixed inputenc
End
Encoding iso8859-4 latin4 ISO-8859-4
Encoding iso8859-4 latin4 ISO-8859-4 fixed inputenc
End
Encoding iso8859-5 iso88595 ISO-8859-5
Encoding iso8859-5 iso88595 ISO-8859-5 fixed inputenc
End
# Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/
Encoding iso8859-6 8859-6 ISO-8859-6
Encoding iso8859-6 8859-6 ISO-8859-6 fixed inputenc
End
Encoding iso8859-7 iso-8859-7 ISO-8859-7
Encoding iso8859-7 iso-8859-7 ISO-8859-7 fixed inputenc
End
Encoding iso8859-8 8859-8 ISO-8859-8
Encoding iso8859-8 8859-8 ISO-8859-8 fixed inputenc
End
Encoding iso8859-9 latin5 ISO-8859-9
Encoding iso8859-9 latin5 ISO-8859-9 fixed inputenc
End
# Not standard, see http://www.vtex.lt/tex/littex/index.html
Encoding iso8859-13 l7xenc ISO-8859-13
Encoding iso8859-13 l7xenc ISO-8859-13 fixed inputenc
End
Encoding iso8859-15 latin9 ISO-8859-15
Encoding iso8859-15 latin9 ISO-8859-15 fixed inputenc
End
Encoding iso8859-16 latin10 ISO-8859-16
Encoding iso8859-16 latin10 ISO-8859-16 fixed inputenc
End
Encoding cp437 cp437 CP437
Encoding cp437 cp437 CP437 fixed inputenc
End
# cp437, but on position 225 is sz instead of beta
Encoding cp437de cp437de CP437
Encoding cp437de cp437de CP437 fixed inputenc
End
Encoding cp850 cp850 CP850
Encoding cp850 cp850 CP850 fixed inputenc
End
Encoding cp852 cp852 CP852
Encoding cp852 cp852 CP852 fixed inputenc
End
Encoding cp855 cp855 CP855
Encoding cp855 cp855 CP855 fixed inputenc
End
Encoding cp858 cp858 CP858
Encoding cp858 cp858 CP858 fixed inputenc
End
Encoding cp862 cp862 CP862
Encoding cp862 cp862 CP862 fixed inputenc
End
Encoding cp865 cp865 CP865
Encoding cp865 cp865 CP865 fixed inputenc
End
Encoding cp866 cp866 CP866
Encoding cp866 cp866 CP866 fixed inputenc
End
Encoding cp1250 cp1250 CP1250
Encoding cp1250 cp1250 CP1250 fixed inputenc
End
Encoding cp1251 cp1251 CP1251
Encoding cp1251 cp1251 CP1251 fixed inputenc
End
Encoding cp1252 cp1252 CP1252
Encoding cp1252 cp1252 CP1252 fixed inputenc
End
Encoding cp1255 cp1255 CP1255
Encoding cp1255 cp1255 CP1255 fixed inputenc
End
# Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/
Encoding cp1256 cp1256 CP1256
Encoding cp1256 cp1256 CP1256 fixed inputenc
End
Encoding cp1257 cp1257 CP1257
Encoding cp1257 cp1257 CP1257 fixed inputenc
End
Encoding koi8 koi8-r KOI8-R
Encoding koi8 koi8-r KOI8-R fixed inputenc
End
Encoding koi8-u koi8-u KOI8-U
Encoding koi8-u koi8-u KOI8-U fixed inputenc
End
Encoding pt154 pt154 PT154 fixed inputenc
End
Encoding pt254 pt254 PT254 fixed inputenc
End
# encodings used by CJK.sty
# The following encodings that are supported by the CJK package are not
# included here, because they are not widely used and lack proper iconv support:
# Bg5+, GBt
# See the NOTES file of libiconv for details.
# The following encodings can't be handled directly, because the code points
# of TeX control characters like {, } and \ can occur in the second byte:
# For traditional chinese
#Encoding big5 Bg5 BIG5 variable CJK
#End
# For japanese
#Encoding shift-jis SJIS SJIS variable CJK
#End
# The following encodings need hardcoded support of the encodable unicode
# range, but are known by iconv:
# For simplified chinese
Encoding euc-cn GB EUC-CN variable CJK
End
# For simplified chinese
Encoding gbk GBK GBK variable CJK
End
# For japanese
Encoding jis JIS JIS variable CJK
End
# For korean
Encoding euc-kr KS EUC-KR variable CJK
End
# The CJK package has yet another name for utf8...
Encoding utf8-cjk UTF8 UTF-8 variable CJK
End
# For traditional chinese
Encoding euc-tw EUC-TW EUC-TW variable CJK
End
# For japanese
Encoding euc-jp EUC-JP EUC-JP variable CJK
End
# This one needs hardcoded support, since the inputenc package does not know
# tis620-0, and thailatex sets up babel directly to use tis620-0, so the value
# for inputenc is never output to .tex files (but needed for the hardcoded
# tis620-0, and thailatex sets up babel directly to use tis620-0, so the
# LaTeX name is never output to .tex files (but needed for the hardcoded
# tis620-0 support).
Encoding tis620-0 tis620-0 TIS620-0
Encoding tis620-0 tis620-0 TIS620-0 fixed none
End
Encoding pt154 pt154 PT154
End
Encoding pt254 pt254 PT254
End
# Pure 7bit ASCII encoding (partially hardcoded in LyX)
Encoding ascii ascii ascii
Encoding ascii ascii ascii fixed none
End

View File

@ -2,10 +2,7 @@
afrikaans afrikaans "Afrikaans" false iso8859-15 af_ZA ""
american american "American" false iso8859-15 en_US ""
arabic arabic "Arabic" true cp1256 ar_SA ""
# loading babel with the option "english" as armtex overwrites this later
# armtex provides the armscii8 encoding and translates words instead of babel
# the english option can later be omitted when the patch for bug 3043 is in.
armenian english "Armenian" false armscii8 hy_AM ""
armenian "" "Armenian" false armscii8 hy_AM ""
austrian austrian "Austrian" false iso8859-15 de_AT ""
naustrian naustrian "Austrian (new spelling)" false iso8859-15 de_AT ""
bahasa bahasa "Bahasa" false iso8859-15 in_ID ""
@ -18,6 +15,8 @@ bulgarian bulgarian "Bulgarian" false cp1251 bg_BG ""
canadian canadian "Canadian" false iso8859-15 en_CA ""
canadien canadien "French Canadian" false iso8859-15 fr_CA ""
catalan catalan "Catalan" false iso8859-15 ca_ES ""
chinese-simplified "" "Chinese (simplified)" false euc-cn zh ""
chinese-traditional "" "Chinese (traditional)" false utf8-cjk zh ""
croatian croatian "Croatian" false iso8859-2 hr_HR ""
czech czech "Czech" false iso8859-2 cs_CZ ""
danish danish "Danish" false iso8859-15 da_DK ""
@ -39,7 +38,9 @@ hebrew hebrew "Hebrew" true cp1255 he_IL ""
#hungarian hungarian "Hungarian" false iso8859-2 hu_HU ""
irish irish "Irish" false iso8859-15 ga_IE ""
italian italian "Italian" false iso8859-15 it_IT ""
japanese "" "Japanese" false euc-jp ja ""
kazakh kazakh "Kazakh" false pt154 kk_KZ ""
korean "" "Korean" false euc-kr ko ""
#lsorbian lsorbian "Lower Sorbian" false iso8859-2 dsb_DE ""
lithuanian lithuanian "Lithuanian" false iso8859-13 lt_LT ""
latvian latvian "Latvian" false iso8859-13 lv_LV ""

View File

@ -74,7 +74,7 @@ format_relation = [("0_06", [200], generate_minor_versions("0.6" , 4)),
("1_2", [220], generate_minor_versions("1.2" , 4)),
("1_3", [221], generate_minor_versions("1.3" , 7)),
("1_4", range(222,246), generate_minor_versions("1.4" , 4)),
("1_5", range(246,268), generate_minor_versions("1.5" , 0))]
("1_5", range(246,269), generate_minor_versions("1.5" , 0))]
def formats_list():

View File

@ -1325,6 +1325,28 @@ def revert_armenian(document):
document.preamble.append('\\usepackage{armtex}')
def revert_CJK(document):
" Set CJK encodings to default and languages chinese, japanese and korean to english. "
encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
"KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
i = find_token(document.header, "\\inputencoding", 0)
if i == -1:
document.header.append("\\inputencoding auto")
else:
inputenc = get_value(document.header, "\\inputencoding", i)
if inputenc in encodings:
document.header[i] = "\\inputencoding default"
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
if document.language == "chinese-simplified" or \
document.language == "chinese-traditional" or \
document.language == "japanese" or document.language == "korean":
document.language = "english"
i = find_token(document.header, "\\language", 0)
if i != -1:
document.header[i] = "\\language english"
##
# Conversion hub
#
@ -1351,9 +1373,11 @@ convert = [[246, []],
[264, [convert_cv_textclass]],
[265, [convert_tableborder]],
[266, []],
[267, []]]
[267, []],
[268, []]]
revert = [[266, [revert_utf8plain]],
revert = [[267, [revert_CJK]],
[266, [revert_utf8plain]],
[265, [revert_armenian]],
[264, [revert_tableborder]],
[263, [revert_cv_textclass]],
@ -1380,3 +1404,4 @@ revert = [[266, [revert_utf8plain]],
if __name__ == "__main__":
pass

View File

@ -141,7 +141,7 @@ using std::string;
namespace {
int const LYX_FORMAT = 267;
int const LYX_FORMAT = 268;
} // namespace anon
@ -974,6 +974,16 @@ void Buffer::writeLaTeXSource(odocstream & os,
texrow().newline();
}
Encoding const & encoding = params().encoding();
if (encoding.package() == Encoding::CJK) {
// Open a CJK environment, since in contrast to the encodings
// handled by inputenc the document encoding is not set in
// the preamble if it is handled by CJK.sty.
os << "\\begin{CJK}{" << from_ascii(encoding.latexName())
<< "}{}\n";
texrow().newline();
}
// if we are doing a real file with body, even if this is the
// child of some other buffer, let's cut the link here.
// This happens for example if only a child document is printed.
@ -994,6 +1004,14 @@ void Buffer::writeLaTeXSource(odocstream & os,
os << endl;
texrow().newline();
if (encoding.package() == Encoding::CJK) {
// Close the open CJK environment.
// latexParagraphs will have opened one even if the last text
// was not CJK.
os << "\\end{CJK}\n";
texrow().newline();
}
if (!lyxrc.language_auto_end &&
!params().language->babel().empty()) {
os << from_utf8(subst(lyxrc.language_command_end,

View File

@ -893,16 +893,15 @@ bool BufferParams::writeLaTeX(odocstream & os, LaTeXFeatures & features,
if (inputenc == "auto") {
string const doc_encoding =
language->encoding()->latexName();
Encoding::Package const package =
language->encoding()->package();
// Create a list with all the input encodings used
// in the document
std::set<string> encodings =
features.getEncodingSet(doc_encoding);
// thailatex does not use the inputenc package, but sets up
// babel directly for tis620-0 encoding, therefore we must
// not request inputenc for tis620-0 encoding
if (!encodings.empty() || doc_encoding != "tis620-0") {
if (!encodings.empty() || package == Encoding::inputenc) {
os << "\\usepackage[";
std::set<string>::const_iterator it = encodings.begin();
std::set<string>::const_iterator const end = encodings.end();
@ -912,7 +911,7 @@ bool BufferParams::writeLaTeX(odocstream & os, LaTeXFeatures & features,
}
for (; it != end; ++it)
os << ',' << from_ascii(*it);
if (doc_encoding != "tis620-0") {
if (package == Encoding::inputenc) {
if (!encodings.empty())
os << ',';
os << from_ascii(doc_encoding);
@ -920,12 +919,24 @@ bool BufferParams::writeLaTeX(odocstream & os, LaTeXFeatures & features,
os << "]{inputenc}\n";
texrow.newline();
}
// utf8-plain is for XeTeX users (inputenc not desired)
} else if (inputenc != "default" && inputenc != "tis620-0" &&
inputenc != "ascii" && inputenc != "utf8-plain") {
os << "\\usepackage[" << from_ascii(inputenc)
<< "]{inputenc}\n";
texrow.newline();
if (package == Encoding::CJK) {
os << "\\usepackage{CJK}\n";
texrow.newline();
}
} else if (inputenc != "default") {
switch (language->encoding()->package()) {
case Encoding::none:
break;
case Encoding::inputenc:
os << "\\usepackage[" << from_ascii(inputenc)
<< "]{inputenc}\n";
texrow.newline();
break;
case Encoding::CJK:
os << "\\usepackage{CJK}\n";
texrow.newline();
break;
}
}
// The encoding "armscii8" is only available when the package "armtex" is loaded.

View File

@ -123,25 +123,42 @@ struct CharInfo {
typedef std::map<char_type, CharInfo> CharInfoMap;
CharInfoMap unicodesymbols;
/// The highest code point in UCS4 encoding (1<<20 + 1<<16)
char_type const max_ucs4 = 0x110000;
} // namespace anon
Encoding::Encoding(string const & n, string const & l, string const & i)
: Name_(n), LatexName_(l), iconvName_(i)
Encoding::Encoding(string const & n, string const & l, string const & i,
bool f, Encoding::Package p)
: Name_(n), LatexName_(l), iconvName_(i), fixedwidth_(f), package_(p)
{
if (n == "ascii")
if (n == "ascii") {
// ASCII can encode 128 code points and nothing else
start_encodable_ = 128;
else if (i == "UTF-8")
// UTF8 can encode all 1<<20 + 1<<16 UCS4 code points
start_encodable_ = 0x110000;
else {
start_encodable_ = 0;
// temporarily switch off lyxerr, since we will generate iconv errors
lyxerr.disable();
complete_ = true;
} else if (i == "UTF-8") {
// UTF8 can encode all UCS4 code points
start_encodable_ = max_ucs4;
complete_ = true;
} else {
complete_ = false;
}
}
void Encoding::init() const
{
start_encodable_ = 0;
// temporarily switch off lyxerr, since we will generate iconv errors
lyxerr.disable();
if (fixedwidth_) {
// We do not need to check all UCS4 code points, it is enough
// if we check all 256 code points of this encoding.
for (unsigned short j = 0; j < 256; ++j) {
char const c = j;
std::vector<char_type> const ucs4 = eightbit_to_ucs4(&c, 1, i);
std::vector<char_type> const ucs4 = eightbit_to_ucs4(&c, 1, iconvName_);
if (ucs4.size() == 1) {
char_type const c = ucs4[0];
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
@ -149,19 +166,36 @@ Encoding::Encoding(string const & n, string const & l, string const & i)
encodable_.insert(c);
}
}
lyxerr.enable();
CharSet::iterator it = encodable_.find(start_encodable_);
while (it != encodable_.end()) {
encodable_.erase(it);
++start_encodable_;
it = encodable_.find(start_encodable_);
} else {
// We do not know how many code points this encoding has, and
// they do not have a direct representation as a single byte,
// therefore we need to check all UCS4 code points.
// This is expensive!
for (char_type c = 0; c < max_ucs4; ++c) {
std::vector<char> const eightbit = ucs4_to_eightbit(&c, 1, iconvName_);
if (!eightbit.empty()) {
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it == unicodesymbols.end() || !it->second.force)
encodable_.insert(c);
}
}
}
lyxerr.enable();
CharSet::iterator it = encodable_.find(start_encodable_);
while (it != encodable_.end()) {
encodable_.erase(it);
++start_encodable_;
it = encodable_.find(start_encodable_);
}
complete_ = true;
}
docstring const Encoding::latexChar(char_type c) const
{
// validate() should have been called before
//BOOST_ASSERT(complete_);
if (c < start_encodable_)
return docstring(1, c);
if (encodable_.find(c) == encodable_.end()) {
@ -360,8 +394,32 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
string const latexname = lex.getString();
lex.next();
string const iconvname = lex.getString();
lex.next();
string const width = lex.getString();
bool fixedwidth;
if (width == "fixed")
fixedwidth = true;
else if (width == "variable")
fixedwidth = false;
else
lex.printError("Encodings::read: "
"Unknown width: `$$Token'");
lex.next();
string const p = lex.getString();
Encoding::Package package;
if (p == "none")
package = Encoding::none;
else if (p == "inputenc")
package = Encoding::inputenc;
else if (p == "CJK")
package = Encoding::CJK;
else
lex.printError("Encodings::read: "
"Unknown package: `$$Token'");
LYXERR(Debug::INFO) << "Reading encoding " << name << endl;
encodinglist[name] = Encoding(name, latexname, iconvname);
encodinglist[name] = Encoding(name, latexname,
iconvname, fixedwidth,
package);
if (lex.lex() != et_end)
lex.printError("Encodings::read: "
"missing end");

View File

@ -28,11 +28,19 @@ class LaTeXFeatures;
///
class Encoding {
public:
/// Which LaTeX package handles this encoding?
enum Package {
none,
inputenc,
CJK
};
///
Encoding() {}
///
Encoding(std::string const & n, std::string const & l,
std::string const & i);
std::string const & i, bool f, Package p);
///
void init() const;
///
std::string const & name() const { return Name_; }
///
@ -48,6 +56,8 @@ public:
* character is returned.
*/
docstring const latexChar(char_type c) const;
/// Which LaTeX package handles this encoding?
Package package() const { return package_; }
private:
///
std::string Name_;
@ -55,15 +65,27 @@ private:
std::string LatexName_;
///
std::string iconvName_;
/// Is this a fixed width encoding?
bool fixedwidth_;
///
typedef std::set<char_type> CharSet;
/// Set of UCS4 characters that we can encode (for singlebyte
/// encodings only)
CharSet encodable_;
mutable CharSet encodable_;
/// All code points below this are encodable. This helps us to avoid
/// lokup of ASCII characters in encodable_ and gives about 1 sec
/// speedup on export of the Userguide.
char_type start_encodable_;
mutable char_type start_encodable_;
/// Which LaTeX package handles this encoding?
Package package_;
/**
* If this is true the stored information about the encoding covers
* all encodable characters. We set this to false initially so that
* we only need to query iconv for the actually used encodings.
* This is needed especially for the multibyte encodings, if we
* complete all encoding info on startup it takes 2-3 minutes.
*/
mutable bool complete_;
};
class Encodings {

View File

@ -18,11 +18,14 @@
#include "BufferParams.h" // stateText
#include "debug.h"
#include "Encoding.h"
#include "gettext.h"
#include "Language.h"
#include "Color.h"
#include "Lexer.h"
#include "LyXRC.h"
#include "output_latex.h"
#include "OutputParams.h"
#include "support/lstrings.h"
@ -163,37 +166,37 @@ bool operator==(Font::FontBits const & lhs,
Font::Font()
: bits(sane), lang(default_language)
: bits(sane), lang(default_language), open_encoding_(false)
{}
Font::Font(Font::FONT_INIT1)
: bits(inherit), lang(default_language)
: bits(inherit), lang(default_language), open_encoding_(false)
{}
Font::Font(Font::FONT_INIT2)
: bits(ignore), lang(ignore_language)
: bits(ignore), lang(ignore_language), open_encoding_(false)
{}
Font::Font(Font::FONT_INIT3)
: bits(sane), lang(default_language)
: bits(sane), lang(default_language), open_encoding_(false)
{}
Font::Font(Font::FONT_INIT1, Language const * l)
: bits(inherit), lang(l)
: bits(inherit), lang(l), open_encoding_(false)
{}
Font::Font(Font::FONT_INIT2, Language const * l)
: bits(ignore), lang(l)
: bits(ignore), lang(l), open_encoding_(false)
{}
Font::Font(Font::FONT_INIT3, Language const * l)
: bits(sane), lang(l)
: bits(sane), lang(l), open_encoding_(false)
{}
@ -736,7 +739,9 @@ void Font::lyxWriteChanges(Font const & orgfont,
/// Writes the head of the LaTeX needed to impose this font
// Returns number of chars written.
int Font::latexWriteStartChanges(odocstream & os, Font const & base,
int Font::latexWriteStartChanges(odocstream & os, BufferParams const & bparams,
OutputParams const & runparams,
Font const & base,
Font const & prev) const
{
bool env = false;
@ -758,6 +763,20 @@ int Font::latexWriteStartChanges(odocstream & os, Font const & base,
"$$lang", language()->babel());
os << from_ascii(tmp);
count += tmp.length();
} else {
os << '{';
count += 1;
}
}
if (language()->encoding()->package() == Encoding::CJK) {
int const c = switchEncoding(os, bparams,
runparams.moving_arg, *(runparams.encoding),
*(language()->encoding()));
if (c > 0) {
open_encoding_ = true;
count += c;
runparams.encoding = language()->encoding();
}
}
@ -832,7 +851,9 @@ int Font::latexWriteStartChanges(odocstream & os, Font const & base,
/// Writes ending block of LaTeX needed to close use of this font
// Returns number of chars written
// This one corresponds to latexWriteStartChanges(). (Asger)
int Font::latexWriteEndChanges(odocstream & os, Font const & base,
int Font::latexWriteEndChanges(odocstream & os, BufferParams const & bparams,
OutputParams const & runparams,
Font const & base,
Font const & next) const
{
int count = 0;
@ -893,6 +914,19 @@ int Font::latexWriteEndChanges(odocstream & os, Font const & base,
count += 6;
}
if (open_encoding_) {
// We need to close the encoding even if it does not change
// to do correct environment nesting
Encoding const * const ascii = encodings.getFromLyXName("ascii");
int const c = switchEncoding(os, bparams,
runparams.moving_arg, *(runparams.encoding),
*ascii);
BOOST_ASSERT(c > 0);
count += c;
runparams.encoding = ascii;
open_encoding_ = false;
}
if (language() != base.language() && language() != next.language()) {
os << '}';
++count;

View File

@ -25,6 +25,7 @@ namespace lyx {
class Lexer;
class BufferParams;
class Language;
class OutputParams;
///
@ -296,14 +297,18 @@ public:
to this font. Returns number of chars written. Base is the
font state active now.
*/
int latexWriteStartChanges(odocstream &, Font const & base,
int latexWriteStartChanges(odocstream &, BufferParams const & bparams,
OutputParams const & runparams,
Font const & base,
Font const & prev) const;
/** Writes the tail of the LaTeX needed to change to this font.
Returns number of chars written. Base is the font state we want
to achieve.
*/
int latexWriteEndChanges(odocstream &, Font const & base,
int latexWriteEndChanges(odocstream &, BufferParams const & bparams,
OutputParams const & runparams,
Font const & base,
Font const & next) const;
@ -348,6 +353,8 @@ private:
/// Updates a misc setting according to request
Font::FONT_MISC_STATE setMisc(Font::FONT_MISC_STATE newfont,
Font::FONT_MISC_STATE org);
/// Did latexWriteStartChanges open an encoding environment?
mutable bool open_encoding_;
};

View File

@ -344,26 +344,29 @@ string LaTeXFeatures::getLanguages() const
{
ostringstream languages;
for (LanguageList::const_iterator cit =
UsedLanguages_.begin();
LanguageList::const_iterator const begin = UsedLanguages_.begin();
for (LanguageList::const_iterator cit = begin;
cit != UsedLanguages_.end();
++cit)
languages << (*cit)->babel() << ',';
++cit) {
if (cit != begin)
languages << ',';
languages << (*cit)->babel();
}
return languages.str();
}
set<string> LaTeXFeatures::getEncodingSet(string const & doc_encoding) const
{
// This does only find encodings of languages supported by babel, but
// that does not matter since we don't have a language with an
// encoding supported by inputenc but without babel support.
set<string> encodings;
LanguageList::const_iterator it = UsedLanguages_.begin();
LanguageList::const_iterator end = UsedLanguages_.end();
for (; it != end; ++it)
// thailatex does not use the inputenc package, but sets up
// babel directly for tis620-0 encoding, therefore we must
// not add tis620-0 to the encoding set.
if ((*it)->encoding()->latexName() != doc_encoding &&
(*it)->encoding()->name() != "tis620-0")
(*it)->encoding()->package() == Encoding::inputenc)
encodings.insert((*it)->encoding()->latexName());
return encodings;
}

View File

@ -118,7 +118,7 @@ private:
static PackagesList packages_;
///
typedef std::set<Language const *> LanguageList;
///
/// used languages (only those that are supported by babel)
LanguageList UsedLanguages_;
///
typedef std::set<std::string> UsedFloats;

View File

@ -702,7 +702,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
} else {
if (open_font) {
column += running_font.latexWriteEndChanges(
os, basefont, basefont);
os, bparams, runparams,
basefont, basefont);
open_font = false;
}
@ -757,7 +758,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
// some insets cannot be inside a font change command
if (open_font && inset->noFontChange()) {
column += running_font.latexWriteEndChanges(
os, basefont, basefont);
os, bparams, runparams,
basefont, basefont);
open_font = false;
basefont = owner_->getLayoutFont(bparams, outerfont);
running_font = basefont;
@ -991,7 +993,7 @@ void Paragraph::Pimpl::validate(LaTeXFeatures & features,
{
features.useLanguage(language);
LYXERR(Debug::LATEX) << "Found language "
<< language->babel() << endl;
<< language->lang() << endl;
}
}
@ -1975,7 +1977,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
if (body_pos > 0) {
if (open_font) {
column += running_font.latexWriteEndChanges(
os, basefont, basefont);
os, bparams, runparams,
basefont, basefont);
open_font = false;
}
basefont = getLayoutFont(bparams, outerfont);
@ -2027,19 +2030,22 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
font.language() != running_font.language()))
{
column += running_font.latexWriteEndChanges(
os, basefont,
os, bparams, runparams, basefont,
(i == body_pos-1) ? basefont : font);
running_font = basefont;
open_font = false;
}
// Switch file encoding if necessary
int const count = switchEncoding(os, bparams,
runparams.moving_arg, *(runparams.encoding),
*(font.language()->encoding()));
if (count > 0) {
column += count;
runparams.encoding = font.language()->encoding();
if (runparams.encoding->package() == Encoding::inputenc &&
font.language()->encoding()->package() == Encoding::inputenc) {
int const count = switchEncoding(os, bparams,
runparams.moving_arg, *(runparams.encoding),
*(font.language()->encoding()));
if (count > 0) {
column += count;
runparams.encoding = font.language()->encoding();
}
}
// Do we need to change font?
@ -2047,7 +2053,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
font.language() != running_font.language()) &&
i != body_pos - 1)
{
column += font.latexWriteStartChanges(os, basefont,
column += font.latexWriteStartChanges(os, bparams,
runparams, basefont,
last_font);
running_font = font;
open_font = true;
@ -2087,11 +2094,12 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
#ifdef FIXED_LANGUAGE_END_DETECTION
if (next_) {
running_font
.latexWriteEndChanges(os, basefont,
.latexWriteEndChanges(os, bparams, runparams,
basefont,
next_->getFont(bparams, 0, outerfont));
} else {
running_font.latexWriteEndChanges(os, basefont,
basefont);
running_font.latexWriteEndChanges(os, bparams,
runparams, basefont, basefont);
}
#else
#ifdef WITH_WARNINGS
@ -2099,7 +2107,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
//#warning there as we start another \selectlanguage with the next paragraph if
//#warning we are in need of this. This should be fixed sometime (Jug)
#endif
running_font.latexWriteEndChanges(os, basefont, basefont);
running_font.latexWriteEndChanges(os, bparams, runparams,
basefont, basefont);
#endif
}

View File

@ -297,7 +297,8 @@ TeXOnePar(Buffer const & buf,
}
// Switch file encoding if necessary
if (bparams.inputenc == "auto") {
if (bparams.inputenc == "auto" &&
runparams.encoding->package() == Encoding::inputenc) {
// Look ahead for future encoding changes.
// We try to output them at the beginning of the paragraph,
// since the \inputencoding command is not allowed e.g. in
@ -313,7 +314,8 @@ TeXOnePar(Buffer const & buf,
// encoding to that required by the language of c.
Encoding const * const encoding =
pit->getFontSettings(bparams, i).language()->encoding();
if (switchEncoding(os, bparams, false,
if (encoding->package() == Encoding::inputenc &&
switchEncoding(os, bparams, false,
*(runparams.encoding), *encoding) > 0) {
runparams.encoding = encoding;
os << '\n';
@ -601,25 +603,44 @@ int switchEncoding(odocstream & os, BufferParams const & bparams,
bool moving_arg, Encoding const & oldEnc,
Encoding const & newEnc)
{
// FIXME thailatex does not support the inputenc package, so we
// ignore switches from/to tis620-0 encoding here. This does of
// course only work as long as the non-thai text contains ASCII
// only, but it is the best we can do.
// Since the \inputencoding command does not work inside sections
// we ignore the encoding switch also in moving arguments.
// FIXME We ignore encoding switches from/to encodings that do
// neither support the inputenc package nor the CJK package here.
// This does of course only work in special cases (e.g. switch from
// tis620-0 to latin1, but the text in latin1 contains ASCII only,
// but it is the best we can do
if (((bparams.inputenc == "auto" && !moving_arg) ||
bparams.inputenc == "default") &&
oldEnc.name() != newEnc.name() &&
oldEnc.name() != "ascii" && newEnc.name() != "ascii" &&
oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") {
oldEnc.package() != Encoding::none &&
newEnc.package() != Encoding::none) {
LYXERR(Debug::LATEX) << "Changing LaTeX encoding from "
<< oldEnc.name() << " to "
<< newEnc.name() << endl;
os << setEncoding(newEnc.iconvName());
if (bparams.inputenc != "default") {
docstring const inputenc(from_ascii(newEnc.latexName()));
os << "\\inputencoding{" << inputenc << '}';
return 16 + inputenc.length();
switch (newEnc.package()) {
case Encoding::none:
break;
case Encoding::inputenc: {
int count = inputenc.length();
if (oldEnc.package() == Encoding::CJK) {
os << "\\end{CJK}";
count += 9;
}
os << "\\inputencoding{" << inputenc << '}';
return count + 16;
}
case Encoding::CJK: {
int count = inputenc.length();
if (oldEnc.package() == Encoding::CJK) {
os << "\\end{CJK}";
count += 9;
}
os << "\\begin{CJK}{" << inputenc << "}{}";
return count + 15;
}
}
}
}
return 0;