diff --git a/development/FORMAT b/development/FORMAT index ef67c3cc6f..5d34fd3995 100644 --- a/development/FORMAT +++ b/development/FORMAT @@ -11,6 +11,14 @@ adjustments are made to tex2lyx and bugs are fixed in lyx2lyx. ----------------------- +2013-02-18 Julien Rioux + * Format incremented to 463: + - Use the LyX name of encodings instead of the LaTeX names. + The LyX name must be unique, while the name used by LaTeX + not necessarily, e.g. different packages might implement + support for the same encoding. + - Add utf8-platex encoding. + 2013-02-15 Jürgen Spitzmüller * Format incremented to 462: Support for recent libertine LaTeX fonts a.) Support Libertine Mono: diff --git a/lib/encodings b/lib/encodings index a5bd5ccd6d..c79db405be 100644 --- a/lib/encodings +++ b/lib/encodings @@ -3,7 +3,7 @@ # Note that you can only add singlebyte encodings to this file. # LyX does not support the output of multibyte encodings (e.g. utf16). -# It does support singlebyte encodings with variable with (e.g. utf8). +# It does support singlebyte encodings with variable width (e.g. utf8). # These are marked with the "variable" keyword. # Fixed width encodings are marked with the "fixed" keyword. # The code points of TeX control characters like {, } and \ can occur in the @@ -11,7 +11,20 @@ # set as document encodings and are marked with the "variableunsafe" keyword. # They are only needed for proper tex2lyx import. -# Syntax: Encoding fixed|variable|variableunsafe End +# Most encodings require loading a latex package such as "inputenc" or "CJK". +# There is no "japanese" latex package, rather this keyword indicates to LyX +# to switch the buffer format and use platex instead of standard (pdf)latex. +# In this case, TeX control characters in high bytes is not a problem. +# The invocation is platex -kanji= + +# Syntax: Encoding End + +# LyX name: Name used by the file format and in lib/languages. Must be unique! +# LaTeX name: Used in the latex export or passed to platex as command-line switch. +# GUI name: Displayed in document settings. +# iconv name: Used by iconv. +# width: One of fixed, variable, or variableunsafe (see above). +# package: One of none, inputenc, CJK, or japanese (see above). # encodings used by inputenc.sty @@ -116,7 +129,7 @@ End Encoding cp1257 cp1257 "Baltic (CP 1257)" CP1257 fixed inputenc End -Encoding koi8 koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc +Encoding koi8-r koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc End Encoding koi8-u koi8-u "Cyrillic (KOI8-U)" KOI8-U fixed inputenc @@ -184,12 +197,13 @@ End # Traditional Japanese TeX programs require the japanese package. # that is incompatible with CJK and inputenc. -Encoding euc-jp-plain euc "Japanese (non-CJK) (EUC-JP)" EUC-JP variable japanese +Encoding euc-jp-platex euc "Japanese (pLaTeX) (EUC-JP)" EUC-JP variable japanese End -Encoding jis-plain jis "Japanese (non-CJK) (JIS)" ISO-2022-JP variable japanese +Encoding jis-platex jis "Japanese (pLaTeX) (JIS)" ISO-2022-JP variable japanese End -# FIXME: Should use variableunsafe (would be a file format change) -Encoding shift-jis-plain sjis "Japanese (non-CJK) (SJIS)" CP932 variable japanese +Encoding shift-jis-platex sjis "Japanese (pLaTeX) (SJIS)" CP932 variable japanese +End +Encoding utf8-platex utf8 "Japanese (pLaTeX) (UTF8)" UTF-8 variable japanese End # This one needs hardcoded support, since the inputenc package does not know @@ -207,4 +221,3 @@ End # Pure 7bit ASCII encoding (partially hardcoded in LyX) Encoding ascii ascii "ASCII" ascii fixed none End - diff --git a/lib/languages b/lib/languages index 1022f3cdbe..a0ba7b8fe7 100644 --- a/lib/languages +++ b/lib/languages @@ -576,7 +576,7 @@ End Language japanese GuiName "Japanese" BabelName japanese - Encoding jis-plain + Encoding jis-platex LangCode ja_JP AsBabelOptions true Requires japanese @@ -773,7 +773,7 @@ Language russian BabelName russian PolyglossiaName russian QuoteStyle french - Encoding koi8 + Encoding koi8-r LangCode ru_RU End diff --git a/lib/lyx2lyx/lyx_2_1.py b/lib/lyx2lyx/lyx_2_1.py index 6c0b9c837f..bc4d23ba28 100644 --- a/lib/lyx2lyx/lyx_2_1.py +++ b/lib/lyx2lyx/lyx_2_1.py @@ -3546,6 +3546,90 @@ def revert_newframes(document): document.body[i : i + 1] = subst i = j +# known encodings that do not change their names (same LyX and LaTeX names) +known_enc_tuple = ("auto", "default", "ansinew", "applemac", "armscii8", "ascii", + "cp437", "cp437de", "cp850", "cp852", "cp855", "cp858", "cp862", "cp865", "cp866", + "cp1250", "cp1251", "cp1252", "cp1255", "cp1256", "cp1257", "koi8-r", "koi8-u", + "pt154", "pt254", "tis620-0", "utf8", "utf8x", "utf8-plain") + +def convert_encodings(document): + "Use the LyX names of the encodings instead of the LaTeX names." + LaTeX2LyX_enc_dict = { + "8859-6": "iso8859-6", + "8859-8": "iso8859-8", + "Bg5": "big5", + "euc": "euc-jp-platex", + "EUC-JP": "euc-jp", + "EUC-TW": "euc-tw", + "GB": "euc-cn", + "GBK": "gbk", + "iso88595": "iso8859-5", + "iso-8859-7": "iso8859-7", + "JIS": "jis", + "jis": "jis-platex", + "KS": "euc-kr", + "l7xenc": "iso8859-13", + "latin1": "iso8859-1", + "latin2": "iso8859-2", + "latin3": "iso8859-3", + "latin4": "iso8859-4", + "latin5": "iso8859-9", + "latin9": "iso8859-15", + "latin10": "iso8859-16", + "SJIS": "shift-jis", + "sjis": "shift-jis-platex", + "UTF8": "utf8-cjk" + } + i = find_token(document.header, "\\inputencoding" , 0) + if i == -1: + return + val = get_value(document.header, "\\inputencoding", i) + if val in LaTeX2LyX_enc_dict.keys(): + document.header[i] = "\\inputencoding %s" % LaTeX2LyX_enc_dict[val] + elif val not in known_enc_tuple: + document.warning("Ignoring unknown input encoding: `%s'" % val) + + +def revert_encodings(document): + """Revert to using the LaTeX names of the encodings instead of the LyX names. + Also revert utf8-platex to sjis, the language default when using Japanese. + """ + LyX2LaTeX_enc_dict = { + "big5": "Bg5", + "euc-cn": "GB", + "euc-kr": "KS", + "euc-jp": "EUC-JP", + "euc-jp-platex": "euc", + "euc-tw": "EUC-TW", + "gbk": "GBK", + "iso8859-1": "latin1", + "iso8859-2": "latin2", + "iso8859-3": "latin3", + "iso8859-4": "latin4", + "iso8859-5": "iso88595", + "iso8859-6": "8859-6", + "iso8859-7": "iso-8859-7", + "iso8859-8": "8859-8", + "iso8859-9": "latin5", + "iso8859-13": "l7xenc", + "iso8859-15": "latin9", + "iso8859-16": "latin10", + "jis": "JIS", + "jis-platex": "jis", + "shift-jis": "SJIS", + "shift-jis-platex": "sjis", + "utf8-cjk": "UTF8", + "utf8-platex": "sjis" + } + i = find_token(document.header, "\\inputencoding" , 0) + if i == -1: + return + val = get_value(document.header, "\\inputencoding", i) + if val in LyX2LaTeX_enc_dict.keys(): + document.header[i] = "\\inputencoding %s" % LyX2LaTeX_enc_dict[val] + elif val not in known_enc_tuple: + document.warning("Ignoring unknown input encoding: `%s'" % val) + def revert_IEEEtran_3(document): ''' @@ -3699,10 +3783,12 @@ convert = [ [459, []], [460, []], [461, []], - [462, []] + [462, []], + [463, [convert_encodings]], ] revert = [ + [462, [revert_encodings]], [461, [revert_new_libertines]], [460, [revert_kurier_fonts]], [459, [revert_IEEEtran_3]], diff --git a/src/BufferParams.cpp b/src/BufferParams.cpp index 0b0965c236..60129ab00c 100644 --- a/src/BufferParams.cpp +++ b/src/BufferParams.cpp @@ -2786,7 +2786,7 @@ void BufferParams::writeEncodingPreamble(otexstream & os, // do not load inputenc if japanese is used if (features.isRequired("japanese")) break; - os << "\\usepackage[" << from_ascii(inputenc) + os << "\\usepackage[" << from_ascii(encoding().latexName()) << "]{inputenc}\n"; break; case Encoding::CJK: @@ -2913,10 +2913,10 @@ Encoding const & BufferParams::encoding() const // This check will not work with XeTeX/LuaTeX and tex fonts. // Thus we have to reset the encoding in Buffer::makeLaTeXFile. if (useNonTeXFonts) - return *(encodings.fromLaTeXName("utf8-plain")); + return *(encodings.fromLyXName("utf8-plain")); if (inputenc == "auto" || inputenc == "default") return *language->encoding(); - Encoding const * const enc = encodings.fromLaTeXName(inputenc); + Encoding const * const enc = encodings.fromLyXName(inputenc); if (enc) return *enc; LYXERR0("Unknown inputenc value `" << inputenc diff --git a/src/BufferParams.h b/src/BufferParams.h index 1bff132187..b9e37510ca 100644 --- a/src/BufferParams.h +++ b/src/BufferParams.h @@ -284,10 +284,10 @@ public: IndicesList & indiceslist(); IndicesList const & indiceslist() const; /** - * The input encoding for LaTeX. This can be one of + * The LyX name of the input encoding for LaTeX. This can be one of * - \c auto: find out the input encoding from the used languages * - \c default: ditto - * - any encoding supported by the inputenc package + * - any encoding defined in the file lib/encodings * The encoding of the LyX file is always utf8 and has nothing to * do with this setting. * The difference between \c auto and \c default is that \c auto also diff --git a/src/frontends/qt4/GuiDocument.cpp b/src/frontends/qt4/GuiDocument.cpp index 1017d5d8f1..ff63977f00 100644 --- a/src/frontends/qt4/GuiDocument.cpp +++ b/src/frontends/qt4/GuiDocument.cpp @@ -2586,7 +2586,7 @@ void GuiDocument::applyView() for (; it != end; ++it) { if (qt_(it->guiName()) == enc_gui && !it->unsafe()) { - bp_.inputenc = it->latexName(); + bp_.inputenc = it->name(); found = true; break; } @@ -2993,7 +2993,7 @@ void GuiDocument::paramsToDialog() Encodings::const_iterator it = encodings.begin(); Encodings::const_iterator const end = encodings.end(); for (; it != end; ++it) { - if (it->latexName() == bp_.inputenc && + if (it->name() == bp_.inputenc && !it->unsafe()) { enc_gui = it->guiName(); break; diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp index 36cb1800f2..95415a5538 100644 --- a/src/support/unicode.cpp +++ b/src/support/unicode.cpp @@ -377,7 +377,6 @@ int max_encoded_bytes(std::string const & encoding) // The CJK encodings use (different) multibyte representation as well. // All other encodings encode one UCS4 code point in one byte // (and can therefore only encode a subset of UCS4) - // Note that BIG5 and SJIS do not work with LaTeX (see lib/encodings). // Furthermore, all encodings that use shifting (like SJIS) do not work with // iconv_codecvt_facet. if (encoding == "UTF-8" || diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index aaa76841fe..9d72a5e77b 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -805,7 +805,7 @@ void Preamble::handle_package(Parser &p, string const & name, Encoding const * const enc = encodings.fromIconvName( p.getEncoding(), Encoding::japanese, false); if (enc) - h_inputencoding = enc->latexName(); + h_inputencoding = enc->name(); is_nonCJKJapanese = true; // in this case babel can be removed from the preamble registerAutomaticallyLoadedPackage("babel"); @@ -844,7 +844,7 @@ void Preamble::handle_package(Parser &p, string const & name, } else if (name == "CJKutf8") { - h_inputencoding = "UTF8"; + h_inputencoding = "utf8-cjk"; p.setEncoding("UTF-8"); registerAutomaticallyLoadedPackage("CJKutf8"); } @@ -863,14 +863,22 @@ void Preamble::handle_package(Parser &p, string const & name, // h_inputencoding is only set when there is not more than one // inputenc option because otherwise h_inputencoding must be // set to "auto" (the default encoding of the document language) - // Therefore check for the "," character. + // Therefore check that exactly one option is passed to inputenc. // It is also only set when there is not more than one babel // language option. - if (opts.find(",") == string::npos && one_language == true) - h_inputencoding = opts; - if (!options.empty()) - p.setEncoding(options.back(), Encoding::inputenc); - options.clear(); + if (!options.empty()) { + string const encoding = options.back(); + Encoding const * const enc = encodings.fromLaTeXName( + encoding, Encoding::inputenc, true); + if (!enc) + cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl; + else { + if (!enc->unsafe() && options.size() == 1 && one_language == true) + h_inputencoding = enc->name(); + p.setEncoding(enc->iconvName()); + } + options.clear(); + } } else if (name == "srcltx") { @@ -1624,8 +1632,15 @@ void Preamble::parse(Parser & p, string const & forceclass, else if (t.cs() == "inputencoding") { string const encoding = p.getArg('{','}'); - h_inputencoding = encoding; - p.setEncoding(encoding, Encoding::inputenc); + Encoding const * const enc = encodings.fromLaTeXName( + encoding, Encoding::inputenc, true); + if (!enc) + cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl; + else { + if (!enc->unsafe()) + h_inputencoding = enc->name(); + p.setEncoding(enc->iconvName()); + } } else if (t.cs() == "newenvironment") { diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt index 60c48e1359..5c7ba2de02 100644 --- a/src/tex2lyx/TODO.txt +++ b/src/tex2lyx/TODO.txt @@ -87,10 +87,10 @@ Format LaTeX feature LyX feature \usepackage[scale|scaled=$val]{biolinum-type1} \font_sans \font_sf_scale +463 General * Use the language information provided by Language.cpp and the languages file (for babel/lyx/polyglossia name, quote style etc.) instead of hardcoding this information in Preamble.cpp. - diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 3f98280cbe..9239d30984 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -840,8 +840,8 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding) if (preamble.inputencoding() == "auto") encoding = "ISO8859-1"; else { - Encoding const * const enc = encodings.fromLaTeXName( - preamble.inputencoding(), Encoding::any, true); + Encoding const * const enc = encodings.fromLyXName( + preamble.inputencoding(), true); encoding = enc->iconvName(); } } diff --git a/src/version.h b/src/version.h index 8eebe850ff..ab0ee1b39a 100644 --- a/src/version.h +++ b/src/version.h @@ -30,8 +30,8 @@ extern char const * const lyx_version_info; // Do not remove the comment below, so we get merge conflict in // independent branches. Instead add your own. -#define LYX_FORMAT_LYX 462 // spitz: support for the newest libertine fonts -#define LYX_FORMAT_TEX2LYX 462 // spitz: support for the newest libertine fonts +#define LYX_FORMAT_LYX 463 // jrioux: encodings renaming +#define LYX_FORMAT_TEX2LYX 463 // jrioux: encodings renaming #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #ifndef _MSC_VER