support non-CJK Japanese for tex2lyx

We cannot know the exact encoding and thus con only assume one (the one that Koji uses for the docs) and output a not at the beginning
This commit is contained in:
Uwe Stöhr 2012-06-24 18:10:36 +02:00
parent adb0d7f63f
commit 5ad84b9fbd
4 changed files with 45 additions and 14 deletions

View File

@ -47,7 +47,6 @@ namespace {
// "chinese-simplified", "chinese-traditional", "japanese-cjk", "korean" // "chinese-simplified", "chinese-traditional", "japanese-cjk", "korean"
// cannot be supported because it is impossible to determine the correct document // cannot be supported because it is impossible to determine the correct document
// language if CJK is used. // language if CJK is used.
// FIXME: missing support for "japanese" (non-CJK)
/** /**
* known babel language names (including synonyms) * known babel language names (including synonyms)
* not in standard babel: arabic, arabtex, armenian, belarusian, serbian-latin, thai * not in standard babel: arabic, arabtex, armenian, belarusian, serbian-latin, thai
@ -60,7 +59,7 @@ const char * const known_languages[] = {"acadian", "afrikaans", "albanian",
"dutch", "english", "esperanto", "estonian", "farsi", "finnish", "francais", "dutch", "english", "esperanto", "estonian", "farsi", "finnish", "francais",
"french", "frenchb", "frenchle", "frenchpro", "galician", "german", "germanb", "french", "frenchb", "frenchle", "frenchpro", "galician", "german", "germanb",
"greek", "hebrew", "hungarian", "icelandic", "indon", "indonesian", "interlingua", "greek", "hebrew", "hungarian", "icelandic", "indon", "indonesian", "interlingua",
"irish", "italian", "kazakh", "kurmanji", "latin", "latvian", "lithuanian", "irish", "italian", "japanese", "kazakh", "kurmanji", "latin", "latvian", "lithuanian",
"lowersorbian", "lsorbian", "magyar", "malay", "meyalu", "mongolian", "naustrian", "lowersorbian", "lsorbian", "magyar", "malay", "meyalu", "mongolian", "naustrian",
"newzealand", "ngerman", "ngermanb", "norsk", "nynorsk", "polutonikogreek", "polish", "newzealand", "ngerman", "ngermanb", "norsk", "nynorsk", "polutonikogreek", "polish",
"portuges", "portuguese", "romanian", "russian", "russianb", "samin", "portuges", "portuguese", "romanian", "russian", "russianb", "samin",
@ -80,7 +79,7 @@ const char * const known_coded_languages[] = {"french", "afrikaans", "albanian",
"dutch", "english", "esperanto", "estonian", "farsi", "finnish", "french", "dutch", "english", "esperanto", "estonian", "farsi", "finnish", "french",
"french", "french", "french", "french", "galician", "german", "german", "french", "french", "french", "french", "galician", "german", "german",
"greek", "hebrew", "magyar", "icelandic", "bahasa", "bahasa", "interlingua", "greek", "hebrew", "magyar", "icelandic", "bahasa", "bahasa", "interlingua",
"irish", "italian", "kazakh", "kurmanji", "latin", "latvian", "lithuanian", "irish", "italian", "japanese", "kazakh", "kurmanji", "latin", "latvian", "lithuanian",
"lowersorbian", "lowersorbian", "magyar", "bahasam", "bahasam", "mongolian", "naustrian", "lowersorbian", "lowersorbian", "magyar", "bahasam", "bahasam", "mongolian", "naustrian",
"newzealand", "ngerman", "ngerman", "norsk", "nynorsk", "polutonikogreek", "polish", "newzealand", "ngerman", "ngerman", "norsk", "nynorsk", "polutonikogreek", "polish",
"portuguese", "portuguese", "romanian", "russian", "russian", "samin", "portuguese", "portuguese", "romanian", "russian", "russian", "samin",
@ -206,8 +205,8 @@ const char * const known_xetex_packages[] = {"arabxetex", "fixlatvian",
/// packages that are automatically skipped if loaded by LyX /// packages that are automatically skipped if loaded by LyX
const char * const known_lyx_packages[] = {"amsbsy", "amsmath", "amssymb", const char * const known_lyx_packages[] = {"amsbsy", "amsmath", "amssymb",
"amstext", "amsthm", "array", "booktabs", "calc", "CJK", "color", "float", "fontspec", "amstext", "amsthm", "array", "babel", "booktabs", "calc", "CJK", "color", "float",
"graphicx", "hhline", "ifthen", "longtable", "makeidx", "multirow", "fontspec", "graphicx", "hhline", "ifthen", "longtable", "makeidx", "multirow",
"nomencl", "pdfpages", "rotating", "rotfloat", "splitidx", "setspace", "nomencl", "pdfpages", "rotating", "rotfloat", "splitidx", "setspace",
"subscript", "textcomp", "ulem", "url", "varioref", "verbatim", "wrapfig", "subscript", "textcomp", "ulem", "url", "varioref", "verbatim", "wrapfig",
"xunicode", 0}; "xunicode", 0};
@ -718,6 +717,17 @@ void Preamble::handle_package(Parser &p, string const & name,
// call as document language. If there is no such language option, the // call as document language. If there is no such language option, the
// last language in the documentclass options is used. // last language in the documentclass options is used.
handle_opt(options, known_languages, h_language); handle_opt(options, known_languages, h_language);
// translate the babel name to a LyX name
h_language = babel2lyx(h_language);
// for Japanese we assume EUC-JP as encoding
// but we cannot determine the exact encoding and thus output also a note
if (h_language == "japanese") {
h_inputencoding = "euc";
p.setEncoding("EUC-JP");
is_nonCJKJapanese = true;
// in this case babel can be removed from the preamble
registerAutomaticallyLoadedPackage("babel");
} else {
// If babel is called with options, LyX puts them by default into the // If babel is called with options, LyX puts them by default into the
// document class options. This works for most languages, except // document class options. This works for most languages, except
// for Latvian, Lithuanian, Mongolian, Turkmen and Vietnamese and // for Latvian, Lithuanian, Mongolian, Turkmen and Vietnamese and
@ -725,9 +735,8 @@ void Preamble::handle_package(Parser &p, string const & name,
// Therefore keep the babel call as it is as the user might have // Therefore keep the babel call as it is as the user might have
// reasons for it. // reasons for it.
h_preamble << "\\usepackage[" << opts << "]{babel}\n"; h_preamble << "\\usepackage[" << opts << "]{babel}\n";
}
delete_opt(options, known_languages); delete_opt(options, known_languages);
// finally translate the babel name to a LyX name
h_language = babel2lyx(h_language);
} }
else else
h_preamble << "\\usepackage{babel}\n"; h_preamble << "\\usepackage{babel}\n";

View File

@ -333,6 +333,7 @@ bool noweb_mode = false;
bool pdflatex = false; bool pdflatex = false;
bool xetex = false; bool xetex = false;
bool have_CJK = false; bool have_CJK = false;
bool is_nonCJKJapanese = false;
bool roundtrip = false; bool roundtrip = false;

View File

@ -168,6 +168,8 @@ extern bool pdflatex;
extern bool xetex; extern bool xetex;
/// Do we have CJK? /// Do we have CJK?
extern bool have_CJK; extern bool have_CJK;
/// Do we have non-CJK Japanese?
extern bool is_nonCJKJapanese;
/// LyX format that is created by tex2lyx /// LyX format that is created by tex2lyx
extern int const LYX_FORMAT; extern int const LYX_FORMAT;

View File

@ -2119,6 +2119,25 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
have_CJK = false; have_CJK = false;
} }
// it is impossible to determine the correct encoding for non-CJK Japanese.
// Therefore write a note at the beginning of the document
if (is_nonCJKJapanese) {
context.check_layout(os);
begin_inset(os, "Note Note\n");
os << "status open\n\\begin_layout Plain Layout\n"
<< "\\series bold\n"
<< "Important information:\n"
<< "\\end_layout\n\n"
<< "\\begin_layout Plain Layout\n"
<< "This document is in Japanese (non-CJK).\n"
<< " It was therefore impossible for tex2lyx to determine the correct encoding."
<< " The encoding EUC-JP was assumed. If this is incorrect, please set the correct"
<< " encoding in the document settings.\n"
<< "\\end_layout\n";
end_inset(os);
is_nonCJKJapanese = false;
}
#ifdef FILEDEBUG #ifdef FILEDEBUG
debugToken(cerr, t, flags); debugToken(cerr, t, flags);
#endif #endif