support non-CJK Japanese for tex2lyx

We cannot know the exact encoding and thus con only assume one (the one that Koji uses for the docs) and output a not at the beginning
2025-01-21 23:09:40 +00:00 · 2012-06-24 18:10:36 +02:00 · 2012-06-24 18:10:36 +02:00 · 5ad84b9fbd
commit 5ad84b9fbd
parent adb0d7f63f
4 changed files with 45 additions and 14 deletions
--- a/src/tex2lyx/Preamble.cpp
+++ b/src/tex2lyx/Preamble.cpp
@ -47,7 +47,6 @@ namespace {
 // "chinese-simplified", "chinese-traditional", "japanese-cjk", "korean"
 // cannot be supported because it is impossible to determine the correct document
 // language if CJK is used.
-// FIXME: missing support for "japanese" (non-CJK)
 /**
 * known babel language names (including synonyms)
 * not in standard babel: arabic, arabtex, armenian, belarusian, serbian-latin, thai
@ -60,7 +59,7 @@ const char * const known_languages[] = {"acadian", "afrikaans", "albanian",
 "dutch", "english", "esperanto", "estonian", "farsi", "finnish", "francais",
 "french", "frenchb", "frenchle", "frenchpro", "galician", "german", "germanb",
 "greek", "hebrew", "hungarian", "icelandic", "indon", "indonesian", "interlingua",
-"irish", "italian", "kazakh", "kurmanji", "latin", "latvian", "lithuanian",
+"irish", "italian", "japanese", "kazakh", "kurmanji", "latin", "latvian", "lithuanian",
 "lowersorbian", "lsorbian", "magyar", "malay", "meyalu", "mongolian", "naustrian",
 "newzealand", "ngerman", "ngermanb", "norsk", "nynorsk", "polutonikogreek", "polish",
 "portuges", "portuguese", "romanian", "russian", "russianb", "samin",
@ -80,7 +79,7 @@ const char * const known_coded_languages[] = {"french", "afrikaans", "albanian",
 "dutch", "english", "esperanto", "estonian", "farsi", "finnish", "french",
 "french", "french", "french", "french", "galician", "german", "german",
 "greek", "hebrew", "magyar", "icelandic", "bahasa", "bahasa", "interlingua",
-"irish", "italian", "kazakh", "kurmanji", "latin", "latvian", "lithuanian",
+"irish", "italian", "japanese", "kazakh", "kurmanji", "latin", "latvian", "lithuanian",
 "lowersorbian", "lowersorbian", "magyar", "bahasam", "bahasam", "mongolian", "naustrian",
 "newzealand", "ngerman", "ngerman", "norsk", "nynorsk", "polutonikogreek", "polish",
 "portuguese", "portuguese", "romanian", "russian", "russian", "samin",
@ -206,8 +205,8 @@ const char * const known_xetex_packages[] = {"arabxetex", "fixlatvian",

 /// packages that are automatically skipped if loaded by LyX
 const char * const known_lyx_packages[] = {"amsbsy", "amsmath", "amssymb",
-"amstext", "amsthm", "array", "booktabs", "calc", "CJK", "color", "float", "fontspec",
-"graphicx", "hhline", "ifthen", "longtable", "makeidx", "multirow",
+"amstext", "amsthm", "array", "babel", "booktabs", "calc", "CJK", "color", "float",
+"fontspec", "graphicx", "hhline", "ifthen", "longtable", "makeidx", "multirow",
 "nomencl", "pdfpages", "rotating", "rotfloat", "splitidx", "setspace",
 "subscript", "textcomp", "ulem", "url", "varioref", "verbatim", "wrapfig",
 "xunicode", 0};
@ -718,16 +717,26 @@ void Preamble::handle_package(Parser &p, string const & name,
 			// call as document language. If there is no such language option, the
 			// last language in the documentclass options is used.
 			handle_opt(options, known_languages, h_language);
-			// If babel is called with options, LyX puts them by default into the
-			// document class options. This works for most languages, except
-			// for Latvian, Lithuanian, Mongolian, Turkmen and Vietnamese and
-			// perhaps in future others.
-			// Therefore keep the babel call as it is as the user might have
-			// reasons for it.
-			h_preamble << "\\usepackage[" << opts << "]{babel}\n";
-			delete_opt(options, known_languages);
-			// finally translate the babel name to a LyX name
+			// translate the babel name to a LyX name
 			h_language = babel2lyx(h_language);
+			// for Japanese we assume EUC-JP as encoding
+			// but we cannot determine the exact encoding and thus output also a note
+			if (h_language == "japanese") {
+				h_inputencoding = "euc";
+				p.setEncoding("EUC-JP");
+				is_nonCJKJapanese = true;
+				// in this case babel can be removed from the preamble
+				registerAutomaticallyLoadedPackage("babel");
+			} else {
+				// If babel is called with options, LyX puts them by default into the
+				// document class options. This works for most languages, except
+				// for Latvian, Lithuanian, Mongolian, Turkmen and Vietnamese and
+				// perhaps in future others.
+				// Therefore keep the babel call as it is as the user might have
+				// reasons for it.
+				h_preamble << "\\usepackage[" << opts << "]{babel}\n";
+			}
+			delete_opt(options, known_languages);
 		}
 		else
 			h_preamble << "\\usepackage{babel}\n";
--- a/src/tex2lyx/tex2lyx.cpp
+++ b/src/tex2lyx/tex2lyx.cpp
@ -333,6 +333,7 @@ bool noweb_mode = false;
 bool pdflatex = false;
 bool xetex = false;
 bool have_CJK = false;
+bool is_nonCJKJapanese = false;
 bool roundtrip = false;


--- a/src/tex2lyx/tex2lyx.h
+++ b/src/tex2lyx/tex2lyx.h
@ -168,6 +168,8 @@ extern bool pdflatex;
 extern bool xetex;
 /// Do we have CJK?
 extern bool have_CJK;
+/// Do we have non-CJK Japanese?
+extern bool is_nonCJKJapanese;
 /// LyX format that is created by tex2lyx
 extern int const LYX_FORMAT;

--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@ -2119,6 +2119,25 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		have_CJK = false;
 	}

+	// it is impossible to determine the correct encoding for non-CJK Japanese.
+	// Therefore write a note at the beginning of the document
+	if (is_nonCJKJapanese) {
+		context.check_layout(os);
+		begin_inset(os, "Note Note\n");
+		os << "status open\n\\begin_layout Plain Layout\n"
+		   << "\\series bold\n"
+		   << "Important information:\n"
+		   << "\\end_layout\n\n"
+		   << "\\begin_layout Plain Layout\n"
+		   << "This document is in Japanese (non-CJK).\n"
+		   << " It was therefore impossible for tex2lyx to determine the correct encoding."
+		   << " The encoding EUC-JP was assumed. If this is incorrect, please set the correct"
+		   << " encoding in the document settings.\n"
+		   << "\\end_layout\n";
+		end_inset(os);
+		is_nonCJKJapanese = false;
+	}
+
 #ifdef FILEDEBUG
 		debugToken(cerr, t, flags);
 #endif