Fix language handling in tex2lyx:

- Add missing babel synonyms (taken from a standard TeXLive 2010 install).
  Some synonyms might still be missing (if they are defined not by babel
  itself, but additional packages). Adding these is now simple: Only
  known_languages and known_coded_languages need to be touched.
- Translate babel language name arguments of \foreignlanguage and
  \selectlanguage to LyX names.
- Translate second argument of \foreignlanguage to LyX.
- Cleanup mixed language names in the preamble parser: Now all checks are
  done with LyX names, not a mix of babel and LyX names. This sets correct
  quotes even if the language as given as an alias.


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@37188 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2011-01-12 21:04:39 +00:00
parent c99a8a702c
commit acd57ccaaf
5 changed files with 95 additions and 98 deletions

View File

@ -517,6 +517,13 @@ Language korean
LangCode ko
End
# Currently not supported (file format change!)
# Language kurmanji
# GuiName "Kurmanji"
# BabelName kurmanji
# LangCode kmr
# End
# Currently not supported (file format change!)
# Language lao
# GuiName "Lao"

View File

@ -57,53 +57,69 @@ namespace {
// Both changes require first that support for non-babel languages (CJK,
// armtex) is added.
// add turkmen for lyxformat 383
const char * const known_languages[] = { "afrikaans", "albanian", "american",
"arabic", "arabtex", "austrian", "bahasa", "bahasai", "bahasam", "basque",
"belarusian", "brazil", "brazilian", "breton", "british", "bulgarian",
/**
* known babel language names (including synonyms)
* not in standard babel: arabic, arabtex, belarusian, serbian-latin, thai
* not yet supported by LyX: kurmanji
* please keep this in sync with known_coded_languages line by line!
*/
const char * const known_languages[] = {"acadian", "afrikaans", "albanian",
"american", "arabic", "arabtex", "austrian", "bahasa", "bahasai", "bahasam",
"basque", "belarusian", "brazil", "brazilian", "breton", "british", "bulgarian",
"canadian", "canadien", "catalan", "croatian", "czech", "danish", "dutch",
"english", "esperanto", "estonian", "finnish", "francais", "french",
"english", "esperanto", "estonian", "farsi", "finnish", "francais", "french",
"frenchb", "frenchle", "frenchpro", "galician", "german", "germanb", "greek",
"hebrew", "icelandic", "indon", "indonesian", "interlingua", "irish",
"italian", "kazakh", "latin", "latvian", "lithuanian", "lsorbian", "magyar",
"malay", "meyalu", "mongolian", "naustrian", "ngerman", "ngermanb", "norsk",
"nynorsk", "polutonikogreek", "polish", "portuges", "portuguese", "romanian",
"russian", "russianb", "samin", "scottish", "serbian", "serbian-latin",
"slovak", "slovene", "spanish", "swedish", "thai", "turkish", "ukraineb",
"ukrainian", "usorbian", "vietnam", "welsh", 0};
"hebrew", "hungarian", "icelandic", "indon", "indonesian", "interlingua",
"irish", "italian", "kazakh", "latin", "latvian", "lithuanian", "lowersorbian",
"lsorbian", "magyar", "malay", "meyalu", "mongolian", "naustrian", "newzealand",
"ngerman", "ngermanb", "norsk", "nynorsk", "polutonikogreek", "polish",
"portuges", "portuguese", "romanian", "russian", "russianb", "samin",
"scottish", "serbian", "serbian-latin", "slovak", "slovene", "spanish",
"swedish", "thai", "turkish", "ukraineb", "ukrainian", "uppersorbian",
"UKenglish", "USenglish", "usorbian", "vietnam", "welsh", 0};
const char * const known_bahasa_languages[] = {"bahasa", "bahasai",
"indon", "indonesian", 0};
const char * const known_bahasam_languages[] = {"bahasam", "malay",
"meyalu", 0};
const char * const known_brazilian_languages[] = {"brazil", "brazilian", 0};
const char * const known_french_languages[] = {"french", "frenchb", "francais",
"frenchle", "frenchpro", 0};
const char * const known_german_languages[] = {"german", "germanb", 0};
const char * const known_ngerman_languages[] = {"ngerman", "ngermanb", 0};
const char * const known_portuguese_languages[] = {"portuges", "portuguese", 0};
const char * const known_russian_languages[] = {"russian", "russianb", 0};
const char * const known_ukrainian_languages[] = {"ukrainian", "ukraineb", 0};
/**
* the same as known_languages with .lyx names
* please keep this in sync with known_languages line by line!
*/
const char * const known_coded_languages[] = {"french", "afrikaans", "albanian",
"american", "arabic_arabi", "arabic_arabtex", "austrian", "bahasa", "bahasa", "bahasam",
"basque", "belarusian", "brazilian", "brazilian", "breton", "british", "bulgarian",
"canadian", "canadien", "catalan", "croatian", "czech", "danish", "dutch",
"english", "esperanto", "estonian", "farsi", "finnish", "french", "french",
"french", "french", "french", "galician", "german", "german", "greek",
"hebrew", "magyar", "icelandic", "bahasa", "bahasa", "interlingua",
"irish", "italian", "kazakh", "latin", "latvian", "lithuanian", "lowersorbian",
"lowersorbian", "magyar", "bahasam", "bahasam", "mongolian", "naustrian", "english",
"ngerman", "ngerman", "norsk", "nynorsk", "polutonikogreek", "polish",
"portuguese", "portuguese", "romanian", "russian", "russian", "samin",
"scottish", "serbian", "serbian-latin", "slovak", "slovene", "spanish",
"swedish", "thai", "turkish", "ukrainian", "ukrainian", "uppersorbian",
"uppersorbian", "english", "english", "vietnamese", "welsh", 0};
//add these to known_english_quotes_languages when updating to lyxformat 268:
//"chinese-simplified", "korean"
// This requires first that support for non-babel languages (CJK) is added.
const char * const known_english_quotes_languages[] = {"american", "canadian",
"english", "esperanto", "hebrew", "irish", "scottish", "thai", 0};
/// languages with english quotes (.lyx names)
const char * const known_english_quotes_languages[] = {"american", "bahasa",
"bahasam", "brazilian", "canadian", "chinese-simplified", "english",
"esperanto", "hebrew", "irish", "korean", "portuguese", "scottish", "thai", 0};
//add this to known_french_quotes_languages when updating to
//lyxformat 383: "turkmen"
const char * const known_french_quotes_languages[] = {"albanian", "arabic",
"basque", "canadien", "catalan", "galician", "greek", "italian", "norsk",
"nynorsk", "polutonikogreek", "spanish", "spanish-mexico", "turkish",
"vietnam", 0};
/// languages with french quotes (.lyx names)
const char * const known_french_quotes_languages[] = {"albanian",
"arabic_arabi", "arabic_arabtex", "basque", "canadien", "catalan", "french",
"galician", "greek", "italian", "norsk", "nynorsk", "polutonikogreek",
"russian", "spanish", "spanish-mexico", "turkish", "ukrainian", "vietnamese", 0};
/// languages with german quotes (.lyx names)
const char * const known_german_quotes_languages[] = {"austrian", "bulgarian",
"czech", "icelandic", "lithuanian", "lsorbian", "naustrian", "serbian",
"serbian-latin", "slovak", "slovene", "usorbian", 0};
"czech", "german", "icelandic", "lithuanian", "lowersorbian", "naustrian",
"ngerman", "serbian", "serbian-latin", "slovak", "slovene", "uppersorbian", 0};
/// languages with polish quotes (.lyx names)
const char * const known_polish_quotes_languages[] = {"afrikaans", "croatian",
"dutch", "estonian", "magyar", "polish", "romanian", 0};
/// languages with swedish quotes (.lyx names)
const char * const known_swedish_quotes_languages[] = {"finnish",
"swedish", 0};
@ -137,7 +153,7 @@ const char * const known_coded_paper_margins[] = { "leftmargin", "topmargin",
ostringstream h_preamble;
string h_textclass = "article";
string h_use_default_options = "false";
string h_options = string();
string h_options;
string h_language = "english";
string h_inputencoding = "auto";
string h_font_roman = "default";
@ -182,27 +198,12 @@ string h_defskip = "medskip";
string h_paragraph_indentation = "default";
string h_quotes_language = "english";
string h_papercolumns = "1";
string h_papersides = string();
string h_papersides;
string h_paperpagestyle = "default";
string h_listings_params;
string h_tracking_changes = "false";
string h_output_changes = "false";
string h_margins = "";
/// translates a babel language name to a LyX language name
string babel2lyx(string language)
{
if (language == "arabtex")
return "arabic_arabtex";
if (language == "arabic")
return "arabic_arabi";
if (language == "lsorbian")
return "lowersorbian";
if (language == "usorbian")
return "uppersorbian";
return language;
}
string h_margins;
// returns true if at least one of the options in what has been found
@ -606,25 +607,8 @@ void handle_package(Parser &p, string const & name, string const & opts,
void end_preamble(ostream & os, TextClass const & /*textclass*/)
{
// merge synonym languages
if (is_known(h_language, known_bahasa_languages))
h_language = "bahasa";
else if (is_known(h_language, known_bahasam_languages))
h_language = "bahasam";
else if (is_known(h_language, known_brazilian_languages))
h_language = "brazilian";
else if (is_known(h_language, known_french_languages))
h_language = "french";
else if (is_known(h_language, known_german_languages))
h_language = "german";
else if (is_known(h_language, known_ngerman_languages))
h_language = "ngerman";
else if (is_known(h_language, known_portuguese_languages))
h_language = "portuguese";
else if (is_known(h_language, known_russian_languages))
h_language = "russian";
else if (is_known(h_language, known_ukrainian_languages))
h_language = "ukrainian";
// translate from babel to LyX names
h_language = babel2lyx(h_language);
// set the quote language
// LyX only knows the following quotes languages:
@ -638,15 +622,10 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
if (h_language == "danish")
h_quotes_language = "danish";
// french
else if (is_known(h_language, known_french_quotes_languages)
|| is_known(h_language, known_french_languages)
|| is_known(h_language, known_russian_languages)
|| is_known(h_language, known_ukrainian_languages))
else if (is_known(h_language, known_french_quotes_languages))
h_quotes_language = "french";
// german
else if (is_known(h_language, known_german_quotes_languages)
|| is_known(h_language, known_german_languages)
|| is_known(h_language, known_ngerman_languages))
else if (is_known(h_language, known_german_quotes_languages))
h_quotes_language = "german";
// polish
else if (is_known(h_language, known_polish_quotes_languages))
@ -655,15 +634,9 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
else if (is_known(h_language, known_swedish_quotes_languages))
h_quotes_language = "swedish";
//english
else if (is_known(h_language, known_english_quotes_languages)
|| is_known(h_language, known_bahasa_languages)
|| is_known(h_language, known_bahasam_languages)
|| is_known(h_language, known_brazilian_languages)
|| is_known(h_language, known_portuguese_languages))
else if (is_known(h_language, known_english_quotes_languages))
h_quotes_language = "english";
h_language = babel2lyx(h_language);
// output the LyX file settings
os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
<< "\\lyxformat " << LYX_FORMAT << '\n'
@ -745,6 +718,7 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
} // anonymous namespace
void parse_preamble(Parser & p, ostream & os,
string const & forceclass, TeX2LyXDocClass & tc)
{
@ -1130,6 +1104,16 @@ void parse_preamble(Parser & p, ostream & os,
end_preamble(os, tc);
}
/// translates a babel language name to a LyX language name
string babel2lyx(string const & language)
{
char const * const * where = is_known(language, known_languages);
if (where)
return known_coded_languages[where - known_languages];
return language;
}
// }])

View File

@ -1,9 +1,10 @@
%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file. For more info, see http://www.lyx.org/.
%% Do not edit unless you really know what you are doing.
\documentclass[legalpaper]{article}
\documentclass[legalpaper,francais,german,newzealand]{article}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
\usepackage{babel}
\newcommand{\noun}[1]{\textsc{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
@ -159,6 +160,15 @@ and bibliography:
\noindent blabla bla bla
switch to german:\selectlanguage{german}
Hallo!
\foreignlanguage{french}{some \emph{french}}
back to english:\selectlanguage{newzealand}
and some nested \foreignlanguage{francais}{french \foreignlanguage{german}{nested
\emph{german}} french} english stuff.
Note that we both used \texttt{french} and the \texttt{francais} alias for the
french text, but for some reason this does not work with the
\texttt{newzealand} alias and \texttt{english} for english text.
\section{Another Appendix section}

View File

@ -43,6 +43,8 @@ class TeX2LyXDocClass : public DocumentClass
/// in preamble.cpp
void parse_preamble(Parser & p, std::ostream & os,
std::string const & forceclass, TeX2LyXDocClass & tc);
/// Translate babel language name to LyX language name
extern std::string babel2lyx(std::string const & language);
/// used packages with options
extern std::map<std::string, std::vector<std::string> > used_packages;

View File

@ -2441,21 +2441,15 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
// save the language for the case that a
// \foreignlanguage is used
// FIXME: \lang needs a LyX name, but we set a LaTeX name
context.font.language = subst(p.verbatim_item(), "\n", " ");
os << "\\lang " << context.font.language << "\n";
context.font.language = babel2lyx(p.verbatim_item());
os << "\n\\lang " << context.font.language << "\n";
}
else if (t.cs() == "foreignlanguage") {
context.check_layout(os);
// FIXME: \lang needs a LyX name, but we set a LaTeX name
os << "\n\\lang " << subst(p.verbatim_item(), "\n", " ") << "\n";
os << subst(p.verbatim_item(), "\n", " ");
// FIXME: the second argument of selectlanguage
// has to be parsed (like for \textsf, for
// example).
// set back to last selectlanguage
os << "\n\\lang " << context.font.language << "\n";
string const lang = babel2lyx(p.verbatim_item());
parse_text_attributes(p, os, FLAG_ITEM, outer,
context, "\\lang",
context.font.language, lang);
}
else if (t.cs() == "inputencoding") {