Implement native reading of mo files.

Get the default language by a mix of QLocale and LyXRC::gui_language

Known limitations:
   * encoding is supposed to be UTF-8 (the charset parameter is checked);
   * context is not handled (implemented differently in LyX);
   * plural forms not implemented (not used for now in LyX);.
   * tThe byte endianness of the machine on which the .mo file have been
     built is expected to be the same as the one of the machine where this
     code is run.
This commit is contained in:
Jean-Marc Lasgouttes 2013-05-08 18:50:38 +02:00 committed by Vincent van Ravesteijn
parent 39a65bfb94
commit 19024f7255
7 changed files with 248 additions and 170 deletions

View File

@ -33,7 +33,6 @@
#include "FuncStatus.h" #include "FuncStatus.h"
#include "HunspellChecker.h" #include "HunspellChecker.h"
#include "KeyMap.h" #include "KeyMap.h"
#include "Language.h"
#include "LaTeXFonts.h" #include "LaTeXFonts.h"
#include "LayoutFile.h" #include "LayoutFile.h"
#include "Lexer.h" #include "Lexer.h"
@ -188,7 +187,7 @@ struct LyX::Impl {
bool first_start; bool first_start;
/// the parsed command line batch command if any /// the parsed command line batch command if any
vector<string> batch_commands; vector<string> batch_commands;
/// ///
LaTeXFonts * latexfonts_; LaTeXFonts * latexfonts_;
@ -272,7 +271,7 @@ int LyX::exec(int & argc, char * argv[])
try { try {
init_package(os::utf8_argv(0), string(), string()); init_package(os::utf8_argv(0), string(), string());
// we do not get to this point when init_package throws an exception // we do not get to this point when init_package throws an exception
locale_init(); setLocale();
} catch (ExceptionMessage const & message) { } catch (ExceptionMessage const & message) {
LYXERR(Debug::LOCALE, message.title_ + ", " + message.details_); LYXERR(Debug::LOCALE, message.title_ + ", " + message.details_);
} }
@ -294,7 +293,7 @@ int LyX::exec(int & argc, char * argv[])
// Reinit the messages machinery in case package() knows // Reinit the messages machinery in case package() knows
// something interesting about the locale directory. // something interesting about the locale directory.
Messages::init(); setLocale();
if (!use_gui) { if (!use_gui) {
// FIXME: create a ConsoleApplication // FIXME: create a ConsoleApplication
@ -337,7 +336,7 @@ int LyX::exec(int & argc, char * argv[])
// Reestablish our defaults, as Qt overwrites them // Reestablish our defaults, as Qt overwrites them
// after createApplication() // after createApplication()
locale_init(); setLocale();//???
// Parse and remove all known arguments in the LyX singleton // Parse and remove all known arguments in the LyX singleton
// Give an error for all remaining ones. // Give an error for all remaining ones.
@ -794,6 +793,9 @@ bool LyX::init()
if (!readRcFile("preferences", true)) if (!readRcFile("preferences", true))
return false; return false;
// The language may have been set to someting useful through prefs
setLocale();
if (!readEncodingsFile("encodings", "unicodesymbols")) if (!readEncodingsFile("encodings", "unicodesymbols"))
return false; return false;
if (!readLanguagesFile("languages")) if (!readLanguagesFile("languages"))
@ -1379,19 +1381,7 @@ Messages const & getMessages(string const & language)
Messages const & getGuiMessages() Messages const & getGuiMessages()
{ {
LAPPERR(singleton_); LAPPERR(singleton_);
// A cache to translate full language name to language code return singleton_->messages(Messages::guiLanguage());
static string last_language = "auto";
static string code;
if (lyxrc.gui_language != last_language) {
if (lyxrc.gui_language == "auto")
code.clear();
else {
Language const * l = languages.getLanguage(lyxrc.gui_language);
code = l ? l->code() : string();
}
last_language = lyxrc.gui_language;
}
return singleton_->messages(code);
} }

View File

@ -259,6 +259,8 @@ std::vector<std::string> loadableImageFormats();
frontend::Application * theApp(); frontend::Application * theApp();
frontend::Application * createApplication(int & argc, char * argv[]); frontend::Application * createApplication(int & argc, char * argv[]);
void hideDialogs(std::string const & name, Inset * inset); void hideDialogs(std::string const & name, Inset * inset);
/// Set locale correctly using LyXRC::gui_language
void setLocale();
} // namespace lyx } // namespace lyx

View File

@ -175,6 +175,22 @@ frontend::Application * createApplication(int & argc, char * argv[])
return guiApp; return guiApp;
} }
void setLocale()
{
QLocale theLocale;
if (lyxrc.gui_language == "auto") {
theLocale = QLocale::system();
} else {
Language const * l = languages.getLanguage(lyxrc.gui_language);
string const code = l ? l->code() : string();
theLocale = QLocale(toqstr(code));
}
Messages::guiLanguage(fromqstr(theLocale.name()));
QLocale::setDefault(theLocale);
}
namespace frontend { namespace frontend {
@ -2212,14 +2228,10 @@ void GuiApplication::exit(int status)
void GuiApplication::setGuiLanguage() void GuiApplication::setGuiLanguage()
{ {
QString const default_language = toqstr(getGuiMessages().language()); setLocale();
LYXERR(Debug::LOCALE, "Trying to set default locale to: " << default_language); QLocale theLocale;
QLocale const default_locale(default_language);
QLocale::setDefault(default_locale);
// install translation file for Qt built-in dialogs // install translation file for Qt built-in dialogs
QString const language_name = QString("qt_") + default_locale.name(); QString const language_name = QString("qt_") + theLocale.name();
// language_name can be short (e.g. qt_zh) or long (e.g. qt_zh_CN). // language_name can be short (e.g. qt_zh) or long (e.g. qt_zh_CN).
// Short-named translator can be loaded from a long name, but not the // Short-named translator can be loaded from a long name, but not the
// opposite. Therefore, long name should be used without truncation. // opposite. Therefore, long name should be used without truncation.
@ -2233,7 +2245,7 @@ void GuiApplication::setGuiLanguage()
<< language_name); << language_name);
} }
switch (default_locale.language()) { switch (theLocale.language()) {
case QLocale::Arabic : case QLocale::Arabic :
case QLocale::Hebrew : case QLocale::Hebrew :
case QLocale::Persian : case QLocale::Persian :

View File

@ -7,28 +7,99 @@
* Full author contact details are available in file CREDITS. * Full author contact details are available in file CREDITS.
*/ */
/*
This is a limited parser for gettext's po files. Several features are
not handled for now:
* encoding is supposed to be UTF-8 (the charset parameter is not honored)
* context is not handled (implemented differently in LyX)
* plural forms not implemented (not used for now in LyX).
* The byte endianness of the machine on which the .mo file have been
built is expected to be the same as the one of the machine where this
code is run.
The data is loaded in a std::map object for simplicity.
*/
/*
Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
byte
+------------------------------------------+
0 | magic number = 0x950412de |
| |
4 | file format revision = 0 |
| |
8 | number of strings | == N
| |
12 | offset of table with original strings | == O
| |
16 | offset of table with translation strings | == T
| |
20 | size of hashing table | == S
| |
24 | offset of hashing table | == H
| |
. .
. (possibly more entries later) .
. .
| |
O | length & offset 0th string ----------------.
O + 8 | length & offset 1st string ------------------.
... ... | |
O + ((N-1)*8)| length & offset (N-1)th string | | |
| | | |
T | length & offset 0th translation ---------------.
T + 8 | length & offset 1st translation -----------------.
... ... | | | |
T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
| | | | | |
H | start hash table | | | | |
... ... | | | |
H + S * 4 | end hash table | | | | |
| | | | | |
| NUL terminated 0th string <----------------' | | |
| | | | |
| NUL terminated 1st string <------------------' | |
| | | |
... ... | |
| | | |
| NUL terminated 0th translation <---------------' |
| | |
| NUL terminated 1st translation <-----------------'
| |
... ...
| |
+------------------------------------------+
*/
#include <config.h> #include <config.h>
#include "support/Messages.h" #include "support/Messages.h"
#include "support/debug.h" #include "support/debug.h"
#include "support/docstring.h" #include "support/docstring.h"
#include "support/environment.h"
#include "support/lstrings.h" #include "support/lstrings.h"
#include "support/Package.h" #include "support/Package.h"
#include "support/unicode.h" #include "support/unicode.h"
#include "support/lassert.h" #include "support/lassert.h"
#include <cerrno> #include <boost/cstdint.hpp>
# define N_(str) (str) // for marking strings to be translated #include <cerrno>
#include <fstream>
#ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
#endif
using namespace std; using namespace std;
using boost::uint32_t;
namespace lyx { namespace lyx {
void cleanTranslation(docstring & trans) void cleanTranslation(docstring & trans)
{ {
/* /*
Some english words have different translations, depending on Some english words have different translations, depending on
@ -62,20 +133,13 @@ void cleanTranslation(docstring & trans)
#ifdef ENABLE_NLS #ifdef ENABLE_NLS
# ifdef HAVE_LOCALE_H
# include <locale.h>
# endif
# if HAVE_GETTEXT
# include <libintl.h> // use the header already in the system *EK*
# else
# include "intl/libintl.h"
# endif
using namespace lyx::support; using namespace lyx::support;
namespace lyx { namespace lyx {
std::string Messages::gui_lang_;
// This version use the traditional gettext. // This version use the traditional gettext.
Messages::Messages(string const & l) Messages::Messages(string const & l)
: lang_(l) : lang_(l)
@ -84,138 +148,166 @@ Messages::Messages(string const & l)
size_t i = lang_.find("."); size_t i = lang_.find(".");
lang_ = lang_.substr(0, i); lang_ = lang_.substr(0, i);
LYXERR(Debug::LOCALE, "language(" << lang_ << ")"); LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
readMoFile();
} }
void Messages::init() namespace {
string moFile(string const & c)
{ {
errno = 0; static string const locale_dir
string const locale_dir = package().locale_dir().toFilesystemEncoding(); = package().locale_dir().toFilesystemEncoding();
char const * c = bindtextdomain(PACKAGE, locale_dir.c_str()); return locale_dir + "/" + c
int e = errno; + "/LC_MESSAGES/" PACKAGE ".mo";
if (e) {
LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
<< "Directory : " << package().locale_dir().absFileName() << '\n'
<< "Rtn value : " << c);
}
if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
<< "Codeset : " << ucs4_codeset);
}
textdomain(PACKAGE);
} }
string Messages::language() const // Find the code we have for a given language code. Return empty if not found.
string realCode(string const & c)
{ {
// get the language from the gmo file // Qt tries to outsmart us and transforms en_US to C.
string const test = N_("[[Replace with the code of your language]]"); string code = (c == "C") ? "en" : c;
string const trans = to_utf8(get(test)); // this loops at most twice
if (trans == test) { while (true) {
LYXERR0("Something is weird."); if (FileName(moFile(code)).isReadableFile())
return string(); return code;
} else if (contains(code, '_'))
return trans; code = token(code, '_', 0);
else
break;
}
return string();
}
} }
bool Messages::available(string const & c) bool Messages::available(string const & c)
{ {
static string locale_dir = package().locale_dir().toFilesystemEncoding(); return !realCode(c).empty();
string code = c;
// this loops at most twice
while (true) {
string const filen = locale_dir + "/" + code
+ "/LC_MESSAGES/" PACKAGE ".mo";
if (FileName(filen).isReadableFile())
return true;
if (contains(code, '_'))
code = token(code, '_', 0);
else return false;
}
return false;
} }
namespace {
// Trivial wrapper around gettext() string Messages::language() const
docstring const getText(string const & m)
{ {
// FIXME: gettext sometimes "forgets" the ucs4_codeset we set return realCode(lang_);
// in init(), which leads to severe message corruption (#7371) }
// We set it again here unconditionally. A real fix must be found!
LATTEST(bind_textdomain_codeset(PACKAGE, ucs4_codeset));
char const * m_c = m.c_str();
char const * trans_c = gettext(m_c); struct MoHeader
docstring trans; {
if (!trans_c) { // magic number = 0x950412de
LYXERR(Debug::LOCALE, "Undefined result from gettext for `" << m << "'."); uint32_t magic;
trans = from_ascii(m); // file format revision = 0
} else if (trans_c == m_c) { uint32_t rev;
//LYXERR(Debug::LOCALE, "Same as entered returned"); // number of strings
trans = from_ascii(m); uint32_t N;
} else { // offset of table with original strings
//LYXERR(Debug::LOCALE, "We got a translation"); uint32_t O;
// m is actually not a char const * but ucs4 data // offset of table with translation strings
trans = reinterpret_cast<char_type const *>(trans_c); uint32_t T;
// there is a hashing table afterwrds, but we ignore it
};
struct StringTable
{
// string length
uint32_t length;
// string offset
uint32_t offset;
};
bool Messages::readMoFile()
{
// FIXME:remove
if (lang_.empty()) {
LYXERR0("No language given, nothing to load.");
return false;
} }
cleanTranslation(trans); string const code = realCode(lang_);
if (code.empty()) {
LYXERR0("Cannot find translation for language " << lang_);
return false;
}
return trans; string const filen = moFile(code);
// get file size
struct stat buf;
if (stat(filen.c_str(), &buf)) {
LYXERR0("Cannot get information for file " << filen);
return false;
}
vector<char> moData(buf.st_size);
ifstream is(filen.c_str(), ios::in | ios::binary);
if (!is.read(&moData[0], buf.st_size)) {
LYXERR0("Cannot read file " << filen);
return false;
}
MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
if (header->magic != 0x950412de) {
LYXERR0("Wrong magic number for file " << filen
<< ".\nExpected 0x950412de, got " << std::hex << header->magic);
return false;
}
StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
// First the header
string const info = string(&moData[0] + trans[0].offset, trans[0].length);
size_t pos = info.find("charset=");
if (pos != string::npos) {
pos += 8;
string charset;
size_t pos2 = info.find("\n", pos);
if (pos2 == string::npos)
charset = info.substr(pos);
else
charset = info.substr(pos, pos2 - pos);
charset = ascii_lowercase(trim(charset));
if (charset != "utf-8") {
LYXERR0("Wrong encoding " << charset << " for file " << filen);
return false;
}
} else {
LYXERR0("Cannot find encoding encoding for file " << filen);
return false;
}
for (size_t i = 1; i < header->N; ++i) {
// Note that in theory the strings may contain NUL characters.
// This may be the case with plural forms
string const ostr(&moData[0] + orig[i].offset, orig[i].length);
docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
trans[i].length));
cleanTranslation(tstr);
trans_map_[ostr] = tstr;
//lyxerr << ostr << " ==> " << tstr << endl;
}
return true;
} }
}
docstring const Messages::get(string const & m) const docstring const Messages::get(string const & m) const
{ {
if (m.empty()) if (m.empty())
return docstring(); return docstring();
// Look for the translated string in the cache. TranslationMap::const_iterator it = trans_map_.find(m);
TranslationCache::iterator it = cache_.find(m); if (it != trans_map_.end())
if (it != cache_.end())
return it->second; return it->second;
else {
// The string was not found, use gettext to generate it docstring res = from_utf8(m);
docstring trans; cleanTranslation(res);
if (!lang_.empty()) { return res;
// This GNU extension overrides any language locale }
// wrt gettext.
LYXERR(Debug::LOCALE, "Setting LANGUAGE to " << lang_);
EnvChanger language_chg("LANGUAGE", lang_);
// However, setting LANGUAGE does nothing when the
// locale is "C". Therefore we set the locale to
// something that is believed to exist on most
// systems. The idea is that one should be able to
// load German documents even without having de_DE
// installed.
LYXERR(Debug::LOCALE, "Setting LC_ALL to en_US");
EnvChanger lc_all_chg("LC_ALL", "en_US");
#ifdef HAVE_LC_MESSAGES
setlocale(LC_MESSAGES, "");
#endif
trans = getText(m);
} else
trans = getText(m);
#ifdef HAVE_LC_MESSAGES
setlocale(LC_MESSAGES, "");
#endif
// store translation in cache
pair<TranslationCache::iterator, bool> result =
cache_.insert(make_pair(m, trans));
LASSERT(result.second, return from_utf8(m));
return result.first->second;
} }
} // namespace lyx } // namespace lyx

View File

@ -31,17 +31,20 @@ public:
/// Is an (at least partial) translation of language with code \p c available? /// Is an (at least partial) translation of language with code \p c available?
static bool available(std::string const & c); static bool available(std::string const & c);
/// ///
static void init(); static void guiLanguage(std::string const & l) { gui_lang_ = l; }
///
static std::string const & guiLanguage() { return gui_lang_; }
private: private:
/// Read the strings from the .mo file. Returns true on success.
bool readMoFile();
/// ///
std::string lang_; std::string lang_;
/// ///
typedef std::map<std::string, docstring> TranslationCache; typedef std::map<std::string, docstring> TranslationMap;
/// Internal cache for gettext translated strings. TranslationMap trans_map_;
/// This is needed for performance reason within \c updateBuffer() /// The language used by the Gui
/// under Windows. static std::string gui_lang_;
mutable TranslationCache cache_;
}; };
/// Access to the unique Messages object for the passed \p language. /// Access to the unique Messages object for the passed \p language.

View File

@ -17,10 +17,6 @@
#include "support/Messages.h" #include "support/Messages.h"
#include "support/Package.h" #include "support/Package.h"
#ifdef HAVE_LOCALE_H
# include <locale.h>
#endif
using namespace std; using namespace std;
namespace lyx { namespace lyx {
@ -31,19 +27,6 @@ docstring const _(string const & str)
} }
void locale_init()
{
#ifdef ENABLE_NLS
# ifdef HAVE_LC_MESSAGES
setlocale(LC_MESSAGES, "");
# endif
setlocale(LC_CTYPE, "");
Messages::init();
#endif
setlocale(LC_NUMERIC, "C");
}
docstring const translateIfPossible(docstring const & name) docstring const translateIfPossible(docstring const & name)
{ {
if (support::isAscii(name) && !name.empty()) if (support::isAscii(name) && !name.empty())

View File

@ -78,10 +78,6 @@ docstring const translateIfPossible(docstring const & name);
* language if they come from a file in the personal directory. */ * language if they come from a file in the personal directory. */
docstring const translateIfPossible(docstring const & name, std::string const & language); docstring const translateIfPossible(docstring const & name, std::string const & language);
///
void locale_init();
} // namespace lyx } // namespace lyx
#endif #endif