Implement native reading of mo files.

Get the default language by a mix of QLocale and LyXRC::gui_language

Known limitations:
   * encoding is supposed to be UTF-8 (the charset parameter is checked);
   * context is not handled (implemented differently in LyX);
   * plural forms not implemented (not used for now in LyX);.
   * tThe byte endianness of the machine on which the .mo file have been
     built is expected to be the same as the one of the machine where this
     code is run.
This commit is contained in:
Jean-Marc Lasgouttes 2013-05-08 18:50:38 +02:00 committed by Vincent van Ravesteijn
parent 39a65bfb94
commit 19024f7255
7 changed files with 248 additions and 170 deletions

View File

@ -33,7 +33,6 @@
#include "FuncStatus.h"
#include "HunspellChecker.h"
#include "KeyMap.h"
#include "Language.h"
#include "LaTeXFonts.h"
#include "LayoutFile.h"
#include "Lexer.h"
@ -272,7 +271,7 @@ int LyX::exec(int & argc, char * argv[])
try {
init_package(os::utf8_argv(0), string(), string());
// we do not get to this point when init_package throws an exception
locale_init();
setLocale();
} catch (ExceptionMessage const & message) {
LYXERR(Debug::LOCALE, message.title_ + ", " + message.details_);
}
@ -294,7 +293,7 @@ int LyX::exec(int & argc, char * argv[])
// Reinit the messages machinery in case package() knows
// something interesting about the locale directory.
Messages::init();
setLocale();
if (!use_gui) {
// FIXME: create a ConsoleApplication
@ -337,7 +336,7 @@ int LyX::exec(int & argc, char * argv[])
// Reestablish our defaults, as Qt overwrites them
// after createApplication()
locale_init();
setLocale();//???
// Parse and remove all known arguments in the LyX singleton
// Give an error for all remaining ones.
@ -794,6 +793,9 @@ bool LyX::init()
if (!readRcFile("preferences", true))
return false;
// The language may have been set to someting useful through prefs
setLocale();
if (!readEncodingsFile("encodings", "unicodesymbols"))
return false;
if (!readLanguagesFile("languages"))
@ -1379,19 +1381,7 @@ Messages const & getMessages(string const & language)
Messages const & getGuiMessages()
{
LAPPERR(singleton_);
// A cache to translate full language name to language code
static string last_language = "auto";
static string code;
if (lyxrc.gui_language != last_language) {
if (lyxrc.gui_language == "auto")
code.clear();
else {
Language const * l = languages.getLanguage(lyxrc.gui_language);
code = l ? l->code() : string();
}
last_language = lyxrc.gui_language;
}
return singleton_->messages(code);
return singleton_->messages(Messages::guiLanguage());
}

View File

@ -259,6 +259,8 @@ std::vector<std::string> loadableImageFormats();
frontend::Application * theApp();
frontend::Application * createApplication(int & argc, char * argv[]);
void hideDialogs(std::string const & name, Inset * inset);
/// Set locale correctly using LyXRC::gui_language
void setLocale();
} // namespace lyx

View File

@ -175,6 +175,22 @@ frontend::Application * createApplication(int & argc, char * argv[])
return guiApp;
}
void setLocale()
{
QLocale theLocale;
if (lyxrc.gui_language == "auto") {
theLocale = QLocale::system();
} else {
Language const * l = languages.getLanguage(lyxrc.gui_language);
string const code = l ? l->code() : string();
theLocale = QLocale(toqstr(code));
}
Messages::guiLanguage(fromqstr(theLocale.name()));
QLocale::setDefault(theLocale);
}
namespace frontend {
@ -2212,14 +2228,10 @@ void GuiApplication::exit(int status)
void GuiApplication::setGuiLanguage()
{
QString const default_language = toqstr(getGuiMessages().language());
LYXERR(Debug::LOCALE, "Trying to set default locale to: " << default_language);
QLocale const default_locale(default_language);
QLocale::setDefault(default_locale);
setLocale();
QLocale theLocale;
// install translation file for Qt built-in dialogs
QString const language_name = QString("qt_") + default_locale.name();
QString const language_name = QString("qt_") + theLocale.name();
// language_name can be short (e.g. qt_zh) or long (e.g. qt_zh_CN).
// Short-named translator can be loaded from a long name, but not the
// opposite. Therefore, long name should be used without truncation.
@ -2233,7 +2245,7 @@ void GuiApplication::setGuiLanguage()
<< language_name);
}
switch (default_locale.language()) {
switch (theLocale.language()) {
case QLocale::Arabic :
case QLocale::Hebrew :
case QLocale::Persian :

View File

@ -7,24 +7,95 @@
* Full author contact details are available in file CREDITS.
*/
/*
This is a limited parser for gettext's po files. Several features are
not handled for now:
* encoding is supposed to be UTF-8 (the charset parameter is not honored)
* context is not handled (implemented differently in LyX)
* plural forms not implemented (not used for now in LyX).
* The byte endianness of the machine on which the .mo file have been
built is expected to be the same as the one of the machine where this
code is run.
The data is loaded in a std::map object for simplicity.
*/
/*
Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
byte
+------------------------------------------+
0 | magic number = 0x950412de |
| |
4 | file format revision = 0 |
| |
8 | number of strings | == N
| |
12 | offset of table with original strings | == O
| |
16 | offset of table with translation strings | == T
| |
20 | size of hashing table | == S
| |
24 | offset of hashing table | == H
| |
. .
. (possibly more entries later) .
. .
| |
O | length & offset 0th string ----------------.
O + 8 | length & offset 1st string ------------------.
... ... | |
O + ((N-1)*8)| length & offset (N-1)th string | | |
| | | |
T | length & offset 0th translation ---------------.
T + 8 | length & offset 1st translation -----------------.
... ... | | | |
T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
| | | | | |
H | start hash table | | | | |
... ... | | | |
H + S * 4 | end hash table | | | | |
| | | | | |
| NUL terminated 0th string <----------------' | | |
| | | | |
| NUL terminated 1st string <------------------' | |
| | | |
... ... | |
| | | |
| NUL terminated 0th translation <---------------' |
| | |
| NUL terminated 1st translation <-----------------'
| |
... ...
| |
+------------------------------------------+
*/
#include <config.h>
#include "support/Messages.h"
#include "support/debug.h"
#include "support/docstring.h"
#include "support/environment.h"
#include "support/lstrings.h"
#include "support/Package.h"
#include "support/unicode.h"
#include "support/lassert.h"
#include <cerrno>
#include <boost/cstdint.hpp>
# define N_(str) (str) // for marking strings to be translated
#include <cerrno>
#include <fstream>
#ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
#endif
using namespace std;
using boost::uint32_t;
namespace lyx {
@ -62,20 +133,13 @@ void cleanTranslation(docstring & trans)
#ifdef ENABLE_NLS
# ifdef HAVE_LOCALE_H
# include <locale.h>
# endif
# if HAVE_GETTEXT
# include <libintl.h> // use the header already in the system *EK*
# else
# include "intl/libintl.h"
# endif
using namespace lyx::support;
namespace lyx {
std::string Messages::gui_lang_;
// This version use the traditional gettext.
Messages::Messages(string const & l)
: lang_(l)
@ -84,138 +148,166 @@ Messages::Messages(string const & l)
size_t i = lang_.find(".");
lang_ = lang_.substr(0, i);
LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
readMoFile();
}
void Messages::init()
namespace {
string moFile(string const & c)
{
errno = 0;
string const locale_dir = package().locale_dir().toFilesystemEncoding();
char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
int e = errno;
if (e) {
LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
<< "Directory : " << package().locale_dir().absFileName() << '\n'
<< "Rtn value : " << c);
}
if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
<< "Codeset : " << ucs4_codeset);
}
textdomain(PACKAGE);
static string const locale_dir
= package().locale_dir().toFilesystemEncoding();
return locale_dir + "/" + c
+ "/LC_MESSAGES/" PACKAGE ".mo";
}
string Messages::language() const
// Find the code we have for a given language code. Return empty if not found.
string realCode(string const & c)
{
// get the language from the gmo file
string const test = N_("[[Replace with the code of your language]]");
string const trans = to_utf8(get(test));
if (trans == test) {
LYXERR0("Something is weird.");
// Qt tries to outsmart us and transforms en_US to C.
string code = (c == "C") ? "en" : c;
// this loops at most twice
while (true) {
if (FileName(moFile(code)).isReadableFile())
return code;
if (contains(code, '_'))
code = token(code, '_', 0);
else
break;
}
return string();
} else
return trans;
}
}
bool Messages::available(string const & c)
{
static string locale_dir = package().locale_dir().toFilesystemEncoding();
string code = c;
// this loops at most twice
while (true) {
string const filen = locale_dir + "/" + code
+ "/LC_MESSAGES/" PACKAGE ".mo";
if (FileName(filen).isReadableFile())
return true;
if (contains(code, '_'))
code = token(code, '_', 0);
else return false;
}
return false;
return !realCode(c).empty();
}
namespace {
// Trivial wrapper around gettext()
docstring const getText(string const & m)
string Messages::language() const
{
// FIXME: gettext sometimes "forgets" the ucs4_codeset we set
// in init(), which leads to severe message corruption (#7371)
// We set it again here unconditionally. A real fix must be found!
LATTEST(bind_textdomain_codeset(PACKAGE, ucs4_codeset));
return realCode(lang_);
}
char const * m_c = m.c_str();
char const * trans_c = gettext(m_c);
docstring trans;
if (!trans_c) {
LYXERR(Debug::LOCALE, "Undefined result from gettext for `" << m << "'.");
trans = from_ascii(m);
} else if (trans_c == m_c) {
//LYXERR(Debug::LOCALE, "Same as entered returned");
trans = from_ascii(m);
struct MoHeader
{
// magic number = 0x950412de
uint32_t magic;
// file format revision = 0
uint32_t rev;
// number of strings
uint32_t N;
// offset of table with original strings
uint32_t O;
// offset of table with translation strings
uint32_t T;
// there is a hashing table afterwrds, but we ignore it
};
struct StringTable
{
// string length
uint32_t length;
// string offset
uint32_t offset;
};
bool Messages::readMoFile()
{
// FIXME:remove
if (lang_.empty()) {
LYXERR0("No language given, nothing to load.");
return false;
}
string const code = realCode(lang_);
if (code.empty()) {
LYXERR0("Cannot find translation for language " << lang_);
return false;
}
string const filen = moFile(code);
// get file size
struct stat buf;
if (stat(filen.c_str(), &buf)) {
LYXERR0("Cannot get information for file " << filen);
return false;
}
vector<char> moData(buf.st_size);
ifstream is(filen.c_str(), ios::in | ios::binary);
if (!is.read(&moData[0], buf.st_size)) {
LYXERR0("Cannot read file " << filen);
return false;
}
MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
if (header->magic != 0x950412de) {
LYXERR0("Wrong magic number for file " << filen
<< ".\nExpected 0x950412de, got " << std::hex << header->magic);
return false;
}
StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
// First the header
string const info = string(&moData[0] + trans[0].offset, trans[0].length);
size_t pos = info.find("charset=");
if (pos != string::npos) {
pos += 8;
string charset;
size_t pos2 = info.find("\n", pos);
if (pos2 == string::npos)
charset = info.substr(pos);
else
charset = info.substr(pos, pos2 - pos);
charset = ascii_lowercase(trim(charset));
if (charset != "utf-8") {
LYXERR0("Wrong encoding " << charset << " for file " << filen);
return false;
}
} else {
//LYXERR(Debug::LOCALE, "We got a translation");
// m is actually not a char const * but ucs4 data
trans = reinterpret_cast<char_type const *>(trans_c);
LYXERR0("Cannot find encoding encoding for file " << filen);
return false;
}
cleanTranslation(trans);
return trans;
for (size_t i = 1; i < header->N; ++i) {
// Note that in theory the strings may contain NUL characters.
// This may be the case with plural forms
string const ostr(&moData[0] + orig[i].offset, orig[i].length);
docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
trans[i].length));
cleanTranslation(tstr);
trans_map_[ostr] = tstr;
//lyxerr << ostr << " ==> " << tstr << endl;
}
return true;
}
docstring const Messages::get(string const & m) const
{
if (m.empty())
return docstring();
// Look for the translated string in the cache.
TranslationCache::iterator it = cache_.find(m);
if (it != cache_.end())
TranslationMap::const_iterator it = trans_map_.find(m);
if (it != trans_map_.end())
return it->second;
// The string was not found, use gettext to generate it
docstring trans;
if (!lang_.empty()) {
// This GNU extension overrides any language locale
// wrt gettext.
LYXERR(Debug::LOCALE, "Setting LANGUAGE to " << lang_);
EnvChanger language_chg("LANGUAGE", lang_);
// However, setting LANGUAGE does nothing when the
// locale is "C". Therefore we set the locale to
// something that is believed to exist on most
// systems. The idea is that one should be able to
// load German documents even without having de_DE
// installed.
LYXERR(Debug::LOCALE, "Setting LC_ALL to en_US");
EnvChanger lc_all_chg("LC_ALL", "en_US");
#ifdef HAVE_LC_MESSAGES
setlocale(LC_MESSAGES, "");
#endif
trans = getText(m);
} else
trans = getText(m);
#ifdef HAVE_LC_MESSAGES
setlocale(LC_MESSAGES, "");
#endif
// store translation in cache
pair<TranslationCache::iterator, bool> result =
cache_.insert(make_pair(m, trans));
LASSERT(result.second, return from_utf8(m));
return result.first->second;
else {
docstring res = from_utf8(m);
cleanTranslation(res);
return res;
}
}
} // namespace lyx

View File

@ -31,17 +31,20 @@ public:
/// Is an (at least partial) translation of language with code \p c available?
static bool available(std::string const & c);
///
static void init();
static void guiLanguage(std::string const & l) { gui_lang_ = l; }
///
static std::string const & guiLanguage() { return gui_lang_; }
private:
/// Read the strings from the .mo file. Returns true on success.
bool readMoFile();
///
std::string lang_;
///
typedef std::map<std::string, docstring> TranslationCache;
/// Internal cache for gettext translated strings.
/// This is needed for performance reason within \c updateBuffer()
/// under Windows.
mutable TranslationCache cache_;
typedef std::map<std::string, docstring> TranslationMap;
TranslationMap trans_map_;
/// The language used by the Gui
static std::string gui_lang_;
};
/// Access to the unique Messages object for the passed \p language.

View File

@ -17,10 +17,6 @@
#include "support/Messages.h"
#include "support/Package.h"
#ifdef HAVE_LOCALE_H
# include <locale.h>
#endif
using namespace std;
namespace lyx {
@ -31,19 +27,6 @@ docstring const _(string const & str)
}
void locale_init()
{
#ifdef ENABLE_NLS
# ifdef HAVE_LC_MESSAGES
setlocale(LC_MESSAGES, "");
# endif
setlocale(LC_CTYPE, "");
Messages::init();
#endif
setlocale(LC_NUMERIC, "C");
}
docstring const translateIfPossible(docstring const & name)
{
if (support::isAscii(name) && !name.empty())

View File

@ -78,10 +78,6 @@ docstring const translateIfPossible(docstring const & name);
* language if they come from a file in the personal directory. */
docstring const translateIfPossible(docstring const & name, std::string const & language);
///
void locale_init();
} // namespace lyx
#endif