lyx_mirror/src/Language.cpp

512 lines
12 KiB
C++
Raw Normal View History

/**
* \file Language.cpp
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Lars Gullik Bjønnes
* \author Jean-Marc Lasgouttes
* \author Jürgen Spitzmüller
* \author Dekel Tsur
*
* Full author contact details are available in file CREDITS.
*/
#include <config.h>
#include "Language.h"
#include "Encoding.h"
#include "LaTeXFonts.h"
#include "Lexer.h"
#include "LyXRC.h"
#include "support/debug.h"
Rename files in src/support, step one. src/support/package.h src/support/Package.h Package src/support/package.C.in src/support/Package.C.in Package src/support/path.h src/support/Path.h Path src/support/fs_extras.h src/support/fs_extras.h NOCLASSES src/support/RandomAccessList.h src/support/RandomAccessList.h RandomAccessList src/support/lyxmanip.h src/support/lyxmanip.h NOCLASSES src/support/rename.C src/support/rename.cpp NOCLASSES src/support/abort.C src/support/abort.cpp NOCLASSES src/support/lyxlib.h src/support/lyxlib.h NOCLASSES src/support/ExceptionMessage.h src/support/ExceptionMessage.h ExceptionMessage src/support/copy.C src/support/copy.cpp NOCLASSES src/support/limited_stack.h src/support/limited_stack.h limited_stack src/support/filefilterlist.C src/support/FileFilterList.cpp ['FileFilterList', 'Filter'] src/support/cow_ptr.h src/support/cow_ptr.h cow_ptr src/support/os_unix.C src/support/os_unix.cpp NOCLASSES src/support/socktools.h src/support/socktools.h NOCLASSES src/support/forkedcontr.h src/support/ForkedcallsController.h ForkedcallsController src/support/os.h src/support/os.h NOCLASSES src/support/FileMonitor.h src/support/FileMonitor.h FileMonitor src/support/copied_ptr.h src/support/copied_ptr.h copied_ptr src/support/translator.h src/support/Translator.h Translator src/support/filetools.C src/support/filetools.cpp NOCLASSES src/support/unlink.C src/support/unlink.cpp NOCLASSES src/support/os_win32.C src/support/os_win32.cpp GetFolderPath src/support/lstrings.C src/support/lstrings.cpp NOCLASSES src/support/qstring_helpers.C src/support/qstring_helpers.cpp NOCLASSES src/support/getcwd.C src/support/getcwd.cpp NOCLASSES src/support/systemcall.C src/support/Systemcall.cpp Systemcall src/support/lyxalgo.h src/support/lyxalgo.h NOCLASSES src/support/filefilterlist.h src/support/FileFilterList.h ['FileFilterList', 'Filter'] src/support/unicode.C src/support/unicode.cpp IconvProcessor src/support/userinfo.C src/support/userinfo.cpp NOCLASSES src/support/lyxtime.C src/support/lyxtime.cpp NOCLASSES src/support/kill.C src/support/kill.cpp NOCLASSES src/support/docstring.C src/support/docstring.cpp to_local8bit_failure src/support/os_cygwin.C src/support/os_cygwin.cpp NOCLASSES src/support/lyxsum.C src/support/lyxsum.cpp NOCLASSES src/support/environment.C src/support/environment.cpp NOCLASSES src/support/filetools.h src/support/filetools.h NOCLASSES src/support/textutils.C src/support/textutils.cpp NOCLASSES src/support/mkdir.C src/support/mkdir.cpp NOCLASSES src/support/forkedcall.C src/support/Forkedcall.cpp ['ForkedProcess', 'Forkedcall'] src/support/tempname.C src/support/tempname.cpp NOCLASSES src/support/os_win32.h src/support/os_win32.h GetFolderPath src/support/types.h src/support/types.h NOCLASSES src/support/lstrings.h src/support/lstrings.h NOCLASSES src/support/forkedcallqueue.C src/support/ForkedCallQueue.cpp ForkedCallQueue src/support/qstring_helpers.h src/support/qstring_helpers.h NOCLASSES src/support/convert.C src/support/convert.cpp NOCLASSES src/support/filename.C src/support/FileName.cpp ['FileName', 'DocFileName'] src/support/tests/convert.C src/support/tests/convert.cpp NOCLASSES src/support/tests/filetools.C src/support/tests/filetools.cpp NOCLASSES src/support/tests/lstrings.C src/support/tests/lstrings.cpp NOCLASSES src/support/tests/boost.C src/support/tests/boost.cpp NOCLASSES src/support/docstream.C src/support/docstream.cpp ['iconv_codecvt_facet_exception', 'idocfstream', 'odocfstream'] src/support/std_istream.h src/support/std_istream.h NOCLASSES src/support/systemcall.h src/support/Systemcall.h Systemcall src/support/chdir.C src/support/chdir.cpp NOCLASSES src/support/std_ostream.h src/support/std_ostream.h NOCLASSES src/support/unicode.h src/support/unicode.h IconvProcessor src/support/path.C src/support/Path.cpp Path src/support/fs_extras.C src/support/fs_extras.cpp NOCLASSES src/support/userinfo.h src/support/userinfo.h NOCLASSES src/support/lyxtime.h src/support/lyxtime.h NOCLASSES src/support/docstring.h src/support/docstring.h to_local8bit_failure src/support/debugstream.h src/support/debugstream.h basic_debugstream src/support/environment.h src/support/environment.h NOCLASSES src/support/textutils.h src/support/textutils.h NOCLASSES src/support/forkedcall.h src/support/Forkedcall.h ['ForkedProcess', 'Forkedcall'] src/support/socktools.C src/support/socktools.cpp NOCLASSES src/support/forkedcallqueue.h src/support/ForkedCallQueue.h ForkedCallQueue src/support/forkedcontr.C src/support/ForkedcallsController.cpp ForkedcallsController src/support/os.C src/support/os.cpp NOCLASSES src/support/convert.h src/support/convert.h NOCLASSES src/support/filename.h src/support/FileName.h ['FileName', 'DocFileName'] src/support/docstream.h src/support/docstream.h ['iconv_codecvt_facet_exception', 'idocfstream', 'odocfstream'] src/support/FileMonitor.C src/support/FileMonitor.cpp FileMonitor git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@18024 a592a061-630c-0410-9148-cb99ea01b6c8
2007-04-26 05:12:52 +00:00
#include "support/FileName.h"
#include "support/filetools.h"
#include "support/lassert.h"
#include "support/lstrings.h"
#include "support/qstring_helpers.h"
#include "support/Messages.h"
#include <QLocale>
#include <QString>
using namespace std;
using namespace lyx::support;
namespace lyx {
Languages languages;
Language const * ignore_language = 0;
Language const * default_language = 0;
Language const * latex_language = 0;
Language const * reset_language = 0;
bool Language::isPolyglossiaExclusive() const
{
return babel().empty() && !polyglossia().empty() && requires().empty();
}
bool Language::isBabelExclusive() const
{
return !babel().empty() && polyglossia().empty() && requires().empty();
}
docstring const Language::translateLayout(string const & m) const
{
if (m.empty())
return docstring();
if (!isAscii(m)) {
lyxerr << "Warning: not translating `" << m
<< "' because it is not pure ASCII.\n";
return from_utf8(m);
}
TranslationMap::const_iterator it = layoutTranslations_.find(m);
if (it != layoutTranslations_.end())
return it->second;
docstring t = from_ascii(m);
cleanTranslation(t);
return t;
}
string Language::fontenc(BufferParams const & params) const
{
// Don't use LaTeX fonts, so just return the language's preferred
// (although this is not used with nonTeXFonts anyway).
if (params.useNonTeXFonts)
return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
// Determine optimal font encoding
// We check whether the used rm font supports an encoding our language supports
2018-04-23 16:19:41 +02:00
LaTeXFont const & lf =
theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
vector<string> const lfe = lf.fontencs();
for (auto & fe : fontenc_) {
// ASCII means: support all T* encodings plus OT1
if (fe == "ASCII") {
for (auto & afe : lfe) {
if (afe == "OT1" || prefixIs(afe, "T"))
// we found a suitable one; return that.
return afe;
}
}
// For other encodings, just check whether the font supports it
if (lf.hasFontenc(fe))
return fe;
}
// We did not find a suitable one; just take the first in the list,
// the priorized one (which is "T1" for ASCII).
return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
}
string Language::dateFormat(size_t i) const
{
if (i > dateformats_.size())
return string();
return dateformats_.at(i);
}
docstring Language::decimalSeparator() const
{
if (lyxrc.default_decimal_sep == "locale") {
QLocale loc = QLocale(toqstr(code()));
return qstring_to_ucs4(QString(loc.decimalPoint()));
}
return from_utf8(lyxrc.default_decimal_sep);
}
bool Language::readLanguage(Lexer & lex)
{
enum LanguageTags {
LA_BABELNAME = 1,
LA_DATEFORMATS,
LA_ENCODING,
LA_END,
LA_FONTENC,
LA_GUINAME,
LA_HAS_GUI_SUPPORT,
LA_INTERNAL_ENC,
LA_LANG_CODE,
LA_LANG_VARIETY,
LA_POLYGLOSSIANAME,
LA_POLYGLOSSIAOPTS,
LA_POSTBABELPREAMBLE,
LA_PREBABELPREAMBLE,
LA_PROVIDES,
2012-06-09 11:32:34 +02:00
LA_REQUIRES,
LA_QUOTESTYLE,
LA_RTL,
LA_WORDWRAP
};
// Keep these sorted alphabetically!
LexerKeyword languageTags[] = {
{ "babelname", LA_BABELNAME },
{ "dateformats", LA_DATEFORMATS },
{ "encoding", LA_ENCODING },
{ "end", LA_END },
{ "fontencoding", LA_FONTENC },
{ "guiname", LA_GUINAME },
{ "hasguisupport", LA_HAS_GUI_SUPPORT },
{ "internalencoding", LA_INTERNAL_ENC },
{ "langcode", LA_LANG_CODE },
{ "langvariety", LA_LANG_VARIETY },
{ "polyglossianame", LA_POLYGLOSSIANAME },
{ "polyglossiaopts", LA_POLYGLOSSIAOPTS },
{ "postbabelpreamble", LA_POSTBABELPREAMBLE },
{ "prebabelpreamble", LA_PREBABELPREAMBLE },
{ "provides", LA_PROVIDES },
{ "quotestyle", LA_QUOTESTYLE },
2012-06-09 11:32:34 +02:00
{ "requires", LA_REQUIRES },
{ "rtl", LA_RTL },
{"wordwrap", LA_WORDWRAP }
};
bool error = false;
bool finished = false;
lex.pushTable(languageTags);
// parse style section
while (!finished && lex.isOK() && !error) {
int le = lex.lex();
// See comment in LyXRC.cpp.
switch (le) {
case Lexer::LEX_FEOF:
continue;
case Lexer::LEX_UNDEF: // parse error
lex.printError("Unknown language tag `$$Token'");
error = true;
continue;
2014-12-07 17:29:12 +01:00
default:
break;
}
switch (static_cast<LanguageTags>(le)) {
case LA_END: // end of structure
finished = true;
break;
case LA_BABELNAME:
lex >> babel_;
break;
case LA_POLYGLOSSIANAME:
lex >> polyglossia_name_;
break;
case LA_POLYGLOSSIAOPTS:
lex >> polyglossia_opts_;
break;
case LA_QUOTESTYLE:
lex >> quote_style_;
break;
case LA_ENCODING:
lex >> encodingStr_;
break;
case LA_FONTENC: {
lex.eatLine();
vector<string> const fe =
getVectorFromString(lex.getString(true), "|");
fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
break;
}
case LA_DATEFORMATS: {
lex.eatLine();
vector<string> const df =
getVectorFromString(trim(lex.getString(true), "\""), "|");
dateformats_.insert(dateformats_.end(), df.begin(), df.end());
break;
}
case LA_GUINAME:
lex >> display_;
break;
case LA_HAS_GUI_SUPPORT:
lex >> has_gui_support_;
break;
case LA_INTERNAL_ENC:
lex >> internal_enc_;
break;
case LA_LANG_CODE:
lex >> code_;
break;
case LA_LANG_VARIETY:
lex >> variety_;
break;
case LA_POSTBABELPREAMBLE:
babel_postsettings_ =
lex.getLongString(from_ascii("EndPostBabelPreamble"));
break;
case LA_PREBABELPREAMBLE:
babel_presettings_ =
lex.getLongString(from_ascii("EndPreBabelPreamble"));
break;
2012-06-09 11:32:34 +02:00
case LA_REQUIRES:
lex >> requires_;
break;
case LA_PROVIDES:
lex >> provides_;
break;
case LA_RTL:
lex >> rightToLeft_;
break;
case LA_WORDWRAP:
lex >> word_wrap_;
break;
}
}
lex.popTable();
return finished && !error;
}
bool Language::read(Lexer & lex)
{
encoding_ = 0;
internal_enc_ = 0;
rightToLeft_ = 0;
if (!lex.next()) {
lex.printError("No name given for language: `$$Token'.");
return false;
}
lang_ = lex.getString();
LYXERR(Debug::INFO, "Reading language " << lang_);
if (!readLanguage(lex)) {
LYXERR0("Error parsing language `" << lang_ << '\'');
return false;
}
encoding_ = encodings.fromLyXName(encodingStr_);
if (!encoding_ && !encodingStr_.empty()) {
encoding_ = encodings.fromLyXName("iso8859-1");
LYXERR0("Unknown encoding " << encodingStr_);
}
if (fontenc_.empty())
fontenc_.push_back("ASCII");
if (dateformats_.empty()) {
dateformats_.push_back("MMMM dd, yyyy");
dateformats_.push_back("MMM dd, yyyy");
dateformats_.push_back("M/d/yyyy");
}
return true;
}
void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
{
for (auto const & t : trans) {
if (replace
|| layoutTranslations_.find(t.first) == layoutTranslations_.end())
layoutTranslations_[t.first] = t.second;
}
}
void Languages::read(FileName const & filename)
{
Lexer lex;
lex.setFile(filename);
lex.setContext("Languages::read");
while (lex.isOK()) {
int le = lex.lex();
switch (le) {
case Lexer::LEX_FEOF:
continue;
default:
break;
}
if (lex.getString() != "Language") {
lex.printError("Unknown Language tag `$$Token'");
continue;
}
Language l;
l.read(lex);
if (!lex)
break;
if (l.lang() == "latex") {
// Check if latex language was not already defined.
LASSERT(latex_language == 0, continue);
static const Language latex_lang = l;
latex_language = &latex_lang;
} else if (l.lang() == "ignore") {
// Check if ignore language was not already defined.
LASSERT(ignore_language == 0, continue);
static const Language ignore_lang = l;
ignore_language = &ignore_lang;
} else
languagelist_[l.lang()] = l;
}
default_language = getLanguage("english");
if (!default_language) {
LYXERR0("Default language \"english\" not found!");
default_language = &(*languagelist_.begin()).second;
LYXERR0("Using \"" << default_language->lang() << "\" instead!");
}
// Read layout translations
FileName const path = libFileSearch(string(), "layouttranslations");
readLayoutTranslations(path);
}
namespace {
bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
{
while (lex.isOK()) {
if (lex.checkFor("End"))
break;
if (!lex.next(true))
return false;
string const key = lex.getString();
if (!lex.next(true))
return false;
docstring const val = lex.getDocString();
trans[key] = val;
}
return true;
}
enum Match {
NoMatch,
ApproximateMatch,
VeryApproximateMatch,
ExactMatch
};
Match match(string const & code, Language const & lang)
{
// we need to mimic gettext: code can be a two-letter code, which
// should match all variants, e.g. "de" should match "de_DE",
// "de_AT" etc.
// special case for chinese:
// simplified => code == "zh_CN", langcode == "zh_CN"
// traditional => code == "zh_TW", langcode == "zh_CN"
string const variety = lang.variety();
string const langcode = variety.empty() ?
lang.code() : lang.code() + '_' + variety;
string const name = lang.lang();
if ((code == langcode && name != "chinese-traditional")
|| (code == "zh_TW" && name == "chinese-traditional"))
return ExactMatch;
if ((code.size() == 2) && (langcode.size() > 2)
&& (code + '_' == langcode.substr(0, 3)))
return ApproximateMatch;
if (code.substr(0,2) == langcode.substr(0,2))
return VeryApproximateMatch;
return NoMatch;
}
Bulk cleanup/fix incorrect annotation at the end of namespaces. This commit does a bulk fix of incorrect annotations (comments) at the end of namespaces. The commit was generated by initially running clang-format, and then from the diff of the result extracting the hunks corresponding to fixes of namespace comments. The changes being applied and all the results have been manually reviewed. The source code successfully builds on macOS. Further details on the steps below, in case they're of interest to someone else in the future. 1. Checkout a fresh and up to date version of src/ git pull && git checkout -- src && git status src 2. Ensure there's a suitable .clang-format in place, i.e. with options to fix the comment at the end of namespaces, including: FixNamespaceComments: true SpacesBeforeTrailingComments: 1 and that clang-format is >= 5.0.0, by doing e.g.: clang-format -dump-config | grep Comments: clang-format --version 3. Apply clang-format to the source: clang-format -i $(find src -name "*.cpp" -or -name "*.h") 4. Create and filter out hunks related to fixing the namespace git diff -U0 src > tmp.patch grepdiff '^} // namespace' --output-matching=hunk tmp.patch > fix_namespace.patch 5. Filter out hunks corresponding to simple fixes into to a separate patch: pcregrep -M -e '^diff[^\n]+\nindex[^\n]+\n--- [^\n]+\n\+\+\+ [^\n]+\n' \ -e '^@@ -[0-9]+ \+[0-9]+ @@[^\n]*\n-\}[^\n]*\n\+\}[^\n]*\n' \ fix_namespace.patch > fix_namespace_simple.patch 6. Manually review the simple patch and then apply it, after first restoring the source. git checkout -- src patch -p1 < fix_namespace_simple.path 7. Manually review the (simple) changes and then stage the changes git diff src git add src 8. Again apply clang-format and filter out hunks related to any remaining fixes to the namespace, this time filter with more context. There will be fewer hunks as all the simple cases have already been handled: clang-format -i $(find src -name "*.cpp" -or -name "*.h") git diff src > tmp.patch grepdiff '^} // namespace' --output-matching=hunk tmp.patch > fix_namespace2.patch 9. Manually review/edit the resulting patch file to remove hunks for files which need to be dealt with manually, noting the file names and line numbers. Then restore files to as before applying clang-format and apply the patch: git checkout src patch -p1 < fix_namespace2.patch 10. Manually fix the files noted in the previous step. Stage files, review changes and commit.
2017-07-23 13:11:54 +02:00
} // namespace
Language const * Languages::getFromCode(string const & code) const
{
// 1/ exact match with any known language
for (auto const & l : languagelist_) {
if (match(code, l.second) == ExactMatch)
return &l.second;
}
// 2/ approximate with any known language
for (auto const & l : languagelist_) {
if (match(code, l.second) == ApproximateMatch)
return &l.second;
}
return 0;
}
Language const * Languages::getFromCode(string const & code,
set<Language const *> const & tryfirst) const
{
// 1/ exact match with tryfirst list
for (auto const * lptr : tryfirst) {
if (match(code, *lptr) == ExactMatch)
return lptr;
}
// 2/ approximate match with tryfirst list
for (auto const * lptr : tryfirst) {
Match const m = match(code, *lptr);
if (m == ApproximateMatch || m == VeryApproximateMatch)
return lptr;
}
// 3/ stricter match in all languages
return getFromCode(code);
LYXERR0("Unknown language `" << code << "'");
return 0;
}
void Languages::readLayoutTranslations(support::FileName const & filename)
{
Lexer lex;
lex.setFile(filename);
lex.setContext("Languages::read");
// 1) read all translations (exact and approximate matches) into trans
std::map<string, Language::TranslationMap> trans;
while (lex.isOK()) {
if (!lex.checkFor("Translation")) {
if (lex.isOK())
lex.printError("Unknown layout translation tag `$$Token'");
break;
}
if (!lex.next(true))
break;
string const code = lex.getString();
bool found = getFromCode(code);
if (!found) {
lex.printError("Unknown language `" + code + "'");
break;
}
if (!readTranslations(lex, trans[code])) {
lex.printError("Could not read layout translations for language `"
+ code + "'");
break;
}
}
// 2) merge all translations into the languages
// exact translations overwrite approximate ones
for (auto & tr : trans) {
for (auto & lang : languagelist_) {
Match const m = match(tr.first, lang.second);
if (m == NoMatch)
continue;
lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
}
}
}
Language const * Languages::getLanguage(string const & language) const
{
if (language == "reset")
return reset_language;
if (language == "ignore")
return ignore_language;
const_iterator it = languagelist_.find(language);
return it == languagelist_.end() ? reset_language : &it->second;
}
} // namespace lyx