2001-07-29 10:42:11 +00:00
|
|
|
/**
|
2007-04-26 04:41:58 +00:00
|
|
|
* \file Thesaurus.cpp
|
2003-08-23 00:17:00 +00:00
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
* Licence details can be found in the file COPYING.
|
2001-07-29 10:42:11 +00:00
|
|
|
*
|
|
|
|
* \author John Levon
|
2008-11-16 18:48:25 +00:00
|
|
|
* \author Jürgen Spitzmüller
|
2003-08-23 00:17:00 +00:00
|
|
|
*
|
|
|
|
* Full author contact details are available in file CREDITS.
|
2001-07-29 10:42:11 +00:00
|
|
|
*/
|
|
|
|
|
2001-12-20 15:11:51 +00:00
|
|
|
#include <config.h>
|
|
|
|
|
2001-07-29 10:42:11 +00:00
|
|
|
#include "Thesaurus.h"
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "LyXRC.h"
|
2007-06-07 17:27:10 +00:00
|
|
|
|
2012-03-02 10:20:09 +00:00
|
|
|
#include "SpellChecker.h"
|
|
|
|
#include "WordLangTuple.h"
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "support/FileNameList.h"
|
2010-05-29 14:36:51 +00:00
|
|
|
#include "support/Package.h"
|
2009-05-08 21:38:50 +00:00
|
|
|
#include "support/debug.h"
|
2020-10-20 11:36:59 +03:00
|
|
|
#include "support/docstring.h"
|
2012-03-02 10:20:09 +00:00
|
|
|
#include "support/docstring_list.h"
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "support/filetools.h"
|
2006-12-10 11:52:46 +00:00
|
|
|
#include "support/lstrings.h"
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "support/os.h"
|
2006-12-10 11:52:46 +00:00
|
|
|
|
2011-09-07 08:05:44 +00:00
|
|
|
#include <cstdio>
|
2017-03-06 17:08:38 +01:00
|
|
|
#include MYTHES_H_LOCATION
|
2007-06-07 17:27:10 +00:00
|
|
|
|
2002-01-13 01:46:33 +00:00
|
|
|
#include <algorithm>
|
2008-11-16 18:48:25 +00:00
|
|
|
#include <cstring>
|
2010-07-03 16:26:47 +00:00
|
|
|
#include <fstream>
|
2003-10-06 15:43:21 +00:00
|
|
|
|
2007-12-12 10:16:00 +00:00
|
|
|
using namespace std;
|
2008-11-16 18:48:25 +00:00
|
|
|
using namespace lyx::support;
|
|
|
|
using namespace lyx::support::os;
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
namespace {
|
|
|
|
|
2009-05-01 06:42:25 +00:00
|
|
|
typedef std::map<docstring, MyThes *> Thesauri;
|
|
|
|
|
2017-07-23 13:11:54 +02:00
|
|
|
} // namespace
|
2008-11-16 18:48:25 +00:00
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
struct Thesaurus::Private
|
2008-11-16 18:48:25 +00:00
|
|
|
{
|
2009-05-01 08:44:35 +00:00
|
|
|
~Private()
|
2009-05-01 06:33:19 +00:00
|
|
|
{
|
|
|
|
for (Thesauri::iterator it = thes_.begin();
|
|
|
|
it != thes_.end(); ++it) {
|
|
|
|
delete it->second;
|
|
|
|
}
|
2008-11-16 18:48:25 +00:00
|
|
|
}
|
2009-05-01 08:35:59 +00:00
|
|
|
///
|
2009-05-01 08:44:35 +00:00
|
|
|
bool thesaurusAvailable(docstring const & lang) const
|
|
|
|
{
|
|
|
|
for (Thesauri::const_iterator it = thes_.begin();
|
|
|
|
it != thes_.end(); ++it) {
|
|
|
|
if (it->first == lang)
|
|
|
|
if (it->second)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2009-05-01 08:35:59 +00:00
|
|
|
|
2010-02-10 08:10:31 +00:00
|
|
|
///
|
|
|
|
typedef std::pair<std::string, std::string> ThesFiles;
|
|
|
|
///
|
2010-05-29 14:36:51 +00:00
|
|
|
ThesFiles getThesaurus(string const & path, docstring const & lang);
|
2010-02-10 08:10:31 +00:00
|
|
|
ThesFiles getThesaurus(docstring const & lang);
|
2009-05-01 06:33:19 +00:00
|
|
|
/// add a thesaurus to the list
|
|
|
|
bool addThesaurus(docstring const & lang);
|
2008-11-16 18:48:25 +00:00
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
/// the thesauri
|
|
|
|
Thesauri thes_;
|
2010-06-06 19:10:10 +00:00
|
|
|
|
|
|
|
/// the location below system/user directory
|
|
|
|
/// there the data+idx files lookup will happen
|
|
|
|
const string dataDirectory(void) { return "thes"; }
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
};
|
|
|
|
|
2010-02-10 08:10:31 +00:00
|
|
|
|
2010-05-29 14:36:51 +00:00
|
|
|
pair<string,string> Thesaurus::Private::getThesaurus(string const & path, docstring const & lang)
|
2008-11-16 18:48:25 +00:00
|
|
|
{
|
2010-05-29 14:36:51 +00:00
|
|
|
FileName base(path);
|
|
|
|
if (!base.isDirectory()) {
|
2010-02-10 08:10:31 +00:00
|
|
|
return make_pair(string(), string());
|
2010-05-29 14:36:51 +00:00
|
|
|
}
|
|
|
|
FileNameList const idx_files = base.dirList("idx");
|
|
|
|
FileNameList const data_files = base.dirList("dat");
|
2008-11-16 18:48:25 +00:00
|
|
|
string idx;
|
|
|
|
string data;
|
2010-07-03 16:26:47 +00:00
|
|
|
string basename;
|
2008-11-16 18:48:25 +00:00
|
|
|
|
2010-05-29 14:36:51 +00:00
|
|
|
LYXERR(Debug::FILES, "thesaurus path: " << path);
|
|
|
|
for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) {
|
2010-07-03 16:26:47 +00:00
|
|
|
basename = it->onlyFileNameWithoutExt();
|
|
|
|
if (contains(basename, to_ascii(lang))) {
|
2015-06-02 17:28:44 +02:00
|
|
|
// do not use more specific dicts.
|
|
|
|
if (contains(basename, '_') && !contains(lang, '_'))
|
|
|
|
continue;
|
|
|
|
if (contains(basename, '-') && !contains(lang, '-'))
|
|
|
|
continue;
|
2010-07-03 16:26:47 +00:00
|
|
|
ifstream ifs(it->absFileName().c_str());
|
|
|
|
if (ifs) {
|
2010-07-05 09:20:45 +00:00
|
|
|
// check for appropriate version of index file
|
|
|
|
string encoding; // first line is encoding
|
|
|
|
int items = 0; // second line is no. of items
|
|
|
|
getline(ifs,encoding);
|
|
|
|
ifs >> items;
|
|
|
|
if (ifs.fail()) {
|
|
|
|
LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) {
|
2010-07-03 16:26:47 +00:00
|
|
|
LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2010-04-21 01:19:09 +00:00
|
|
|
idx = it->absFileName();
|
2008-11-16 18:48:25 +00:00
|
|
|
LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
|
|
|
|
break;
|
|
|
|
}
|
2010-05-29 14:36:51 +00:00
|
|
|
}
|
2015-06-02 17:28:44 +02:00
|
|
|
if (idx.empty()) {
|
|
|
|
// try with a more general dictionary
|
|
|
|
docstring shortcode;
|
|
|
|
if (contains(lang, '_')) {
|
|
|
|
split(lang, shortcode, '_');
|
|
|
|
LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
|
|
|
|
<< lang << ". Trying with " << shortcode);
|
|
|
|
return getThesaurus(path, shortcode);
|
|
|
|
}
|
|
|
|
else if (contains(lang, '-')) {
|
|
|
|
split(lang, shortcode, '-');
|
|
|
|
LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
|
|
|
|
<< lang << ". Trying with " << shortcode);
|
|
|
|
return getThesaurus(path, shortcode);
|
|
|
|
}
|
2010-05-29 14:36:51 +00:00
|
|
|
return make_pair(string(), string());
|
2015-06-02 17:28:44 +02:00
|
|
|
}
|
2010-05-29 14:36:51 +00:00
|
|
|
for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) {
|
2010-07-03 16:26:47 +00:00
|
|
|
if (contains(it->onlyFileName(), basename)) {
|
2010-04-21 01:19:09 +00:00
|
|
|
data = it->absFileName();
|
2008-11-16 18:48:25 +00:00
|
|
|
LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
|
|
|
|
break;
|
|
|
|
}
|
2010-05-29 14:36:51 +00:00
|
|
|
}
|
2010-02-10 08:10:31 +00:00
|
|
|
return make_pair(idx, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-05-29 14:36:51 +00:00
|
|
|
pair<string,string> Thesaurus::Private::getThesaurus(docstring const & lang)
|
|
|
|
{
|
|
|
|
string const thes_path = external_path(lyxrc.thesaurusdir_path);
|
|
|
|
pair<string,string> result ;
|
|
|
|
|
|
|
|
if (thesaurusAvailable(lang))
|
|
|
|
return make_pair(string(), string());
|
|
|
|
|
2012-10-27 15:45:27 +02:00
|
|
|
if (!thes_path.empty())
|
2010-05-29 14:36:51 +00:00
|
|
|
result = getThesaurus(thes_path, lang);
|
|
|
|
if (result.first.empty() || result.second.empty()) {
|
2010-06-06 19:10:10 +00:00
|
|
|
string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ;
|
2010-05-29 14:36:51 +00:00
|
|
|
result = getThesaurus(sys_path, lang);
|
|
|
|
}
|
|
|
|
if (result.first.empty() || result.second.empty()) {
|
2010-06-06 19:10:10 +00:00
|
|
|
string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ;
|
2010-05-29 14:36:51 +00:00
|
|
|
result = getThesaurus(user_path, lang);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-10 08:10:31 +00:00
|
|
|
bool Thesaurus::Private::addThesaurus(docstring const & lang)
|
|
|
|
{
|
2010-04-20 07:30:58 +00:00
|
|
|
if (thesaurusAvailable(lang))
|
|
|
|
return true;
|
|
|
|
|
2010-02-10 08:10:31 +00:00
|
|
|
ThesFiles files = getThesaurus(lang);
|
|
|
|
string const idx = files.first;
|
|
|
|
string const data = files.second;
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
if (idx.empty() || data.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
char const * af = idx.c_str();
|
|
|
|
char const * df = data.c_str();
|
|
|
|
thes_[lang] = new MyThes(af, df);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Thesaurus::thesaurusAvailable(docstring const & lang) const
|
|
|
|
{
|
2009-05-01 08:44:35 +00:00
|
|
|
return d->thesaurusAvailable(lang);
|
2008-11-16 18:48:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-10 08:10:31 +00:00
|
|
|
bool Thesaurus::thesaurusInstalled(docstring const & lang) const
|
|
|
|
{
|
2010-07-03 16:26:47 +00:00
|
|
|
if (thesaurusAvailable(lang))
|
|
|
|
return true;
|
2010-02-10 08:10:31 +00:00
|
|
|
pair<string, string> files = d->getThesaurus(lang);
|
|
|
|
return (!files.first.empty() && !files.second.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-03-02 10:20:09 +00:00
|
|
|
Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl)
|
2008-11-16 18:48:25 +00:00
|
|
|
{
|
|
|
|
Meanings meanings;
|
2020-05-03 02:32:39 -04:00
|
|
|
MyThes * mythes = nullptr;
|
2008-11-16 18:48:25 +00:00
|
|
|
|
2012-03-02 10:20:09 +00:00
|
|
|
docstring const lang_code = from_ascii(wl.lang()->code());
|
2020-10-31 15:09:46 +02:00
|
|
|
docstring const & t = wl.word();
|
2012-03-02 10:20:09 +00:00
|
|
|
|
|
|
|
if (!d->addThesaurus(lang_code))
|
2008-11-16 18:48:25 +00:00
|
|
|
return meanings;
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
for (Thesauri::const_iterator it = d->thes_.begin();
|
|
|
|
it != d->thes_.end(); ++it) {
|
2012-03-02 10:20:09 +00:00
|
|
|
if (it->first == lang_code) {
|
2008-11-16 18:48:25 +00:00
|
|
|
mythes = it->second;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!mythes)
|
|
|
|
return meanings;
|
|
|
|
|
|
|
|
string const encoding = mythes->get_th_encoding();
|
2017-07-03 13:53:14 -04:00
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
mentry * pmean;
|
|
|
|
string const text = to_iconv_encoding(support::lowercase(t), encoding);
|
|
|
|
int len = strlen(text.c_str());
|
|
|
|
int count = mythes->Lookup(text.c_str(), len, &pmean);
|
2012-03-02 10:20:09 +00:00
|
|
|
if (!count) {
|
|
|
|
SpellChecker * speller = theSpellChecker();
|
|
|
|
if (!speller)
|
|
|
|
return meanings;
|
|
|
|
docstring_list suggestions;
|
|
|
|
speller->stem(wl, suggestions);
|
|
|
|
for (size_t i = 0; i != suggestions.size(); ++i) {
|
|
|
|
string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding);
|
|
|
|
len = strlen(wordform.c_str());
|
|
|
|
count = mythes->Lookup(wordform.c_str(), len, &pmean);
|
|
|
|
if (count)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!count)
|
|
|
|
return meanings;
|
|
|
|
}
|
2008-11-16 18:48:25 +00:00
|
|
|
|
|
|
|
// don't change value of pmean or count
|
|
|
|
// they are needed for the CleanUpAfterLookup routine
|
|
|
|
mentry * pm = pmean;
|
|
|
|
docstring meaning;
|
|
|
|
for (int i = 0; i < count; i++) {
|
2010-09-13 16:15:15 +00:00
|
|
|
vector<docstring> ret;
|
2008-11-16 18:48:25 +00:00
|
|
|
meaning = from_iconv_encoding(string(pm->defn), encoding);
|
|
|
|
// remove silly item
|
|
|
|
if (support::prefixIs(meaning, '-'))
|
|
|
|
meaning = support::ltrim(meaning, "- ");
|
2020-10-31 15:09:46 +02:00
|
|
|
ret.reserve(pm->count);
|
2008-11-16 18:48:25 +00:00
|
|
|
for (int j = 0; j < pm->count; j++) {
|
2010-09-13 16:15:15 +00:00
|
|
|
ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding));
|
2008-11-16 18:48:25 +00:00
|
|
|
}
|
2010-09-13 16:15:15 +00:00
|
|
|
meanings[meaning] = ret;
|
|
|
|
++pm;
|
2008-11-16 18:48:25 +00:00
|
|
|
}
|
|
|
|
// now clean up all allocated memory
|
|
|
|
mythes->CleanUpAfterLookup(&pmean, count);
|
|
|
|
|
|
|
|
for (Meanings::iterator it = meanings.begin();
|
|
|
|
it != meanings.end(); ++it)
|
|
|
|
sort(it->second.begin(), it->second.end());
|
|
|
|
|
|
|
|
return meanings;
|
|
|
|
}
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
|
2012-10-27 15:45:27 +02:00
|
|
|
Thesaurus::Thesaurus()
|
|
|
|
: d(new Thesaurus::Private)
|
2009-05-01 06:33:19 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Thesaurus::~Thesaurus()
|
|
|
|
{
|
|
|
|
delete d;
|
|
|
|
}
|
2002-02-16 15:59:55 +00:00
|
|
|
|
|
|
|
// Global instance
|
2002-03-21 17:27:08 +00:00
|
|
|
Thesaurus thesaurus;
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
} // namespace lyx
|