2001-07-29 10:42:11 +00:00
|
|
|
/**
|
2007-04-26 04:41:58 +00:00
|
|
|
* \file Thesaurus.cpp
|
2003-08-23 00:17:00 +00:00
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
* Licence details can be found in the file COPYING.
|
2001-07-29 10:42:11 +00:00
|
|
|
*
|
|
|
|
* \author John Levon
|
2008-11-16 18:48:25 +00:00
|
|
|
* \author Jürgen Spitzmüller
|
2003-08-23 00:17:00 +00:00
|
|
|
*
|
|
|
|
* Full author contact details are available in file CREDITS.
|
2001-07-29 10:42:11 +00:00
|
|
|
*/
|
|
|
|
|
2001-12-20 15:11:51 +00:00
|
|
|
#include <config.h>
|
|
|
|
|
2001-07-29 10:42:11 +00:00
|
|
|
#include "Thesaurus.h"
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "support/debug.h"
|
2007-11-29 07:04:28 +00:00
|
|
|
#include "support/gettext.h"
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "LyXRC.h"
|
2007-06-07 17:27:10 +00:00
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "support/FileNameList.h"
|
|
|
|
#include "support/filetools.h"
|
2006-12-10 11:52:46 +00:00
|
|
|
#include "support/lstrings.h"
|
2008-11-16 18:48:25 +00:00
|
|
|
#include "support/os.h"
|
|
|
|
#include "support/unicode.h"
|
2006-12-10 11:52:46 +00:00
|
|
|
|
2007-06-07 17:27:10 +00:00
|
|
|
#include "frontends/alert.h"
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
#ifdef HAVE_LIBMYTHES
|
|
|
|
#include MYTHES_H_LOCATION
|
|
|
|
#else
|
|
|
|
#ifdef HAVE_LIBAIKSAURUS
|
|
|
|
#include AIKSAURUS_H_LOCATION
|
|
|
|
#endif // HAVE_LIBAIKSAURUS
|
|
|
|
#endif // !HAVE_LIBMYTHES
|
|
|
|
|
2002-01-13 01:46:33 +00:00
|
|
|
#include <algorithm>
|
2008-11-16 18:48:25 +00:00
|
|
|
#include <cstring>
|
2003-10-06 15:43:21 +00:00
|
|
|
|
2007-12-12 10:16:00 +00:00
|
|
|
using namespace std;
|
2008-11-16 18:48:25 +00:00
|
|
|
using namespace lyx::support;
|
|
|
|
using namespace lyx::support::os;
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
2007-05-28 07:56:38 +00:00
|
|
|
#ifdef HAVE_LIBAIKSAURUS
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
struct Thesaurus::Private
|
2001-07-29 10:42:11 +00:00
|
|
|
{
|
2009-05-01 06:33:19 +00:00
|
|
|
Private(): thes_(new Aiksaurus) {}
|
|
|
|
Aiksaurus * thes_;
|
|
|
|
};
|
2001-07-29 10:42:11 +00:00
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const &)
|
2001-07-29 10:42:11 +00:00
|
|
|
{
|
2002-01-13 01:46:33 +00:00
|
|
|
Meanings meanings;
|
2001-07-29 10:42:11 +00:00
|
|
|
|
2006-12-10 11:52:46 +00:00
|
|
|
// aiksaurus is for english text only, therefore it does not work
|
|
|
|
// with non-ascii strings.
|
|
|
|
// The interface of the Thesaurus class uses docstring because a
|
|
|
|
// non-english thesaurus is possible in theory.
|
|
|
|
if (!support::isAscii(t))
|
|
|
|
// to_ascii() would assert
|
|
|
|
return meanings;
|
|
|
|
|
|
|
|
string const text = to_ascii(t);
|
2007-06-07 17:27:10 +00:00
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
docstring error = from_ascii(d->thes_->error());
|
2007-06-07 17:27:10 +00:00
|
|
|
if (!error.empty()) {
|
|
|
|
static bool sent_error = false;
|
|
|
|
if (!sent_error) {
|
|
|
|
frontend::Alert::error(_("Thesaurus failure"),
|
|
|
|
bformat(_("Aiksaurus returned the following error:\n\n%1$s."),
|
|
|
|
error));
|
|
|
|
sent_error = true;
|
|
|
|
}
|
|
|
|
return meanings;
|
|
|
|
}
|
2009-05-01 06:33:19 +00:00
|
|
|
if (!d->thes_->find(text.c_str()))
|
2002-01-13 01:46:33 +00:00
|
|
|
return meanings;
|
2001-07-29 10:42:11 +00:00
|
|
|
|
2002-01-13 01:46:33 +00:00
|
|
|
// weird api, but ...
|
2002-03-21 17:27:08 +00:00
|
|
|
|
2002-01-13 01:46:33 +00:00
|
|
|
int prev_meaning = -1;
|
|
|
|
int cur_meaning;
|
2006-12-10 11:52:46 +00:00
|
|
|
docstring meaning;
|
2001-07-29 10:42:11 +00:00
|
|
|
|
2002-03-21 17:27:08 +00:00
|
|
|
// correct, returns "" at the end
|
2009-05-01 06:33:19 +00:00
|
|
|
string ret = d->thes_->next(cur_meaning);
|
2002-03-21 17:27:08 +00:00
|
|
|
|
2001-07-29 10:42:11 +00:00
|
|
|
while (!ret.empty()) {
|
2002-01-13 01:46:33 +00:00
|
|
|
if (cur_meaning != prev_meaning) {
|
2006-12-10 11:52:46 +00:00
|
|
|
meaning = from_ascii(ret);
|
2009-05-01 06:33:19 +00:00
|
|
|
ret = d->thes_->next(cur_meaning);
|
2002-01-13 01:46:33 +00:00
|
|
|
prev_meaning = cur_meaning;
|
|
|
|
} else {
|
2006-12-10 11:52:46 +00:00
|
|
|
if (ret != text)
|
|
|
|
meanings[meaning].push_back(from_ascii(ret));
|
2002-01-13 01:46:33 +00:00
|
|
|
}
|
2002-03-21 17:27:08 +00:00
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
ret = d->thes_->next(cur_meaning);
|
2001-07-29 10:42:11 +00:00
|
|
|
}
|
|
|
|
|
2002-01-13 01:46:33 +00:00
|
|
|
for (Meanings::iterator it = meanings.begin();
|
2006-12-10 11:52:46 +00:00
|
|
|
it != meanings.end(); ++it)
|
|
|
|
sort(it->second.begin(), it->second.end());
|
2002-03-21 17:27:08 +00:00
|
|
|
|
2002-01-13 01:46:33 +00:00
|
|
|
return meanings;
|
2001-07-29 10:42:11 +00:00
|
|
|
}
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
|
|
|
|
bool Thesaurus::thesaurusAvailable(docstring const & lang) const
|
|
|
|
{
|
|
|
|
// we support English only
|
2008-11-16 20:35:27 +00:00
|
|
|
return prefixIs(lang, from_ascii("en_"));
|
2008-11-16 18:48:25 +00:00
|
|
|
}
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
#else // HAVE_LIBAIKSAURUS
|
2008-11-16 18:48:25 +00:00
|
|
|
#ifdef HAVE_LIBMYTHES
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
string const to_iconv_encoding(docstring const & s, string const & encoding)
|
|
|
|
{
|
|
|
|
std::vector<char> const encoded =
|
|
|
|
ucs4_to_eightbit(s.data(), s.length(), encoding);
|
|
|
|
return string(encoded.begin(), encoded.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
docstring const from_iconv_encoding(string const & s, string const & encoding)
|
|
|
|
{
|
|
|
|
std::vector<char_type> const ucs4 =
|
|
|
|
eightbit_to_ucs4(s.data(), s.length(), encoding);
|
|
|
|
return docstring(ucs4.begin(), ucs4.end());
|
|
|
|
}
|
|
|
|
|
2009-05-01 06:42:25 +00:00
|
|
|
typedef std::map<docstring, MyThes *> Thesauri;
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
} // namespace anon
|
|
|
|
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
struct Thesaurus::Private
|
2008-11-16 18:48:25 +00:00
|
|
|
{
|
2009-05-01 06:33:19 +00:00
|
|
|
~Private()
|
|
|
|
{
|
|
|
|
for (Thesauri::iterator it = thes_.begin();
|
|
|
|
it != thes_.end(); ++it) {
|
|
|
|
delete it->second;
|
|
|
|
}
|
2008-11-16 18:48:25 +00:00
|
|
|
}
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
/// add a thesaurus to the list
|
|
|
|
bool addThesaurus(docstring const & lang);
|
2008-11-16 18:48:25 +00:00
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
/// the thesauri
|
|
|
|
Thesauri thes_;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool Thesaurus::Private::addThesaurus(docstring const & lang)
|
2008-11-16 18:48:25 +00:00
|
|
|
{
|
|
|
|
string const thes_path = external_path(lyxrc.thesaurusdir_path);
|
|
|
|
LYXERR(Debug::FILES, "thesaurus path: " << thes_path);
|
|
|
|
if (thes_path.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (thesaurusAvailable(lang))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
FileNameList const idx_files = FileName(thes_path).dirList("idx");
|
|
|
|
FileNameList const data_files = FileName(thes_path).dirList("dat");
|
|
|
|
string idx;
|
|
|
|
string data;
|
|
|
|
|
|
|
|
for (FileNameList::const_iterator it = idx_files.begin();
|
|
|
|
it != idx_files.end(); ++it) {
|
|
|
|
LYXERR(Debug::FILES, "found thesaurus idx file: " << it->onlyFileName());
|
|
|
|
if (contains(it->onlyFileName(), to_ascii(lang))) {
|
|
|
|
idx = it->absFilename();
|
|
|
|
LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (support::FileNameList::const_iterator it = data_files.begin();
|
|
|
|
it != data_files.end(); ++it) {
|
|
|
|
LYXERR(Debug::FILES, "found thesaurus data file: " << it->onlyFileName());
|
|
|
|
if (contains(it->onlyFileName(), to_ascii(lang))) {
|
|
|
|
data = it->absFilename();
|
|
|
|
LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (idx.empty() || data.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
char const * af = idx.c_str();
|
|
|
|
char const * df = data.c_str();
|
|
|
|
thes_[lang] = new MyThes(af, df);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Thesaurus::thesaurusAvailable(docstring const & lang) const
|
|
|
|
{
|
2009-05-01 06:33:19 +00:00
|
|
|
for (Thesauri::const_iterator it = d->thes_.begin();
|
|
|
|
it != d->thes_.end(); ++it) {
|
2008-11-16 18:48:25 +00:00
|
|
|
if (it->first == lang)
|
|
|
|
if (it->second)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lang)
|
|
|
|
{
|
|
|
|
Meanings meanings;
|
|
|
|
MyThes * mythes = 0;
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
if (!d->addThesaurus(lang))
|
2008-11-16 18:48:25 +00:00
|
|
|
return meanings;
|
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
for (Thesauri::const_iterator it = d->thes_.begin();
|
|
|
|
it != d->thes_.end(); ++it) {
|
2008-11-16 18:48:25 +00:00
|
|
|
if (it->first == lang) {
|
|
|
|
mythes = it->second;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!mythes)
|
|
|
|
return meanings;
|
|
|
|
|
|
|
|
string const encoding = mythes->get_th_encoding();
|
|
|
|
|
|
|
|
mentry * pmean;
|
|
|
|
string const text = to_iconv_encoding(support::lowercase(t), encoding);
|
|
|
|
int len = strlen(text.c_str());
|
|
|
|
int count = mythes->Lookup(text.c_str(), len, &pmean);
|
|
|
|
if (!count)
|
|
|
|
return meanings;
|
|
|
|
|
|
|
|
// don't change value of pmean or count
|
|
|
|
// they are needed for the CleanUpAfterLookup routine
|
|
|
|
mentry * pm = pmean;
|
|
|
|
docstring meaning;
|
|
|
|
docstring ret;
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
|
|
meaning = from_iconv_encoding(string(pm->defn), encoding);
|
|
|
|
// remove silly item
|
|
|
|
if (support::prefixIs(meaning, '-'))
|
|
|
|
meaning = support::ltrim(meaning, "- ");
|
|
|
|
for (int j = 0; j < pm->count; j++) {
|
|
|
|
ret = from_iconv_encoding(string(pm->psyns[j]), encoding);
|
|
|
|
}
|
|
|
|
meanings[meaning].push_back(ret);
|
|
|
|
pm++;
|
|
|
|
}
|
|
|
|
// now clean up all allocated memory
|
|
|
|
mythes->CleanUpAfterLookup(&pmean, count);
|
|
|
|
|
|
|
|
for (Meanings::iterator it = meanings.begin();
|
|
|
|
it != meanings.end(); ++it)
|
|
|
|
sort(it->second.begin(), it->second.end());
|
|
|
|
|
|
|
|
return meanings;
|
|
|
|
}
|
|
|
|
|
2001-07-29 10:42:11 +00:00
|
|
|
#else
|
2008-11-16 20:17:17 +00:00
|
|
|
|
2009-05-01 06:33:19 +00:00
|
|
|
struct Thesaurus::Private
|
2008-11-16 20:17:17 +00:00
|
|
|
{
|
2009-05-01 06:33:19 +00:00
|
|
|
};
|
2008-11-16 20:17:17 +00:00
|
|
|
|
2002-03-21 17:27:08 +00:00
|
|
|
|
2008-11-16 19:18:43 +00:00
|
|
|
Thesaurus::Meanings Thesaurus::lookup(docstring const &, docstring const &)
|
2001-07-29 10:42:11 +00:00
|
|
|
{
|
2002-01-13 01:46:33 +00:00
|
|
|
return Meanings();
|
2001-07-29 10:42:11 +00:00
|
|
|
}
|
|
|
|
|
2008-12-13 14:41:37 +00:00
|
|
|
|
2009-03-30 10:36:05 +00:00
|
|
|
bool Thesaurus::thesaurusAvailable(docstring const &) const
|
2008-12-13 14:41:37 +00:00
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-11-16 18:48:25 +00:00
|
|
|
#endif // HAVE_LIBMYTHES
|
2009-05-01 06:33:19 +00:00
|
|
|
#endif // HAVE_LIBAIKSAURUS
|
|
|
|
|
|
|
|
Thesaurus::Thesaurus() : d(new Thesaurus::Private)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Thesaurus::~Thesaurus()
|
|
|
|
{
|
|
|
|
delete d;
|
|
|
|
}
|
2002-02-16 15:59:55 +00:00
|
|
|
|
|
|
|
// Global instance
|
2002-03-21 17:27:08 +00:00
|
|
|
Thesaurus thesaurus;
|
2006-10-21 00:16:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
} // namespace lyx
|