2009-07-12 22:46:01 +00:00
|
|
|
/**
|
2009-08-01 17:24:13 +00:00
|
|
|
* \file HunspellChecker.cpp
|
2009-07-12 22:46:01 +00:00
|
|
|
* This file is part of LyX, the document processor.
|
|
|
|
* Licence details can be found in the file COPYING.
|
|
|
|
*
|
|
|
|
* \author Abdelrazak Younes
|
|
|
|
*
|
|
|
|
* Full author contact details are available in file CREDITS.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
2009-08-01 17:24:13 +00:00
|
|
|
#include "HunspellChecker.h"
|
2010-12-02 18:52:31 +00:00
|
|
|
#include "PersonalWordList.h"
|
2009-07-12 22:46:01 +00:00
|
|
|
|
|
|
|
#include "LyXRC.h"
|
|
|
|
#include "WordLangTuple.h"
|
|
|
|
|
2009-11-25 16:52:45 +00:00
|
|
|
#include "frontends/alert.h"
|
|
|
|
|
2009-07-12 22:46:01 +00:00
|
|
|
#include "support/debug.h"
|
2009-08-02 09:17:32 +00:00
|
|
|
#include "support/docstring_list.h"
|
2009-12-05 03:55:03 +00:00
|
|
|
#include "support/filetools.h"
|
2010-05-29 14:36:51 +00:00
|
|
|
#include "support/Package.h"
|
2009-08-08 17:05:31 +00:00
|
|
|
#include "support/FileName.h"
|
|
|
|
#include "support/gettext.h"
|
|
|
|
#include "support/lassert.h"
|
2009-11-25 16:52:45 +00:00
|
|
|
#include "support/lstrings.h"
|
2009-08-08 17:05:31 +00:00
|
|
|
#include "support/os.h"
|
2009-07-12 22:46:01 +00:00
|
|
|
|
|
|
|
#include <hunspell/hunspell.hxx>
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <string>
|
2010-01-21 12:47:45 +00:00
|
|
|
#include <vector>
|
2009-07-12 22:46:01 +00:00
|
|
|
|
|
|
|
using namespace std;
|
2009-08-08 17:05:31 +00:00
|
|
|
using namespace lyx::support;
|
|
|
|
using namespace lyx::support::os;
|
2009-07-12 22:46:01 +00:00
|
|
|
|
|
|
|
namespace lyx {
|
|
|
|
|
|
|
|
namespace {
|
2009-08-01 18:03:26 +00:00
|
|
|
|
2009-07-12 22:46:01 +00:00
|
|
|
typedef map<std::string, Hunspell *> Spellers;
|
2010-12-02 18:52:31 +00:00
|
|
|
typedef map<std::string, PersonalWordList *> LangPersonalWordList;
|
|
|
|
|
2010-01-21 12:47:45 +00:00
|
|
|
typedef vector<WordLangTuple> IgnoreList;
|
2009-07-12 22:46:01 +00:00
|
|
|
|
2009-08-01 18:03:26 +00:00
|
|
|
} // anon namespace
|
|
|
|
|
2010-05-29 14:36:51 +00:00
|
|
|
|
2009-08-01 18:03:26 +00:00
|
|
|
struct HunspellChecker::Private
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
2010-12-02 18:52:31 +00:00
|
|
|
Private();
|
2009-08-02 09:17:32 +00:00
|
|
|
~Private();
|
|
|
|
|
2010-06-06 19:10:10 +00:00
|
|
|
const string dictPath(int selector);
|
|
|
|
bool haveLanguageFiles(string const & hpath);
|
2011-02-17 17:09:06 +00:00
|
|
|
bool haveDictionary(Language const * lang, string & hpath);
|
|
|
|
bool haveDictionary(Language const * lang);
|
|
|
|
Hunspell * addSpeller(Language const * lang, string & hpath);
|
2010-12-02 18:52:31 +00:00
|
|
|
Hunspell * addSpeller(Language const * lang);
|
|
|
|
Hunspell * speller(Language const * lang);
|
2010-01-21 12:47:45 +00:00
|
|
|
/// ignored words
|
|
|
|
bool isIgnored(WordLangTuple const & wl) const;
|
2010-12-02 18:52:31 +00:00
|
|
|
/// personal word list interface
|
|
|
|
void remove(WordLangTuple const & wl);
|
|
|
|
void insert(WordLangTuple const & wl);
|
|
|
|
bool learned(WordLangTuple const & wl);
|
2009-07-12 22:46:01 +00:00
|
|
|
/// the spellers
|
|
|
|
Spellers spellers_;
|
2010-01-21 12:47:45 +00:00
|
|
|
///
|
|
|
|
IgnoreList ignored_;
|
2010-12-02 18:52:31 +00:00
|
|
|
///
|
|
|
|
LangPersonalWordList personal_;
|
2010-06-06 19:10:10 +00:00
|
|
|
|
|
|
|
/// the location below system/user directory
|
|
|
|
/// there the aff+dic files lookup will happen
|
2011-02-17 17:09:06 +00:00
|
|
|
const string dictDirectory(void) const { return "dicts"; }
|
2010-06-08 13:38:12 +00:00
|
|
|
int maxLookupSelector(void) const { return 3; }
|
2011-02-18 11:54:14 +00:00
|
|
|
const string HunspellDictionaryName(Language const * lang) {
|
|
|
|
return lang->variety().empty()
|
|
|
|
? lang->code()
|
|
|
|
: lang->code() + "-" + lang->variety();
|
|
|
|
}
|
2009-07-12 22:46:01 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2010-12-02 18:52:31 +00:00
|
|
|
HunspellChecker::Private::Private()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
HunspellChecker::Private::~Private()
|
|
|
|
{
|
|
|
|
Spellers::iterator it = spellers_.begin();
|
|
|
|
Spellers::iterator end = spellers_.end();
|
|
|
|
|
|
|
|
for (; it != end; ++it) {
|
2010-05-29 14:36:51 +00:00
|
|
|
if ( 0 != it->second) delete it->second;
|
2009-08-02 09:17:32 +00:00
|
|
|
}
|
2010-12-02 18:52:31 +00:00
|
|
|
|
|
|
|
LangPersonalWordList::const_iterator pdit = personal_.begin();
|
|
|
|
LangPersonalWordList::const_iterator pdet = personal_.end();
|
|
|
|
|
|
|
|
for (; pdit != pdet; ++pdit) {
|
|
|
|
if ( 0 == pdit->second)
|
|
|
|
continue;
|
|
|
|
PersonalWordList * pd = pdit->second;
|
|
|
|
pd->save();
|
|
|
|
delete pd;
|
|
|
|
}
|
2009-08-02 09:17:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-06-06 19:10:10 +00:00
|
|
|
bool HunspellChecker::Private::haveLanguageFiles(string const & hpath)
|
2009-08-02 09:17:32 +00:00
|
|
|
{
|
2009-11-25 16:52:45 +00:00
|
|
|
FileName const affix(hpath + ".aff");
|
|
|
|
FileName const dict(hpath + ".dic");
|
2010-05-29 14:36:51 +00:00
|
|
|
return affix.isReadableFile() && dict.isReadableFile();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-06-06 19:10:10 +00:00
|
|
|
const string HunspellChecker::Private::dictPath(int selector)
|
2010-05-29 14:36:51 +00:00
|
|
|
{
|
|
|
|
switch (selector) {
|
|
|
|
case 2:
|
2010-12-02 18:52:31 +00:00
|
|
|
return addName(package().system_support().absFileName(),dictDirectory());
|
2010-05-29 14:36:51 +00:00
|
|
|
break;
|
|
|
|
case 1:
|
2010-12-02 18:52:31 +00:00
|
|
|
return addName(package().user_support().absFileName(),dictDirectory());
|
2010-05-29 14:36:51 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return lyxrc.hunspelldir_path;
|
2009-08-08 17:05:31 +00:00
|
|
|
}
|
2009-11-25 16:52:45 +00:00
|
|
|
}
|
2010-05-29 14:36:51 +00:00
|
|
|
|
2009-11-25 16:52:45 +00:00
|
|
|
|
2011-02-17 17:09:06 +00:00
|
|
|
bool HunspellChecker::Private::haveDictionary(Language const * lang, string & hpath)
|
2009-11-25 16:52:45 +00:00
|
|
|
{
|
2010-05-29 14:36:51 +00:00
|
|
|
if (hpath.empty()) {
|
2010-02-10 08:10:31 +00:00
|
|
|
return false;
|
2009-11-25 16:52:45 +00:00
|
|
|
}
|
|
|
|
|
2011-05-03 18:32:14 +00:00
|
|
|
LYXERR(Debug::FILES, "check hunspell path: " << hpath
|
|
|
|
<< " for language " << (lang ? lang->lang() : "NULL" ));
|
|
|
|
|
2011-02-18 11:54:14 +00:00
|
|
|
string h_path = addName(hpath, HunspellDictionaryName(lang));
|
2011-02-17 17:09:06 +00:00
|
|
|
// first we try lang code+variety
|
|
|
|
if (haveLanguageFiles(h_path)) {
|
2011-05-03 18:32:14 +00:00
|
|
|
LYXERR(Debug::FILES, " found " << h_path);
|
2011-02-17 17:09:06 +00:00
|
|
|
hpath = h_path;
|
|
|
|
return true;
|
|
|
|
}
|
2011-02-18 11:54:14 +00:00
|
|
|
// another try with code, '_' replaced by '-'
|
2011-02-17 17:09:06 +00:00
|
|
|
h_path = addName(hpath, subst(lang->code(), '_', '-'));
|
2010-05-29 14:36:51 +00:00
|
|
|
if (!haveLanguageFiles(h_path)) {
|
2011-02-17 17:09:06 +00:00
|
|
|
return false;
|
2009-11-25 16:52:45 +00:00
|
|
|
}
|
2011-05-03 18:32:14 +00:00
|
|
|
LYXERR(Debug::FILES, " found " << h_path);
|
2010-05-29 14:36:51 +00:00
|
|
|
hpath = h_path;
|
2010-02-10 08:10:31 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-02-17 17:09:06 +00:00
|
|
|
bool HunspellChecker::Private::haveDictionary(Language const * lang)
|
2010-02-10 08:10:31 +00:00
|
|
|
{
|
2010-05-29 14:36:51 +00:00
|
|
|
bool result = false;
|
2010-06-06 19:10:10 +00:00
|
|
|
for ( int p = 0; !result && p < maxLookupSelector(); p++ ) {
|
2010-05-29 14:36:51 +00:00
|
|
|
string lpath = dictPath(p);
|
|
|
|
result = haveDictionary(lang, lpath);
|
|
|
|
}
|
2011-02-17 17:09:06 +00:00
|
|
|
// FIXME: if result is false...
|
|
|
|
// we should indicate somehow that this language is not
|
|
|
|
// supported, probably by popping a warning. But we'll need to
|
|
|
|
// remember which warnings we've issued.
|
2010-05-29 14:36:51 +00:00
|
|
|
return result;
|
2009-08-02 09:17:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-12-02 18:52:31 +00:00
|
|
|
Hunspell * HunspellChecker::Private::speller(Language const * lang)
|
2009-08-02 09:17:32 +00:00
|
|
|
{
|
2011-02-18 11:54:14 +00:00
|
|
|
Spellers::iterator it = spellers_.find(lang->lang());
|
2009-08-02 09:17:32 +00:00
|
|
|
if (it != spellers_.end())
|
|
|
|
return it->second;
|
2010-12-02 18:52:31 +00:00
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
return addSpeller(lang);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-02-17 17:09:06 +00:00
|
|
|
Hunspell * HunspellChecker::Private::addSpeller(Language const * lang,string & path)
|
2010-05-29 14:36:51 +00:00
|
|
|
{
|
|
|
|
if (!haveDictionary(lang, path)) {
|
2011-02-18 11:54:14 +00:00
|
|
|
spellers_[lang->lang()] = 0;
|
2010-05-29 14:36:51 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
FileName const affix(path + ".aff");
|
|
|
|
FileName const dict(path + ".dic");
|
|
|
|
Hunspell * h = new Hunspell(affix.absFileName().c_str(), dict.absFileName().c_str());
|
|
|
|
LYXERR(Debug::FILES, "Hunspell speller for langage " << lang << " at " << dict << " found");
|
2011-02-18 11:54:14 +00:00
|
|
|
spellers_[lang->lang()] = h;
|
2010-05-29 14:36:51 +00:00
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-12-02 18:52:31 +00:00
|
|
|
Hunspell * HunspellChecker::Private::addSpeller(Language const * lang)
|
2010-05-29 14:36:51 +00:00
|
|
|
{
|
|
|
|
Hunspell * h = 0;
|
2010-06-06 19:10:10 +00:00
|
|
|
for ( int p = 0; p < maxLookupSelector() && 0 == h; p++ ) {
|
2010-05-29 14:36:51 +00:00
|
|
|
string lpath = dictPath(p);
|
2011-02-17 17:09:06 +00:00
|
|
|
h = addSpeller(lang, lpath);
|
2010-12-02 18:52:31 +00:00
|
|
|
}
|
|
|
|
if (0 != h) {
|
|
|
|
string const encoding = h->get_dic_encoding();
|
|
|
|
PersonalWordList * pd = new PersonalWordList(lang->lang());
|
|
|
|
pd->load();
|
|
|
|
personal_[lang->lang()] = pd;
|
|
|
|
docstring_list::const_iterator it = pd->begin();
|
|
|
|
docstring_list::const_iterator et = pd->end();
|
|
|
|
for (; it != et; ++it) {
|
|
|
|
string const word_to_add = to_iconv_encoding(*it, encoding);
|
|
|
|
h->add(word_to_add.c_str());
|
|
|
|
}
|
2010-05-29 14:36:51 +00:00
|
|
|
}
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-01-21 12:47:45 +00:00
|
|
|
bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl) const
|
|
|
|
{
|
|
|
|
IgnoreList::const_iterator it = ignored_.begin();
|
|
|
|
for (; it != ignored_.end(); ++it) {
|
2010-02-09 11:26:49 +00:00
|
|
|
if ((*it).lang()->code() != wl.lang()->code())
|
2010-01-21 12:47:45 +00:00
|
|
|
continue;
|
|
|
|
if ((*it).word() == wl.word())
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-12-02 18:52:31 +00:00
|
|
|
/// personal word list interface
|
|
|
|
void HunspellChecker::Private::remove(WordLangTuple const & wl)
|
|
|
|
{
|
|
|
|
Hunspell * h = speller(wl.lang());
|
|
|
|
if (!h)
|
|
|
|
return;
|
|
|
|
string const encoding = h->get_dic_encoding();
|
|
|
|
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
|
|
|
|
h->remove(word_to_check.c_str());
|
|
|
|
PersonalWordList * pd = personal_[wl.lang()->lang()];
|
|
|
|
if (!pd)
|
|
|
|
return;
|
|
|
|
pd->remove(wl.word());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void HunspellChecker::Private::insert(WordLangTuple const & wl)
|
|
|
|
{
|
|
|
|
Hunspell * h = speller(wl.lang());
|
|
|
|
if (!h)
|
|
|
|
return;
|
|
|
|
string const encoding = h->get_dic_encoding();
|
|
|
|
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
|
|
|
|
h->add(word_to_check.c_str());
|
|
|
|
PersonalWordList * pd = personal_[wl.lang()->lang()];
|
|
|
|
if (!pd)
|
|
|
|
return;
|
|
|
|
pd->insert(wl.word());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool HunspellChecker::Private::learned(WordLangTuple const & wl)
|
|
|
|
{
|
|
|
|
PersonalWordList * pd = personal_[wl.lang()->lang()];
|
|
|
|
if (!pd)
|
|
|
|
return false;
|
|
|
|
return pd->exists(wl.word());
|
|
|
|
}
|
|
|
|
|
2010-01-21 12:47:45 +00:00
|
|
|
|
2009-08-01 17:24:13 +00:00
|
|
|
HunspellChecker::HunspellChecker(): d(new Private)
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-01 17:24:13 +00:00
|
|
|
HunspellChecker::~HunspellChecker()
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
|
|
|
delete d;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl)
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
2010-01-21 12:47:45 +00:00
|
|
|
if (d->isIgnored(wl))
|
2010-08-05 20:10:40 +00:00
|
|
|
return WORD_OK;
|
2010-01-21 12:47:45 +00:00
|
|
|
|
2010-12-02 18:52:31 +00:00
|
|
|
Hunspell * h = d->speller(wl.lang());
|
2009-08-08 17:05:31 +00:00
|
|
|
if (!h)
|
2010-08-05 20:10:40 +00:00
|
|
|
return WORD_OK;
|
2009-08-02 09:17:32 +00:00
|
|
|
int info;
|
2010-02-10 10:24:10 +00:00
|
|
|
|
|
|
|
string const encoding = h->get_dic_encoding();
|
|
|
|
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
|
2010-12-02 18:52:31 +00:00
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
if (h->spell(word_to_check.c_str(), &info))
|
2010-12-02 18:52:31 +00:00
|
|
|
return d->learned(wl) ? LEARNED_WORD : WORD_OK;
|
2009-08-08 17:05:31 +00:00
|
|
|
|
|
|
|
if (info & SPELL_COMPOUND) {
|
|
|
|
// FIXME: What to do with that?
|
|
|
|
LYXERR(Debug::FILES, "Hunspell compound word found " << word_to_check);
|
|
|
|
}
|
|
|
|
if (info & SPELL_FORBIDDEN) {
|
2010-12-02 18:52:31 +00:00
|
|
|
// This was removed from personal dictionary
|
2009-08-08 17:05:31 +00:00
|
|
|
LYXERR(Debug::FILES, "Hunspell explicit forbidden word found " << word_to_check);
|
2009-08-02 09:17:32 +00:00
|
|
|
}
|
2009-08-08 17:05:31 +00:00
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
return UNKNOWN_WORD;
|
2009-07-12 22:46:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-09-14 05:24:04 +00:00
|
|
|
void HunspellChecker::advanceChangeNumber()
|
|
|
|
{
|
|
|
|
nextChangeNumber();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
void HunspellChecker::insert(WordLangTuple const & wl)
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
2010-12-02 18:52:31 +00:00
|
|
|
d->insert(wl);
|
|
|
|
LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ;
|
|
|
|
advanceChangeNumber();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void HunspellChecker::remove(WordLangTuple const & wl)
|
|
|
|
{
|
|
|
|
d->remove(wl);
|
|
|
|
LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ;
|
2010-09-14 05:24:04 +00:00
|
|
|
advanceChangeNumber();
|
2009-07-12 22:46:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-01-21 12:47:45 +00:00
|
|
|
void HunspellChecker::accept(WordLangTuple const & wl)
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
2010-01-21 12:47:45 +00:00
|
|
|
d->ignored_.push_back(wl);
|
2010-12-02 18:52:31 +00:00
|
|
|
LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ;
|
2010-09-14 05:24:04 +00:00
|
|
|
advanceChangeNumber();
|
2009-07-12 22:46:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-02 09:17:32 +00:00
|
|
|
void HunspellChecker::suggest(WordLangTuple const & wl,
|
|
|
|
docstring_list & suggestions)
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
2009-08-02 09:17:32 +00:00
|
|
|
suggestions.clear();
|
2010-12-02 18:52:31 +00:00
|
|
|
Hunspell * h = d->speller(wl.lang());
|
2009-08-08 17:05:31 +00:00
|
|
|
if (!h)
|
|
|
|
return;
|
2010-02-10 10:24:10 +00:00
|
|
|
string const encoding = h->get_dic_encoding();
|
|
|
|
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
|
2009-08-09 13:43:58 +00:00
|
|
|
char ** suggestion_list;
|
|
|
|
int const suggestion_number = h->suggest(&suggestion_list, word_to_check.c_str());
|
|
|
|
if (suggestion_number <= 0)
|
2009-08-02 09:17:32 +00:00
|
|
|
return;
|
2009-08-09 13:43:58 +00:00
|
|
|
for (int i = 0; i != suggestion_number; ++i)
|
2010-02-10 10:24:10 +00:00
|
|
|
suggestions.push_back(from_iconv_encoding(suggestion_list[i], encoding));
|
2009-08-09 13:43:58 +00:00
|
|
|
h->free_list(&suggestion_list, suggestion_number);
|
2009-07-12 22:46:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-10 08:10:31 +00:00
|
|
|
bool HunspellChecker::hasDictionary(Language const * lang) const
|
|
|
|
{
|
|
|
|
if (!lang)
|
|
|
|
return false;
|
2011-02-17 17:09:06 +00:00
|
|
|
return (d->haveDictionary(lang));
|
2010-02-10 08:10:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-01 17:24:13 +00:00
|
|
|
docstring const HunspellChecker::error()
|
2009-07-12 22:46:01 +00:00
|
|
|
{
|
|
|
|
return docstring();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace lyx
|