lyx_mirror/src/HunspellChecker.cpp
Richard Kimberly Heck 9ffba4b72d nullptr
2021-01-04 13:47:17 -05:00

483 lines
11 KiB
C++

/**
* \file HunspellChecker.cpp
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Abdelrazak Younes
*
* Full author contact details are available in file CREDITS.
*/
#include <config.h>
#include "HunspellChecker.h"
#include "PersonalWordList.h"
#include "LyXRC.h"
#include "WordLangTuple.h"
#include "support/debug.h"
#include "support/docstring_list.h"
#include "support/filetools.h"
#include "support/Package.h"
#include "support/FileName.h"
#include "support/lassert.h"
#include "support/lstrings.h"
#include <hunspell/hunspell.hxx>
#include <map>
#include <string>
#include <vector>
using namespace std;
using namespace lyx::support;
using namespace lyx::support::os;
namespace lyx {
namespace {
typedef map<std::string, Hunspell *> Spellers;
typedef map<std::string, PersonalWordList *> LangPersonalWordList;
typedef vector<WordLangTuple> IgnoreList;
docstring remap_result(docstring const & s)
{
// substitute RIGHT SINGLE QUOTATION MARK
// by APOSTROPHE
return subst(s, 0x2019, 0x0027);
}
} // namespace
struct HunspellChecker::Private
{
Private();
~Private();
void cleanCache();
void setUserPath(std::string const & path);
const string dictPath(int selector);
bool haveLanguageFiles(string const & hpath);
bool haveDictionary(Language const * lang, string & hpath);
bool haveDictionary(Language const * lang);
int numDictionaries() const;
Hunspell * addSpeller(Language const * lang, string & hpath);
Hunspell * addSpeller(Language const * lang);
Hunspell * speller(Language const * lang);
Hunspell * lookup(Language const * lang);
/// ignored words
bool isIgnored(WordLangTuple const & wl) const;
/// personal word list interface
void remove(WordLangTuple const & wl);
void insert(WordLangTuple const & wl);
bool learned(WordLangTuple const & wl);
/// the spellers
Spellers spellers_;
///
IgnoreList ignored_;
///
LangPersonalWordList personal_;
///
std::string user_path_;
/// the location below system/user directory
/// there the aff+dic files lookup will happen
const string dictDirectory(void) const { return "dicts"; }
int maxLookupSelector(void) const { return 5; }
const string HunspellDictionaryName(Language const * lang) {
return lang->variety().empty()
? lang->code()
: lang->code() + "-" + lang->variety();
}
const string myspellPackageDictDirectory(void) {
return "/usr/share/myspell";
}
const string hunspellPackageDictDirectory(void) {
return "/usr/share/hunspell";
}
};
HunspellChecker::Private::Private()
{
setUserPath(lyxrc.hunspelldir_path);
}
HunspellChecker::Private::~Private()
{
cleanCache();
}
void HunspellChecker::Private::setUserPath(std::string const & path)
{
if (user_path_ != lyxrc.hunspelldir_path) {
cleanCache();
user_path_ = path;
}
}
void HunspellChecker::Private::cleanCache()
{
Spellers::iterator it = spellers_.begin();
Spellers::iterator end = spellers_.end();
for (; it != end; ++it) {
delete it->second;
it->second = nullptr;
}
LangPersonalWordList::const_iterator pdit = personal_.begin();
LangPersonalWordList::const_iterator pdet = personal_.end();
for (; pdit != pdet; ++pdit) {
if (pdit->second == nullptr)
continue;
PersonalWordList * pd = pdit->second;
pd->save();
delete pd;
}
}
bool HunspellChecker::Private::haveLanguageFiles(string const & hpath)
{
FileName const affix(hpath + ".aff");
FileName const dict(hpath + ".dic");
return affix.isReadableFile() && dict.isReadableFile();
}
const string HunspellChecker::Private::dictPath(int selector)
{
switch (selector) {
case 4:
return hunspellPackageDictDirectory();
case 3:
return myspellPackageDictDirectory();
case 2:
return addName(package().system_support().absFileName(),dictDirectory());
case 1:
return addName(package().user_support().absFileName(),dictDirectory());
default:
return user_path_;
}
}
bool HunspellChecker::Private::haveDictionary(Language const * lang, string & hpath)
{
if (hpath.empty() || !lang)
return false;
if (lookup(lang)) return true;
string d_name = HunspellDictionaryName(lang);
LYXERR(Debug::FILES, "check hunspell path: " << hpath
<< " for language " << lang->lang() << " with name " << d_name);
string h_path = addName(hpath, d_name);
// first we try lang code+variety
if (haveLanguageFiles(h_path)) {
LYXERR(Debug::FILES, " found " << h_path);
hpath = h_path;
return true;
}
// another try with code, '_' replaced by '-'
h_path = addName(hpath, subst(lang->code(), '_', '-'));
if (!haveLanguageFiles(h_path))
return false;
LYXERR(Debug::FILES, " found " << h_path);
hpath = h_path;
return true;
}
bool HunspellChecker::Private::haveDictionary(Language const * lang)
{
bool result = false;
setUserPath(lyxrc.hunspelldir_path);
for (int p = 0; !result && p < maxLookupSelector(); ++p) {
string lpath = dictPath(p);
result = haveDictionary(lang, lpath);
}
return result;
}
Hunspell * HunspellChecker::Private::speller(Language const * lang)
{
Hunspell * h = lookup(lang);
if (h) return h;
setUserPath(lyxrc.hunspelldir_path);
return addSpeller(lang);
}
Hunspell * HunspellChecker::Private::lookup(Language const * lang)
{
Spellers::iterator it = spellers_.find(lang->lang());
return it != spellers_.end() ? it->second : nullptr;
}
Hunspell * HunspellChecker::Private::addSpeller(Language const * lang, string & path)
{
if (!haveDictionary(lang, path)) {
spellers_[lang->lang()] = nullptr;
return nullptr;
}
FileName const affix(path + ".aff");
FileName const dict(path + ".dic");
Hunspell * h = new Hunspell(affix.absFileName().c_str(), dict.absFileName().c_str());
LYXERR(Debug::FILES, "Hunspell speller for langage " << lang << " at " << dict << " added.");
spellers_[lang->lang()] = h;
return h;
}
Hunspell * HunspellChecker::Private::addSpeller(Language const * lang)
{
Hunspell * h = nullptr;
for (int p = 0; p < maxLookupSelector() && nullptr == h; ++p) {
string lpath = dictPath(p);
h = addSpeller(lang, lpath);
}
if (h) {
string const encoding = h->get_dic_encoding();
PersonalWordList * pd = new PersonalWordList(lang->lang());
pd->load();
personal_[lang->lang()] = pd;
docstring_list::const_iterator it = pd->begin();
docstring_list::const_iterator et = pd->end();
for (; it != et; ++it) {
string const word_to_add = to_iconv_encoding(*it, encoding);
h->add(word_to_add.c_str());
}
}
return h;
}
int HunspellChecker::Private::numDictionaries() const
{
int result = 0;
Spellers::const_iterator it = spellers_.begin();
Spellers::const_iterator et = spellers_.end();
for (; it != et; ++it)
result += it->second != nullptr;
return result;
}
bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl) const
{
IgnoreList::const_iterator it = ignored_.begin();
for (; it != ignored_.end(); ++it) {
if (it->lang()->code() != wl.lang()->code())
continue;
if (it->word() == wl.word())
return true;
}
return false;
}
/// personal word list interface
void HunspellChecker::Private::remove(WordLangTuple const & wl)
{
Hunspell * h = speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
h->remove(word_to_check.c_str());
PersonalWordList * pd = personal_[wl.lang()->lang()];
if (!pd)
return;
pd->remove(wl.word());
}
void HunspellChecker::Private::insert(WordLangTuple const & wl)
{
Hunspell * h = speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
h->add(word_to_check.c_str());
PersonalWordList * pd = personal_[wl.lang()->lang()];
if (!pd)
return;
pd->insert(wl.word());
}
bool HunspellChecker::Private::learned(WordLangTuple const & wl)
{
PersonalWordList * pd = personal_[wl.lang()->lang()];
if (!pd)
return false;
return pd->exists(wl.word());
}
HunspellChecker::HunspellChecker()
: d(new Private)
{}
HunspellChecker::~HunspellChecker()
{
delete d;
}
SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl)
{
if (d->isIgnored(wl))
return WORD_OK;
Hunspell * h = d->speller(wl.lang());
if (!h)
return NO_DICTIONARY;
int info;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
LYXERR(Debug::GUI, "spellCheck: \"" <<
wl.word() << "\", lang = " << wl.lang()->lang()) ;
#ifdef HAVE_HUNSPELL_CXXABI
if (h->spell(word_to_check, &info))
#else
if (h->spell(word_to_check.c_str(), &info))
#endif
return d->learned(wl) ? LEARNED_WORD : WORD_OK;
if (info & SPELL_COMPOUND) {
// FIXME: What to do with that?
LYXERR(Debug::GUI, "Hunspell compound word found " << word_to_check);
}
if (info & SPELL_FORBIDDEN) {
// This was removed from personal dictionary
LYXERR(Debug::GUI, "Hunspell explicit forbidden word found " << word_to_check);
}
return UNKNOWN_WORD;
}
void HunspellChecker::advanceChangeNumber()
{
nextChangeNumber();
}
void HunspellChecker::insert(WordLangTuple const & wl)
{
d->insert(wl);
LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ;
advanceChangeNumber();
}
void HunspellChecker::remove(WordLangTuple const & wl)
{
d->remove(wl);
LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ;
advanceChangeNumber();
}
void HunspellChecker::accept(WordLangTuple const & wl)
{
d->ignored_.push_back(wl);
LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ;
advanceChangeNumber();
}
void HunspellChecker::suggest(WordLangTuple const & wl,
docstring_list & suggestions)
{
suggestions.clear();
Hunspell * h = d->speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
#ifdef HAVE_HUNSPELL_CXXABI
vector<string> wlst = h->suggest(word_to_check);
for (auto const & s : wlst)
suggestions.push_back(remap_result(from_iconv_encoding(s, encoding)));
#else
char ** suggestion_list;
int const suggestion_number = h->suggest(&suggestion_list, word_to_check.c_str());
if (suggestion_number <= 0)
return;
for (int i = 0; i != suggestion_number; ++i)
suggestions.push_back(remap_result(from_iconv_encoding(suggestion_list[i], encoding)));
h->free_list(&suggestion_list, suggestion_number);
#endif
}
void HunspellChecker::stem(WordLangTuple const & wl,
docstring_list & suggestions)
{
suggestions.clear();
Hunspell * h = d->speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
#ifdef HAVE_HUNSPELL_CXXABI
vector<string> wlst = h->stem(word_to_check);
for (auto const & s : wlst)
suggestions.push_back(from_iconv_encoding(s, encoding));
#else
char ** suggestion_list;
int const suggestion_number = h->stem(&suggestion_list, word_to_check.c_str());
if (suggestion_number <= 0)
return;
for (int i = 0; i != suggestion_number; ++i)
suggestions.push_back(from_iconv_encoding(suggestion_list[i], encoding));
h->free_list(&suggestion_list, suggestion_number);
#endif
}
bool HunspellChecker::hasDictionary(Language const * lang) const
{
if (!lang)
return false;
return d->haveDictionary(lang);
}
int HunspellChecker::numDictionaries() const
{
return d->numDictionaries();
}
docstring const HunspellChecker::error()
{
return docstring();
}
} // namespace lyx