/** * \file HunspellChecker.cpp * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * * \author Abdelrazak Younes * * Full author contact details are available in file CREDITS. */ #include #include "HunspellChecker.h" #include "PersonalWordList.h" #include "LyXRC.h" #include "WordLangTuple.h" #include "support/debug.h" #include "support/docstring_list.h" #include "support/filetools.h" #include "support/Package.h" #include "support/FileName.h" #include "support/lassert.h" #include "support/lstrings.h" #include #include #include #include using namespace std; using namespace lyx::support; using namespace lyx::support::os; namespace lyx { namespace { typedef map Spellers; typedef map LangPersonalWordList; docstring remap_result(docstring const & s) { // substitute RIGHT SINGLE QUOTATION MARK // by APOSTROPHE return subst(s, 0x2019, 0x0027); } } // namespace struct HunspellChecker::Private { Private(); ~Private(); void cleanCache(); void setUserPath(std::string const & path); const string dictPath(int selector); bool haveLanguageFiles(string const & hpath); bool haveDictionary(Language const * lang, string & hpath); bool haveDictionary(Language const * lang); int numDictionaries() const; Hunspell * addSpeller(Language const * lang, string & hpath); Hunspell * addSpeller(Language const * lang); Hunspell * speller(Language const * lang); Hunspell * lookup(Language const * lang); /// ignored words bool isIgnored(WordLangTuple const & wl) const; /// personal word list interface void remove(WordLangTuple const & wl); void insert(WordLangTuple const & wl); bool learned(WordLangTuple const & wl); /// the spellers Spellers spellers_; /// WordLangTable ignored_; /// LangPersonalWordList personal_; /// std::string user_path_; /// the location below system/user directory /// there the aff+dic files lookup will happen const string dictDirectory(void) const { return "dicts"; } int maxLookupSelector(void) const { return 5; } const string HunspellDictionaryName(Language const * lang) { return lang->variety().empty() ? lang->code() : lang->code() + "-" + lang->variety(); } const string myspellPackageDictDirectory(void) { return "/usr/share/myspell"; } const string hunspellPackageDictDirectory(void) { return "/usr/share/hunspell"; } }; HunspellChecker::Private::Private() { setUserPath(lyxrc.hunspelldir_path); } HunspellChecker::Private::~Private() { cleanCache(); } void HunspellChecker::Private::setUserPath(std::string const & path) { if (user_path_ != lyxrc.hunspelldir_path) { cleanCache(); user_path_ = path; } } void HunspellChecker::Private::cleanCache() { Spellers::iterator it = spellers_.begin(); Spellers::iterator end = spellers_.end(); for (; it != end; ++it) { delete it->second; it->second = nullptr; } LangPersonalWordList::const_iterator pdit = personal_.begin(); LangPersonalWordList::const_iterator pdet = personal_.end(); for (; pdit != pdet; ++pdit) { if (pdit->second == nullptr) continue; PersonalWordList * pd = pdit->second; pd->save(); delete pd; } } bool HunspellChecker::Private::haveLanguageFiles(string const & hpath) { FileName const affix(hpath + ".aff"); FileName const dict(hpath + ".dic"); return affix.isReadableFile() && dict.isReadableFile(); } const string HunspellChecker::Private::dictPath(int selector) { switch (selector) { case 4: return hunspellPackageDictDirectory(); case 3: return myspellPackageDictDirectory(); case 2: return addName(package().system_support().absFileName(),dictDirectory()); case 1: return addName(package().user_support().absFileName(),dictDirectory()); default: return user_path_; } } bool HunspellChecker::Private::haveDictionary(Language const * lang, string & hpath) { if (hpath.empty() || !lang) return false; if (lookup(lang)) return true; string d_name = HunspellDictionaryName(lang); LYXERR(Debug::FILES, "check hunspell path: " << hpath << " for language " << lang->lang() << " with name " << d_name); string h_path = addName(hpath, d_name); // first we try lang code+variety if (haveLanguageFiles(h_path)) { LYXERR(Debug::FILES, " found " << h_path); hpath = h_path; return true; } // another try with code, '_' replaced by '-' h_path = addName(hpath, subst(lang->code(), '_', '-')); if (!haveLanguageFiles(h_path)) return false; LYXERR(Debug::FILES, " found " << h_path); hpath = h_path; return true; } bool HunspellChecker::Private::haveDictionary(Language const * lang) { bool result = false; setUserPath(lyxrc.hunspelldir_path); for (int p = 0; !result && p < maxLookupSelector(); ++p) { string lpath = dictPath(p); result = haveDictionary(lang, lpath); } return result; } Hunspell * HunspellChecker::Private::speller(Language const * lang) { Hunspell * h = lookup(lang); if (h) return h; setUserPath(lyxrc.hunspelldir_path); return addSpeller(lang); } Hunspell * HunspellChecker::Private::lookup(Language const * lang) { Spellers::iterator it = spellers_.find(lang->lang()); return it != spellers_.end() ? it->second : nullptr; } Hunspell * HunspellChecker::Private::addSpeller(Language const * lang, string & path) { if (!haveDictionary(lang, path)) { spellers_[lang->lang()] = nullptr; return nullptr; } FileName const affix(path + ".aff"); FileName const dict(path + ".dic"); Hunspell * h = new Hunspell(affix.absFileName().c_str(), dict.absFileName().c_str()); LYXERR(Debug::FILES, "Hunspell speller for langage " << lang << " at " << dict << " added."); spellers_[lang->lang()] = h; return h; } Hunspell * HunspellChecker::Private::addSpeller(Language const * lang) { Hunspell * h = nullptr; for (int p = 0; p < maxLookupSelector() && nullptr == h; ++p) { string lpath = dictPath(p); h = addSpeller(lang, lpath); } if (h) { string const encoding = h->get_dic_encoding(); PersonalWordList * pd = new PersonalWordList(lang->lang()); pd->load(); personal_[lang->lang()] = pd; docstring_list::const_iterator it = pd->begin(); docstring_list::const_iterator et = pd->end(); for (; it != et; ++it) { string const word_to_add = to_iconv_encoding(*it, encoding); h->add(word_to_add.c_str()); } } return h; } int HunspellChecker::Private::numDictionaries() const { int result = 0; Spellers::const_iterator it = spellers_.begin(); Spellers::const_iterator et = spellers_.end(); for (; it != et; ++it) result += it->second != nullptr; return result; } bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl) const { WordLangTable::const_iterator it = ignored_.begin(); for (; it != ignored_.end(); ++it) { if (it->lang()->code() != wl.lang()->code()) continue; if (it->word() == wl.word()) return true; } return false; } /// personal word list interface void HunspellChecker::Private::remove(WordLangTuple const & wl) { Hunspell * h = speller(wl.lang()); if (!h) return; string const encoding = h->get_dic_encoding(); string const word_to_check = to_iconv_encoding(wl.word(), encoding); h->remove(word_to_check.c_str()); PersonalWordList * pd = personal_[wl.lang()->lang()]; if (!pd) return; pd->remove(wl.word()); } void HunspellChecker::Private::insert(WordLangTuple const & wl) { Hunspell * h = speller(wl.lang()); if (!h) return; string const encoding = h->get_dic_encoding(); string const word_to_check = to_iconv_encoding(wl.word(), encoding); h->add(word_to_check.c_str()); PersonalWordList * pd = personal_[wl.lang()->lang()]; if (!pd) return; pd->insert(wl.word()); } bool HunspellChecker::Private::learned(WordLangTuple const & wl) { PersonalWordList * pd = personal_[wl.lang()->lang()]; if (!pd) return false; return pd->exists(wl.word()); } HunspellChecker::HunspellChecker() : d(new Private) {} HunspellChecker::~HunspellChecker() { delete d; } SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl, vector const & docdict) { if (d->isIgnored(wl)) return WORD_OK; WordLangTable::const_iterator it = docdict.begin(); for (; it != docdict.end(); ++it) { if (it->lang()->code() != wl.lang()->code()) continue; if (it->word() == wl.word()) return DOCUMENT_LEARNED_WORD; } Hunspell * h = d->speller(wl.lang()); if (!h) return NO_DICTIONARY; int info; string const encoding = h->get_dic_encoding(); string const word_to_check = to_iconv_encoding(wl.word(), encoding); LYXERR(Debug::GUI, "spellCheck: \"" << wl.word() << "\", lang = " << wl.lang()->lang()) ; #ifdef HAVE_HUNSPELL_CXXABI if (h->spell(word_to_check, &info)) #else if (h->spell(word_to_check.c_str(), &info)) #endif return d->learned(wl) ? LEARNED_WORD : WORD_OK; if (info & SPELL_COMPOUND) { // FIXME: What to do with that? LYXERR(Debug::GUI, "Hunspell compound word found " << word_to_check); } if (info & SPELL_FORBIDDEN) { // This was removed from personal dictionary LYXERR(Debug::GUI, "Hunspell explicit forbidden word found " << word_to_check); } return UNKNOWN_WORD; } void HunspellChecker::advanceChangeNumber() { nextChangeNumber(); } void HunspellChecker::insert(WordLangTuple const & wl) { d->insert(wl); LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ; advanceChangeNumber(); } void HunspellChecker::remove(WordLangTuple const & wl) { d->remove(wl); LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ; advanceChangeNumber(); } void HunspellChecker::accept(WordLangTuple const & wl) { d->ignored_.push_back(wl); LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ; advanceChangeNumber(); } void HunspellChecker::suggest(WordLangTuple const & wl, docstring_list & suggestions) { suggestions.clear(); Hunspell * h = d->speller(wl.lang()); if (!h) return; string const encoding = h->get_dic_encoding(); string const word_to_check = to_iconv_encoding(wl.word(), encoding); #ifdef HAVE_HUNSPELL_CXXABI vector wlst = h->suggest(word_to_check); for (auto const & s : wlst) suggestions.push_back(remap_result(from_iconv_encoding(s, encoding))); #else char ** suggestion_list; int const suggestion_number = h->suggest(&suggestion_list, word_to_check.c_str()); if (suggestion_number <= 0) return; for (int i = 0; i != suggestion_number; ++i) suggestions.push_back(remap_result(from_iconv_encoding(suggestion_list[i], encoding))); h->free_list(&suggestion_list, suggestion_number); #endif } void HunspellChecker::stem(WordLangTuple const & wl, docstring_list & suggestions) { suggestions.clear(); Hunspell * h = d->speller(wl.lang()); if (!h) return; string const encoding = h->get_dic_encoding(); string const word_to_check = to_iconv_encoding(wl.word(), encoding); #ifdef HAVE_HUNSPELL_CXXABI vector wlst = h->stem(word_to_check); for (auto const & s : wlst) suggestions.push_back(from_iconv_encoding(s, encoding)); #else char ** suggestion_list; int const suggestion_number = h->stem(&suggestion_list, word_to_check.c_str()); if (suggestion_number <= 0) return; for (int i = 0; i != suggestion_number; ++i) suggestions.push_back(from_iconv_encoding(suggestion_list[i], encoding)); h->free_list(&suggestion_list, suggestion_number); #endif } bool HunspellChecker::hasDictionary(Language const * lang) const { if (!lang) return false; return d->haveDictionary(lang); } int HunspellChecker::numDictionaries() const { return d->numDictionaries(); } docstring const HunspellChecker::error() { return docstring(); } } // namespace lyx