#7043 add the implementation of a persistent personal word list for LyX spell checker, aspell and hunspell backend

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@36661 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Stephan Witt 2010-12-02 18:52:31 +00:00
parent 663029f55a
commit a2287675d2
8 changed files with 435 additions and 63 deletions

View File

@ -1552,10 +1552,13 @@ Alias('tex2lyx', tex2lyx)
#
if env.has_key('USE_ASPELL') and env['USE_ASPELL']:
src_post_files.append('AspellChecker.cpp')
src_post_files.append('PersonalWordList.cpp')
elif env.has_key('USE_ENCHANT') and env['USE_ENCHANT']:
src_post_files.append('EnchantChecker.cpp')
src_post_files.append('PersonalWordList.cpp')
elif env.has_key('USE_HUNSPELL') and env['USE_HUNSPELL']:
src_post_files.append('HunspellChecker.cpp')
src_post_files.append('PersonalWordList.cpp')
# tells scons how to get these moced files, although not all moced files are needed
# (or are actually generated).

View File

@ -12,6 +12,8 @@
#include <config.h>
#include "AspellChecker.h"
#include "PersonalWordList.h"
#include "LyXRC.h"
#include "WordLangTuple.h"
@ -39,9 +41,11 @@ namespace {
struct Speller {
AspellConfig * config;
AspellCanHaveError * e_speller;
docstring_list ignored_words_;
};
typedef std::map<std::string, Speller> Spellers;
typedef map<std::string, PersonalWordList *> LangPersonalWordList;
} // anon namespace
@ -52,16 +56,13 @@ struct AspellChecker::Private
~Private();
/// add a speller of the given language and variety
AspellSpeller * addSpeller(string const & lang,
string const & variety = string());
AspellSpeller * addSpeller(Language const * lang);
///
AspellSpeller * speller(string const & lang,
string const & variety);
AspellSpeller * speller(Language const * lang);
/// create a unique ID from lang code and variety
string const spellerID(string const & lang,
string const & variety);
string const spellerID(Language const * lang);
bool isValidDictionary(AspellConfig * config,
string const & lang, string const & variety);
@ -70,9 +71,21 @@ struct AspellChecker::Private
string const & lang, string const & variety);
AspellConfig * getConfig(string const & lang, string const & variety);
SpellChecker::Result check(AspellSpeller * m,
string const & word) const;
void initSessionDictionary(Speller const & speller, PersonalWordList * pd);
void insert(WordLangTuple const & word);
void remove(WordLangTuple const & word);
bool learned(WordLangTuple const & word);
void accept(Speller & speller, WordLangTuple const & word);
/// the spellers
Spellers spellers_;
LangPersonalWordList personal_;
/// the location below system/user directory
/// there the rws files lookup will happen
const string dictDirectory(void) { return "dict"; }
@ -113,6 +126,17 @@ AspellChecker::Private::~Private()
}
delete_aspell_config(it->second.config);
}
LangPersonalWordList::const_iterator pdit = personal_.begin();
LangPersonalWordList::const_iterator pdet = personal_.end();
for (; pdit != pdet; ++pdit) {
if ( 0 == pdit->second)
continue;
PersonalWordList * pd = pdit->second;
pd->save();
delete pd;
}
}
@ -186,16 +210,37 @@ AspellConfig * AspellChecker::Private::getConfig(string const & lang, string con
}
AspellSpeller * AspellChecker::Private::addSpeller(string const & lang,
string const & variety)
void AspellChecker::Private::initSessionDictionary(
Speller const & speller,
PersonalWordList * pd)
{
AspellSpeller * aspell = to_aspell_speller(speller.e_speller);
aspell_speller_clear_session(aspell);
docstring_list::const_iterator it = pd->begin();
docstring_list::const_iterator et = pd->end();
for (; it != et; ++it) {
string const word_to_add = to_utf8(*it);
aspell_speller_add_to_session(aspell, word_to_add.c_str(), -1);
}
it = speller.ignored_words_.begin();
et = speller.ignored_words_.end();
for (; it != et; ++it) {
string const word_to_add = to_utf8(*it);
aspell_speller_add_to_session(aspell, word_to_add.c_str(), -1);
}
}
AspellSpeller * AspellChecker::Private::addSpeller(Language const * lang)
{
Speller m;
m.config = getConfig(lang, variety);
string const code = lang->code();
string const variety = lang->variety();
m.config = getConfig(code, variety);
// Aspell supports both languages and varieties (such as German
// old vs. new spelling). The respective naming convention is
// lang_REGION-variety (e.g. de_DE-alt).
aspell_config_replace(m.config, "lang", lang.c_str());
aspell_config_replace(m.config, "lang", code.c_str());
if (!variety.empty())
aspell_config_replace(m.config, "variety", variety.c_str());
// Set the encoding to utf-8.
@ -221,30 +266,82 @@ AspellSpeller * AspellChecker::Private::addSpeller(string const & lang,
delete_aspell_config(m.config);
m.config = 0;
m.e_speller = 0;
} else {
PersonalWordList * pd = new PersonalWordList(lang->lang());
pd->load();
personal_[lang->lang()] = pd;
initSessionDictionary(m, pd);
}
spellers_[spellerID(lang, variety)] = m;
spellers_[spellerID(lang)] = m;
return m.e_speller ? to_aspell_speller(m.e_speller) : 0;
}
AspellSpeller * AspellChecker::Private::speller(string const & lang,
string const & variety)
AspellSpeller * AspellChecker::Private::speller(Language const * lang)
{
Spellers::iterator it = spellers_.find(spellerID(lang, variety));
Spellers::iterator it = spellers_.find(spellerID(lang));
if (it != spellers_.end())
return to_aspell_speller(it->second.e_speller);
return addSpeller(lang, variety);
return addSpeller(lang);
}
string const AspellChecker::Private::spellerID(string const & lang,
string const & variety)
string const AspellChecker::Private::spellerID(Language const * lang)
{
if (variety.empty())
return lang;
return lang + "-" + variety;
return lang->code() + "-" + lang->variety();
}
SpellChecker::Result AspellChecker::Private::check(
AspellSpeller * m, string const & word)
const
{
int const word_ok = aspell_speller_check(m, word.c_str(), -1);
LASSERT(word_ok != -1, /**/);
return (word_ok) ? WORD_OK : UNKNOWN_WORD;
}
void AspellChecker::Private::accept(Speller & speller, WordLangTuple const & word)
{
speller.ignored_words_.push_back(word.word());
}
/// personal word list interface
void AspellChecker::Private::remove(WordLangTuple const & word)
{
PersonalWordList * pd = personal_[word.lang()->lang()];
if (!pd)
return;
pd->remove(word.word());
Spellers::iterator it = spellers_.find(spellerID(word.lang()));
if (it != spellers_.end()) {
initSessionDictionary(it->second, pd);
}
}
void AspellChecker::Private::insert(WordLangTuple const & word)
{
Spellers::iterator it = spellers_.find(spellerID(word.lang()));
if (it != spellers_.end()) {
AspellSpeller * speller = to_aspell_speller(it->second.e_speller);
aspell_speller_add_to_session(speller, to_utf8(word.word()).c_str(), -1);
PersonalWordList * pd = personal_[word.lang()->lang()];
if (!pd)
return;
pd->insert(word.word());
}
}
bool AspellChecker::Private::learned(WordLangTuple const & word)
{
PersonalWordList * pd = personal_[word.lang()->lang()];
if (!pd)
return false;
return pd->exists(word.word());
}
@ -262,8 +359,7 @@ AspellChecker::~AspellChecker()
SpellChecker::Result AspellChecker::check(WordLangTuple const & word)
{
AspellSpeller * m =
d->speller(word.lang()->code(), word.lang()->variety());
AspellSpeller * m = d->speller(word.lang());
if (!m)
return WORD_OK;
@ -273,10 +369,8 @@ SpellChecker::Result AspellChecker::check(WordLangTuple const & word)
return WORD_OK;
string const word_str = to_utf8(word.word());
int const word_ok = aspell_speller_check(m, word_str.c_str(), -1);
LASSERT(word_ok != -1, /**/);
return (word_ok) ? WORD_OK : UNKNOWN_WORD;
SpellChecker::Result rc = d->check(m, word_str);
return (rc == WORD_OK && d->learned(word)) ? LEARNED_WORD : rc;
}
@ -288,23 +382,18 @@ void AspellChecker::advanceChangeNumber()
void AspellChecker::insert(WordLangTuple const & word)
{
Spellers::iterator it = d->spellers_.find(
d->spellerID(word.lang()->code(), word.lang()->variety()));
if (it != d->spellers_.end()) {
AspellSpeller * speller = to_aspell_speller(it->second.e_speller);
aspell_speller_add_to_personal(speller, to_utf8(word.word()).c_str(), -1);
advanceChangeNumber();
}
d->insert(word);
advanceChangeNumber();
}
void AspellChecker::accept(WordLangTuple const & word)
{
Spellers::iterator it = d->spellers_.find(
d->spellerID(word.lang()->code(), word.lang()->variety()));
Spellers::iterator it = d->spellers_.find(d->spellerID(word.lang()));
if (it != d->spellers_.end()) {
AspellSpeller * speller = to_aspell_speller(it->second.e_speller);
aspell_speller_add_to_session(speller, to_utf8(word.word()).c_str(), -1);
d->accept(it->second, word);
advanceChangeNumber();
}
}
@ -314,8 +403,7 @@ void AspellChecker::suggest(WordLangTuple const & wl,
docstring_list & suggestions)
{
suggestions.clear();
AspellSpeller * m =
d->speller(wl.lang()->code(), wl.lang()->variety());
AspellSpeller * m = d->speller(wl.lang());
if (!m)
return;
@ -337,6 +425,11 @@ void AspellChecker::suggest(WordLangTuple const & wl,
delete_aspell_string_enumeration(els);
}
void AspellChecker::remove(WordLangTuple const & word)
{
d->remove(word);
advanceChangeNumber();
}
bool AspellChecker::hasDictionary(Language const * lang) const
{

View File

@ -28,7 +28,7 @@ public:
enum Result check(WordLangTuple const &);
void suggest(WordLangTuple const &, docstring_list &);
void insert(WordLangTuple const &);
void remove(WordLangTuple const &) {};
void remove(WordLangTuple const &);
void accept(WordLangTuple const &);
bool hasDictionary(Language const * lang) const;
docstring const error();

View File

@ -11,6 +11,7 @@
#include <config.h>
#include "HunspellChecker.h"
#include "PersonalWordList.h"
#include "LyXRC.h"
#include "WordLangTuple.h"
@ -42,6 +43,8 @@ namespace lyx {
namespace {
typedef map<std::string, Hunspell *> Spellers;
typedef map<std::string, PersonalWordList *> LangPersonalWordList;
typedef vector<WordLangTuple> IgnoreList;
} // anon namespace
@ -49,8 +52,7 @@ typedef vector<WordLangTuple> IgnoreList;
struct HunspellChecker::Private
{
Private() {}
Private();
~Private();
const string dictPath(int selector);
@ -58,15 +60,20 @@ struct HunspellChecker::Private
bool haveDictionary(string const & lang, string & hpath);
bool haveDictionary(string const & lang);
Hunspell * addSpeller(string const & lang, string & hpath);
Hunspell * addSpeller(string const & lang);
Hunspell * speller(string const & lang);
Hunspell * addSpeller(Language const * lang);
Hunspell * speller(Language const * lang);
/// ignored words
bool isIgnored(WordLangTuple const & wl) const;
/// personal word list interface
void remove(WordLangTuple const & wl);
void insert(WordLangTuple const & wl);
bool learned(WordLangTuple const & wl);
/// the spellers
Spellers spellers_;
///
IgnoreList ignored_;
///
LangPersonalWordList personal_;
/// the location below system/user directory
/// there the aff+dic files lookup will happen
@ -75,6 +82,11 @@ struct HunspellChecker::Private
};
HunspellChecker::Private::Private()
{
}
HunspellChecker::Private::~Private()
{
Spellers::iterator it = spellers_.begin();
@ -83,6 +95,17 @@ HunspellChecker::Private::~Private()
for (; it != end; ++it) {
if ( 0 != it->second) delete it->second;
}
LangPersonalWordList::const_iterator pdit = personal_.begin();
LangPersonalWordList::const_iterator pdet = personal_.end();
for (; pdit != pdet; ++pdit) {
if ( 0 == pdit->second)
continue;
PersonalWordList * pd = pdit->second;
pd->save();
delete pd;
}
}
@ -98,10 +121,10 @@ const string HunspellChecker::Private::dictPath(int selector)
{
switch (selector) {
case 2:
return addName(lyx::support::package().system_support().absFileName(),dictDirectory());
return addName(package().system_support().absFileName(),dictDirectory());
break;
case 1:
return addName(lyx::support::package().user_support().absFileName(),dictDirectory());
return addName(package().user_support().absFileName(),dictDirectory());
break;
default:
return lyxrc.hunspelldir_path;
@ -143,12 +166,12 @@ bool HunspellChecker::Private::haveDictionary(string const & lang)
}
Hunspell * HunspellChecker::Private::speller(string const & lang)
Hunspell * HunspellChecker::Private::speller(Language const * lang)
{
Spellers::iterator it = spellers_.find(lang);
Spellers::iterator it = spellers_.find(lang->code());
if (it != spellers_.end())
return it->second;
return addSpeller(lang);
}
@ -169,12 +192,24 @@ Hunspell * HunspellChecker::Private::addSpeller(string const & lang,string & pat
}
Hunspell * HunspellChecker::Private::addSpeller(string const & lang)
Hunspell * HunspellChecker::Private::addSpeller(Language const * lang)
{
Hunspell * h = 0;
for ( int p = 0; p < maxLookupSelector() && 0 == h; p++ ) {
string lpath = dictPath(p);
h = addSpeller(lang, lpath);
h = addSpeller(lang->code(), lpath);
}
if (0 != h) {
string const encoding = h->get_dic_encoding();
PersonalWordList * pd = new PersonalWordList(lang->lang());
pd->load();
personal_[lang->lang()] = pd;
docstring_list::const_iterator it = pd->begin();
docstring_list::const_iterator et = pd->end();
for (; it != et; ++it) {
string const word_to_add = to_iconv_encoding(*it, encoding);
h->add(word_to_add.c_str());
}
}
return h;
}
@ -192,6 +227,45 @@ bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl) const
return false;
}
/// personal word list interface
void HunspellChecker::Private::remove(WordLangTuple const & wl)
{
Hunspell * h = speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
h->remove(word_to_check.c_str());
PersonalWordList * pd = personal_[wl.lang()->lang()];
if (!pd)
return;
pd->remove(wl.word());
}
void HunspellChecker::Private::insert(WordLangTuple const & wl)
{
Hunspell * h = speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
h->add(word_to_check.c_str());
PersonalWordList * pd = personal_[wl.lang()->lang()];
if (!pd)
return;
pd->insert(wl.word());
}
bool HunspellChecker::Private::learned(WordLangTuple const & wl)
{
PersonalWordList * pd = personal_[wl.lang()->lang()];
if (!pd)
return false;
return pd->exists(wl.word());
}
HunspellChecker::HunspellChecker(): d(new Private)
{
@ -209,23 +283,23 @@ SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl)
if (d->isIgnored(wl))
return WORD_OK;
Hunspell * h = d->speller(wl.lang()->code());
Hunspell * h = d->speller(wl.lang());
if (!h)
return WORD_OK;
int info;
string const encoding = h->get_dic_encoding();
string const word_to_check = to_iconv_encoding(wl.word(), encoding);
if (h->spell(word_to_check.c_str(), &info))
return WORD_OK;
return d->learned(wl) ? LEARNED_WORD : WORD_OK;
if (info & SPELL_COMPOUND) {
// FIXME: What to do with that?
LYXERR(Debug::FILES, "Hunspell compound word found " << word_to_check);
}
if (info & SPELL_FORBIDDEN) {
// FIXME: What to do with that?
// This was removed from personal dictionary
LYXERR(Debug::FILES, "Hunspell explicit forbidden word found " << word_to_check);
}
@ -241,11 +315,16 @@ void HunspellChecker::advanceChangeNumber()
void HunspellChecker::insert(WordLangTuple const & wl)
{
string const word_to_check = to_utf8(wl.word());
Hunspell * h = d->speller(wl.lang()->code());
if (!h)
return;
h->add(word_to_check.c_str());
d->insert(wl);
LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ;
advanceChangeNumber();
}
void HunspellChecker::remove(WordLangTuple const & wl)
{
d->remove(wl);
LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ;
advanceChangeNumber();
}
@ -253,6 +332,7 @@ void HunspellChecker::insert(WordLangTuple const & wl)
void HunspellChecker::accept(WordLangTuple const & wl)
{
d->ignored_.push_back(wl);
LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ;
advanceChangeNumber();
}
@ -261,7 +341,7 @@ void HunspellChecker::suggest(WordLangTuple const & wl,
docstring_list & suggestions)
{
suggestions.clear();
Hunspell * h = d->speller(wl.lang()->code());
Hunspell * h = d->speller(wl.lang());
if (!h)
return;
string const encoding = h->get_dic_encoding();

View File

@ -28,7 +28,7 @@ public:
enum Result check(WordLangTuple const &);
void suggest(WordLangTuple const &, docstring_list &);
void insert(WordLangTuple const &);
void remove(WordLangTuple const &) {};
void remove(WordLangTuple const &);
void accept(WordLangTuple const &);
bool hasDictionary(Language const * lang) const;
docstring const error();

View File

@ -49,14 +49,17 @@ endif
if USE_ASPELL
ASPELL = AspellChecker.cpp AspellChecker.h
PWL = PersonalWordList.cpp PersonalWordList.h
endif
if USE_ENCHANT
ENCHANT = EnchantChecker.cpp EnchantChecker.h
PWL = PersonalWordList.cpp PersonalWordList.h
endif
if USE_HUNSPELL
HUNSPELL = HunspellChecker.cpp HunspellChecker.h
PWL = PersonalWordList.cpp PersonalWordList.h
endif
# These four objects are linked as object files as they are not
@ -77,6 +80,7 @@ lyx_SOURCES = \
Dimension.h \
$(ENCHANT) \
$(HUNSPELL) \
$(PWL) \
PrinterParams.cpp \
PrinterParams.h \
Thesaurus.cpp \

135
src/PersonalWordList.cpp Normal file
View File

@ -0,0 +1,135 @@
/**
* \file PersonalWordList.cpp
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Stephan Witt
*
* Full author contact details are available in file CREDITS.
*/
#include <config.h>
#include "PersonalWordList.h"
#include "support/debug.h"
#include "support/docstring_list.h"
#include "support/filetools.h"
#include "support/Package.h"
#include "support/FileName.h"
#include "support/lstrings.h"
#include "support/os.h"
#include <string>
#include <fstream>
using namespace std;
using namespace lyx::support;
using namespace lyx::support::os;
namespace lyx {
FileName PersonalWordList::dictfile() const
{
string fname = "pwl_" + lang_ + ".dict";
return FileName(addName(package().user_support().absFileName(),fname));
}
docstring_list::const_iterator PersonalWordList::begin() const
{
return words_.begin();
}
docstring_list::const_iterator PersonalWordList::end() const
{
return words_.end();
}
void PersonalWordList::load()
{
FileName fn = dictfile();
LYXERR(Debug::FILES, "load personal dictionary from: " << fn);
ifstream ifs(fn.toFilesystemEncoding().c_str());
dirty(words_.size() > 0);
words_.clear();
string line;
getline(ifs, line);
if (line == header()) {
while (ifs) {
getline(ifs, line);
if (!line.empty()) {
docstring const word = from_utf8(line);
insert(word);
}
}
LYXERR(Debug::FILES, "valid dictionary file found: " << words_.size() << " items.");
} else {
LYXERR(Debug::FILES, "invalid dictionary file found: header is \"" << line << "\".");
}
ifs.close();
dirty(false);
}
void PersonalWordList::save()
{
if (!isDirty())
return;
FileName fn = dictfile();
LYXERR(Debug::FILES, "save personal dictionary at: " << fn);
ofstream ofs(fn.toFilesystemEncoding().c_str());
docstring_list::iterator it = words_.begin();
docstring_list::const_iterator et = words_.end();
ofs << header() << "\n";
for (; it != et; ++it) {
ofs << to_utf8(*it) << "\n";
}
LYXERR(Debug::FILES, "count of saved items: " << words_.size());
}
bool PersonalWordList::equalwords(docstring const & w1, docstring const & w2) const
{
return w1 == w2;
}
bool PersonalWordList::exists(docstring const & word) const
{
docstring_list::const_iterator it = words_.begin();
docstring_list::const_iterator et = words_.end();
for (; it != et; ++it) {
if (equalwords(word,*it))
return true;
}
return false;
}
void PersonalWordList::insert(docstring const & word)
{
if (exists(word))
return;
words_.push_back(word);
dirty(true);
}
void PersonalWordList::remove(docstring const & word)
{
docstring_list::iterator it = words_.begin();
docstring_list::const_iterator et = words_.end();
for (; it != et; ++it) {
if (equalwords(word,*it)) {
words_.erase(it);
dirty(true);
return;
}
}
}
} // namespace lyx

57
src/PersonalWordList.h Normal file
View File

@ -0,0 +1,57 @@
// -*- C++ -*-
/**
* \file PersonalWordList.h
* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
* \author Stephan Witt
*
* Full author contact details are available in file CREDITS.
*/
#ifndef PERSONAL_WORD_LIST_H
#define PERSONAL_WORD_LIST_H
#include "support/strfwd.h"
#include "support/docstring_list.h"
#include "support/FileName.h"
#include <string>
namespace lyx {
/// A PersonalWordList holds a word list with persistent state
class PersonalWordList {
public:
/// the word list has an associated language
PersonalWordList(std::string lang) { lang_ = lang; }
/// the location of the file to hold to word list
lyx::support::FileName dictfile() const;
/// (re)load the word list from file
void load();
/// save word list to file
void save();
/// check for presence of given word
bool exists(docstring const & word) const;
/// add given word to list
void insert(docstring const & word);
/// remove given word from list
void remove(docstring const & word);
/// is word list modified after load/save?
bool isDirty() const { return dirty_; }
/// first item in word list
docstring_list::const_iterator begin() const;
/// end of word list
docstring_list::const_iterator end() const;
private:
docstring_list words_;
std::string lang_;
bool dirty_;
bool equalwords(docstring const & w1, docstring const & w2) const;
std::string header() const { return "# personal word list"; }
void dirty(bool flag) { dirty_ = flag; }
};
} // namespace lyx
#endif // PERSONAL_WORD_LIST_H