From e9be0fae66e1520c128b8005864f3b471360d85d Mon Sep 17 00:00:00 2001 From: Abdelrazak Younes Date: Fri, 8 May 2009 21:38:50 +0000 Subject: [PATCH] Integrate mythes-1.1 into our source and remove support for aiksaurus. I have tested only the Cmake build system. Please test autotools and scons. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@29577 a592a061-630c-0410-9148-cb99ea01b6c8 --- configure.ac | 38 -- development/cmake/src/support/CMakeLists.txt | 15 +- development/scons/scons_manifest.py | 2 + src/Thesaurus.cpp | 109 +----- src/support/Makefile.am | 4 +- src/support/mythes/license.readme | 34 ++ src/support/mythes/mythes.cxx | 365 +++++++++++++++++++ src/support/mythes/mythes.hxx | 67 ++++ 8 files changed, 481 insertions(+), 153 deletions(-) create mode 100644 src/support/mythes/license.readme create mode 100644 src/support/mythes/mythes.cxx create mode 100644 src/support/mythes/mythes.hxx diff --git a/configure.ac b/configure.ac index a4a929988d..054da29f9a 100644 --- a/configure.ac +++ b/configure.ac @@ -116,44 +116,6 @@ AC_CHECK_LIB(psapi, main, [LIBPSAPI=-lpsapi]) AC_SUBST(LIBPSAPI) AC_CHECK_LIB(gdi32, main) -AC_ARG_WITH(mythes, - [ --without-mythes do not use the MyThes library], - [lyx_use_mythes=$withval]) -if test x$lyx_use_mythes != xno; then -AC_CHECK_LIB(mythes, main, - [AC_DEFINE(HAVE_LIBMYTHES,1,[Define this if you have the MyThes library]) - MYTHES_LIBS="-lmythes" - lyx_flags="mythes $lyx_flags" - ]) -AC_CHECK_HEADER(mythes.hxx,[ - ac_cv_header_mythes_h=yes - lyx_cv_mythes_h_location=""]) -AC_CHECK_HEADER(mythes/mythes.hxx,[ - ac_cv_header_mythes_h=yes - lyx_cv_mythes_h_location=""]) -AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$lyx_cv_mythes_h_location,[Location of mythes.hxx]) -fi -AC_SUBST(MYTHES_LIBS) - -AC_ARG_WITH(aiksaurus, - [ --without-aiksaurus do not use the Aiksaurus library], - [lyx_use_aiksaurus=$withval]) -if test x$lyx_use_aiksaurus != xno; then -AC_CHECK_LIB(Aiksaurus, main, - [AC_DEFINE(HAVE_LIBAIKSAURUS,1,[Define this if you have the AikSaurus library]) - AIKSAURUS_LIBS="-lAiksaurus" - lyx_flags="aiksaurus $lyx_flags" - ]) -AC_CHECK_HEADER(Aiksaurus.h,[ - ac_cv_header_aiksaurus_h=yes - lyx_cv_aiksaurus_h_location=""]) -AC_CHECK_HEADER(Aiksaurus/Aiksaurus.h,[ - ac_cv_header_aiksaurus_h=yes - lyx_cv_aiksaurus_h_location=""]) -AC_DEFINE_UNQUOTED(AIKSAURUS_H_LOCATION,$lyx_cv_aiksaurus_h_location,[Location of Aiksaurus.h]) -fi -AC_SUBST(AIKSAURUS_LIBS) - LYX_USE_INCLUDED_BOOST # Needed for our char_type diff --git a/development/cmake/src/support/CMakeLists.txt b/development/cmake/src/support/CMakeLists.txt index d902a2f581..1d24fe0817 100644 --- a/development/cmake/src/support/CMakeLists.txt +++ b/development/cmake/src/support/CMakeLists.txt @@ -9,9 +9,8 @@ project(support) file(GLOB support_sources ${TOP_SRC_DIR}/src/support/${LYX_CPP_FILES}) file(GLOB support_headers ${TOP_SRC_DIR}/src/support/${LYX_HPP_FILES}) -file(GLOB support_minizip_sources ${TOP_SRC_DIR}/src/support/minizip/*.c) -file(GLOB support_minizip_cpp_sources ${TOP_SRC_DIR}/src/support/minizip/*.cpp) -file(GLOB support_minizip_headers ${TOP_SRC_DIR}/src/support/minizip/*.h) +file(GLOB support_mythes_sources ${TOP_SRC_DIR}/src/support/mythes/*.cxx) +file(GLOB support_mythes_headers ${TOP_SRC_DIR}/src/support/mythes/*.hxx) file(GLOB support_linkback_sources ${TOP_SRC_DIR}/src/support/linkback/*.m*) file(GLOB support_linkback_headers ${TOP_SRC_DIR}/src/support/linkback/*.h) @@ -23,8 +22,6 @@ list(REMOVE_ITEM support_sources ${TOP_SRC_DIR}/src/support/os_os2.C ${TOP_SRC_DIR}/src/support/atexit.c ${TOP_SRC_DIR}/src/support/strerror.c - ${TOP_SRC_DIR}/src/support/minizip/iowin32.h - ${TOP_SRC_DIR}/src/support/minizip/iowin32.c ${TOP_SRC_DIR}/src/support/gettext.cpp) if(APPLE) @@ -43,15 +40,15 @@ lyx_automoc(${support_sources}) include_directories(${TOP_SRC_DIR}/src/support ${CMAKE_BINARY_DIR}/src/support - ${TOP_SRC_DIR}/src/support/minizip + ${TOP_SRC_DIR}/src/support/mythes ${QT_INCLUDES} ${ICONV_INCLUDE_DIR} ${ZLIB_INCLUDE_DIR}) if(NOT MERGE_FILES) - set(support_sources ${support_sources} ${support_minizip_sources} ${support_minizip_cpp_sources} ${support_linkback_sources}) - set(support_headers ${support_headers} ${support_minizip_headers} ${support_linkback_headers}) + set(support_sources ${support_sources} ${support_mythes_sources} ${support_linkback_sources}) + set(support_headers ${support_headers} ${support_mythes_headers} ${support_linkback_headers}) add_library(support ${library_type} ${support_sources} ${support_headers} ${dont_merge}) else() lyx_const_touched_files(_allinone support_sources) @@ -61,7 +58,7 @@ else() set_source_files_properties(_allinone_touched.C PROPERTIES OBJECT_DEPENDS "${depends_moc}") add_library(support ${library_type} ${_allinone_files} - ${support_minizip_sources} ${support_minizip_cpp_sources} ${support_linkback_sources} ${support_headers} ${dont_merge}) + ${support_mythes_sources} ${support_linkback_sources} ${support_headers} ${dont_merge}) endif() target_link_libraries(support boost_signals ${QT_QTCORE_LIBRARY} ${ZLIB_LIBRARY}) diff --git a/development/scons/scons_manifest.py b/development/scons/scons_manifest.py index 4989f51493..9fbb66b475 100644 --- a/development/scons/scons_manifest.py +++ b/development/scons/scons_manifest.py @@ -316,6 +316,7 @@ src_support_header_files = Split(''' types.h unicode.h userinfo.h + mythes/mythes.hxx ''') @@ -347,6 +348,7 @@ src_support_files = Split(''' socktools.cpp unicode.cpp userinfo.cpp + mythes/mythes.hxx ''') diff --git a/src/Thesaurus.cpp b/src/Thesaurus.cpp index 42daa0c8eb..a80ef87ad7 100644 --- a/src/Thesaurus.cpp +++ b/src/Thesaurus.cpp @@ -13,25 +13,19 @@ #include "Thesaurus.h" -#include "support/debug.h" -#include "support/gettext.h" #include "LyXRC.h" #include "support/FileNameList.h" +#include "support/debug.h" #include "support/filetools.h" +#include "support/gettext.h" #include "support/lstrings.h" #include "support/os.h" #include "support/unicode.h" -#include "frontends/alert.h" +#include "support/mythes/mythes.hxx" -#ifdef HAVE_LIBMYTHES -#include MYTHES_H_LOCATION -#else -#ifdef HAVE_LIBAIKSAURUS -#include AIKSAURUS_H_LOCATION -#endif // HAVE_LIBAIKSAURUS -#endif // !HAVE_LIBMYTHES +#include "frontends/alert.h" #include #include @@ -42,8 +36,6 @@ using namespace lyx::support::os; namespace lyx { -#ifdef HAVE_LIBMYTHES - namespace { string const to_iconv_encoding(docstring const & s, string const & encoding) @@ -199,99 +191,6 @@ Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lan return meanings; } -#else // HAVE_LIBMYTHES -#ifdef HAVE_LIBAIKSAURUS - -struct Thesaurus::Private -{ - Private(): thes_(new Aiksaurus) {} - Aiksaurus * thes_; -}; - -Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const &) -{ - Meanings meanings; - - // aiksaurus is for english text only, therefore it does not work - // with non-ascii strings. - // The interface of the Thesaurus class uses docstring because a - // non-english thesaurus is possible in theory. - if (!support::isAscii(t)) - // to_ascii() would assert - return meanings; - - string const text = to_ascii(t); - - docstring error = from_ascii(d->thes_->error()); - if (!error.empty()) { - static bool sent_error = false; - if (!sent_error) { - frontend::Alert::error(_("Thesaurus failure"), - bformat(_("Aiksaurus returned the following error:\n\n%1$s."), - error)); - sent_error = true; - } - return meanings; - } - if (!d->thes_->find(text.c_str())) - return meanings; - - // weird api, but ... - - int prev_meaning = -1; - int cur_meaning; - docstring meaning; - - // correct, returns "" at the end - string ret = d->thes_->next(cur_meaning); - - while (!ret.empty()) { - if (cur_meaning != prev_meaning) { - meaning = from_ascii(ret); - ret = d->thes_->next(cur_meaning); - prev_meaning = cur_meaning; - } else { - if (ret != text) - meanings[meaning].push_back(from_ascii(ret)); - } - - ret = d->thes_->next(cur_meaning); - } - - for (Meanings::iterator it = meanings.begin(); - it != meanings.end(); ++it) - sort(it->second.begin(), it->second.end()); - - return meanings; -} - - -bool Thesaurus::thesaurusAvailable(docstring const & lang) const -{ - // we support English only - return prefixIs(lang, from_ascii("en_")); -} - -#else // HAVE_LIBAIKSAURUS - -struct Thesaurus::Private -{ -}; - - -Thesaurus::Meanings Thesaurus::lookup(docstring const &, docstring const &) -{ - return Meanings(); -} - - -bool Thesaurus::thesaurusAvailable(docstring const &) const -{ - return false; -} - -#endif // HAVE_LIBAIKSAURUS -#endif // HAVE_LIBMYTHES Thesaurus::Thesaurus() : d(new Thesaurus::Private) { diff --git a/src/support/Makefile.am b/src/support/Makefile.am index f6ebfe05f3..3e66033981 100644 --- a/src/support/Makefile.am +++ b/src/support/Makefile.am @@ -97,7 +97,9 @@ liblyxsupport_a_SOURCES = \ userinfo.h \ unicode.cpp \ unicode.h \ - weighted_btree.h + weighted_btree.h \ + mythes/mythes.cxx \ + mythes/mythes.hxx if INSTALL_MACOSX liblyxsupport_a_SOURCES += \ diff --git a/src/support/mythes/license.readme b/src/support/mythes/license.readme new file mode 100644 index 0000000000..b6bf70a0c7 --- /dev/null +++ b/src/support/mythes/license.readme @@ -0,0 +1,34 @@ +/* + * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ diff --git a/src/support/mythes/mythes.cxx b/src/support/mythes/mythes.cxx new file mode 100644 index 0000000000..c530580384 --- /dev/null +++ b/src/support/mythes/mythes.cxx @@ -0,0 +1,365 @@ +#include "license.readme" +#include +#include +#include +#include + +#include "mythes.hxx" + +// some basic utility routines + + +// string duplication routine +char * mythesstrdup(const char * p) +{ + + int sl = strlen(p) + 1; + char * d = (char *)malloc(sl); + if (d) { + memcpy(d,p,sl); + return d; + } + return NULL; +} + + +// return index of char in string +int mystr_indexOfChar(const char * d, int c) +{ + const char * p = strchr(d,c); + if (p) return (int)(p-d); + return -1; +} + + +// remove cross-platform text line end characters +void mytheschomp(char * s) +{ + int k = strlen(s); + if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; + if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; +} + + + +MyThes::MyThes(const char* idxpath, const char * datpath) +{ + nw = 0; + encoding = NULL; + list = NULL; + offst = NULL; + + if (thInitialize(idxpath, datpath) != 1) { + fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath); + fflush(stderr); + if (encoding) free((void*)encoding); + if (list) free((void*)list); + if (offst) free((void*)offst); + // did not initialize properly - throw exception? + } +} + + +MyThes::~MyThes() +{ + if (thCleanup() != 1) { + /* did not cleanup properly - throw exception? */ + } + if (encoding) free((void*)encoding); + encoding = NULL; + list = NULL; + offst = NULL; +} + + +int MyThes::thInitialize(const char* idxpath, const char* datpath) +{ + + // open the index file + FILE * pifile = fopen(idxpath,"r"); + if (!pifile) { + pifile = NULL; + return 0; + } + + // parse in encoding and index size */ + char * wrd; + wrd = (char *)calloc(1, MAX_WD_LEN); + int len = readLine(pifile,wrd,MAX_WD_LEN); + encoding = mythesstrdup(wrd); + len = readLine(pifile,wrd,MAX_WD_LEN); + int idxsz = atoi(wrd); + + + // now allocate list, offst for the given size + list = (char**) calloc(idxsz,sizeof(char*)); + offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int)); + + if ( (!(list)) || (!(offst)) ) { + fprintf(stderr,"Error - bad memory allocation\n"); + fflush(stderr); + return 0; + } + + // now parse the remaining lines of the index + len = readLine(pifile,wrd,MAX_WD_LEN); + while (len > 0) + { + int np = mystr_indexOfChar(wrd,'|'); + if (nw < idxsz) { + if (np >= 0) { + *(wrd+np) = '\0'; + list[nw] = (char *)calloc(1,(np+1)); + memcpy((list[nw]),wrd,np); + offst[nw] = atoi(wrd+np+1); + nw++; + } + } + len = readLine(pifile,wrd,MAX_WD_LEN); + } + + free((void *)wrd); + fclose(pifile); + pifile=NULL; + + /* next open the data file */ + pdfile = fopen(datpath,"r"); + if (!pdfile) { + pdfile = NULL; + return 0; + } + + return 1; +} + + +int MyThes::thCleanup() +{ + /* first close the data file */ + if (pdfile) { + fclose(pdfile); + pdfile=NULL; + } + + /* now free up all the allocated strings on the list */ + for (int i=0; i < nw; i++) + { + if (list[i]) { + free(list[i]); + list[i] = 0; + } + } + + if (list) free((void*)list); + if (offst) free((void*)offst); + + nw = 0; + return 1; +} + + + +// lookup text in index and count of meanings and a list of meaning entries +// with each entry having a synonym count and pointer to an +// array of char * (i.e the synonyms) +// +// note: calling routine should call CleanUpAfterLookup with the original +// meaning point and count to properly deallocate memory + +int MyThes::Lookup(const char * pText, int len, mentry** pme) +{ + + *pme = NULL; + + // handle the case of missing file or file related errors + if (! pdfile) return 0; + + long offset = 0; + + /* copy search word and make sure null terminated */ + char * wrd = (char *) calloc(1,(len+1)); + memcpy(wrd,pText,len); + + /* find it in the list */ + int idx = binsearch(wrd,list,nw); + free(wrd); + if (idx < 0) return 0; + + // now seek to the offset + offset = (long) offst[idx]; + int rc = fseek(pdfile,offset,SEEK_SET); + if (rc) { + return 0; + } + + // grab the count of the number of meanings + // and allocate a list of meaning entries + char * buf = NULL; + buf = (char *) malloc( MAX_LN_LEN ); + if (!buf) return 0; + readLine(pdfile, buf, (MAX_LN_LEN-1)); + int np = mystr_indexOfChar(buf,'|'); + if (np < 0) { + free(buf); + return 0; + } + int nmeanings = atoi(buf+np+1); + *pme = (mentry*) malloc( nmeanings * sizeof(mentry) ); + if (!(*pme)) { + free(buf); + return 0; + } + + // now read in each meaning and parse it to get defn, count and synonym lists + mentry* pm = *(pme); + char dfn[MAX_WD_LEN]; + + for (int j = 0; j < nmeanings; j++) { + readLine(pdfile, buf, (MAX_LN_LEN-1)); + + pm->count = 0; + pm->psyns = NULL; + pm->defn = NULL; + + // store away the part of speech for later use + char * p = buf; + char * pos = NULL; + np = mystr_indexOfChar(p,'|'); + if (np >= 0) { + *(buf+np) = '\0'; + pos = mythesstrdup(p); + p = p + np + 1; + } else { + pos = mythesstrdup(""); + } + + // count the number of fields in the remaining line + int nf = 1; + char * d = p; + np = mystr_indexOfChar(d,'|'); + while ( np >= 0 ) { + nf++; + d = d + np + 1; + np = mystr_indexOfChar(d,'|'); + } + pm->count = nf; + pm->psyns = (char **) malloc(nf*sizeof(char*)); + + // fill in the synonym list + d = p; + for (int j = 0; j < nf; j++) { + np = mystr_indexOfChar(d,'|'); + if (np > 0) { + *(d+np) = '\0'; + pm->psyns[j] = mythesstrdup(d); + d = d + np + 1; + } else { + pm->psyns[j] = mythesstrdup(d); + } + } + + // add pos to first synonym to create the definition + int k = strlen(pos); + int m = strlen(pm->psyns[0]); + if ((k+m) < (MAX_WD_LEN - 1)) { + strncpy(dfn,pos,k); + *(dfn+k) = ' '; + strncpy((dfn+k+1),(pm->psyns[0]),m+1); + pm->defn = mythesstrdup(dfn); + } else { + pm->defn = mythesstrdup(pm->psyns[0]); + } + free(pos); + pm++; + + } + free(buf); + + return nmeanings; +} + + + +void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings) +{ + + if (nmeanings == 0) return; + if ((*pme) == NULL) return; + + mentry * pm = *pme; + + for (int i = 0; i < nmeanings; i++) { + int count = pm->count; + for (int j = 0; j < count; j++) { + if (pm->psyns[j]) free(pm->psyns[j]); + pm->psyns[j] = NULL; + } + if (pm->psyns) free(pm->psyns); + pm->psyns = NULL; + if (pm->defn) free(pm->defn); + pm->defn = NULL; + pm->count = 0; + pm++; + } + pm = *pme; + free(pm); + *pme = NULL; + return; +} + + +// read a line of text from a text file stripping +// off the line terminator and replacing it with +// a null string terminator. +// returns: -1 on error or the number of characters in +// in the returning string + +// A maximum of nc characters will be returned + +int MyThes::readLine(FILE * pf, char * buf, int nc) +{ + + if (fgets(buf,nc,pf)) { + mytheschomp(buf); + return strlen(buf); + } + return -1; +} + + + +// performs a binary search on null terminated character +// strings +// +// returns: -1 on not found +// index of wrd in the list[] + +int MyThes::binsearch(char * sw, char* list[], int nlst) +{ + int lp, up, mp, j, indx; + lp = 0; + up = nlst-1; + indx = -1; + if (strcmp(sw,list[lp]) < 0) return -1; + if (strcmp(sw,list[up]) > 0) return -1; + while (indx < 0 ) { + mp = (int)((lp+up) >> 1); + j = strcmp(sw,list[mp]); + if ( j > 0) { + lp = mp + 1; + } else if (j < 0 ) { + up = mp - 1; + } else { + indx = mp; + } + if (lp > up) return -1; + } + return indx; +} + +char * MyThes::get_th_encoding() +{ + if (encoding) return encoding; + return NULL; +} + diff --git a/src/support/mythes/mythes.hxx b/src/support/mythes/mythes.hxx new file mode 100644 index 0000000000..489481b3d7 --- /dev/null +++ b/src/support/mythes/mythes.hxx @@ -0,0 +1,67 @@ +#ifndef _MYTHES_HXX_ +#define _MYTHES_HXX_ + +// some maximum sizes for buffers +#define MAX_WD_LEN 200 +#define MAX_LN_LEN 16384 + + +// a meaning with definition, count of synonyms and synonym list +struct mentry { + char* defn; + int count; + char** psyns; +}; + + +class MyThes +{ + + int nw; /* number of entries in thesaurus */ + char** list; /* stores word list */ + unsigned int* offst; /* stores offset list */ + char * encoding; /* stores text encoding; */ + + FILE *pdfile; + + // disallow copy-constructor and assignment-operator for now + MyThes(); + MyThes(const MyThes &); + MyThes & operator = (const MyThes &); + +public: + MyThes(const char* idxpath, const char* datpath); + ~MyThes(); + + // lookup text in index and return number of meanings + // each meaning entry has a defintion, synonym count and pointer + // when complete return the *original* meaning entry and count via + // CleanUpAfterLookup to properly handle memory deallocation + + int Lookup(const char * pText, int len, mentry** pme); + + void CleanUpAfterLookup(mentry** pme, int nmean); + + char* get_th_encoding(); + +private: + // Open index and dat files and load list array + int thInitialize (const char* indxpath, const char* datpath); + + // internal close and cleanup dat and idx files + int thCleanup (); + + // read a text line (\n terminated) stripping off line terminator + int readLine(FILE * pf, char * buf, int nc); + + // binary search on null terminated character strings + int binsearch(char * wrd, char* list[], int nlst); + +}; + +#endif + + + + +