From 0e50ad8b164724b8e05005c6c5e21d51cd8c8421 Mon Sep 17 00:00:00 2001 From: Jean-Marc Lasgouttes Date: Mon, 6 Mar 2017 17:08:38 +0100 Subject: [PATCH] Update bundled mythes to version 1.2.5 Move it to 3rdparty/ directory alongside the other ones. --- 3rdparty/Makefile.am | 8 +- 3rdparty/mythes/1.2.5/AUTHORS | 3 + .../mythes/1.2.5/COPYING | 0 3rdparty/mythes/1.2.5/README | 63 +++++ .../mythes/1.2.5}/mythes.cxx | 220 +++++++++--------- .../mythes/1.2.5}/mythes.hxx | 11 +- 3rdparty/mythes/Makefile.am | 12 + config/lyxinclude.m4 | 12 +- configure.ac | 1 + src/Makefile.am | 2 +- src/Thesaurus.cpp | 6 +- src/support/Makefile.am | 6 - 12 files changed, 219 insertions(+), 125 deletions(-) create mode 100644 3rdparty/mythes/1.2.5/AUTHORS rename src/support/mythes/license.readme => 3rdparty/mythes/1.2.5/COPYING (100%) create mode 100644 3rdparty/mythes/1.2.5/README rename {src/support/mythes => 3rdparty/mythes/1.2.5}/mythes.cxx (68%) rename {src/support/mythes => 3rdparty/mythes/1.2.5}/mythes.hxx (84%) create mode 100644 3rdparty/mythes/Makefile.am diff --git a/3rdparty/Makefile.am b/3rdparty/Makefile.am index 02dbfef70b..c6cc780644 100644 --- a/3rdparty/Makefile.am +++ b/3rdparty/Makefile.am @@ -1,6 +1,6 @@ include $(top_srcdir)/config/common.am -DIST_SUBDIRS = boost hunspell libiconv zlib +DIST_SUBDIRS = boost hunspell mythes libiconv zlib if USE_INCLUDED_BOOST if LYX_USE_STD_REGEX @@ -13,6 +13,10 @@ if USE_INCLUDED_HUNSPELL HUNSPELL = hunspell endif +if USE_INCLUDED_MYTHES +MYTHES = mythes +endif + if USE_INCLUDED_ICONV ICONV = libiconv endif @@ -21,4 +25,4 @@ if USE_INCLUDED_ZLIB ZLIB = zlib endif -SUBDIRS = $(BOOST) $(HUNSPELL) $(ICONV) $(ZLIB) +SUBDIRS = $(BOOST) $(HUNSPELL) $(MYTHES) $(ICONV) $(ZLIB) diff --git a/3rdparty/mythes/1.2.5/AUTHORS b/3rdparty/mythes/1.2.5/AUTHORS new file mode 100644 index 0000000000..274fd9a40f --- /dev/null +++ b/3rdparty/mythes/1.2.5/AUTHORS @@ -0,0 +1,3 @@ +Kevin Hendricks +Németh László +Caolán McNamara diff --git a/src/support/mythes/license.readme b/3rdparty/mythes/1.2.5/COPYING similarity index 100% rename from src/support/mythes/license.readme rename to 3rdparty/mythes/1.2.5/COPYING diff --git a/3rdparty/mythes/1.2.5/README b/3rdparty/mythes/1.2.5/README new file mode 100644 index 0000000000..35ee09441c --- /dev/null +++ b/3rdparty/mythes/1.2.5/README @@ -0,0 +1,63 @@ +MyThes is a simple thesaurus that uses a structured +text data file and an index file with binary search +to lookup words and phrases and return information +on part of speech, meanings, and synonyms + +MyThes was originall written to provide a thesaurus +for the OpenOffice.org project + +The Main features of MyThes are: + +1. written in C++ to make it easier to interface with + LibreOffice, OpenOffice, AbiWord, Pspell, etc + +2. it is stateless, uses no static variables and + should be completely reentrant with no ifdefs + +3. it compiles with -ansi and -pedantic and -Wall + with no warnigns so it shouldbe quite portable + +4. it uses a simple perl program to read the structured + text file and create the index needed for binary + searching + +5. it is very simple with *lots* of comments. + The main "smarts" are in the structure of the + text file that makes up the thesaurus data + +6. It comes with a ready-to-go structured thesaurus + data file for en_US extracted from the WordNet-2.0 data. + + Please see WordNet_license.txt and WordNet_readme.txt + for more information on the very useful project! + + See http://www.danielnaber.de/wn2ooo/ for utilities to + regenerate an up to date English thesaurus from the most + recent WordNet data. + +7. The source code has a BSD license (and no advertising clause) + + +MyThes comes with a simple example program that looks up some words and returns +meanings and synonyms. + +To build it simply do the following: + +unzip mythes.zip +cd mythes +./configure +make + +To run the example program: +./example th_en_US_new.idx th_en_US_new.dat checkme.lst + +To run the example program with stemming and morphological generation: +e.g. to check mouse, mice, rodents, eats, eaten, ate, eating etc. words +./example morph.idx morph.dat morph.lst morph.aff morph.dic + +NOTE: this is only an example and test environment for dictionary developers, +full English stemming and morphological generation needs an improved +English Hunspell dictionary. + +László Németh +Kevin Hendricks diff --git a/src/support/mythes/mythes.cxx b/3rdparty/mythes/1.2.5/mythes.cxx similarity index 68% rename from src/support/mythes/mythes.cxx rename to 3rdparty/mythes/1.2.5/mythes.cxx index c530580384..675bbfe3cc 100644 --- a/src/support/mythes/mythes.cxx +++ b/3rdparty/mythes/1.2.5/mythes.cxx @@ -1,60 +1,25 @@ -#include "license.readme" +#include "COPYING" #include #include #include #include +#include +#include #include "mythes.hxx" -// some basic utility routines - - -// string duplication routine -char * mythesstrdup(const char * p) -{ - - int sl = strlen(p) + 1; - char * d = (char *)malloc(sl); - if (d) { - memcpy(d,p,sl); - return d; - } - return NULL; -} - - -// return index of char in string -int mystr_indexOfChar(const char * d, int c) -{ - const char * p = strchr(d,c); - if (p) return (int)(p-d); - return -1; -} - - -// remove cross-platform text line end characters -void mytheschomp(char * s) -{ - int k = strlen(s); - if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; - if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; -} - - - MyThes::MyThes(const char* idxpath, const char * datpath) { nw = 0; encoding = NULL; list = NULL; offst = NULL; + pdfile = NULL; if (thInitialize(idxpath, datpath) != 1) { fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath); fflush(stderr); - if (encoding) free((void*)encoding); - if (list) free((void*)list); - if (offst) free((void*)offst); + thCleanup(); // did not initialize properly - throw exception? } } @@ -62,13 +27,7 @@ MyThes::MyThes(const char* idxpath, const char * datpath) MyThes::~MyThes() { - if (thCleanup() != 1) { - /* did not cleanup properly - throw exception? */ - } - if (encoding) free((void*)encoding); - encoding = NULL; - list = NULL; - offst = NULL; + thCleanup(); } @@ -78,18 +37,22 @@ int MyThes::thInitialize(const char* idxpath, const char* datpath) // open the index file FILE * pifile = fopen(idxpath,"r"); if (!pifile) { - pifile = NULL; return 0; } // parse in encoding and index size */ - char * wrd; - wrd = (char *)calloc(1, MAX_WD_LEN); - int len = readLine(pifile,wrd,MAX_WD_LEN); - encoding = mythesstrdup(wrd); - len = readLine(pifile,wrd,MAX_WD_LEN); + std::vector buffer(MAX_WD_LEN); + char * wrd = &buffer[0]; + readLine(pifile,wrd,MAX_WD_LEN); + encoding = mystrdup(wrd); + readLine(pifile,wrd,MAX_WD_LEN); int idxsz = atoi(wrd); - + + if (idxsz <= 0 || idxsz > std::numeric_limits::max() / sizeof(sizeof(char*))) { + fprintf(stderr,"Error - bad index %d\n", idxsz); + fclose(pifile); + return 0; + } // now allocate list, offst for the given size list = (char**) calloc(idxsz,sizeof(char*)); @@ -97,35 +60,38 @@ int MyThes::thInitialize(const char* idxpath, const char* datpath) if ( (!(list)) || (!(offst)) ) { fprintf(stderr,"Error - bad memory allocation\n"); - fflush(stderr); + fclose(pifile); return 0; } // now parse the remaining lines of the index - len = readLine(pifile,wrd,MAX_WD_LEN); + int len = readLine(pifile,wrd,MAX_WD_LEN); while (len > 0) { int np = mystr_indexOfChar(wrd,'|'); if (nw < idxsz) { - if (np >= 0) { - *(wrd+np) = '\0'; - list[nw] = (char *)calloc(1,(np+1)); - memcpy((list[nw]),wrd,np); - offst[nw] = atoi(wrd+np+1); - nw++; - } + if (np >= 0) { + *(wrd+np) = '\0'; + list[nw] = (char *)calloc(1,(np+1)); + if (!list[nw]) { + fprintf(stderr,"Error - bad memory allocation\n"); + fflush(stderr); + fclose(pifile); + return 0; + } + memcpy((list[nw]),wrd,np); + offst[nw] = atoi(wrd+np+1); + nw++; + } } len = readLine(pifile,wrd,MAX_WD_LEN); } - free((void *)wrd); fclose(pifile); - pifile=NULL; /* next open the data file */ pdfile = fopen(datpath,"r"); if (!pdfile) { - pdfile = NULL; return 0; } @@ -133,7 +99,7 @@ int MyThes::thInitialize(const char* idxpath, const char* datpath) } -int MyThes::thCleanup() +void MyThes::thCleanup() { /* first close the data file */ if (pdfile) { @@ -141,20 +107,26 @@ int MyThes::thCleanup() pdfile=NULL; } - /* now free up all the allocated strings on the list */ - for (int i=0; i < nw; i++) + if (list) { - if (list[i]) { - free(list[i]); - list[i] = 0; + /* now free up all the allocated strings on the list */ + for (int i=0; i < nw; i++) + { + if (list[i]) { + free(list[i]); + list[i] = 0; + } } + free((void*)list); } - if (list) free((void*)list); + if (encoding) free((void*)encoding); if (offst) free((void*)offst); + encoding = NULL; + list = NULL; + offst = NULL; nw = 0; - return 1; } @@ -177,12 +149,12 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme) long offset = 0; /* copy search word and make sure null terminated */ - char * wrd = (char *) calloc(1,(len+1)); + std::vector buffer(len+1); + char * wrd = &buffer[0]; memcpy(wrd,pText,len); /* find it in the list */ - int idx = binsearch(wrd,list,nw); - free(wrd); + int idx = nw > 0 ? binsearch(wrd,list,nw) : -1; if (idx < 0) return 0; // now seek to the offset @@ -204,7 +176,9 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme) return 0; } int nmeanings = atoi(buf+np+1); - *pme = (mentry*) malloc( nmeanings * sizeof(mentry) ); + if (nmeanings < 0 || nmeanings > std::numeric_limits::max() / sizeof(mentry)) + nmeanings = 0; + *pme = (mentry*)(nmeanings ? malloc(nmeanings * sizeof(mentry)) : NULL); if (!(*pme)) { free(buf); return 0; @@ -227,10 +201,10 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme) np = mystr_indexOfChar(p,'|'); if (np >= 0) { *(buf+np) = '\0'; - pos = mythesstrdup(p); + pos = mystrdup(p); p = p + np + 1; } else { - pos = mythesstrdup(""); + pos = mystrdup(""); } // count the number of fields in the remaining line @@ -247,28 +221,35 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme) // fill in the synonym list d = p; - for (int j = 0; j < nf; j++) { + for (int jj = 0; jj < nf; jj++) + { np = mystr_indexOfChar(d,'|'); - if (np > 0) { - *(d+np) = '\0'; - pm->psyns[j] = mythesstrdup(d); - d = d + np + 1; - } else { - pm->psyns[j] = mythesstrdup(d); - } + if (np > 0) + { + *(d+np) = '\0'; + pm->psyns[jj] = mystrdup(d); + d = d + np + 1; + } + else + { + pm->psyns[jj] = mystrdup(d); + } } // add pos to first synonym to create the definition - int k = strlen(pos); - int m = strlen(pm->psyns[0]); - if ((k+m) < (MAX_WD_LEN - 1)) { - strncpy(dfn,pos,k); - *(dfn+k) = ' '; - strncpy((dfn+k+1),(pm->psyns[0]),m+1); - pm->defn = mythesstrdup(dfn); - } else { - pm->defn = mythesstrdup(pm->psyns[0]); - } + if (pm->psyns[0]) + { + int k = strlen(pos); + int m = strlen(pm->psyns[0]); + if ((k+m) < (MAX_WD_LEN - 1)) { + strncpy(dfn,pos,k); + *(dfn+k) = ' '; + strncpy((dfn+k+1),(pm->psyns[0]),m+1); + pm->defn = mystrdup(dfn); + } else { + pm->defn = mystrdup(pm->psyns[0]); + } + } free(pos); pm++; @@ -320,7 +301,7 @@ int MyThes::readLine(FILE * pf, char * buf, int nc) { if (fgets(buf,nc,pf)) { - mytheschomp(buf); + mychomp(buf); return strlen(buf); } return -1; @@ -334,17 +315,17 @@ int MyThes::readLine(FILE * pf, char * buf, int nc) // returns: -1 on not found // index of wrd in the list[] -int MyThes::binsearch(char * sw, char* list[], int nlst) +int MyThes::binsearch(char * sw, char* _list[], int nlst) { int lp, up, mp, j, indx; lp = 0; up = nlst-1; indx = -1; - if (strcmp(sw,list[lp]) < 0) return -1; - if (strcmp(sw,list[up]) > 0) return -1; + if (strcmp(sw,_list[lp]) < 0) return -1; + if (strcmp(sw,_list[up]) > 0) return -1; while (indx < 0 ) { mp = (int)((lp+up) >> 1); - j = strcmp(sw,list[mp]); + j = strcmp(sw,_list[mp]); if ( j > 0) { lp = mp + 1; } else if (j < 0 ) { @@ -359,7 +340,36 @@ int MyThes::binsearch(char * sw, char* list[], int nlst) char * MyThes::get_th_encoding() { - if (encoding) return encoding; - return NULL; + return encoding; +} + + +// string duplication routine +char * MyThes::mystrdup(const char * s) +{ + char * d = NULL; + if (s) { + int sl = strlen(s)+1; + d = (char *) malloc(sl); + if (d) memcpy(d,s,sl); + } + return d; +} + +// remove cross-platform text line end characters +void MyThes::mychomp(char * s) +{ + int k = strlen(s); + if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; + if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; +} + + +// return index of char in string +int MyThes::mystr_indexOfChar(const char * d, int c) +{ + char * p = strchr((char *)d,c); + if (p) return (int)(p-d); + return -1; } diff --git a/src/support/mythes/mythes.hxx b/3rdparty/mythes/1.2.5/mythes.hxx similarity index 84% rename from src/support/mythes/mythes.hxx rename to 3rdparty/mythes/1.2.5/mythes.hxx index 489481b3d7..eff8aea1bf 100644 --- a/src/support/mythes/mythes.hxx +++ b/3rdparty/mythes/1.2.5/mythes.hxx @@ -49,7 +49,7 @@ private: int thInitialize (const char* indxpath, const char* datpath); // internal close and cleanup dat and idx files - int thCleanup (); + void thCleanup (); // read a text line (\n terminated) stripping off line terminator int readLine(FILE * pf, char * buf, int nc); @@ -57,6 +57,15 @@ private: // binary search on null terminated character strings int binsearch(char * wrd, char* list[], int nlst); + // string duplication routine + char * mystrdup(const char * p); + + // remove cross-platform text line end characters + void mychomp(char * s); + + // return index of char in string + int mystr_indexOfChar(const char * d, int c); + }; #endif diff --git a/3rdparty/mythes/Makefile.am b/3rdparty/mythes/Makefile.am new file mode 100644 index 0000000000..dcc2b1957d --- /dev/null +++ b/3rdparty/mythes/Makefile.am @@ -0,0 +1,12 @@ +include $(top_srcdir)/config/common.am + +noinst_LIBRARIES = liblyxmythes.a + +EXTRA_DIST = \ + 1.2.5/AUTHORS \ + 1.2.5/COPYING \ + 1.2.5/README + +liblyxmythes_a_SOURCES = \ + 1.2.5/mythes.cxx \ + 1.2.5/myspell.hxx diff --git a/config/lyxinclude.m4 b/config/lyxinclude.m4 index 8287fb2f5d..9a0bef81a2 100644 --- a/config/lyxinclude.m4 +++ b/config/lyxinclude.m4 @@ -652,14 +652,16 @@ AC_DEFUN([LYX_USE_INCLUDED_MYTHES],[ break]) AC_LANG_POP(C++) fi - if test $use_included_mythes = no ; then - AC_DEFINE(USE_EXTERNAL_MYTHES, 1, [Define as 1 to use an external MyThes library]) - AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$mythes_h_location,[Location of mythes.hxx]) - AC_SUBST(MYTHES_LIBS) - else + if test $use_included_mythes = yes ; then + mythes_h_location="" + MYTHES_INCLUDES='-I$(top_srcdir)/3rdparty/mythes/1.2.5/' + MYTHES_LIBS='$(top_builddir)/3rdparty/mythes/liblyxmythes.a' lyx_included_libs="$lyx_included_libs mythes" fi AM_CONDITIONAL(USE_INCLUDED_MYTHES, test x$use_included_mythes = xyes) + AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$mythes_h_location,[Location of mythes.hxx]) + AC_SUBST(MYTHES_INCLUDES) + AC_SUBST(MYTHES_LIBS) AC_MSG_CHECKING([whether to use included MyThes library]) AC_MSG_RESULT([$use_included_mythes]) ]) diff --git a/configure.ac b/configure.ac index ce4eb96215..694ddc48fb 100644 --- a/configure.ac +++ b/configure.ac @@ -372,6 +372,7 @@ AC_CONFIG_FILES([Makefile \ 3rdparty/Makefile \ 3rdparty/boost/Makefile \ 3rdparty/hunspell/Makefile \ + 3rdparty/mythes/Makefile \ 3rdparty/libiconv/Makefile \ $ICONV_ICONV_H_IN \ 3rdparty/zlib/Makefile \ diff --git a/src/Makefile.am b/src/Makefile.am index 3f3e99a0e6..9576076d12 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -4,7 +4,7 @@ include $(top_srcdir)/config/common.am AM_CPPFLAGS += -I$(top_srcdir)/src AM_CPPFLAGS += $(BOOST_INCLUDES) $(ICONV_INCLUDES) $(ZLIB_INCLUDES) -AM_CPPFLAGS += $(ENCHANT_CFLAGS) $(HUNSPELL_CFLAGS) +AM_CPPFLAGS += $(ENCHANT_CFLAGS) $(HUNSPELL_CFLAGS) $(MYTHES_INCLUDES) AM_CPPFLAGS += $(QT_CPPFLAGS) $(QT_CORE_INCLUDES) if BUILD_CLIENT_SUBDIR diff --git a/src/Thesaurus.cpp b/src/Thesaurus.cpp index 4f4ad64d14..cf8567eb53 100644 --- a/src/Thesaurus.cpp +++ b/src/Thesaurus.cpp @@ -27,12 +27,8 @@ #include "support/lstrings.h" #include "support/os.h" -#ifdef USE_EXTERNAL_MYTHES -#include MYTHES_H_LOCATION -#else #include -#include "support/mythes/mythes.hxx" -#endif +#include MYTHES_H_LOCATION #include "frontends/alert.h" diff --git a/src/support/Makefile.am b/src/support/Makefile.am index a63bb89681..f3a8823ac6 100644 --- a/src/support/Makefile.am +++ b/src/support/Makefile.am @@ -113,12 +113,6 @@ liblyxsupport_a_SOURCES = \ unicode.cpp \ unicode.h \ weighted_btree.h -if USE_INCLUDED_MYTHES -liblyxsupport_a_SOURCES += \ - mythes/mythes.cxx \ - mythes/mythes.hxx \ - mythes/license.readme -endif #if INSTALL_MACOSX #liblyxsupport_a_SOURCES += \