Update bundled mythes to version 1.2.5

Move it to 3rdparty/ directory alongside the other ones.
This commit is contained in:
Jean-Marc Lasgouttes 2017-03-06 17:08:38 +01:00
parent 8cb021b32f
commit 0e50ad8b16
12 changed files with 219 additions and 125 deletions

View File

@ -1,6 +1,6 @@
include $(top_srcdir)/config/common.am include $(top_srcdir)/config/common.am
DIST_SUBDIRS = boost hunspell libiconv zlib DIST_SUBDIRS = boost hunspell mythes libiconv zlib
if USE_INCLUDED_BOOST if USE_INCLUDED_BOOST
if LYX_USE_STD_REGEX if LYX_USE_STD_REGEX
@ -13,6 +13,10 @@ if USE_INCLUDED_HUNSPELL
HUNSPELL = hunspell HUNSPELL = hunspell
endif endif
if USE_INCLUDED_MYTHES
MYTHES = mythes
endif
if USE_INCLUDED_ICONV if USE_INCLUDED_ICONV
ICONV = libiconv ICONV = libiconv
endif endif
@ -21,4 +25,4 @@ if USE_INCLUDED_ZLIB
ZLIB = zlib ZLIB = zlib
endif endif
SUBDIRS = $(BOOST) $(HUNSPELL) $(ICONV) $(ZLIB) SUBDIRS = $(BOOST) $(HUNSPELL) $(MYTHES) $(ICONV) $(ZLIB)

3
3rdparty/mythes/1.2.5/AUTHORS vendored Normal file
View File

@ -0,0 +1,3 @@
Kevin Hendricks <kevin.hendricks@sympatico.ca>
Németh László <nemeth@openoffice.org>
Caolán McNamara <caolanm@redhat.com>

63
3rdparty/mythes/1.2.5/README vendored Normal file
View File

@ -0,0 +1,63 @@
MyThes is a simple thesaurus that uses a structured
text data file and an index file with binary search
to lookup words and phrases and return information
on part of speech, meanings, and synonyms
MyThes was originall written to provide a thesaurus
for the OpenOffice.org project
The Main features of MyThes are:
1. written in C++ to make it easier to interface with
LibreOffice, OpenOffice, AbiWord, Pspell, etc
2. it is stateless, uses no static variables and
should be completely reentrant with no ifdefs
3. it compiles with -ansi and -pedantic and -Wall
with no warnigns so it shouldbe quite portable
4. it uses a simple perl program to read the structured
text file and create the index needed for binary
searching
5. it is very simple with *lots* of comments.
The main "smarts" are in the structure of the
text file that makes up the thesaurus data
6. It comes with a ready-to-go structured thesaurus
data file for en_US extracted from the WordNet-2.0 data.
Please see WordNet_license.txt and WordNet_readme.txt
for more information on the very useful project!
See http://www.danielnaber.de/wn2ooo/ for utilities to
regenerate an up to date English thesaurus from the most
recent WordNet data.
7. The source code has a BSD license (and no advertising clause)
MyThes comes with a simple example program that looks up some words and returns
meanings and synonyms.
To build it simply do the following:
unzip mythes.zip
cd mythes
./configure
make
To run the example program:
./example th_en_US_new.idx th_en_US_new.dat checkme.lst
To run the example program with stemming and morphological generation:
e.g. to check mouse, mice, rodents, eats, eaten, ate, eating etc. words
./example morph.idx morph.dat morph.lst morph.aff morph.dic
NOTE: this is only an example and test environment for dictionary developers,
full English stemming and morphological generation needs an improved
English Hunspell dictionary.
László Németh <nemeth at OO.o>
Kevin Hendricks <kevin.hendricks@sympatico.ca>

View File

@ -1,60 +1,25 @@
#include "license.readme" #include "COPYING"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
#include <limits>
#include <vector>
#include "mythes.hxx" #include "mythes.hxx"
// some basic utility routines
// string duplication routine
char * mythesstrdup(const char * p)
{
int sl = strlen(p) + 1;
char * d = (char *)malloc(sl);
if (d) {
memcpy(d,p,sl);
return d;
}
return NULL;
}
// return index of char in string
int mystr_indexOfChar(const char * d, int c)
{
const char * p = strchr(d,c);
if (p) return (int)(p-d);
return -1;
}
// remove cross-platform text line end characters
void mytheschomp(char * s)
{
int k = strlen(s);
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
}
MyThes::MyThes(const char* idxpath, const char * datpath) MyThes::MyThes(const char* idxpath, const char * datpath)
{ {
nw = 0; nw = 0;
encoding = NULL; encoding = NULL;
list = NULL; list = NULL;
offst = NULL; offst = NULL;
pdfile = NULL;
if (thInitialize(idxpath, datpath) != 1) { if (thInitialize(idxpath, datpath) != 1) {
fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath); fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
fflush(stderr); fflush(stderr);
if (encoding) free((void*)encoding); thCleanup();
if (list) free((void*)list);
if (offst) free((void*)offst);
// did not initialize properly - throw exception? // did not initialize properly - throw exception?
} }
} }
@ -62,13 +27,7 @@ MyThes::MyThes(const char* idxpath, const char * datpath)
MyThes::~MyThes() MyThes::~MyThes()
{ {
if (thCleanup() != 1) { thCleanup();
/* did not cleanup properly - throw exception? */
}
if (encoding) free((void*)encoding);
encoding = NULL;
list = NULL;
offst = NULL;
} }
@ -78,18 +37,22 @@ int MyThes::thInitialize(const char* idxpath, const char* datpath)
// open the index file // open the index file
FILE * pifile = fopen(idxpath,"r"); FILE * pifile = fopen(idxpath,"r");
if (!pifile) { if (!pifile) {
pifile = NULL;
return 0; return 0;
} }
// parse in encoding and index size */ // parse in encoding and index size */
char * wrd; std::vector<char> buffer(MAX_WD_LEN);
wrd = (char *)calloc(1, MAX_WD_LEN); char * wrd = &buffer[0];
int len = readLine(pifile,wrd,MAX_WD_LEN); readLine(pifile,wrd,MAX_WD_LEN);
encoding = mythesstrdup(wrd); encoding = mystrdup(wrd);
len = readLine(pifile,wrd,MAX_WD_LEN); readLine(pifile,wrd,MAX_WD_LEN);
int idxsz = atoi(wrd); int idxsz = atoi(wrd);
if (idxsz <= 0 || idxsz > std::numeric_limits<int>::max() / sizeof(sizeof(char*))) {
fprintf(stderr,"Error - bad index %d\n", idxsz);
fclose(pifile);
return 0;
}
// now allocate list, offst for the given size // now allocate list, offst for the given size
list = (char**) calloc(idxsz,sizeof(char*)); list = (char**) calloc(idxsz,sizeof(char*));
@ -97,35 +60,38 @@ int MyThes::thInitialize(const char* idxpath, const char* datpath)
if ( (!(list)) || (!(offst)) ) { if ( (!(list)) || (!(offst)) ) {
fprintf(stderr,"Error - bad memory allocation\n"); fprintf(stderr,"Error - bad memory allocation\n");
fflush(stderr); fclose(pifile);
return 0; return 0;
} }
// now parse the remaining lines of the index // now parse the remaining lines of the index
len = readLine(pifile,wrd,MAX_WD_LEN); int len = readLine(pifile,wrd,MAX_WD_LEN);
while (len > 0) while (len > 0)
{ {
int np = mystr_indexOfChar(wrd,'|'); int np = mystr_indexOfChar(wrd,'|');
if (nw < idxsz) { if (nw < idxsz) {
if (np >= 0) { if (np >= 0) {
*(wrd+np) = '\0'; *(wrd+np) = '\0';
list[nw] = (char *)calloc(1,(np+1)); list[nw] = (char *)calloc(1,(np+1));
memcpy((list[nw]),wrd,np); if (!list[nw]) {
offst[nw] = atoi(wrd+np+1); fprintf(stderr,"Error - bad memory allocation\n");
nw++; fflush(stderr);
} fclose(pifile);
return 0;
}
memcpy((list[nw]),wrd,np);
offst[nw] = atoi(wrd+np+1);
nw++;
}
} }
len = readLine(pifile,wrd,MAX_WD_LEN); len = readLine(pifile,wrd,MAX_WD_LEN);
} }
free((void *)wrd);
fclose(pifile); fclose(pifile);
pifile=NULL;
/* next open the data file */ /* next open the data file */
pdfile = fopen(datpath,"r"); pdfile = fopen(datpath,"r");
if (!pdfile) { if (!pdfile) {
pdfile = NULL;
return 0; return 0;
} }
@ -133,7 +99,7 @@ int MyThes::thInitialize(const char* idxpath, const char* datpath)
} }
int MyThes::thCleanup() void MyThes::thCleanup()
{ {
/* first close the data file */ /* first close the data file */
if (pdfile) { if (pdfile) {
@ -141,20 +107,26 @@ int MyThes::thCleanup()
pdfile=NULL; pdfile=NULL;
} }
/* now free up all the allocated strings on the list */ if (list)
for (int i=0; i < nw; i++)
{ {
if (list[i]) { /* now free up all the allocated strings on the list */
free(list[i]); for (int i=0; i < nw; i++)
list[i] = 0; {
if (list[i]) {
free(list[i]);
list[i] = 0;
}
} }
free((void*)list);
} }
if (list) free((void*)list); if (encoding) free((void*)encoding);
if (offst) free((void*)offst); if (offst) free((void*)offst);
encoding = NULL;
list = NULL;
offst = NULL;
nw = 0; nw = 0;
return 1;
} }
@ -177,12 +149,12 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme)
long offset = 0; long offset = 0;
/* copy search word and make sure null terminated */ /* copy search word and make sure null terminated */
char * wrd = (char *) calloc(1,(len+1)); std::vector<char> buffer(len+1);
char * wrd = &buffer[0];
memcpy(wrd,pText,len); memcpy(wrd,pText,len);
/* find it in the list */ /* find it in the list */
int idx = binsearch(wrd,list,nw); int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
free(wrd);
if (idx < 0) return 0; if (idx < 0) return 0;
// now seek to the offset // now seek to the offset
@ -204,7 +176,9 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme)
return 0; return 0;
} }
int nmeanings = atoi(buf+np+1); int nmeanings = atoi(buf+np+1);
*pme = (mentry*) malloc( nmeanings * sizeof(mentry) ); if (nmeanings < 0 || nmeanings > std::numeric_limits<int>::max() / sizeof(mentry))
nmeanings = 0;
*pme = (mentry*)(nmeanings ? malloc(nmeanings * sizeof(mentry)) : NULL);
if (!(*pme)) { if (!(*pme)) {
free(buf); free(buf);
return 0; return 0;
@ -227,10 +201,10 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme)
np = mystr_indexOfChar(p,'|'); np = mystr_indexOfChar(p,'|');
if (np >= 0) { if (np >= 0) {
*(buf+np) = '\0'; *(buf+np) = '\0';
pos = mythesstrdup(p); pos = mystrdup(p);
p = p + np + 1; p = p + np + 1;
} else { } else {
pos = mythesstrdup(""); pos = mystrdup("");
} }
// count the number of fields in the remaining line // count the number of fields in the remaining line
@ -247,28 +221,35 @@ int MyThes::Lookup(const char * pText, int len, mentry** pme)
// fill in the synonym list // fill in the synonym list
d = p; d = p;
for (int j = 0; j < nf; j++) { for (int jj = 0; jj < nf; jj++)
{
np = mystr_indexOfChar(d,'|'); np = mystr_indexOfChar(d,'|');
if (np > 0) { if (np > 0)
*(d+np) = '\0'; {
pm->psyns[j] = mythesstrdup(d); *(d+np) = '\0';
d = d + np + 1; pm->psyns[jj] = mystrdup(d);
} else { d = d + np + 1;
pm->psyns[j] = mythesstrdup(d); }
} else
{
pm->psyns[jj] = mystrdup(d);
}
} }
// add pos to first synonym to create the definition // add pos to first synonym to create the definition
int k = strlen(pos); if (pm->psyns[0])
int m = strlen(pm->psyns[0]); {
if ((k+m) < (MAX_WD_LEN - 1)) { int k = strlen(pos);
strncpy(dfn,pos,k); int m = strlen(pm->psyns[0]);
*(dfn+k) = ' '; if ((k+m) < (MAX_WD_LEN - 1)) {
strncpy((dfn+k+1),(pm->psyns[0]),m+1); strncpy(dfn,pos,k);
pm->defn = mythesstrdup(dfn); *(dfn+k) = ' ';
} else { strncpy((dfn+k+1),(pm->psyns[0]),m+1);
pm->defn = mythesstrdup(pm->psyns[0]); pm->defn = mystrdup(dfn);
} } else {
pm->defn = mystrdup(pm->psyns[0]);
}
}
free(pos); free(pos);
pm++; pm++;
@ -320,7 +301,7 @@ int MyThes::readLine(FILE * pf, char * buf, int nc)
{ {
if (fgets(buf,nc,pf)) { if (fgets(buf,nc,pf)) {
mytheschomp(buf); mychomp(buf);
return strlen(buf); return strlen(buf);
} }
return -1; return -1;
@ -334,17 +315,17 @@ int MyThes::readLine(FILE * pf, char * buf, int nc)
// returns: -1 on not found // returns: -1 on not found
// index of wrd in the list[] // index of wrd in the list[]
int MyThes::binsearch(char * sw, char* list[], int nlst) int MyThes::binsearch(char * sw, char* _list[], int nlst)
{ {
int lp, up, mp, j, indx; int lp, up, mp, j, indx;
lp = 0; lp = 0;
up = nlst-1; up = nlst-1;
indx = -1; indx = -1;
if (strcmp(sw,list[lp]) < 0) return -1; if (strcmp(sw,_list[lp]) < 0) return -1;
if (strcmp(sw,list[up]) > 0) return -1; if (strcmp(sw,_list[up]) > 0) return -1;
while (indx < 0 ) { while (indx < 0 ) {
mp = (int)((lp+up) >> 1); mp = (int)((lp+up) >> 1);
j = strcmp(sw,list[mp]); j = strcmp(sw,_list[mp]);
if ( j > 0) { if ( j > 0) {
lp = mp + 1; lp = mp + 1;
} else if (j < 0 ) { } else if (j < 0 ) {
@ -359,7 +340,36 @@ int MyThes::binsearch(char * sw, char* list[], int nlst)
char * MyThes::get_th_encoding() char * MyThes::get_th_encoding()
{ {
if (encoding) return encoding; return encoding;
return NULL; }
// string duplication routine
char * MyThes::mystrdup(const char * s)
{
char * d = NULL;
if (s) {
int sl = strlen(s)+1;
d = (char *) malloc(sl);
if (d) memcpy(d,s,sl);
}
return d;
}
// remove cross-platform text line end characters
void MyThes::mychomp(char * s)
{
int k = strlen(s);
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
}
// return index of char in string
int MyThes::mystr_indexOfChar(const char * d, int c)
{
char * p = strchr((char *)d,c);
if (p) return (int)(p-d);
return -1;
} }

View File

@ -49,7 +49,7 @@ private:
int thInitialize (const char* indxpath, const char* datpath); int thInitialize (const char* indxpath, const char* datpath);
// internal close and cleanup dat and idx files // internal close and cleanup dat and idx files
int thCleanup (); void thCleanup ();
// read a text line (\n terminated) stripping off line terminator // read a text line (\n terminated) stripping off line terminator
int readLine(FILE * pf, char * buf, int nc); int readLine(FILE * pf, char * buf, int nc);
@ -57,6 +57,15 @@ private:
// binary search on null terminated character strings // binary search on null terminated character strings
int binsearch(char * wrd, char* list[], int nlst); int binsearch(char * wrd, char* list[], int nlst);
// string duplication routine
char * mystrdup(const char * p);
// remove cross-platform text line end characters
void mychomp(char * s);
// return index of char in string
int mystr_indexOfChar(const char * d, int c);
}; };
#endif #endif

12
3rdparty/mythes/Makefile.am vendored Normal file
View File

@ -0,0 +1,12 @@
include $(top_srcdir)/config/common.am
noinst_LIBRARIES = liblyxmythes.a
EXTRA_DIST = \
1.2.5/AUTHORS \
1.2.5/COPYING \
1.2.5/README
liblyxmythes_a_SOURCES = \
1.2.5/mythes.cxx \
1.2.5/myspell.hxx

View File

@ -652,14 +652,16 @@ AC_DEFUN([LYX_USE_INCLUDED_MYTHES],[
break]) break])
AC_LANG_POP(C++) AC_LANG_POP(C++)
fi fi
if test $use_included_mythes = no ; then if test $use_included_mythes = yes ; then
AC_DEFINE(USE_EXTERNAL_MYTHES, 1, [Define as 1 to use an external MyThes library]) mythes_h_location="<mythes.hxx>"
AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$mythes_h_location,[Location of mythes.hxx]) MYTHES_INCLUDES='-I$(top_srcdir)/3rdparty/mythes/1.2.5/'
AC_SUBST(MYTHES_LIBS) MYTHES_LIBS='$(top_builddir)/3rdparty/mythes/liblyxmythes.a'
else
lyx_included_libs="$lyx_included_libs mythes" lyx_included_libs="$lyx_included_libs mythes"
fi fi
AM_CONDITIONAL(USE_INCLUDED_MYTHES, test x$use_included_mythes = xyes) AM_CONDITIONAL(USE_INCLUDED_MYTHES, test x$use_included_mythes = xyes)
AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$mythes_h_location,[Location of mythes.hxx])
AC_SUBST(MYTHES_INCLUDES)
AC_SUBST(MYTHES_LIBS)
AC_MSG_CHECKING([whether to use included MyThes library]) AC_MSG_CHECKING([whether to use included MyThes library])
AC_MSG_RESULT([$use_included_mythes]) AC_MSG_RESULT([$use_included_mythes])
]) ])

View File

@ -372,6 +372,7 @@ AC_CONFIG_FILES([Makefile \
3rdparty/Makefile \ 3rdparty/Makefile \
3rdparty/boost/Makefile \ 3rdparty/boost/Makefile \
3rdparty/hunspell/Makefile \ 3rdparty/hunspell/Makefile \
3rdparty/mythes/Makefile \
3rdparty/libiconv/Makefile \ 3rdparty/libiconv/Makefile \
$ICONV_ICONV_H_IN \ $ICONV_ICONV_H_IN \
3rdparty/zlib/Makefile \ 3rdparty/zlib/Makefile \

View File

@ -4,7 +4,7 @@ include $(top_srcdir)/config/common.am
AM_CPPFLAGS += -I$(top_srcdir)/src AM_CPPFLAGS += -I$(top_srcdir)/src
AM_CPPFLAGS += $(BOOST_INCLUDES) $(ICONV_INCLUDES) $(ZLIB_INCLUDES) AM_CPPFLAGS += $(BOOST_INCLUDES) $(ICONV_INCLUDES) $(ZLIB_INCLUDES)
AM_CPPFLAGS += $(ENCHANT_CFLAGS) $(HUNSPELL_CFLAGS) AM_CPPFLAGS += $(ENCHANT_CFLAGS) $(HUNSPELL_CFLAGS) $(MYTHES_INCLUDES)
AM_CPPFLAGS += $(QT_CPPFLAGS) $(QT_CORE_INCLUDES) AM_CPPFLAGS += $(QT_CPPFLAGS) $(QT_CORE_INCLUDES)
if BUILD_CLIENT_SUBDIR if BUILD_CLIENT_SUBDIR

View File

@ -27,12 +27,8 @@
#include "support/lstrings.h" #include "support/lstrings.h"
#include "support/os.h" #include "support/os.h"
#ifdef USE_EXTERNAL_MYTHES
#include MYTHES_H_LOCATION
#else
#include <cstdio> #include <cstdio>
#include "support/mythes/mythes.hxx" #include MYTHES_H_LOCATION
#endif
#include "frontends/alert.h" #include "frontends/alert.h"

View File

@ -113,12 +113,6 @@ liblyxsupport_a_SOURCES = \
unicode.cpp \ unicode.cpp \
unicode.h \ unicode.h \
weighted_btree.h weighted_btree.h
if USE_INCLUDED_MYTHES
liblyxsupport_a_SOURCES += \
mythes/mythes.cxx \
mythes/mythes.hxx \
mythes/license.readme
endif
#if INSTALL_MACOSX #if INSTALL_MACOSX
#liblyxsupport_a_SOURCES += \ #liblyxsupport_a_SOURCES += \