mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-26 06:15:16 +00:00
Integrate mythes-1.1 into our source and remove support for aiksaurus. I have tested only the Cmake build system. Please test autotools and scons.
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@29577 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
parent
1f42ac598d
commit
e9be0fae66
38
configure.ac
38
configure.ac
@ -116,44 +116,6 @@ AC_CHECK_LIB(psapi, main, [LIBPSAPI=-lpsapi])
|
||||
AC_SUBST(LIBPSAPI)
|
||||
AC_CHECK_LIB(gdi32, main)
|
||||
|
||||
AC_ARG_WITH(mythes,
|
||||
[ --without-mythes do not use the MyThes library],
|
||||
[lyx_use_mythes=$withval])
|
||||
if test x$lyx_use_mythes != xno; then
|
||||
AC_CHECK_LIB(mythes, main,
|
||||
[AC_DEFINE(HAVE_LIBMYTHES,1,[Define this if you have the MyThes library])
|
||||
MYTHES_LIBS="-lmythes"
|
||||
lyx_flags="mythes $lyx_flags"
|
||||
])
|
||||
AC_CHECK_HEADER(mythes.hxx,[
|
||||
ac_cv_header_mythes_h=yes
|
||||
lyx_cv_mythes_h_location="<mythes.hxx>"])
|
||||
AC_CHECK_HEADER(mythes/mythes.hxx,[
|
||||
ac_cv_header_mythes_h=yes
|
||||
lyx_cv_mythes_h_location="<mythes/mythes.hxx>"])
|
||||
AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$lyx_cv_mythes_h_location,[Location of mythes.hxx])
|
||||
fi
|
||||
AC_SUBST(MYTHES_LIBS)
|
||||
|
||||
AC_ARG_WITH(aiksaurus,
|
||||
[ --without-aiksaurus do not use the Aiksaurus library],
|
||||
[lyx_use_aiksaurus=$withval])
|
||||
if test x$lyx_use_aiksaurus != xno; then
|
||||
AC_CHECK_LIB(Aiksaurus, main,
|
||||
[AC_DEFINE(HAVE_LIBAIKSAURUS,1,[Define this if you have the AikSaurus library])
|
||||
AIKSAURUS_LIBS="-lAiksaurus"
|
||||
lyx_flags="aiksaurus $lyx_flags"
|
||||
])
|
||||
AC_CHECK_HEADER(Aiksaurus.h,[
|
||||
ac_cv_header_aiksaurus_h=yes
|
||||
lyx_cv_aiksaurus_h_location="<Aiksaurus.h>"])
|
||||
AC_CHECK_HEADER(Aiksaurus/Aiksaurus.h,[
|
||||
ac_cv_header_aiksaurus_h=yes
|
||||
lyx_cv_aiksaurus_h_location="<Aiksaurus/Aiksaurus.h>"])
|
||||
AC_DEFINE_UNQUOTED(AIKSAURUS_H_LOCATION,$lyx_cv_aiksaurus_h_location,[Location of Aiksaurus.h])
|
||||
fi
|
||||
AC_SUBST(AIKSAURUS_LIBS)
|
||||
|
||||
LYX_USE_INCLUDED_BOOST
|
||||
|
||||
# Needed for our char_type
|
||||
|
@ -9,9 +9,8 @@ project(support)
|
||||
file(GLOB support_sources ${TOP_SRC_DIR}/src/support/${LYX_CPP_FILES})
|
||||
file(GLOB support_headers ${TOP_SRC_DIR}/src/support/${LYX_HPP_FILES})
|
||||
|
||||
file(GLOB support_minizip_sources ${TOP_SRC_DIR}/src/support/minizip/*.c)
|
||||
file(GLOB support_minizip_cpp_sources ${TOP_SRC_DIR}/src/support/minizip/*.cpp)
|
||||
file(GLOB support_minizip_headers ${TOP_SRC_DIR}/src/support/minizip/*.h)
|
||||
file(GLOB support_mythes_sources ${TOP_SRC_DIR}/src/support/mythes/*.cxx)
|
||||
file(GLOB support_mythes_headers ${TOP_SRC_DIR}/src/support/mythes/*.hxx)
|
||||
|
||||
file(GLOB support_linkback_sources ${TOP_SRC_DIR}/src/support/linkback/*.m*)
|
||||
file(GLOB support_linkback_headers ${TOP_SRC_DIR}/src/support/linkback/*.h)
|
||||
@ -23,8 +22,6 @@ list(REMOVE_ITEM support_sources
|
||||
${TOP_SRC_DIR}/src/support/os_os2.C
|
||||
${TOP_SRC_DIR}/src/support/atexit.c
|
||||
${TOP_SRC_DIR}/src/support/strerror.c
|
||||
${TOP_SRC_DIR}/src/support/minizip/iowin32.h
|
||||
${TOP_SRC_DIR}/src/support/minizip/iowin32.c
|
||||
${TOP_SRC_DIR}/src/support/gettext.cpp)
|
||||
|
||||
if(APPLE)
|
||||
@ -43,15 +40,15 @@ lyx_automoc(${support_sources})
|
||||
|
||||
include_directories(${TOP_SRC_DIR}/src/support
|
||||
${CMAKE_BINARY_DIR}/src/support
|
||||
${TOP_SRC_DIR}/src/support/minizip
|
||||
${TOP_SRC_DIR}/src/support/mythes
|
||||
${QT_INCLUDES}
|
||||
${ICONV_INCLUDE_DIR}
|
||||
${ZLIB_INCLUDE_DIR})
|
||||
|
||||
|
||||
if(NOT MERGE_FILES)
|
||||
set(support_sources ${support_sources} ${support_minizip_sources} ${support_minizip_cpp_sources} ${support_linkback_sources})
|
||||
set(support_headers ${support_headers} ${support_minizip_headers} ${support_linkback_headers})
|
||||
set(support_sources ${support_sources} ${support_mythes_sources} ${support_linkback_sources})
|
||||
set(support_headers ${support_headers} ${support_mythes_headers} ${support_linkback_headers})
|
||||
add_library(support ${library_type} ${support_sources} ${support_headers} ${dont_merge})
|
||||
else()
|
||||
lyx_const_touched_files(_allinone support_sources)
|
||||
@ -61,7 +58,7 @@ else()
|
||||
set_source_files_properties(_allinone_touched.C
|
||||
PROPERTIES OBJECT_DEPENDS "${depends_moc}")
|
||||
add_library(support ${library_type} ${_allinone_files}
|
||||
${support_minizip_sources} ${support_minizip_cpp_sources} ${support_linkback_sources} ${support_headers} ${dont_merge})
|
||||
${support_mythes_sources} ${support_linkback_sources} ${support_headers} ${dont_merge})
|
||||
endif()
|
||||
|
||||
target_link_libraries(support boost_signals ${QT_QTCORE_LIBRARY} ${ZLIB_LIBRARY})
|
||||
|
@ -316,6 +316,7 @@ src_support_header_files = Split('''
|
||||
types.h
|
||||
unicode.h
|
||||
userinfo.h
|
||||
mythes/mythes.hxx
|
||||
''')
|
||||
|
||||
|
||||
@ -347,6 +348,7 @@ src_support_files = Split('''
|
||||
socktools.cpp
|
||||
unicode.cpp
|
||||
userinfo.cpp
|
||||
mythes/mythes.hxx
|
||||
''')
|
||||
|
||||
|
||||
|
@ -13,25 +13,19 @@
|
||||
|
||||
#include "Thesaurus.h"
|
||||
|
||||
#include "support/debug.h"
|
||||
#include "support/gettext.h"
|
||||
#include "LyXRC.h"
|
||||
|
||||
#include "support/FileNameList.h"
|
||||
#include "support/debug.h"
|
||||
#include "support/filetools.h"
|
||||
#include "support/gettext.h"
|
||||
#include "support/lstrings.h"
|
||||
#include "support/os.h"
|
||||
#include "support/unicode.h"
|
||||
|
||||
#include "frontends/alert.h"
|
||||
#include "support/mythes/mythes.hxx"
|
||||
|
||||
#ifdef HAVE_LIBMYTHES
|
||||
#include MYTHES_H_LOCATION
|
||||
#else
|
||||
#ifdef HAVE_LIBAIKSAURUS
|
||||
#include AIKSAURUS_H_LOCATION
|
||||
#endif // HAVE_LIBAIKSAURUS
|
||||
#endif // !HAVE_LIBMYTHES
|
||||
#include "frontends/alert.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
@ -42,8 +36,6 @@ using namespace lyx::support::os;
|
||||
|
||||
namespace lyx {
|
||||
|
||||
#ifdef HAVE_LIBMYTHES
|
||||
|
||||
namespace {
|
||||
|
||||
string const to_iconv_encoding(docstring const & s, string const & encoding)
|
||||
@ -199,99 +191,6 @@ Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lan
|
||||
return meanings;
|
||||
}
|
||||
|
||||
#else // HAVE_LIBMYTHES
|
||||
#ifdef HAVE_LIBAIKSAURUS
|
||||
|
||||
struct Thesaurus::Private
|
||||
{
|
||||
Private(): thes_(new Aiksaurus) {}
|
||||
Aiksaurus * thes_;
|
||||
};
|
||||
|
||||
Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const &)
|
||||
{
|
||||
Meanings meanings;
|
||||
|
||||
// aiksaurus is for english text only, therefore it does not work
|
||||
// with non-ascii strings.
|
||||
// The interface of the Thesaurus class uses docstring because a
|
||||
// non-english thesaurus is possible in theory.
|
||||
if (!support::isAscii(t))
|
||||
// to_ascii() would assert
|
||||
return meanings;
|
||||
|
||||
string const text = to_ascii(t);
|
||||
|
||||
docstring error = from_ascii(d->thes_->error());
|
||||
if (!error.empty()) {
|
||||
static bool sent_error = false;
|
||||
if (!sent_error) {
|
||||
frontend::Alert::error(_("Thesaurus failure"),
|
||||
bformat(_("Aiksaurus returned the following error:\n\n%1$s."),
|
||||
error));
|
||||
sent_error = true;
|
||||
}
|
||||
return meanings;
|
||||
}
|
||||
if (!d->thes_->find(text.c_str()))
|
||||
return meanings;
|
||||
|
||||
// weird api, but ...
|
||||
|
||||
int prev_meaning = -1;
|
||||
int cur_meaning;
|
||||
docstring meaning;
|
||||
|
||||
// correct, returns "" at the end
|
||||
string ret = d->thes_->next(cur_meaning);
|
||||
|
||||
while (!ret.empty()) {
|
||||
if (cur_meaning != prev_meaning) {
|
||||
meaning = from_ascii(ret);
|
||||
ret = d->thes_->next(cur_meaning);
|
||||
prev_meaning = cur_meaning;
|
||||
} else {
|
||||
if (ret != text)
|
||||
meanings[meaning].push_back(from_ascii(ret));
|
||||
}
|
||||
|
||||
ret = d->thes_->next(cur_meaning);
|
||||
}
|
||||
|
||||
for (Meanings::iterator it = meanings.begin();
|
||||
it != meanings.end(); ++it)
|
||||
sort(it->second.begin(), it->second.end());
|
||||
|
||||
return meanings;
|
||||
}
|
||||
|
||||
|
||||
bool Thesaurus::thesaurusAvailable(docstring const & lang) const
|
||||
{
|
||||
// we support English only
|
||||
return prefixIs(lang, from_ascii("en_"));
|
||||
}
|
||||
|
||||
#else // HAVE_LIBAIKSAURUS
|
||||
|
||||
struct Thesaurus::Private
|
||||
{
|
||||
};
|
||||
|
||||
|
||||
Thesaurus::Meanings Thesaurus::lookup(docstring const &, docstring const &)
|
||||
{
|
||||
return Meanings();
|
||||
}
|
||||
|
||||
|
||||
bool Thesaurus::thesaurusAvailable(docstring const &) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // HAVE_LIBAIKSAURUS
|
||||
#endif // HAVE_LIBMYTHES
|
||||
|
||||
Thesaurus::Thesaurus() : d(new Thesaurus::Private)
|
||||
{
|
||||
|
@ -97,7 +97,9 @@ liblyxsupport_a_SOURCES = \
|
||||
userinfo.h \
|
||||
unicode.cpp \
|
||||
unicode.h \
|
||||
weighted_btree.h
|
||||
weighted_btree.h \
|
||||
mythes/mythes.cxx \
|
||||
mythes/mythes.hxx
|
||||
|
||||
if INSTALL_MACOSX
|
||||
liblyxsupport_a_SOURCES += \
|
||||
|
34
src/support/mythes/license.readme
Normal file
34
src/support/mythes/license.readme
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
365
src/support/mythes/mythes.cxx
Normal file
365
src/support/mythes/mythes.cxx
Normal file
@ -0,0 +1,365 @@
|
||||
#include "license.readme"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "mythes.hxx"
|
||||
|
||||
// some basic utility routines
|
||||
|
||||
|
||||
// string duplication routine
|
||||
char * mythesstrdup(const char * p)
|
||||
{
|
||||
|
||||
int sl = strlen(p) + 1;
|
||||
char * d = (char *)malloc(sl);
|
||||
if (d) {
|
||||
memcpy(d,p,sl);
|
||||
return d;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// return index of char in string
|
||||
int mystr_indexOfChar(const char * d, int c)
|
||||
{
|
||||
const char * p = strchr(d,c);
|
||||
if (p) return (int)(p-d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// remove cross-platform text line end characters
|
||||
void mytheschomp(char * s)
|
||||
{
|
||||
int k = strlen(s);
|
||||
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
|
||||
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
|
||||
}
|
||||
|
||||
|
||||
|
||||
MyThes::MyThes(const char* idxpath, const char * datpath)
|
||||
{
|
||||
nw = 0;
|
||||
encoding = NULL;
|
||||
list = NULL;
|
||||
offst = NULL;
|
||||
|
||||
if (thInitialize(idxpath, datpath) != 1) {
|
||||
fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
|
||||
fflush(stderr);
|
||||
if (encoding) free((void*)encoding);
|
||||
if (list) free((void*)list);
|
||||
if (offst) free((void*)offst);
|
||||
// did not initialize properly - throw exception?
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MyThes::~MyThes()
|
||||
{
|
||||
if (thCleanup() != 1) {
|
||||
/* did not cleanup properly - throw exception? */
|
||||
}
|
||||
if (encoding) free((void*)encoding);
|
||||
encoding = NULL;
|
||||
list = NULL;
|
||||
offst = NULL;
|
||||
}
|
||||
|
||||
|
||||
int MyThes::thInitialize(const char* idxpath, const char* datpath)
|
||||
{
|
||||
|
||||
// open the index file
|
||||
FILE * pifile = fopen(idxpath,"r");
|
||||
if (!pifile) {
|
||||
pifile = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// parse in encoding and index size */
|
||||
char * wrd;
|
||||
wrd = (char *)calloc(1, MAX_WD_LEN);
|
||||
int len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
encoding = mythesstrdup(wrd);
|
||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
int idxsz = atoi(wrd);
|
||||
|
||||
|
||||
// now allocate list, offst for the given size
|
||||
list = (char**) calloc(idxsz,sizeof(char*));
|
||||
offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
|
||||
|
||||
if ( (!(list)) || (!(offst)) ) {
|
||||
fprintf(stderr,"Error - bad memory allocation\n");
|
||||
fflush(stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// now parse the remaining lines of the index
|
||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
while (len > 0)
|
||||
{
|
||||
int np = mystr_indexOfChar(wrd,'|');
|
||||
if (nw < idxsz) {
|
||||
if (np >= 0) {
|
||||
*(wrd+np) = '\0';
|
||||
list[nw] = (char *)calloc(1,(np+1));
|
||||
memcpy((list[nw]),wrd,np);
|
||||
offst[nw] = atoi(wrd+np+1);
|
||||
nw++;
|
||||
}
|
||||
}
|
||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
}
|
||||
|
||||
free((void *)wrd);
|
||||
fclose(pifile);
|
||||
pifile=NULL;
|
||||
|
||||
/* next open the data file */
|
||||
pdfile = fopen(datpath,"r");
|
||||
if (!pdfile) {
|
||||
pdfile = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int MyThes::thCleanup()
|
||||
{
|
||||
/* first close the data file */
|
||||
if (pdfile) {
|
||||
fclose(pdfile);
|
||||
pdfile=NULL;
|
||||
}
|
||||
|
||||
/* now free up all the allocated strings on the list */
|
||||
for (int i=0; i < nw; i++)
|
||||
{
|
||||
if (list[i]) {
|
||||
free(list[i]);
|
||||
list[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (list) free((void*)list);
|
||||
if (offst) free((void*)offst);
|
||||
|
||||
nw = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// lookup text in index and count of meanings and a list of meaning entries
|
||||
// with each entry having a synonym count and pointer to an
|
||||
// array of char * (i.e the synonyms)
|
||||
//
|
||||
// note: calling routine should call CleanUpAfterLookup with the original
|
||||
// meaning point and count to properly deallocate memory
|
||||
|
||||
int MyThes::Lookup(const char * pText, int len, mentry** pme)
|
||||
{
|
||||
|
||||
*pme = NULL;
|
||||
|
||||
// handle the case of missing file or file related errors
|
||||
if (! pdfile) return 0;
|
||||
|
||||
long offset = 0;
|
||||
|
||||
/* copy search word and make sure null terminated */
|
||||
char * wrd = (char *) calloc(1,(len+1));
|
||||
memcpy(wrd,pText,len);
|
||||
|
||||
/* find it in the list */
|
||||
int idx = binsearch(wrd,list,nw);
|
||||
free(wrd);
|
||||
if (idx < 0) return 0;
|
||||
|
||||
// now seek to the offset
|
||||
offset = (long) offst[idx];
|
||||
int rc = fseek(pdfile,offset,SEEK_SET);
|
||||
if (rc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// grab the count of the number of meanings
|
||||
// and allocate a list of meaning entries
|
||||
char * buf = NULL;
|
||||
buf = (char *) malloc( MAX_LN_LEN );
|
||||
if (!buf) return 0;
|
||||
readLine(pdfile, buf, (MAX_LN_LEN-1));
|
||||
int np = mystr_indexOfChar(buf,'|');
|
||||
if (np < 0) {
|
||||
free(buf);
|
||||
return 0;
|
||||
}
|
||||
int nmeanings = atoi(buf+np+1);
|
||||
*pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
|
||||
if (!(*pme)) {
|
||||
free(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// now read in each meaning and parse it to get defn, count and synonym lists
|
||||
mentry* pm = *(pme);
|
||||
char dfn[MAX_WD_LEN];
|
||||
|
||||
for (int j = 0; j < nmeanings; j++) {
|
||||
readLine(pdfile, buf, (MAX_LN_LEN-1));
|
||||
|
||||
pm->count = 0;
|
||||
pm->psyns = NULL;
|
||||
pm->defn = NULL;
|
||||
|
||||
// store away the part of speech for later use
|
||||
char * p = buf;
|
||||
char * pos = NULL;
|
||||
np = mystr_indexOfChar(p,'|');
|
||||
if (np >= 0) {
|
||||
*(buf+np) = '\0';
|
||||
pos = mythesstrdup(p);
|
||||
p = p + np + 1;
|
||||
} else {
|
||||
pos = mythesstrdup("");
|
||||
}
|
||||
|
||||
// count the number of fields in the remaining line
|
||||
int nf = 1;
|
||||
char * d = p;
|
||||
np = mystr_indexOfChar(d,'|');
|
||||
while ( np >= 0 ) {
|
||||
nf++;
|
||||
d = d + np + 1;
|
||||
np = mystr_indexOfChar(d,'|');
|
||||
}
|
||||
pm->count = nf;
|
||||
pm->psyns = (char **) malloc(nf*sizeof(char*));
|
||||
|
||||
// fill in the synonym list
|
||||
d = p;
|
||||
for (int j = 0; j < nf; j++) {
|
||||
np = mystr_indexOfChar(d,'|');
|
||||
if (np > 0) {
|
||||
*(d+np) = '\0';
|
||||
pm->psyns[j] = mythesstrdup(d);
|
||||
d = d + np + 1;
|
||||
} else {
|
||||
pm->psyns[j] = mythesstrdup(d);
|
||||
}
|
||||
}
|
||||
|
||||
// add pos to first synonym to create the definition
|
||||
int k = strlen(pos);
|
||||
int m = strlen(pm->psyns[0]);
|
||||
if ((k+m) < (MAX_WD_LEN - 1)) {
|
||||
strncpy(dfn,pos,k);
|
||||
*(dfn+k) = ' ';
|
||||
strncpy((dfn+k+1),(pm->psyns[0]),m+1);
|
||||
pm->defn = mythesstrdup(dfn);
|
||||
} else {
|
||||
pm->defn = mythesstrdup(pm->psyns[0]);
|
||||
}
|
||||
free(pos);
|
||||
pm++;
|
||||
|
||||
}
|
||||
free(buf);
|
||||
|
||||
return nmeanings;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
|
||||
{
|
||||
|
||||
if (nmeanings == 0) return;
|
||||
if ((*pme) == NULL) return;
|
||||
|
||||
mentry * pm = *pme;
|
||||
|
||||
for (int i = 0; i < nmeanings; i++) {
|
||||
int count = pm->count;
|
||||
for (int j = 0; j < count; j++) {
|
||||
if (pm->psyns[j]) free(pm->psyns[j]);
|
||||
pm->psyns[j] = NULL;
|
||||
}
|
||||
if (pm->psyns) free(pm->psyns);
|
||||
pm->psyns = NULL;
|
||||
if (pm->defn) free(pm->defn);
|
||||
pm->defn = NULL;
|
||||
pm->count = 0;
|
||||
pm++;
|
||||
}
|
||||
pm = *pme;
|
||||
free(pm);
|
||||
*pme = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// read a line of text from a text file stripping
|
||||
// off the line terminator and replacing it with
|
||||
// a null string terminator.
|
||||
// returns: -1 on error or the number of characters in
|
||||
// in the returning string
|
||||
|
||||
// A maximum of nc characters will be returned
|
||||
|
||||
int MyThes::readLine(FILE * pf, char * buf, int nc)
|
||||
{
|
||||
|
||||
if (fgets(buf,nc,pf)) {
|
||||
mytheschomp(buf);
|
||||
return strlen(buf);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// performs a binary search on null terminated character
|
||||
// strings
|
||||
//
|
||||
// returns: -1 on not found
|
||||
// index of wrd in the list[]
|
||||
|
||||
int MyThes::binsearch(char * sw, char* list[], int nlst)
|
||||
{
|
||||
int lp, up, mp, j, indx;
|
||||
lp = 0;
|
||||
up = nlst-1;
|
||||
indx = -1;
|
||||
if (strcmp(sw,list[lp]) < 0) return -1;
|
||||
if (strcmp(sw,list[up]) > 0) return -1;
|
||||
while (indx < 0 ) {
|
||||
mp = (int)((lp+up) >> 1);
|
||||
j = strcmp(sw,list[mp]);
|
||||
if ( j > 0) {
|
||||
lp = mp + 1;
|
||||
} else if (j < 0 ) {
|
||||
up = mp - 1;
|
||||
} else {
|
||||
indx = mp;
|
||||
}
|
||||
if (lp > up) return -1;
|
||||
}
|
||||
return indx;
|
||||
}
|
||||
|
||||
char * MyThes::get_th_encoding()
|
||||
{
|
||||
if (encoding) return encoding;
|
||||
return NULL;
|
||||
}
|
||||
|
67
src/support/mythes/mythes.hxx
Normal file
67
src/support/mythes/mythes.hxx
Normal file
@ -0,0 +1,67 @@
|
||||
#ifndef _MYTHES_HXX_
|
||||
#define _MYTHES_HXX_
|
||||
|
||||
// some maximum sizes for buffers
|
||||
#define MAX_WD_LEN 200
|
||||
#define MAX_LN_LEN 16384
|
||||
|
||||
|
||||
// a meaning with definition, count of synonyms and synonym list
|
||||
struct mentry {
|
||||
char* defn;
|
||||
int count;
|
||||
char** psyns;
|
||||
};
|
||||
|
||||
|
||||
class MyThes
|
||||
{
|
||||
|
||||
int nw; /* number of entries in thesaurus */
|
||||
char** list; /* stores word list */
|
||||
unsigned int* offst; /* stores offset list */
|
||||
char * encoding; /* stores text encoding; */
|
||||
|
||||
FILE *pdfile;
|
||||
|
||||
// disallow copy-constructor and assignment-operator for now
|
||||
MyThes();
|
||||
MyThes(const MyThes &);
|
||||
MyThes & operator = (const MyThes &);
|
||||
|
||||
public:
|
||||
MyThes(const char* idxpath, const char* datpath);
|
||||
~MyThes();
|
||||
|
||||
// lookup text in index and return number of meanings
|
||||
// each meaning entry has a defintion, synonym count and pointer
|
||||
// when complete return the *original* meaning entry and count via
|
||||
// CleanUpAfterLookup to properly handle memory deallocation
|
||||
|
||||
int Lookup(const char * pText, int len, mentry** pme);
|
||||
|
||||
void CleanUpAfterLookup(mentry** pme, int nmean);
|
||||
|
||||
char* get_th_encoding();
|
||||
|
||||
private:
|
||||
// Open index and dat files and load list array
|
||||
int thInitialize (const char* indxpath, const char* datpath);
|
||||
|
||||
// internal close and cleanup dat and idx files
|
||||
int thCleanup ();
|
||||
|
||||
// read a text line (\n terminated) stripping off line terminator
|
||||
int readLine(FILE * pf, char * buf, int nc);
|
||||
|
||||
// binary search on null terminated character strings
|
||||
int binsearch(char * wrd, char* list[], int nlst);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user