mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-22 01:59:02 +00:00
Remove parsers in included hunspell
This commit is contained in:
parent
8d0d3ea090
commit
d04a8cf58f
18
3rdparty/hunspell/1.7.0/src/parsers/Makefile.am
vendored
18
3rdparty/hunspell/1.7.0/src/parsers/Makefile.am
vendored
@ -1,18 +0,0 @@
|
||||
|
||||
AM_CPPFLAGS=-I${top_builddir}/src/hunspell
|
||||
|
||||
noinst_LIBRARIES=libparsers.a
|
||||
libparsers_a_SOURCES=firstparser.cxx xmlparser.cxx \
|
||||
latexparser.cxx manparser.cxx \
|
||||
textparser.cxx htmlparser.cxx \
|
||||
odfparser.cxx
|
||||
|
||||
noinst_PROGRAMS=testparser
|
||||
testparser_SOURCES=firstparser.cxx firstparser.hxx xmlparser.cxx \
|
||||
xmlparser.hxx latexparser.cxx latexparser.hxx \
|
||||
manparser.cxx manparser.hxx testparser.cxx \
|
||||
textparser.cxx textparser.hxx htmlparser.cxx \
|
||||
htmlparser.hxx odfparser.hxx odfparser.cxx
|
||||
|
||||
# need mystrdup()
|
||||
LDADD = ../hunspell/libhunspell-1.7.la
|
@ -1,65 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "firstparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
FirstParser::FirstParser(const char* wordchars)
|
||||
: TextParser(wordchars) {
|
||||
}
|
||||
|
||||
FirstParser::~FirstParser() {}
|
||||
|
||||
bool FirstParser::next_token(std::string& t) {
|
||||
t.clear();
|
||||
const size_t tabpos = line[actual].find('\t');
|
||||
if (tabpos != std::string::npos && tabpos > token) {
|
||||
token = tabpos;
|
||||
t = line[actual].substr(0, tabpos);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef FIRSTPARSER_HXX_
|
||||
#define FIRSTPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* Check first word of the input line
|
||||
*
|
||||
*/
|
||||
|
||||
class FirstParser : public TextParser {
|
||||
public:
|
||||
explicit FirstParser(const char* wc);
|
||||
virtual ~FirstParser();
|
||||
|
||||
virtual bool next_token(std::string&);
|
||||
};
|
||||
|
||||
#endif
|
@ -1,88 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "htmlparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
static const char* PATTERN[][2] = {{"<script", "</script>"},
|
||||
{"<style", "</style>"},
|
||||
{"<code", "</code>"},
|
||||
{"<samp", "</samp>"},
|
||||
{"<kbd", "</kbd>"},
|
||||
{"<var", "</var>"},
|
||||
{"<listing", "</listing>"},
|
||||
{"<address", "</address>"},
|
||||
{"<pre", "</pre>"},
|
||||
{"<!--", "-->"},
|
||||
{"<[cdata[", "]]>"}, // XML comment
|
||||
{"<", ">"}};
|
||||
|
||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
|
||||
|
||||
static const char* PATTERN2[][2] = {
|
||||
{"<img", "alt="}, // ALT and TITLE attrib handled spec.
|
||||
{"<img", "title="},
|
||||
{"<a ", "title="}};
|
||||
|
||||
#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char*) * 2))
|
||||
|
||||
static const char* (*PATTERN3)[2] = NULL;
|
||||
|
||||
#define PATTERN_LEN3 0
|
||||
|
||||
HTMLParser::HTMLParser(const char* wordchars)
|
||||
: XMLParser(wordchars) {
|
||||
}
|
||||
|
||||
HTMLParser::HTMLParser(const w_char* wordchars, int len)
|
||||
: XMLParser(wordchars, len) {
|
||||
}
|
||||
|
||||
bool HTMLParser::next_token(std::string& t) {
|
||||
return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t);
|
||||
}
|
||||
|
||||
HTMLParser::~HTMLParser() {}
|
@ -1,56 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef HTMLPARSER_HXX_
|
||||
#define HTMLPARSER_HXX_
|
||||
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
/*
|
||||
* HTML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class HTMLParser : public XMLParser {
|
||||
public:
|
||||
explicit HTMLParser(const char* wc);
|
||||
HTMLParser(const w_char* wordchars, int len);
|
||||
virtual bool next_token(std::string&);
|
||||
virtual ~HTMLParser();
|
||||
};
|
||||
|
||||
#endif
|
277
3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx
vendored
277
3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx
vendored
@ -1,277 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "latexparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
#define UTF8_APOS "\xe2\x80\x99"
|
||||
#define APOSTROPHE "'"
|
||||
|
||||
static struct {
|
||||
const char* pat[2];
|
||||
int arg;
|
||||
} PATTERN[] = {{{"\\(", "\\)"}, 0},
|
||||
{{"$$", "$$"}, 0},
|
||||
{{"$", "$"}, 0},
|
||||
{{"\\begin{math}", "\\end{math}"}, 0},
|
||||
{{"\\[", "\\]"}, 0},
|
||||
{{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
|
||||
{{"\\begin{equation}", "\\end{equation}"}, 0},
|
||||
{{"\\begin{equation*}", "\\end{equation*}"}, 0},
|
||||
{{"\\cite", NULL}, 1},
|
||||
{{"\\nocite", NULL}, 1},
|
||||
{{"\\index", NULL}, 1},
|
||||
{{"\\label", NULL}, 1},
|
||||
{{"\\ref", NULL}, 1},
|
||||
{{"\\pageref", NULL}, 1},
|
||||
{{"\\autoref", NULL}, 1},
|
||||
{{"\\parbox", NULL}, 1},
|
||||
{{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
|
||||
{{"\\verb+", "+"}, 0},
|
||||
{{"\\verb|", "|"}, 0},
|
||||
{{"\\verb#", "#"}, 0},
|
||||
{{"\\verb*", "*"}, 0},
|
||||
{{"\\documentstyle", "\\begin{document}"}, 0},
|
||||
{{"\\documentclass", "\\begin{document}"}, 0},
|
||||
// { { "\\documentclass", NULL } , 1 },
|
||||
{{"\\usepackage", NULL}, 1},
|
||||
{{"\\includeonly", NULL}, 1},
|
||||
{{"\\include", NULL}, 1},
|
||||
{{"\\input", NULL}, 1},
|
||||
{{"\\vspace", NULL}, 1},
|
||||
{{"\\setlength", NULL}, 2},
|
||||
{{"\\addtolength", NULL}, 2},
|
||||
{{"\\settowidth", NULL}, 2},
|
||||
{{"\\rule", NULL}, 2},
|
||||
{{"\\hspace", NULL}, 1},
|
||||
{{"\\vspace", NULL}, 1},
|
||||
{{"\\\\[", "]"}, 0},
|
||||
{{"\\pagebreak[", "]"}, 0},
|
||||
{{"\\nopagebreak[", "]"}, 0},
|
||||
{{"\\enlargethispage", NULL}, 1},
|
||||
{{"\\begin{tabular}", NULL}, 1},
|
||||
{{"\\addcontentsline", NULL}, 2},
|
||||
{{"\\begin{thebibliography}", NULL}, 1},
|
||||
{{"\\bibliography", NULL}, 1},
|
||||
{{"\\bibliographystyle", NULL}, 1},
|
||||
{{"\\bibitem", NULL}, 1},
|
||||
{{"\\begin", NULL}, 1},
|
||||
{{"\\end", NULL}, 1},
|
||||
{{"\\pagestyle", NULL}, 1},
|
||||
{{"\\pagenumbering", NULL}, 1},
|
||||
{{"\\thispagestyle", NULL}, 1},
|
||||
{{"\\newtheorem", NULL}, 2},
|
||||
{{"\\newcommand", NULL}, 2},
|
||||
{{"\\renewcommand", NULL}, 2},
|
||||
{{"\\setcounter", NULL}, 2},
|
||||
{{"\\addtocounter", NULL}, 1},
|
||||
{{"\\stepcounter", NULL}, 1},
|
||||
{{"\\selectlanguage", NULL}, 1},
|
||||
{{"\\inputencoding", NULL}, 1},
|
||||
{{"\\hyphenation", NULL}, 1},
|
||||
{{"\\definecolor", NULL}, 3},
|
||||
{{"\\color", NULL}, 1},
|
||||
{{"\\textcolor", NULL}, 1},
|
||||
{{"\\pagecolor", NULL}, 1},
|
||||
{{"\\colorbox", NULL}, 2},
|
||||
{{"\\fcolorbox", NULL}, 2},
|
||||
{{"\\declaregraphicsextensions", NULL}, 1},
|
||||
{{"\\psfig", NULL}, 1},
|
||||
{{"\\url", NULL}, 1},
|
||||
{{"\\eqref", NULL}, 1},
|
||||
{{"\\vskip", NULL}, 1},
|
||||
{{"\\vglue", NULL}, 1},
|
||||
{{"\'\'", NULL}, 1}};
|
||||
|
||||
#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
|
||||
|
||||
LaTeXParser::LaTeXParser(const char* wordchars)
|
||||
: TextParser(wordchars)
|
||||
, pattern_num(0), depth(0), arg(0), opt(0) {
|
||||
}
|
||||
|
||||
LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
|
||||
: TextParser(wordchars, len)
|
||||
, pattern_num(0), depth(0), arg(0), opt(0) {
|
||||
}
|
||||
|
||||
LaTeXParser::~LaTeXParser() {}
|
||||
|
||||
int LaTeXParser::look_pattern(int col) {
|
||||
for (unsigned int i = 0; i < PATTERN_LEN; i++) {
|
||||
const char* j = line[actual].c_str() + head;
|
||||
const char* k = PATTERN[i].pat[col];
|
||||
if (!k)
|
||||
continue;
|
||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
||||
j++;
|
||||
k++;
|
||||
}
|
||||
if (*k == '\0')
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* LaTeXParser
|
||||
*
|
||||
* state 0: not wordchar
|
||||
* state 1: wordchar
|
||||
* state 2: comments
|
||||
* state 3: commands
|
||||
* state 4: commands with arguments
|
||||
* state 5: % comment
|
||||
*
|
||||
*/
|
||||
|
||||
bool LaTeXParser::next_token(std::string& t) {
|
||||
t.clear();
|
||||
int i;
|
||||
int slash = 0;
|
||||
int apostrophe;
|
||||
for (;;) {
|
||||
// fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
|
||||
// %s\n",depth,state,arg,line[actual]+head);
|
||||
|
||||
switch (state) {
|
||||
case 0: // non word chars
|
||||
if ((pattern_num = look_pattern(0)) != -1) {
|
||||
if (PATTERN[pattern_num].pat[1]) {
|
||||
state = 2;
|
||||
} else {
|
||||
state = 4;
|
||||
depth = 0;
|
||||
arg = 0;
|
||||
opt = 1;
|
||||
}
|
||||
head += strlen(PATTERN[pattern_num].pat[0]) - 1;
|
||||
} else if (line[actual][head] == '%') {
|
||||
state = 5;
|
||||
} else if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 1;
|
||||
token = head;
|
||||
} else if (line[actual][head] == '\\') {
|
||||
if (line[actual][head + 1] == '\\' || // \\ (linebreak)
|
||||
(line[actual][head + 1] == '$') || // \$ (dollar sign)
|
||||
(line[actual][head + 1] == '%')) { // \% (percent)
|
||||
head++;
|
||||
break;
|
||||
}
|
||||
state = 3;
|
||||
}
|
||||
break;
|
||||
case 1: // wordchar
|
||||
apostrophe = 0;
|
||||
if ((is_wordchar((char*)APOSTROPHE) ||
|
||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
||||
!line[actual].empty() && line[actual][head] == '\'' &&
|
||||
is_wordchar(line[actual].c_str() + head + 1)) {
|
||||
head++;
|
||||
} else if (is_utf8() &&
|
||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
||||
// to the WORDCHARS, if
|
||||
// needed
|
||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
||||
0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
||||
head += strlen(UTF8_APOS) - 1;
|
||||
} else if (!is_wordchar(line[actual].c_str() + head) ||
|
||||
(line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
|
||||
++apostrophe)) {
|
||||
state = 0;
|
||||
bool ok = alloc_token(token, &head, t);
|
||||
if (apostrophe)
|
||||
head += 2;
|
||||
if (ok)
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 2: // comment, labels, etc
|
||||
if (((i = look_pattern(1)) != -1) &&
|
||||
(strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
|
||||
state = 0;
|
||||
head += strlen(PATTERN[pattern_num].pat[1]) - 1;
|
||||
}
|
||||
break;
|
||||
case 3: // command
|
||||
if ((tolower(line[actual][head]) < 'a') ||
|
||||
(tolower(line[actual][head]) > 'z')) {
|
||||
state = 0;
|
||||
head--;
|
||||
}
|
||||
break;
|
||||
case 4: // command with arguments
|
||||
if (slash && (line[actual][head] != '\0')) {
|
||||
slash = 0;
|
||||
head++;
|
||||
break;
|
||||
} else if (line[actual][head] == '\\') {
|
||||
slash = 1;
|
||||
} else if ((line[actual][head] == '{') ||
|
||||
((opt) && (line[actual][head] == '['))) {
|
||||
depth++;
|
||||
opt = 0;
|
||||
} else if (line[actual][head] == '}') {
|
||||
depth--;
|
||||
if (depth == 0) {
|
||||
opt = 1;
|
||||
arg++;
|
||||
}
|
||||
if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
|
||||
(depth < 0)) {
|
||||
state = 0; // XXX not handles the last optional arg.
|
||||
}
|
||||
} else if (line[actual][head] == ']')
|
||||
depth--;
|
||||
} // case
|
||||
if (next_char(line[actual].c_str(), &head)) {
|
||||
if (state == 5)
|
||||
state = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef LATEXPARSER_HXX_
|
||||
#define LATEXPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* HTML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class LaTeXParser : public TextParser {
|
||||
int pattern_num; // number of comment
|
||||
int depth; // depth of blocks
|
||||
int arg; // arguments's number
|
||||
int opt; // optional argument attrib.
|
||||
|
||||
public:
|
||||
explicit LaTeXParser(const char* wc);
|
||||
LaTeXParser(const w_char* wordchars, int len);
|
||||
virtual ~LaTeXParser();
|
||||
|
||||
virtual bool next_token(std::string&);
|
||||
|
||||
private:
|
||||
int look_pattern(int col);
|
||||
};
|
||||
|
||||
#endif
|
@ -1,98 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "manparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
ManParser::ManParser(const char* wordchars)
|
||||
: TextParser(wordchars) {
|
||||
}
|
||||
|
||||
ManParser::ManParser(const w_char* wordchars, int len)
|
||||
: TextParser(wordchars, len) {
|
||||
}
|
||||
|
||||
ManParser::~ManParser() {}
|
||||
|
||||
bool ManParser::next_token(std::string& t) {
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case 1: // command arguments
|
||||
if (line[actual][head] == ' ')
|
||||
state = 2;
|
||||
break;
|
||||
case 0: // dot in begin of line
|
||||
if (line[actual][0] == '.') {
|
||||
state = 1;
|
||||
break;
|
||||
} else {
|
||||
state = 2;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case 2: // non word chars
|
||||
if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 3;
|
||||
token = head;
|
||||
} else if ((line[actual][head] == '\\') &&
|
||||
(line[actual][head + 1] == 'f') &&
|
||||
(line[actual][head + 2] != '\0')) {
|
||||
head += 2;
|
||||
}
|
||||
break;
|
||||
case 3: // wordchar
|
||||
if (!is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 2;
|
||||
if (alloc_token(token, &head, t))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (next_char(line[actual].c_str(), &head)) {
|
||||
state = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef MANPARSER_HXX_
|
||||
#define MANPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* Manparse Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class ManParser : public TextParser {
|
||||
protected:
|
||||
public:
|
||||
explicit ManParser(const char* wc);
|
||||
ManParser(const w_char* wordchars, int len);
|
||||
virtual ~ManParser();
|
||||
|
||||
virtual bool next_token(std::string&);
|
||||
};
|
||||
|
||||
#endif
|
@ -1,86 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "odfparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
static const char* PATTERN[][2] = {
|
||||
{"<office:meta>", "</office:meta>"},
|
||||
{"<office:settings>", "</office:settings>"},
|
||||
{"<office:binary-data>", "</office:binary-data>"},
|
||||
{"<!--", "-->"},
|
||||
{"<[cdata[", "]]>"}, // XML comment
|
||||
{"<", ">"}};
|
||||
|
||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
|
||||
|
||||
static const char* (*PATTERN2)[2] = NULL;
|
||||
|
||||
#define PATTERN_LEN2 0
|
||||
|
||||
static const char* PATTERN3[][2] = {
|
||||
{"<text:span", ">"}, // part of the reedited words
|
||||
{"</text:span", ">"}}; // for example, an inserted letter
|
||||
|
||||
#define PATTERN_LEN3 (sizeof(PATTERN3) / (sizeof(char*) * 2))
|
||||
|
||||
ODFParser::ODFParser(const char* wordchars)
|
||||
: XMLParser(wordchars) {
|
||||
}
|
||||
|
||||
ODFParser::ODFParser(const w_char* wordchars, int len)
|
||||
: XMLParser(wordchars, len) {
|
||||
}
|
||||
|
||||
bool ODFParser::next_token(std::string& t) {
|
||||
return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t);
|
||||
}
|
||||
|
||||
std::string ODFParser::get_word(const std::string &tok) {
|
||||
return XMLParser::get_word2(PATTERN3, PATTERN_LEN3, tok);
|
||||
}
|
||||
|
||||
ODFParser::~ODFParser() {}
|
@ -1,57 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef ODFPARSER_HXX_
|
||||
#define ODFPARSER_HXX_
|
||||
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
/*
|
||||
* HTML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class ODFParser : public XMLParser {
|
||||
public:
|
||||
explicit ODFParser(const char* wc);
|
||||
ODFParser(const w_char* wordchars, int len);
|
||||
virtual bool next_token(std::string&);
|
||||
virtual std::string get_word(const std::string &tok);
|
||||
virtual ~ODFParser();
|
||||
};
|
||||
|
||||
#endif
|
@ -1,86 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "textparser.hxx"
|
||||
#include "htmlparser.hxx"
|
||||
#include "latexparser.hxx"
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
FILE* f;
|
||||
/* first parse the command line options */
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "correct syntax is:\n");
|
||||
fprintf(stderr, "testparser file\n");
|
||||
fprintf(stderr, "example: testparser /dev/stdin\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list */
|
||||
f = fopen(argv[1], "r");
|
||||
if (!f) {
|
||||
fprintf(stderr, "Error - could not open file of words to check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
TextParser* p = new TextParser(
|
||||
"qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
|
||||
|
||||
char buf[MAXLNLEN];
|
||||
|
||||
while (fgets(buf, MAXLNLEN, f)) {
|
||||
p->put_line(buf);
|
||||
p->set_url_checking(1);
|
||||
std::string next;
|
||||
while (p->next_token(next)) {
|
||||
fprintf(stdout, "token: %s\n", next.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
delete p;
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
302
3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx
vendored
302
3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx
vendored
@ -1,302 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "textparser.hxx"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
// ISO-8859-1 HTML character entities
|
||||
|
||||
static const char* LATIN1[] = {
|
||||
"À", "Ã", "Å", "Æ", "È", "Ê",
|
||||
"Ì", "Ï", "Ð", "Ñ", "Ò", "Ø",
|
||||
"Ù", "Þ", "à", "ã", "å", "æ",
|
||||
"è", "ê", "ì", "ï", "ð", "ñ",
|
||||
"ò", "ø", "ù", "þ", "ÿ"};
|
||||
|
||||
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char*))
|
||||
|
||||
#define ENTITY_APOS "'"
|
||||
#define UTF8_APOS "\xe2\x80\x99"
|
||||
#define APOSTROPHE "'"
|
||||
|
||||
TextParser::TextParser(const char* wordchars) {
|
||||
init(wordchars);
|
||||
}
|
||||
|
||||
TextParser::TextParser(const w_char* wordchars, int len) {
|
||||
init(wordchars, len);
|
||||
}
|
||||
|
||||
TextParser::~TextParser() {}
|
||||
|
||||
int TextParser::is_wordchar(const char* w) {
|
||||
if (*w == '\0')
|
||||
return 0;
|
||||
if (utf8) {
|
||||
std::vector<w_char> wc;
|
||||
unsigned short idx;
|
||||
u8_u16(wc, w);
|
||||
if (wc.empty())
|
||||
return 0;
|
||||
idx = (wc[0].h << 8) + wc[0].l;
|
||||
return (unicodeisalpha(idx) ||
|
||||
(wordchars_utf16 &&
|
||||
std::binary_search(wordchars_utf16, wordchars_utf16 + wclen, wc[0])));
|
||||
} else {
|
||||
return wordcharacters[(*w + 256) % 256];
|
||||
}
|
||||
}
|
||||
|
||||
const char* TextParser::get_latin1(const char* s) {
|
||||
if (s[0] == '&') {
|
||||
unsigned int i = 0;
|
||||
while ((i < LATIN1_LEN) && strncmp(LATIN1[i], s, strlen(LATIN1[i])))
|
||||
i++;
|
||||
if (i != LATIN1_LEN)
|
||||
return LATIN1[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void TextParser::init(const char* wordchars) {
|
||||
actual = 0;
|
||||
head = 0;
|
||||
token = 0;
|
||||
state = 0;
|
||||
utf8 = 0;
|
||||
checkurl = 0;
|
||||
wordchars_utf16 = NULL;
|
||||
wclen = 0;
|
||||
wordcharacters.resize(256, 0);
|
||||
if (!wordchars)
|
||||
wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM";
|
||||
for (unsigned int j = 0; j < strlen(wordchars); ++j) {
|
||||
wordcharacters[(wordchars[j] + 256) % 256] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void TextParser::init(const w_char* wc, int len) {
|
||||
actual = 0;
|
||||
head = 0;
|
||||
token = 0;
|
||||
state = 0;
|
||||
utf8 = 1;
|
||||
checkurl = 0;
|
||||
wordchars_utf16 = wc;
|
||||
wclen = len;
|
||||
}
|
||||
|
||||
int TextParser::next_char(const char* ln, size_t* pos) {
|
||||
if (*(ln + *pos) == '\0')
|
||||
return 1;
|
||||
if (utf8) {
|
||||
if (*(ln + *pos) >> 7) {
|
||||
// jump to next UTF-8 character
|
||||
for ((*pos)++; (*(ln + *pos) & 0xc0) == 0x80; (*pos)++)
|
||||
;
|
||||
} else {
|
||||
(*pos)++;
|
||||
}
|
||||
} else
|
||||
(*pos)++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TextParser::put_line(const char* word) {
|
||||
actual = (actual + 1) % MAXPREVLINE;
|
||||
line[actual].assign(word);
|
||||
token = 0;
|
||||
head = 0;
|
||||
check_urls();
|
||||
}
|
||||
|
||||
std::string TextParser::get_prevline(int n) const {
|
||||
return line[(actual + MAXPREVLINE - n) % MAXPREVLINE];
|
||||
}
|
||||
|
||||
std::string TextParser::get_line() const {
|
||||
return get_prevline(0);
|
||||
}
|
||||
|
||||
bool TextParser::next_token(std::string &t) {
|
||||
const char* latin1;
|
||||
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case 0: // non word chars
|
||||
if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 1;
|
||||
token = head;
|
||||
} else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
state = 1;
|
||||
token = head;
|
||||
head += strlen(latin1);
|
||||
}
|
||||
break;
|
||||
case 1: // wordchar
|
||||
if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
head += strlen(latin1);
|
||||
} else if ((is_wordchar((char*)APOSTROPHE) ||
|
||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
||||
!line[actual].empty() && line[actual][head] == '\'' &&
|
||||
is_wordchar(line[actual].c_str() + head + 1)) {
|
||||
head++;
|
||||
} else if (is_utf8() &&
|
||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
||||
// to the WORDCHARS, if
|
||||
// needed
|
||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
||||
0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
||||
head += strlen(UTF8_APOS) - 1;
|
||||
} else if (!is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 0;
|
||||
if (alloc_token(token, &head, t))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (next_char(line[actual].c_str(), &head))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
size_t TextParser::get_tokenpos() {
|
||||
return token;
|
||||
}
|
||||
|
||||
int TextParser::change_token(const char* word) {
|
||||
if (word) {
|
||||
std::string remainder(line[actual].substr(head));
|
||||
line[actual].resize(token);
|
||||
line[actual].append(word);
|
||||
line[actual].append(remainder);
|
||||
head = token;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string TextParser::get_word(const std::string &tok) {
|
||||
return tok;
|
||||
}
|
||||
|
||||
void TextParser::check_urls() {
|
||||
urlline.resize(line[actual].size() + 1);
|
||||
int url_state = 0;
|
||||
size_t url_head = 0;
|
||||
size_t url_token = 0;
|
||||
int url = 0;
|
||||
for (;;) {
|
||||
switch (url_state) {
|
||||
case 0: // non word chars
|
||||
if (is_wordchar(line[actual].c_str() + url_head)) {
|
||||
url_state = 1;
|
||||
url_token = url_head;
|
||||
// Unix path
|
||||
} else if (line[actual][url_head] == '/') {
|
||||
url_state = 1;
|
||||
url_token = url_head;
|
||||
url = 1;
|
||||
}
|
||||
break;
|
||||
case 1: // wordchar
|
||||
char ch = line[actual][url_head];
|
||||
// e-mail address
|
||||
if ((ch == '@') ||
|
||||
// MS-DOS, Windows path
|
||||
(strncmp(line[actual].c_str() + url_head, ":\\", 2) == 0) ||
|
||||
// URL
|
||||
(strncmp(line[actual].c_str() + url_head, "://", 3) == 0)) {
|
||||
url = 1;
|
||||
} else if (!(is_wordchar(line[actual].c_str() + url_head) || (ch == '-') ||
|
||||
(ch == '_') || (ch == '\\') || (ch == '.') ||
|
||||
(ch == ':') || (ch == '/') || (ch == '~') || (ch == '%') ||
|
||||
(ch == '*') || (ch == '$') || (ch == '[') || (ch == ']') ||
|
||||
(ch == '?') || (ch == '!') ||
|
||||
((ch >= '0') && (ch <= '9')))) {
|
||||
url_state = 0;
|
||||
if (url == 1) {
|
||||
for (size_t i = url_token; i < url_head; ++i) {
|
||||
urlline[i] = true;
|
||||
}
|
||||
}
|
||||
url = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
urlline[url_head] = false;
|
||||
if (next_char(line[actual].c_str(), &url_head))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int TextParser::get_url(size_t token_pos, size_t* hd) {
|
||||
for (size_t i = *hd; i < line[actual].size() && urlline[i]; i++, (*hd)++)
|
||||
;
|
||||
return checkurl ? 0 : urlline[token_pos];
|
||||
}
|
||||
|
||||
void TextParser::set_url_checking(int check) {
|
||||
checkurl = check;
|
||||
}
|
||||
|
||||
bool TextParser::alloc_token(size_t tokn, size_t* hd, std::string& t) {
|
||||
size_t url_head = *hd;
|
||||
if (get_url(tokn, &url_head))
|
||||
return false;
|
||||
t = line[actual].substr(tokn, *hd - tokn);
|
||||
// remove colon for Finnish and Swedish language
|
||||
if (!t.empty() && t[t.size() - 1] == ':') {
|
||||
t.resize(t.size() - 1);
|
||||
if (t.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef TEXTPARSER_HXX_
|
||||
#define TEXTPARSER_HXX_
|
||||
|
||||
// set sum of actual and previous lines
|
||||
#define MAXPREVLINE 4
|
||||
|
||||
#ifndef MAXLNLEN
|
||||
#define MAXLNLEN 8192
|
||||
#endif
|
||||
|
||||
#include "../hunspell/w_char.hxx"
|
||||
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
* Base Text Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class TextParser {
|
||||
protected:
|
||||
std::vector<int> wordcharacters;// for detection of the word boundaries
|
||||
std::string line[MAXPREVLINE]; // parsed and previous lines
|
||||
std::vector<bool> urlline; // mask for url detection
|
||||
int checkurl;
|
||||
int actual; // actual line
|
||||
size_t head; // head position
|
||||
size_t token;// begin of token
|
||||
int state; // state of automata
|
||||
int utf8; // UTF-8 character encoding
|
||||
int next_char(const char* line, size_t* pos);
|
||||
const w_char* wordchars_utf16;
|
||||
int wclen;
|
||||
|
||||
public:
|
||||
TextParser(const w_char* wordchars, int len);
|
||||
explicit TextParser(const char* wc);
|
||||
virtual ~TextParser();
|
||||
|
||||
void put_line(const char* line);
|
||||
std::string get_line() const;
|
||||
std::string get_prevline(int n) const;
|
||||
virtual bool next_token(std::string&);
|
||||
virtual std::string get_word(const std::string &tok);
|
||||
virtual int change_token(const char* word);
|
||||
void set_url_checking(int check);
|
||||
|
||||
size_t get_tokenpos();
|
||||
int is_wordchar(const char* w);
|
||||
inline int is_utf8() { return utf8; }
|
||||
const char* get_latin1(const char* s);
|
||||
char* next_char();
|
||||
int tokenize_urls();
|
||||
void check_urls();
|
||||
int get_url(size_t token_pos, size_t* head);
|
||||
bool alloc_token(size_t token, size_t* head, std::string& out);
|
||||
private:
|
||||
void init(const char*);
|
||||
void init(const w_char* wordchars, int len);
|
||||
};
|
||||
|
||||
#endif
|
253
3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx
vendored
253
3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx
vendored
@ -1,253 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
|
||||
|
||||
static const char* __PATTERN__[][2] = {{"<!--", "-->"},
|
||||
{"<[cdata[", "]]>"}, // XML comment
|
||||
{"<", ">"}};
|
||||
|
||||
#define __PATTERN_LEN__ (sizeof(__PATTERN__) / (sizeof(char*) * 2))
|
||||
|
||||
// for checking attributes, eg. <img alt="text"> in HTML
|
||||
static const char* (*__PATTERN2__)[2] = NULL;
|
||||
|
||||
#define __PATTERN_LEN2__ 0
|
||||
|
||||
// for checking words with in-word patterns
|
||||
// for example, "exam<text:span>p</text:span>le" in ODT
|
||||
static const char* (*__PATTERN3__)[2] = NULL;
|
||||
|
||||
#define __PATTERN_LEN3__ 0
|
||||
|
||||
#define ENTITY_APOS "'"
|
||||
#define UTF8_APOS "\xe2\x80\x99"
|
||||
#define APOSTROPHE "'"
|
||||
|
||||
XMLParser::XMLParser(const char* wordchars)
|
||||
: TextParser(wordchars)
|
||||
, pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) {
|
||||
}
|
||||
|
||||
XMLParser::XMLParser(const w_char* wordchars, int len)
|
||||
: TextParser(wordchars, len)
|
||||
, pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) {
|
||||
}
|
||||
|
||||
XMLParser::~XMLParser() {}
|
||||
|
||||
int XMLParser::look_pattern(const char* p[][2], unsigned int len, int column) {
|
||||
for (unsigned int i = 0; i < len; i++) {
|
||||
const char* j = line[actual].c_str() + head;
|
||||
const char* k = p[i][column];
|
||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
||||
j++;
|
||||
k++;
|
||||
}
|
||||
if (*k == '\0')
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* XML parser
|
||||
*
|
||||
*/
|
||||
|
||||
bool XMLParser::next_token(const char* PATTERN[][2],
|
||||
unsigned int PATTERN_LEN,
|
||||
const char* PATTERN2[][2],
|
||||
unsigned int PATTERN_LEN2,
|
||||
const char* PATTERN3[][2],
|
||||
unsigned int PATTERN_LEN3,
|
||||
std::string& t) {
|
||||
t.clear();
|
||||
const char* latin1;
|
||||
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case ST_NON_WORD: // non word chars
|
||||
prevstate = ST_NON_WORD;
|
||||
if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
|
||||
checkattr = 0;
|
||||
if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
|
||||
checkattr = 1;
|
||||
}
|
||||
state = ST_TAG;
|
||||
} else if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = ST_WORD;
|
||||
token = head;
|
||||
} else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
state = ST_WORD;
|
||||
token = head;
|
||||
head += strlen(latin1);
|
||||
} else if (line[actual][head] == '&') {
|
||||
state = ST_CHAR_ENTITY;
|
||||
}
|
||||
break;
|
||||
case ST_WORD: // wordchar
|
||||
if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
head += strlen(latin1);
|
||||
} else if ((is_wordchar((char*)APOSTROPHE) ||
|
||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
||||
strncmp(line[actual].c_str() + head, ENTITY_APOS,
|
||||
strlen(ENTITY_APOS)) == 0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(ENTITY_APOS))) {
|
||||
head += strlen(ENTITY_APOS) - 1;
|
||||
} else if (is_utf8() &&
|
||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
||||
// to the WORDCHARS, if
|
||||
// needed
|
||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
||||
0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
||||
head += strlen(UTF8_APOS) - 1;
|
||||
} else if (!is_wordchar(line[actual].c_str() + head)) {
|
||||
// in-word patterns
|
||||
if ((pattern3_num = look_pattern(PATTERN3, PATTERN_LEN3, 0)) != -1) {
|
||||
size_t pos = line[actual].find(PATTERN3[pattern3_num][1], head);
|
||||
if (pos != std::string::npos) {
|
||||
size_t endpos = pos + strlen(PATTERN3[pattern3_num][1]) - 1;
|
||||
if (is_wordchar(line[actual].c_str() + endpos + 1)) {
|
||||
head = endpos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
state = prevstate;
|
||||
// return with the token, except in the case of in-word patterns
|
||||
if (alloc_token(token, &head, t))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case ST_TAG: // comment, labels, etc
|
||||
int i;
|
||||
if ((checkattr == 1) &&
|
||||
((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1) &&
|
||||
(strcmp(PATTERN2[i][0], PATTERN2[pattern2_num][0]) == 0)) {
|
||||
checkattr = 2;
|
||||
} else if ((checkattr > 0) && (line[actual][head] == '>')) {
|
||||
state = ST_NON_WORD;
|
||||
} else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
|
||||
(strcmp(PATTERN[i][1], PATTERN[pattern_num][1]) == 0)) {
|
||||
state = ST_NON_WORD;
|
||||
head += strlen(PATTERN[pattern_num][1]) - 1;
|
||||
} else if ((strcmp(PATTERN[pattern_num][0], "<") == 0) &&
|
||||
((line[actual][head] == '"') ||
|
||||
(line[actual][head] == '\''))) {
|
||||
quotmark = line[actual][head];
|
||||
state = ST_ATTRIB;
|
||||
}
|
||||
break;
|
||||
case ST_ATTRIB: // non word chars
|
||||
prevstate = ST_ATTRIB;
|
||||
if (line[actual][head] == quotmark) {
|
||||
state = ST_TAG;
|
||||
if (checkattr == 2)
|
||||
checkattr = 1;
|
||||
// for IMG ALT
|
||||
} else if (is_wordchar(line[actual].c_str() + head) && (checkattr == 2)) {
|
||||
state = ST_WORD;
|
||||
token = head;
|
||||
} else if (line[actual][head] == '&') {
|
||||
state = ST_CHAR_ENTITY;
|
||||
}
|
||||
break;
|
||||
case ST_CHAR_ENTITY: // SGML element
|
||||
if ((tolower(line[actual][head]) == ';')) {
|
||||
state = prevstate;
|
||||
head--;
|
||||
}
|
||||
}
|
||||
if (next_char(line[actual].c_str(), &head))
|
||||
return false;
|
||||
}
|
||||
//FIXME No return, in function returning non-void
|
||||
}
|
||||
|
||||
bool XMLParser::next_token(std::string& t) {
|
||||
return next_token(__PATTERN__, __PATTERN_LEN__, __PATTERN2__,
|
||||
__PATTERN_LEN2__, __PATTERN3__, __PATTERN_LEN3__, t);
|
||||
}
|
||||
|
||||
// remove in-word patterns
|
||||
std::string XMLParser::get_word2(
|
||||
const char* PATTERN3[][2],
|
||||
unsigned int PATTERN_LEN3,
|
||||
const std::string &tok) {
|
||||
std::string word = tok;
|
||||
for (unsigned int i = 0; i < PATTERN_LEN3; i++) {
|
||||
size_t pos;
|
||||
while ((pos = word.find(PATTERN3[i][0])) != word.npos) {
|
||||
size_t endpos = word.find(PATTERN3[i][1], pos);
|
||||
if (endpos != word.npos) {
|
||||
word.erase(pos, endpos + strlen(PATTERN3[i][1]) - pos);
|
||||
} else
|
||||
return word;
|
||||
}
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
int XMLParser::change_token(const char* word) {
|
||||
if (strstr(word, APOSTROPHE) != NULL || strchr(word, '"') != NULL ||
|
||||
strchr(word, '&') != NULL || strchr(word, '<') != NULL ||
|
||||
strchr(word, '>') != NULL) {
|
||||
std::string r(word);
|
||||
mystrrep(r, "&", "__namp;__");
|
||||
mystrrep(r, "__namp;__", "&");
|
||||
mystrrep(r, APOSTROPHE, ENTITY_APOS);
|
||||
mystrrep(r, "\"", """);
|
||||
mystrrep(r, ">", ">");
|
||||
mystrrep(r, "<", "<");
|
||||
return TextParser::change_token(r.c_str());
|
||||
}
|
||||
return TextParser::change_token(word);
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef XMLPARSER_HXX_
|
||||
#define XMLPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* XML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class XMLParser : public TextParser {
|
||||
public:
|
||||
explicit XMLParser(const char* wc);
|
||||
XMLParser(const w_char* wordchars, int len);
|
||||
bool next_token(const char* p[][2],
|
||||
unsigned int len,
|
||||
const char* p2[][2],
|
||||
unsigned int len2,
|
||||
const char* p3[][2],
|
||||
unsigned int len3,
|
||||
std::string&);
|
||||
virtual bool next_token(std::string&);
|
||||
std::string get_word2(const char* p2[][2],
|
||||
unsigned int len2,
|
||||
const std::string &tok);
|
||||
int change_token(const char* word);
|
||||
virtual ~XMLParser();
|
||||
|
||||
private:
|
||||
int look_pattern(const char* p[][2], unsigned int len, int column);
|
||||
int pattern_num;
|
||||
int pattern2_num;
|
||||
int pattern3_num;
|
||||
int prevstate;
|
||||
int checkattr;
|
||||
char quotmark;
|
||||
};
|
||||
|
||||
#endif
|
15
3rdparty/hunspell/Makefile.am
vendored
15
3rdparty/hunspell/Makefile.am
vendored
@ -45,19 +45,4 @@ liblyxhunspell_a_SOURCES = \
|
||||
1.7.0/src/hunspell/suggestmgr.hxx \
|
||||
1.7.0/src/hunspell/utf_info.hxx \
|
||||
1.7.0/src/hunspell/w_char.hxx \
|
||||
1.7.0/src/parsers/firstparser.cxx \
|
||||
1.7.0/src/parsers/firstparser.hxx \
|
||||
1.7.0/src/parsers/htmlparser.cxx \
|
||||
1.7.0/src/parsers/htmlparser.hxx \
|
||||
1.7.0/src/parsers/latexparser.cxx \
|
||||
1.7.0/src/parsers/latexparser.hxx \
|
||||
1.7.0/src/parsers/manparser.cxx \
|
||||
1.7.0/src/parsers/manparser.hxx \
|
||||
1.7.0/src/parsers/odfparser.cxx \
|
||||
1.7.0/src/parsers/odfparser.hxx \
|
||||
1.7.0/src/parsers/testparser.cxx \
|
||||
1.7.0/src/parsers/textparser.cxx \
|
||||
1.7.0/src/parsers/textparser.hxx \
|
||||
1.7.0/src/parsers/xmlparser.cxx \
|
||||
1.7.0/src/parsers/xmlparser.hxx \
|
||||
1.7.0/src/win_api/config.h
|
||||
|
Loading…
Reference in New Issue
Block a user