mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-09 18:31:04 +00:00
Remove parsers in included hunspell
This commit is contained in:
parent
8d0d3ea090
commit
d04a8cf58f
18
3rdparty/hunspell/1.7.0/src/parsers/Makefile.am
vendored
18
3rdparty/hunspell/1.7.0/src/parsers/Makefile.am
vendored
@ -1,18 +0,0 @@
|
|||||||
|
|
||||||
AM_CPPFLAGS=-I${top_builddir}/src/hunspell
|
|
||||||
|
|
||||||
noinst_LIBRARIES=libparsers.a
|
|
||||||
libparsers_a_SOURCES=firstparser.cxx xmlparser.cxx \
|
|
||||||
latexparser.cxx manparser.cxx \
|
|
||||||
textparser.cxx htmlparser.cxx \
|
|
||||||
odfparser.cxx
|
|
||||||
|
|
||||||
noinst_PROGRAMS=testparser
|
|
||||||
testparser_SOURCES=firstparser.cxx firstparser.hxx xmlparser.cxx \
|
|
||||||
xmlparser.hxx latexparser.cxx latexparser.hxx \
|
|
||||||
manparser.cxx manparser.hxx testparser.cxx \
|
|
||||||
textparser.cxx textparser.hxx htmlparser.cxx \
|
|
||||||
htmlparser.hxx odfparser.hxx odfparser.cxx
|
|
||||||
|
|
||||||
# need mystrdup()
|
|
||||||
LDADD = ../hunspell/libhunspell-1.7.la
|
|
@ -1,65 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "firstparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
FirstParser::FirstParser(const char* wordchars)
|
|
||||||
: TextParser(wordchars) {
|
|
||||||
}
|
|
||||||
|
|
||||||
FirstParser::~FirstParser() {}
|
|
||||||
|
|
||||||
bool FirstParser::next_token(std::string& t) {
|
|
||||||
t.clear();
|
|
||||||
const size_t tabpos = line[actual].find('\t');
|
|
||||||
if (tabpos != std::string::npos && tabpos > token) {
|
|
||||||
token = tabpos;
|
|
||||||
t = line[actual].substr(0, tabpos);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
@ -1,56 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef FIRSTPARSER_HXX_
|
|
||||||
#define FIRSTPARSER_HXX_
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check first word of the input line
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class FirstParser : public TextParser {
|
|
||||||
public:
|
|
||||||
explicit FirstParser(const char* wc);
|
|
||||||
virtual ~FirstParser();
|
|
||||||
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,88 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "htmlparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const char* PATTERN[][2] = {{"<script", "</script>"},
|
|
||||||
{"<style", "</style>"},
|
|
||||||
{"<code", "</code>"},
|
|
||||||
{"<samp", "</samp>"},
|
|
||||||
{"<kbd", "</kbd>"},
|
|
||||||
{"<var", "</var>"},
|
|
||||||
{"<listing", "</listing>"},
|
|
||||||
{"<address", "</address>"},
|
|
||||||
{"<pre", "</pre>"},
|
|
||||||
{"<!--", "-->"},
|
|
||||||
{"<[cdata[", "]]>"}, // XML comment
|
|
||||||
{"<", ">"}};
|
|
||||||
|
|
||||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
|
|
||||||
|
|
||||||
static const char* PATTERN2[][2] = {
|
|
||||||
{"<img", "alt="}, // ALT and TITLE attrib handled spec.
|
|
||||||
{"<img", "title="},
|
|
||||||
{"<a ", "title="}};
|
|
||||||
|
|
||||||
#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char*) * 2))
|
|
||||||
|
|
||||||
static const char* (*PATTERN3)[2] = NULL;
|
|
||||||
|
|
||||||
#define PATTERN_LEN3 0
|
|
||||||
|
|
||||||
HTMLParser::HTMLParser(const char* wordchars)
|
|
||||||
: XMLParser(wordchars) {
|
|
||||||
}
|
|
||||||
|
|
||||||
HTMLParser::HTMLParser(const w_char* wordchars, int len)
|
|
||||||
: XMLParser(wordchars, len) {
|
|
||||||
}
|
|
||||||
|
|
||||||
bool HTMLParser::next_token(std::string& t) {
|
|
||||||
return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t);
|
|
||||||
}
|
|
||||||
|
|
||||||
HTMLParser::~HTMLParser() {}
|
|
@ -1,56 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef HTMLPARSER_HXX_
|
|
||||||
#define HTMLPARSER_HXX_
|
|
||||||
|
|
||||||
#include "xmlparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HTML Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class HTMLParser : public XMLParser {
|
|
||||||
public:
|
|
||||||
explicit HTMLParser(const char* wc);
|
|
||||||
HTMLParser(const w_char* wordchars, int len);
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
virtual ~HTMLParser();
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
277
3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx
vendored
277
3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx
vendored
@ -1,277 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "latexparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define UTF8_APOS "\xe2\x80\x99"
|
|
||||||
#define APOSTROPHE "'"
|
|
||||||
|
|
||||||
static struct {
|
|
||||||
const char* pat[2];
|
|
||||||
int arg;
|
|
||||||
} PATTERN[] = {{{"\\(", "\\)"}, 0},
|
|
||||||
{{"$$", "$$"}, 0},
|
|
||||||
{{"$", "$"}, 0},
|
|
||||||
{{"\\begin{math}", "\\end{math}"}, 0},
|
|
||||||
{{"\\[", "\\]"}, 0},
|
|
||||||
{{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
|
|
||||||
{{"\\begin{equation}", "\\end{equation}"}, 0},
|
|
||||||
{{"\\begin{equation*}", "\\end{equation*}"}, 0},
|
|
||||||
{{"\\cite", NULL}, 1},
|
|
||||||
{{"\\nocite", NULL}, 1},
|
|
||||||
{{"\\index", NULL}, 1},
|
|
||||||
{{"\\label", NULL}, 1},
|
|
||||||
{{"\\ref", NULL}, 1},
|
|
||||||
{{"\\pageref", NULL}, 1},
|
|
||||||
{{"\\autoref", NULL}, 1},
|
|
||||||
{{"\\parbox", NULL}, 1},
|
|
||||||
{{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
|
|
||||||
{{"\\verb+", "+"}, 0},
|
|
||||||
{{"\\verb|", "|"}, 0},
|
|
||||||
{{"\\verb#", "#"}, 0},
|
|
||||||
{{"\\verb*", "*"}, 0},
|
|
||||||
{{"\\documentstyle", "\\begin{document}"}, 0},
|
|
||||||
{{"\\documentclass", "\\begin{document}"}, 0},
|
|
||||||
// { { "\\documentclass", NULL } , 1 },
|
|
||||||
{{"\\usepackage", NULL}, 1},
|
|
||||||
{{"\\includeonly", NULL}, 1},
|
|
||||||
{{"\\include", NULL}, 1},
|
|
||||||
{{"\\input", NULL}, 1},
|
|
||||||
{{"\\vspace", NULL}, 1},
|
|
||||||
{{"\\setlength", NULL}, 2},
|
|
||||||
{{"\\addtolength", NULL}, 2},
|
|
||||||
{{"\\settowidth", NULL}, 2},
|
|
||||||
{{"\\rule", NULL}, 2},
|
|
||||||
{{"\\hspace", NULL}, 1},
|
|
||||||
{{"\\vspace", NULL}, 1},
|
|
||||||
{{"\\\\[", "]"}, 0},
|
|
||||||
{{"\\pagebreak[", "]"}, 0},
|
|
||||||
{{"\\nopagebreak[", "]"}, 0},
|
|
||||||
{{"\\enlargethispage", NULL}, 1},
|
|
||||||
{{"\\begin{tabular}", NULL}, 1},
|
|
||||||
{{"\\addcontentsline", NULL}, 2},
|
|
||||||
{{"\\begin{thebibliography}", NULL}, 1},
|
|
||||||
{{"\\bibliography", NULL}, 1},
|
|
||||||
{{"\\bibliographystyle", NULL}, 1},
|
|
||||||
{{"\\bibitem", NULL}, 1},
|
|
||||||
{{"\\begin", NULL}, 1},
|
|
||||||
{{"\\end", NULL}, 1},
|
|
||||||
{{"\\pagestyle", NULL}, 1},
|
|
||||||
{{"\\pagenumbering", NULL}, 1},
|
|
||||||
{{"\\thispagestyle", NULL}, 1},
|
|
||||||
{{"\\newtheorem", NULL}, 2},
|
|
||||||
{{"\\newcommand", NULL}, 2},
|
|
||||||
{{"\\renewcommand", NULL}, 2},
|
|
||||||
{{"\\setcounter", NULL}, 2},
|
|
||||||
{{"\\addtocounter", NULL}, 1},
|
|
||||||
{{"\\stepcounter", NULL}, 1},
|
|
||||||
{{"\\selectlanguage", NULL}, 1},
|
|
||||||
{{"\\inputencoding", NULL}, 1},
|
|
||||||
{{"\\hyphenation", NULL}, 1},
|
|
||||||
{{"\\definecolor", NULL}, 3},
|
|
||||||
{{"\\color", NULL}, 1},
|
|
||||||
{{"\\textcolor", NULL}, 1},
|
|
||||||
{{"\\pagecolor", NULL}, 1},
|
|
||||||
{{"\\colorbox", NULL}, 2},
|
|
||||||
{{"\\fcolorbox", NULL}, 2},
|
|
||||||
{{"\\declaregraphicsextensions", NULL}, 1},
|
|
||||||
{{"\\psfig", NULL}, 1},
|
|
||||||
{{"\\url", NULL}, 1},
|
|
||||||
{{"\\eqref", NULL}, 1},
|
|
||||||
{{"\\vskip", NULL}, 1},
|
|
||||||
{{"\\vglue", NULL}, 1},
|
|
||||||
{{"\'\'", NULL}, 1}};
|
|
||||||
|
|
||||||
#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
|
|
||||||
|
|
||||||
LaTeXParser::LaTeXParser(const char* wordchars)
|
|
||||||
: TextParser(wordchars)
|
|
||||||
, pattern_num(0), depth(0), arg(0), opt(0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
|
|
||||||
: TextParser(wordchars, len)
|
|
||||||
, pattern_num(0), depth(0), arg(0), opt(0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
LaTeXParser::~LaTeXParser() {}
|
|
||||||
|
|
||||||
int LaTeXParser::look_pattern(int col) {
|
|
||||||
for (unsigned int i = 0; i < PATTERN_LEN; i++) {
|
|
||||||
const char* j = line[actual].c_str() + head;
|
|
||||||
const char* k = PATTERN[i].pat[col];
|
|
||||||
if (!k)
|
|
||||||
continue;
|
|
||||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
|
||||||
j++;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
if (*k == '\0')
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LaTeXParser
|
|
||||||
*
|
|
||||||
* state 0: not wordchar
|
|
||||||
* state 1: wordchar
|
|
||||||
* state 2: comments
|
|
||||||
* state 3: commands
|
|
||||||
* state 4: commands with arguments
|
|
||||||
* state 5: % comment
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool LaTeXParser::next_token(std::string& t) {
|
|
||||||
t.clear();
|
|
||||||
int i;
|
|
||||||
int slash = 0;
|
|
||||||
int apostrophe;
|
|
||||||
for (;;) {
|
|
||||||
// fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
|
|
||||||
// %s\n",depth,state,arg,line[actual]+head);
|
|
||||||
|
|
||||||
switch (state) {
|
|
||||||
case 0: // non word chars
|
|
||||||
if ((pattern_num = look_pattern(0)) != -1) {
|
|
||||||
if (PATTERN[pattern_num].pat[1]) {
|
|
||||||
state = 2;
|
|
||||||
} else {
|
|
||||||
state = 4;
|
|
||||||
depth = 0;
|
|
||||||
arg = 0;
|
|
||||||
opt = 1;
|
|
||||||
}
|
|
||||||
head += strlen(PATTERN[pattern_num].pat[0]) - 1;
|
|
||||||
} else if (line[actual][head] == '%') {
|
|
||||||
state = 5;
|
|
||||||
} else if (is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
state = 1;
|
|
||||||
token = head;
|
|
||||||
} else if (line[actual][head] == '\\') {
|
|
||||||
if (line[actual][head + 1] == '\\' || // \\ (linebreak)
|
|
||||||
(line[actual][head + 1] == '$') || // \$ (dollar sign)
|
|
||||||
(line[actual][head + 1] == '%')) { // \% (percent)
|
|
||||||
head++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
state = 3;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: // wordchar
|
|
||||||
apostrophe = 0;
|
|
||||||
if ((is_wordchar((char*)APOSTROPHE) ||
|
|
||||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
|
||||||
!line[actual].empty() && line[actual][head] == '\'' &&
|
|
||||||
is_wordchar(line[actual].c_str() + head + 1)) {
|
|
||||||
head++;
|
|
||||||
} else if (is_utf8() &&
|
|
||||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
|
||||||
// to the WORDCHARS, if
|
|
||||||
// needed
|
|
||||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
|
||||||
0 &&
|
|
||||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
|
||||||
head += strlen(UTF8_APOS) - 1;
|
|
||||||
} else if (!is_wordchar(line[actual].c_str() + head) ||
|
|
||||||
(line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
|
|
||||||
++apostrophe)) {
|
|
||||||
state = 0;
|
|
||||||
bool ok = alloc_token(token, &head, t);
|
|
||||||
if (apostrophe)
|
|
||||||
head += 2;
|
|
||||||
if (ok)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2: // comment, labels, etc
|
|
||||||
if (((i = look_pattern(1)) != -1) &&
|
|
||||||
(strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
|
|
||||||
state = 0;
|
|
||||||
head += strlen(PATTERN[pattern_num].pat[1]) - 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3: // command
|
|
||||||
if ((tolower(line[actual][head]) < 'a') ||
|
|
||||||
(tolower(line[actual][head]) > 'z')) {
|
|
||||||
state = 0;
|
|
||||||
head--;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 4: // command with arguments
|
|
||||||
if (slash && (line[actual][head] != '\0')) {
|
|
||||||
slash = 0;
|
|
||||||
head++;
|
|
||||||
break;
|
|
||||||
} else if (line[actual][head] == '\\') {
|
|
||||||
slash = 1;
|
|
||||||
} else if ((line[actual][head] == '{') ||
|
|
||||||
((opt) && (line[actual][head] == '['))) {
|
|
||||||
depth++;
|
|
||||||
opt = 0;
|
|
||||||
} else if (line[actual][head] == '}') {
|
|
||||||
depth--;
|
|
||||||
if (depth == 0) {
|
|
||||||
opt = 1;
|
|
||||||
arg++;
|
|
||||||
}
|
|
||||||
if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
|
|
||||||
(depth < 0)) {
|
|
||||||
state = 0; // XXX not handles the last optional arg.
|
|
||||||
}
|
|
||||||
} else if (line[actual][head] == ']')
|
|
||||||
depth--;
|
|
||||||
} // case
|
|
||||||
if (next_char(line[actual].c_str(), &head)) {
|
|
||||||
if (state == 5)
|
|
||||||
state = 0;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,65 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef LATEXPARSER_HXX_
|
|
||||||
#define LATEXPARSER_HXX_
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HTML Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class LaTeXParser : public TextParser {
|
|
||||||
int pattern_num; // number of comment
|
|
||||||
int depth; // depth of blocks
|
|
||||||
int arg; // arguments's number
|
|
||||||
int opt; // optional argument attrib.
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit LaTeXParser(const char* wc);
|
|
||||||
LaTeXParser(const w_char* wordchars, int len);
|
|
||||||
virtual ~LaTeXParser();
|
|
||||||
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int look_pattern(int col);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,98 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "manparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ManParser::ManParser(const char* wordchars)
|
|
||||||
: TextParser(wordchars) {
|
|
||||||
}
|
|
||||||
|
|
||||||
ManParser::ManParser(const w_char* wordchars, int len)
|
|
||||||
: TextParser(wordchars, len) {
|
|
||||||
}
|
|
||||||
|
|
||||||
ManParser::~ManParser() {}
|
|
||||||
|
|
||||||
bool ManParser::next_token(std::string& t) {
|
|
||||||
for (;;) {
|
|
||||||
switch (state) {
|
|
||||||
case 1: // command arguments
|
|
||||||
if (line[actual][head] == ' ')
|
|
||||||
state = 2;
|
|
||||||
break;
|
|
||||||
case 0: // dot in begin of line
|
|
||||||
if (line[actual][0] == '.') {
|
|
||||||
state = 1;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
state = 2;
|
|
||||||
}
|
|
||||||
/* FALLTHROUGH */
|
|
||||||
case 2: // non word chars
|
|
||||||
if (is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
state = 3;
|
|
||||||
token = head;
|
|
||||||
} else if ((line[actual][head] == '\\') &&
|
|
||||||
(line[actual][head + 1] == 'f') &&
|
|
||||||
(line[actual][head + 2] != '\0')) {
|
|
||||||
head += 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3: // wordchar
|
|
||||||
if (!is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
state = 2;
|
|
||||||
if (alloc_token(token, &head, t))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (next_char(line[actual].c_str(), &head)) {
|
|
||||||
state = 0;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,58 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef MANPARSER_HXX_
|
|
||||||
#define MANPARSER_HXX_
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Manparse Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class ManParser : public TextParser {
|
|
||||||
protected:
|
|
||||||
public:
|
|
||||||
explicit ManParser(const char* wc);
|
|
||||||
ManParser(const w_char* wordchars, int len);
|
|
||||||
virtual ~ManParser();
|
|
||||||
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,86 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "odfparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const char* PATTERN[][2] = {
|
|
||||||
{"<office:meta>", "</office:meta>"},
|
|
||||||
{"<office:settings>", "</office:settings>"},
|
|
||||||
{"<office:binary-data>", "</office:binary-data>"},
|
|
||||||
{"<!--", "-->"},
|
|
||||||
{"<[cdata[", "]]>"}, // XML comment
|
|
||||||
{"<", ">"}};
|
|
||||||
|
|
||||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
|
|
||||||
|
|
||||||
static const char* (*PATTERN2)[2] = NULL;
|
|
||||||
|
|
||||||
#define PATTERN_LEN2 0
|
|
||||||
|
|
||||||
static const char* PATTERN3[][2] = {
|
|
||||||
{"<text:span", ">"}, // part of the reedited words
|
|
||||||
{"</text:span", ">"}}; // for example, an inserted letter
|
|
||||||
|
|
||||||
#define PATTERN_LEN3 (sizeof(PATTERN3) / (sizeof(char*) * 2))
|
|
||||||
|
|
||||||
ODFParser::ODFParser(const char* wordchars)
|
|
||||||
: XMLParser(wordchars) {
|
|
||||||
}
|
|
||||||
|
|
||||||
ODFParser::ODFParser(const w_char* wordchars, int len)
|
|
||||||
: XMLParser(wordchars, len) {
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ODFParser::next_token(std::string& t) {
|
|
||||||
return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ODFParser::get_word(const std::string &tok) {
|
|
||||||
return XMLParser::get_word2(PATTERN3, PATTERN_LEN3, tok);
|
|
||||||
}
|
|
||||||
|
|
||||||
ODFParser::~ODFParser() {}
|
|
@ -1,57 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef ODFPARSER_HXX_
|
|
||||||
#define ODFPARSER_HXX_
|
|
||||||
|
|
||||||
#include "xmlparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HTML Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class ODFParser : public XMLParser {
|
|
||||||
public:
|
|
||||||
explicit ODFParser(const char* wc);
|
|
||||||
ODFParser(const w_char* wordchars, int len);
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
virtual std::string get_word(const std::string &tok);
|
|
||||||
virtual ~ODFParser();
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,86 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
#include "htmlparser.hxx"
|
|
||||||
#include "latexparser.hxx"
|
|
||||||
#include "xmlparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
FILE* f;
|
|
||||||
/* first parse the command line options */
|
|
||||||
|
|
||||||
if (argc < 2) {
|
|
||||||
fprintf(stderr, "correct syntax is:\n");
|
|
||||||
fprintf(stderr, "testparser file\n");
|
|
||||||
fprintf(stderr, "example: testparser /dev/stdin\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* open the words to check list */
|
|
||||||
f = fopen(argv[1], "r");
|
|
||||||
if (!f) {
|
|
||||||
fprintf(stderr, "Error - could not open file of words to check\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser* p = new TextParser(
|
|
||||||
"qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
|
|
||||||
|
|
||||||
char buf[MAXLNLEN];
|
|
||||||
|
|
||||||
while (fgets(buf, MAXLNLEN, f)) {
|
|
||||||
p->put_line(buf);
|
|
||||||
p->set_url_checking(1);
|
|
||||||
std::string next;
|
|
||||||
while (p->next_token(next)) {
|
|
||||||
fprintf(stdout, "token: %s\n", next.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
delete p;
|
|
||||||
fclose(f);
|
|
||||||
return 0;
|
|
||||||
}
|
|
302
3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx
vendored
302
3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx
vendored
@ -1,302 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ISO-8859-1 HTML character entities
|
|
||||||
|
|
||||||
static const char* LATIN1[] = {
|
|
||||||
"À", "Ã", "Å", "Æ", "È", "Ê",
|
|
||||||
"Ì", "Ï", "Ð", "Ñ", "Ò", "Ø",
|
|
||||||
"Ù", "Þ", "à", "ã", "å", "æ",
|
|
||||||
"è", "ê", "ì", "ï", "ð", "ñ",
|
|
||||||
"ò", "ø", "ù", "þ", "ÿ"};
|
|
||||||
|
|
||||||
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char*))
|
|
||||||
|
|
||||||
#define ENTITY_APOS "'"
|
|
||||||
#define UTF8_APOS "\xe2\x80\x99"
|
|
||||||
#define APOSTROPHE "'"
|
|
||||||
|
|
||||||
TextParser::TextParser(const char* wordchars) {
|
|
||||||
init(wordchars);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser::TextParser(const w_char* wordchars, int len) {
|
|
||||||
init(wordchars, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser::~TextParser() {}
|
|
||||||
|
|
||||||
int TextParser::is_wordchar(const char* w) {
|
|
||||||
if (*w == '\0')
|
|
||||||
return 0;
|
|
||||||
if (utf8) {
|
|
||||||
std::vector<w_char> wc;
|
|
||||||
unsigned short idx;
|
|
||||||
u8_u16(wc, w);
|
|
||||||
if (wc.empty())
|
|
||||||
return 0;
|
|
||||||
idx = (wc[0].h << 8) + wc[0].l;
|
|
||||||
return (unicodeisalpha(idx) ||
|
|
||||||
(wordchars_utf16 &&
|
|
||||||
std::binary_search(wordchars_utf16, wordchars_utf16 + wclen, wc[0])));
|
|
||||||
} else {
|
|
||||||
return wordcharacters[(*w + 256) % 256];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* TextParser::get_latin1(const char* s) {
|
|
||||||
if (s[0] == '&') {
|
|
||||||
unsigned int i = 0;
|
|
||||||
while ((i < LATIN1_LEN) && strncmp(LATIN1[i], s, strlen(LATIN1[i])))
|
|
||||||
i++;
|
|
||||||
if (i != LATIN1_LEN)
|
|
||||||
return LATIN1[i];
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::init(const char* wordchars) {
|
|
||||||
actual = 0;
|
|
||||||
head = 0;
|
|
||||||
token = 0;
|
|
||||||
state = 0;
|
|
||||||
utf8 = 0;
|
|
||||||
checkurl = 0;
|
|
||||||
wordchars_utf16 = NULL;
|
|
||||||
wclen = 0;
|
|
||||||
wordcharacters.resize(256, 0);
|
|
||||||
if (!wordchars)
|
|
||||||
wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM";
|
|
||||||
for (unsigned int j = 0; j < strlen(wordchars); ++j) {
|
|
||||||
wordcharacters[(wordchars[j] + 256) % 256] = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::init(const w_char* wc, int len) {
|
|
||||||
actual = 0;
|
|
||||||
head = 0;
|
|
||||||
token = 0;
|
|
||||||
state = 0;
|
|
||||||
utf8 = 1;
|
|
||||||
checkurl = 0;
|
|
||||||
wordchars_utf16 = wc;
|
|
||||||
wclen = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::next_char(const char* ln, size_t* pos) {
|
|
||||||
if (*(ln + *pos) == '\0')
|
|
||||||
return 1;
|
|
||||||
if (utf8) {
|
|
||||||
if (*(ln + *pos) >> 7) {
|
|
||||||
// jump to next UTF-8 character
|
|
||||||
for ((*pos)++; (*(ln + *pos) & 0xc0) == 0x80; (*pos)++)
|
|
||||||
;
|
|
||||||
} else {
|
|
||||||
(*pos)++;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
(*pos)++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::put_line(const char* word) {
|
|
||||||
actual = (actual + 1) % MAXPREVLINE;
|
|
||||||
line[actual].assign(word);
|
|
||||||
token = 0;
|
|
||||||
head = 0;
|
|
||||||
check_urls();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string TextParser::get_prevline(int n) const {
|
|
||||||
return line[(actual + MAXPREVLINE - n) % MAXPREVLINE];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string TextParser::get_line() const {
|
|
||||||
return get_prevline(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TextParser::next_token(std::string &t) {
|
|
||||||
const char* latin1;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
switch (state) {
|
|
||||||
case 0: // non word chars
|
|
||||||
if (is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
state = 1;
|
|
||||||
token = head;
|
|
||||||
} else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
|
||||||
state = 1;
|
|
||||||
token = head;
|
|
||||||
head += strlen(latin1);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: // wordchar
|
|
||||||
if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
|
||||||
head += strlen(latin1);
|
|
||||||
} else if ((is_wordchar((char*)APOSTROPHE) ||
|
|
||||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
|
||||||
!line[actual].empty() && line[actual][head] == '\'' &&
|
|
||||||
is_wordchar(line[actual].c_str() + head + 1)) {
|
|
||||||
head++;
|
|
||||||
} else if (is_utf8() &&
|
|
||||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
|
||||||
// to the WORDCHARS, if
|
|
||||||
// needed
|
|
||||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
|
||||||
0 &&
|
|
||||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
|
||||||
head += strlen(UTF8_APOS) - 1;
|
|
||||||
} else if (!is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
state = 0;
|
|
||||||
if (alloc_token(token, &head, t))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (next_char(line[actual].c_str(), &head))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t TextParser::get_tokenpos() {
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::change_token(const char* word) {
|
|
||||||
if (word) {
|
|
||||||
std::string remainder(line[actual].substr(head));
|
|
||||||
line[actual].resize(token);
|
|
||||||
line[actual].append(word);
|
|
||||||
line[actual].append(remainder);
|
|
||||||
head = token;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string TextParser::get_word(const std::string &tok) {
|
|
||||||
return tok;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::check_urls() {
|
|
||||||
urlline.resize(line[actual].size() + 1);
|
|
||||||
int url_state = 0;
|
|
||||||
size_t url_head = 0;
|
|
||||||
size_t url_token = 0;
|
|
||||||
int url = 0;
|
|
||||||
for (;;) {
|
|
||||||
switch (url_state) {
|
|
||||||
case 0: // non word chars
|
|
||||||
if (is_wordchar(line[actual].c_str() + url_head)) {
|
|
||||||
url_state = 1;
|
|
||||||
url_token = url_head;
|
|
||||||
// Unix path
|
|
||||||
} else if (line[actual][url_head] == '/') {
|
|
||||||
url_state = 1;
|
|
||||||
url_token = url_head;
|
|
||||||
url = 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: // wordchar
|
|
||||||
char ch = line[actual][url_head];
|
|
||||||
// e-mail address
|
|
||||||
if ((ch == '@') ||
|
|
||||||
// MS-DOS, Windows path
|
|
||||||
(strncmp(line[actual].c_str() + url_head, ":\\", 2) == 0) ||
|
|
||||||
// URL
|
|
||||||
(strncmp(line[actual].c_str() + url_head, "://", 3) == 0)) {
|
|
||||||
url = 1;
|
|
||||||
} else if (!(is_wordchar(line[actual].c_str() + url_head) || (ch == '-') ||
|
|
||||||
(ch == '_') || (ch == '\\') || (ch == '.') ||
|
|
||||||
(ch == ':') || (ch == '/') || (ch == '~') || (ch == '%') ||
|
|
||||||
(ch == '*') || (ch == '$') || (ch == '[') || (ch == ']') ||
|
|
||||||
(ch == '?') || (ch == '!') ||
|
|
||||||
((ch >= '0') && (ch <= '9')))) {
|
|
||||||
url_state = 0;
|
|
||||||
if (url == 1) {
|
|
||||||
for (size_t i = url_token; i < url_head; ++i) {
|
|
||||||
urlline[i] = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
url = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
urlline[url_head] = false;
|
|
||||||
if (next_char(line[actual].c_str(), &url_head))
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::get_url(size_t token_pos, size_t* hd) {
|
|
||||||
for (size_t i = *hd; i < line[actual].size() && urlline[i]; i++, (*hd)++)
|
|
||||||
;
|
|
||||||
return checkurl ? 0 : urlline[token_pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::set_url_checking(int check) {
|
|
||||||
checkurl = check;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TextParser::alloc_token(size_t tokn, size_t* hd, std::string& t) {
|
|
||||||
size_t url_head = *hd;
|
|
||||||
if (get_url(tokn, &url_head))
|
|
||||||
return false;
|
|
||||||
t = line[actual].substr(tokn, *hd - tokn);
|
|
||||||
// remove colon for Finnish and Swedish language
|
|
||||||
if (!t.empty() && t[t.size() - 1] == ':') {
|
|
||||||
t.resize(t.size() - 1);
|
|
||||||
if (t.empty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
@ -1,99 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef TEXTPARSER_HXX_
|
|
||||||
#define TEXTPARSER_HXX_
|
|
||||||
|
|
||||||
// set sum of actual and previous lines
|
|
||||||
#define MAXPREVLINE 4
|
|
||||||
|
|
||||||
#ifndef MAXLNLEN
|
|
||||||
#define MAXLNLEN 8192
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "../hunspell/w_char.hxx"
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Base Text Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class TextParser {
|
|
||||||
protected:
|
|
||||||
std::vector<int> wordcharacters;// for detection of the word boundaries
|
|
||||||
std::string line[MAXPREVLINE]; // parsed and previous lines
|
|
||||||
std::vector<bool> urlline; // mask for url detection
|
|
||||||
int checkurl;
|
|
||||||
int actual; // actual line
|
|
||||||
size_t head; // head position
|
|
||||||
size_t token;// begin of token
|
|
||||||
int state; // state of automata
|
|
||||||
int utf8; // UTF-8 character encoding
|
|
||||||
int next_char(const char* line, size_t* pos);
|
|
||||||
const w_char* wordchars_utf16;
|
|
||||||
int wclen;
|
|
||||||
|
|
||||||
public:
|
|
||||||
TextParser(const w_char* wordchars, int len);
|
|
||||||
explicit TextParser(const char* wc);
|
|
||||||
virtual ~TextParser();
|
|
||||||
|
|
||||||
void put_line(const char* line);
|
|
||||||
std::string get_line() const;
|
|
||||||
std::string get_prevline(int n) const;
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
virtual std::string get_word(const std::string &tok);
|
|
||||||
virtual int change_token(const char* word);
|
|
||||||
void set_url_checking(int check);
|
|
||||||
|
|
||||||
size_t get_tokenpos();
|
|
||||||
int is_wordchar(const char* w);
|
|
||||||
inline int is_utf8() { return utf8; }
|
|
||||||
const char* get_latin1(const char* s);
|
|
||||||
char* next_char();
|
|
||||||
int tokenize_urls();
|
|
||||||
void check_urls();
|
|
||||||
int get_url(size_t token_pos, size_t* head);
|
|
||||||
bool alloc_token(size_t token, size_t* head, std::string& out);
|
|
||||||
private:
|
|
||||||
void init(const char*);
|
|
||||||
void init(const w_char* wordchars, int len);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
253
3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx
vendored
253
3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx
vendored
@ -1,253 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "xmlparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
|
|
||||||
|
|
||||||
static const char* __PATTERN__[][2] = {{"<!--", "-->"},
|
|
||||||
{"<[cdata[", "]]>"}, // XML comment
|
|
||||||
{"<", ">"}};
|
|
||||||
|
|
||||||
#define __PATTERN_LEN__ (sizeof(__PATTERN__) / (sizeof(char*) * 2))
|
|
||||||
|
|
||||||
// for checking attributes, eg. <img alt="text"> in HTML
|
|
||||||
static const char* (*__PATTERN2__)[2] = NULL;
|
|
||||||
|
|
||||||
#define __PATTERN_LEN2__ 0
|
|
||||||
|
|
||||||
// for checking words with in-word patterns
|
|
||||||
// for example, "exam<text:span>p</text:span>le" in ODT
|
|
||||||
static const char* (*__PATTERN3__)[2] = NULL;
|
|
||||||
|
|
||||||
#define __PATTERN_LEN3__ 0
|
|
||||||
|
|
||||||
#define ENTITY_APOS "'"
|
|
||||||
#define UTF8_APOS "\xe2\x80\x99"
|
|
||||||
#define APOSTROPHE "'"
|
|
||||||
|
|
||||||
XMLParser::XMLParser(const char* wordchars)
|
|
||||||
: TextParser(wordchars)
|
|
||||||
, pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
XMLParser::XMLParser(const w_char* wordchars, int len)
|
|
||||||
: TextParser(wordchars, len)
|
|
||||||
, pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
XMLParser::~XMLParser() {}
|
|
||||||
|
|
||||||
int XMLParser::look_pattern(const char* p[][2], unsigned int len, int column) {
|
|
||||||
for (unsigned int i = 0; i < len; i++) {
|
|
||||||
const char* j = line[actual].c_str() + head;
|
|
||||||
const char* k = p[i][column];
|
|
||||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
|
||||||
j++;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
if (*k == '\0')
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XML parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool XMLParser::next_token(const char* PATTERN[][2],
|
|
||||||
unsigned int PATTERN_LEN,
|
|
||||||
const char* PATTERN2[][2],
|
|
||||||
unsigned int PATTERN_LEN2,
|
|
||||||
const char* PATTERN3[][2],
|
|
||||||
unsigned int PATTERN_LEN3,
|
|
||||||
std::string& t) {
|
|
||||||
t.clear();
|
|
||||||
const char* latin1;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
switch (state) {
|
|
||||||
case ST_NON_WORD: // non word chars
|
|
||||||
prevstate = ST_NON_WORD;
|
|
||||||
if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
|
|
||||||
checkattr = 0;
|
|
||||||
if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
|
|
||||||
checkattr = 1;
|
|
||||||
}
|
|
||||||
state = ST_TAG;
|
|
||||||
} else if (is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
state = ST_WORD;
|
|
||||||
token = head;
|
|
||||||
} else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
|
||||||
state = ST_WORD;
|
|
||||||
token = head;
|
|
||||||
head += strlen(latin1);
|
|
||||||
} else if (line[actual][head] == '&') {
|
|
||||||
state = ST_CHAR_ENTITY;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_WORD: // wordchar
|
|
||||||
if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
|
||||||
head += strlen(latin1);
|
|
||||||
} else if ((is_wordchar((char*)APOSTROPHE) ||
|
|
||||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
|
||||||
strncmp(line[actual].c_str() + head, ENTITY_APOS,
|
|
||||||
strlen(ENTITY_APOS)) == 0 &&
|
|
||||||
is_wordchar(line[actual].c_str() + head + strlen(ENTITY_APOS))) {
|
|
||||||
head += strlen(ENTITY_APOS) - 1;
|
|
||||||
} else if (is_utf8() &&
|
|
||||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
|
||||||
// to the WORDCHARS, if
|
|
||||||
// needed
|
|
||||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
|
||||||
0 &&
|
|
||||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
|
||||||
head += strlen(UTF8_APOS) - 1;
|
|
||||||
} else if (!is_wordchar(line[actual].c_str() + head)) {
|
|
||||||
// in-word patterns
|
|
||||||
if ((pattern3_num = look_pattern(PATTERN3, PATTERN_LEN3, 0)) != -1) {
|
|
||||||
size_t pos = line[actual].find(PATTERN3[pattern3_num][1], head);
|
|
||||||
if (pos != std::string::npos) {
|
|
||||||
size_t endpos = pos + strlen(PATTERN3[pattern3_num][1]) - 1;
|
|
||||||
if (is_wordchar(line[actual].c_str() + endpos + 1)) {
|
|
||||||
head = endpos;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
state = prevstate;
|
|
||||||
// return with the token, except in the case of in-word patterns
|
|
||||||
if (alloc_token(token, &head, t))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_TAG: // comment, labels, etc
|
|
||||||
int i;
|
|
||||||
if ((checkattr == 1) &&
|
|
||||||
((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1) &&
|
|
||||||
(strcmp(PATTERN2[i][0], PATTERN2[pattern2_num][0]) == 0)) {
|
|
||||||
checkattr = 2;
|
|
||||||
} else if ((checkattr > 0) && (line[actual][head] == '>')) {
|
|
||||||
state = ST_NON_WORD;
|
|
||||||
} else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
|
|
||||||
(strcmp(PATTERN[i][1], PATTERN[pattern_num][1]) == 0)) {
|
|
||||||
state = ST_NON_WORD;
|
|
||||||
head += strlen(PATTERN[pattern_num][1]) - 1;
|
|
||||||
} else if ((strcmp(PATTERN[pattern_num][0], "<") == 0) &&
|
|
||||||
((line[actual][head] == '"') ||
|
|
||||||
(line[actual][head] == '\''))) {
|
|
||||||
quotmark = line[actual][head];
|
|
||||||
state = ST_ATTRIB;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_ATTRIB: // non word chars
|
|
||||||
prevstate = ST_ATTRIB;
|
|
||||||
if (line[actual][head] == quotmark) {
|
|
||||||
state = ST_TAG;
|
|
||||||
if (checkattr == 2)
|
|
||||||
checkattr = 1;
|
|
||||||
// for IMG ALT
|
|
||||||
} else if (is_wordchar(line[actual].c_str() + head) && (checkattr == 2)) {
|
|
||||||
state = ST_WORD;
|
|
||||||
token = head;
|
|
||||||
} else if (line[actual][head] == '&') {
|
|
||||||
state = ST_CHAR_ENTITY;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_CHAR_ENTITY: // SGML element
|
|
||||||
if ((tolower(line[actual][head]) == ';')) {
|
|
||||||
state = prevstate;
|
|
||||||
head--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (next_char(line[actual].c_str(), &head))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
//FIXME No return, in function returning non-void
|
|
||||||
}
|
|
||||||
|
|
||||||
bool XMLParser::next_token(std::string& t) {
|
|
||||||
return next_token(__PATTERN__, __PATTERN_LEN__, __PATTERN2__,
|
|
||||||
__PATTERN_LEN2__, __PATTERN3__, __PATTERN_LEN3__, t);
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove in-word patterns
|
|
||||||
std::string XMLParser::get_word2(
|
|
||||||
const char* PATTERN3[][2],
|
|
||||||
unsigned int PATTERN_LEN3,
|
|
||||||
const std::string &tok) {
|
|
||||||
std::string word = tok;
|
|
||||||
for (unsigned int i = 0; i < PATTERN_LEN3; i++) {
|
|
||||||
size_t pos;
|
|
||||||
while ((pos = word.find(PATTERN3[i][0])) != word.npos) {
|
|
||||||
size_t endpos = word.find(PATTERN3[i][1], pos);
|
|
||||||
if (endpos != word.npos) {
|
|
||||||
word.erase(pos, endpos + strlen(PATTERN3[i][1]) - pos);
|
|
||||||
} else
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
int XMLParser::change_token(const char* word) {
|
|
||||||
if (strstr(word, APOSTROPHE) != NULL || strchr(word, '"') != NULL ||
|
|
||||||
strchr(word, '&') != NULL || strchr(word, '<') != NULL ||
|
|
||||||
strchr(word, '>') != NULL) {
|
|
||||||
std::string r(word);
|
|
||||||
mystrrep(r, "&", "__namp;__");
|
|
||||||
mystrrep(r, "__namp;__", "&");
|
|
||||||
mystrrep(r, APOSTROPHE, ENTITY_APOS);
|
|
||||||
mystrrep(r, "\"", """);
|
|
||||||
mystrrep(r, ">", ">");
|
|
||||||
mystrrep(r, "<", "<");
|
|
||||||
return TextParser::change_token(r.c_str());
|
|
||||||
}
|
|
||||||
return TextParser::change_token(word);
|
|
||||||
}
|
|
@ -1,76 +0,0 @@
|
|||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2017 Németh László
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
|
||||||
*
|
|
||||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
|
||||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
|
||||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
|
||||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
|
||||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#ifndef XMLPARSER_HXX_
|
|
||||||
#define XMLPARSER_HXX_
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XML Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class XMLParser : public TextParser {
|
|
||||||
public:
|
|
||||||
explicit XMLParser(const char* wc);
|
|
||||||
XMLParser(const w_char* wordchars, int len);
|
|
||||||
bool next_token(const char* p[][2],
|
|
||||||
unsigned int len,
|
|
||||||
const char* p2[][2],
|
|
||||||
unsigned int len2,
|
|
||||||
const char* p3[][2],
|
|
||||||
unsigned int len3,
|
|
||||||
std::string&);
|
|
||||||
virtual bool next_token(std::string&);
|
|
||||||
std::string get_word2(const char* p2[][2],
|
|
||||||
unsigned int len2,
|
|
||||||
const std::string &tok);
|
|
||||||
int change_token(const char* word);
|
|
||||||
virtual ~XMLParser();
|
|
||||||
|
|
||||||
private:
|
|
||||||
int look_pattern(const char* p[][2], unsigned int len, int column);
|
|
||||||
int pattern_num;
|
|
||||||
int pattern2_num;
|
|
||||||
int pattern3_num;
|
|
||||||
int prevstate;
|
|
||||||
int checkattr;
|
|
||||||
char quotmark;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
15
3rdparty/hunspell/Makefile.am
vendored
15
3rdparty/hunspell/Makefile.am
vendored
@ -45,19 +45,4 @@ liblyxhunspell_a_SOURCES = \
|
|||||||
1.7.0/src/hunspell/suggestmgr.hxx \
|
1.7.0/src/hunspell/suggestmgr.hxx \
|
||||||
1.7.0/src/hunspell/utf_info.hxx \
|
1.7.0/src/hunspell/utf_info.hxx \
|
||||||
1.7.0/src/hunspell/w_char.hxx \
|
1.7.0/src/hunspell/w_char.hxx \
|
||||||
1.7.0/src/parsers/firstparser.cxx \
|
|
||||||
1.7.0/src/parsers/firstparser.hxx \
|
|
||||||
1.7.0/src/parsers/htmlparser.cxx \
|
|
||||||
1.7.0/src/parsers/htmlparser.hxx \
|
|
||||||
1.7.0/src/parsers/latexparser.cxx \
|
|
||||||
1.7.0/src/parsers/latexparser.hxx \
|
|
||||||
1.7.0/src/parsers/manparser.cxx \
|
|
||||||
1.7.0/src/parsers/manparser.hxx \
|
|
||||||
1.7.0/src/parsers/odfparser.cxx \
|
|
||||||
1.7.0/src/parsers/odfparser.hxx \
|
|
||||||
1.7.0/src/parsers/testparser.cxx \
|
|
||||||
1.7.0/src/parsers/textparser.cxx \
|
|
||||||
1.7.0/src/parsers/textparser.hxx \
|
|
||||||
1.7.0/src/parsers/xmlparser.cxx \
|
|
||||||
1.7.0/src/parsers/xmlparser.hxx \
|
|
||||||
1.7.0/src/win_api/config.h
|
1.7.0/src/win_api/config.h
|
||||||
|
Loading…
Reference in New Issue
Block a user