diff --git a/3rdparty/hunspell/1.6.2/AUTHORS b/3rdparty/hunspell/1.6.2/AUTHORS deleted file mode 100644 index f137fa26b8..0000000000 --- a/3rdparty/hunspell/1.6.2/AUTHORS +++ /dev/null @@ -1,5 +0,0 @@ -Author of Hunspell: -Németh László nemeth (at) numbertext.org - -Hunspell based on OpenOffice.org's Myspell. MySpell's author: -Kevin Hendricks kevin.hendricks (at) sympatico.ca diff --git a/3rdparty/hunspell/1.6.2/BUGS b/3rdparty/hunspell/1.6.2/BUGS deleted file mode 100644 index 6a5468e0f3..0000000000 --- a/3rdparty/hunspell/1.6.2/BUGS +++ /dev/null @@ -1,5 +0,0 @@ -* Interactive interface has some visualization problem with long lines - -* Experimental -U, -u options don't support Unicode. - -* Compound handling is not thread safe in Hungarian specific code. diff --git a/3rdparty/hunspell/1.6.2/COPYING b/3rdparty/hunspell/1.6.2/COPYING deleted file mode 100644 index 94a9ed024d..0000000000 --- a/3rdparty/hunspell/1.6.2/COPYING +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/3rdparty/hunspell/1.6.2/COPYING.LESSER b/3rdparty/hunspell/1.6.2/COPYING.LESSER deleted file mode 100644 index 65c5ca88a6..0000000000 --- a/3rdparty/hunspell/1.6.2/COPYING.LESSER +++ /dev/null @@ -1,165 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/3rdparty/hunspell/1.6.2/COPYING.MPL b/3rdparty/hunspell/1.6.2/COPYING.MPL deleted file mode 100644 index 7714141d15..0000000000 --- a/3rdparty/hunspell/1.6.2/COPYING.MPL +++ /dev/null @@ -1,470 +0,0 @@ - MOZILLA PUBLIC LICENSE - Version 1.1 - - --------------- - -1. Definitions. - - 1.0.1. "Commercial Use" means distribution or otherwise making the - Covered Code available to a third party. - - 1.1. "Contributor" means each entity that creates or contributes to - the creation of Modifications. - - 1.2. "Contributor Version" means the combination of the Original - Code, prior Modifications used by a Contributor, and the Modifications - made by that particular Contributor. - - 1.3. "Covered Code" means the Original Code or Modifications or the - combination of the Original Code and Modifications, in each case - including portions thereof. - - 1.4. "Electronic Distribution Mechanism" means a mechanism generally - accepted in the software development community for the electronic - transfer of data. - - 1.5. "Executable" means Covered Code in any form other than Source - Code. - - 1.6. "Initial Developer" means the individual or entity identified - as the Initial Developer in the Source Code notice required by Exhibit - A. - - 1.7. "Larger Work" means a work which combines Covered Code or - portions thereof with code not governed by the terms of this License. - - 1.8. "License" means this document. - - 1.8.1. "Licensable" means having the right to grant, to the maximum - extent possible, whether at the time of the initial grant or - subsequently acquired, any and all of the rights conveyed herein. - - 1.9. "Modifications" means any addition to or deletion from the - substance or structure of either the Original Code or any previous - Modifications. When Covered Code is released as a series of files, a - Modification is: - A. Any addition to or deletion from the contents of a file - containing Original Code or previous Modifications. - - B. Any new file that contains any part of the Original Code or - previous Modifications. - - 1.10. "Original Code" means Source Code of computer software code - which is described in the Source Code notice required by Exhibit A as - Original Code, and which, at the time of its release under this - License is not already Covered Code governed by this License. - - 1.10.1. "Patent Claims" means any patent claim(s), now owned or - hereafter acquired, including without limitation, method, process, - and apparatus claims, in any patent Licensable by grantor. - - 1.11. "Source Code" means the preferred form of the Covered Code for - making modifications to it, including all modules it contains, plus - any associated interface definition files, scripts used to control - compilation and installation of an Executable, or source code - differential comparisons against either the Original Code or another - well known, available Covered Code of the Contributor's choice. The - Source Code can be in a compressed or archival form, provided the - appropriate decompression or de-archiving software is widely available - for no charge. - - 1.12. "You" (or "Your") means an individual or a legal entity - exercising rights under, and complying with all of the terms of, this - License or a future version of this License issued under Section 6.1. - For legal entities, "You" includes any entity which controls, is - controlled by, or is under common control with You. For purposes of - this definition, "control" means (a) the power, direct or indirect, - to cause the direction or management of such entity, whether by - contract or otherwise, or (b) ownership of more than fifty percent - (50%) of the outstanding shares or beneficial ownership of such - entity. - -2. Source Code License. - - 2.1. The Initial Developer Grant. - The Initial Developer hereby grants You a world-wide, royalty-free, - non-exclusive license, subject to third party intellectual property - claims: - (a) under intellectual property rights (other than patent or - trademark) Licensable by Initial Developer to use, reproduce, - modify, display, perform, sublicense and distribute the Original - Code (or portions thereof) with or without Modifications, and/or - as part of a Larger Work; and - - (b) under Patents Claims infringed by the making, using or - selling of Original Code, to make, have made, use, practice, - sell, and offer for sale, and/or otherwise dispose of the - Original Code (or portions thereof). - - (c) the licenses granted in this Section 2.1(a) and (b) are - effective on the date Initial Developer first distributes - Original Code under the terms of this License. - - (d) Notwithstanding Section 2.1(b) above, no patent license is - granted: 1) for code that You delete from the Original Code; 2) - separate from the Original Code; or 3) for infringements caused - by: i) the modification of the Original Code or ii) the - combination of the Original Code with other software or devices. - - 2.2. Contributor Grant. - Subject to third party intellectual property claims, each Contributor - hereby grants You a world-wide, royalty-free, non-exclusive license - - (a) under intellectual property rights (other than patent or - trademark) Licensable by Contributor, to use, reproduce, modify, - display, perform, sublicense and distribute the Modifications - created by such Contributor (or portions thereof) either on an - unmodified basis, with other Modifications, as Covered Code - and/or as part of a Larger Work; and - - (b) under Patent Claims infringed by the making, using, or - selling of Modifications made by that Contributor either alone - and/or in combination with its Contributor Version (or portions - of such combination), to make, use, sell, offer for sale, have - made, and/or otherwise dispose of: 1) Modifications made by that - Contributor (or portions thereof); and 2) the combination of - Modifications made by that Contributor with its Contributor - Version (or portions of such combination). - - (c) the licenses granted in Sections 2.2(a) and 2.2(b) are - effective on the date Contributor first makes Commercial Use of - the Covered Code. - - (d) Notwithstanding Section 2.2(b) above, no patent license is - granted: 1) for any code that Contributor has deleted from the - Contributor Version; 2) separate from the Contributor Version; - 3) for infringements caused by: i) third party modifications of - Contributor Version or ii) the combination of Modifications made - by that Contributor with other software (except as part of the - Contributor Version) or other devices; or 4) under Patent Claims - infringed by Covered Code in the absence of Modifications made by - that Contributor. - -3. Distribution Obligations. - - 3.1. Application of License. - The Modifications which You create or to which You contribute are - governed by the terms of this License, including without limitation - Section 2.2. The Source Code version of Covered Code may be - distributed only under the terms of this License or a future version - of this License released under Section 6.1, and You must include a - copy of this License with every copy of the Source Code You - distribute. You may not offer or impose any terms on any Source Code - version that alters or restricts the applicable version of this - License or the recipients' rights hereunder. However, You may include - an additional document offering the additional rights described in - Section 3.5. - - 3.2. Availability of Source Code. - Any Modification which You create or to which You contribute must be - made available in Source Code form under the terms of this License - either on the same media as an Executable version or via an accepted - Electronic Distribution Mechanism to anyone to whom you made an - Executable version available; and if made available via Electronic - Distribution Mechanism, must remain available for at least twelve (12) - months after the date it initially became available, or at least six - (6) months after a subsequent version of that particular Modification - has been made available to such recipients. You are responsible for - ensuring that the Source Code version remains available even if the - Electronic Distribution Mechanism is maintained by a third party. - - 3.3. Description of Modifications. - You must cause all Covered Code to which You contribute to contain a - file documenting the changes You made to create that Covered Code and - the date of any change. You must include a prominent statement that - the Modification is derived, directly or indirectly, from Original - Code provided by the Initial Developer and including the name of the - Initial Developer in (a) the Source Code, and (b) in any notice in an - Executable version or related documentation in which You describe the - origin or ownership of the Covered Code. - - 3.4. Intellectual Property Matters - (a) Third Party Claims. - If Contributor has knowledge that a license under a third party's - intellectual property rights is required to exercise the rights - granted by such Contributor under Sections 2.1 or 2.2, - Contributor must include a text file with the Source Code - distribution titled "LEGAL" which describes the claim and the - party making the claim in sufficient detail that a recipient will - know whom to contact. If Contributor obtains such knowledge after - the Modification is made available as described in Section 3.2, - Contributor shall promptly modify the LEGAL file in all copies - Contributor makes available thereafter and shall take other steps - (such as notifying appropriate mailing lists or newsgroups) - reasonably calculated to inform those who received the Covered - Code that new knowledge has been obtained. - - (b) Contributor APIs. - If Contributor's Modifications include an application programming - interface and Contributor has knowledge of patent licenses which - are reasonably necessary to implement that API, Contributor must - also include this information in the LEGAL file. - - (c) Representations. - Contributor represents that, except as disclosed pursuant to - Section 3.4(a) above, Contributor believes that Contributor's - Modifications are Contributor's original creation(s) and/or - Contributor has sufficient rights to grant the rights conveyed by - this License. - - 3.5. Required Notices. - You must duplicate the notice in Exhibit A in each file of the Source - Code. If it is not possible to put such notice in a particular Source - Code file due to its structure, then You must include such notice in a - location (such as a relevant directory) where a user would be likely - to look for such a notice. If You created one or more Modification(s) - You may add your name as a Contributor to the notice described in - Exhibit A. You must also duplicate this License in any documentation - for the Source Code where You describe recipients' rights or ownership - rights relating to Covered Code. You may choose to offer, and to - charge a fee for, warranty, support, indemnity or liability - obligations to one or more recipients of Covered Code. However, You - may do so only on Your own behalf, and not on behalf of the Initial - Developer or any Contributor. You must make it absolutely clear than - any such warranty, support, indemnity or liability obligation is - offered by You alone, and You hereby agree to indemnify the Initial - Developer and every Contributor for any liability incurred by the - Initial Developer or such Contributor as a result of warranty, - support, indemnity or liability terms You offer. - - 3.6. Distribution of Executable Versions. - You may distribute Covered Code in Executable form only if the - requirements of Section 3.1-3.5 have been met for that Covered Code, - and if You include a notice stating that the Source Code version of - the Covered Code is available under the terms of this License, - including a description of how and where You have fulfilled the - obligations of Section 3.2. The notice must be conspicuously included - in any notice in an Executable version, related documentation or - collateral in which You describe recipients' rights relating to the - Covered Code. You may distribute the Executable version of Covered - Code or ownership rights under a license of Your choice, which may - contain terms different from this License, provided that You are in - compliance with the terms of this License and that the license for the - Executable version does not attempt to limit or alter the recipient's - rights in the Source Code version from the rights set forth in this - License. If You distribute the Executable version under a different - license You must make it absolutely clear that any terms which differ - from this License are offered by You alone, not by the Initial - Developer or any Contributor. You hereby agree to indemnify the - Initial Developer and every Contributor for any liability incurred by - the Initial Developer or such Contributor as a result of any such - terms You offer. - - 3.7. Larger Works. - You may create a Larger Work by combining Covered Code with other code - not governed by the terms of this License and distribute the Larger - Work as a single product. In such a case, You must make sure the - requirements of this License are fulfilled for the Covered Code. - -4. Inability to Comply Due to Statute or Regulation. - - If it is impossible for You to comply with any of the terms of this - License with respect to some or all of the Covered Code due to - statute, judicial order, or regulation then You must: (a) comply with - the terms of this License to the maximum extent possible; and (b) - describe the limitations and the code they affect. Such description - must be included in the LEGAL file described in Section 3.4 and must - be included with all distributions of the Source Code. Except to the - extent prohibited by statute or regulation, such description must be - sufficiently detailed for a recipient of ordinary skill to be able to - understand it. - -5. Application of this License. - - This License applies to code to which the Initial Developer has - attached the notice in Exhibit A and to related Covered Code. - -6. Versions of the License. - - 6.1. New Versions. - Netscape Communications Corporation ("Netscape") may publish revised - and/or new versions of the License from time to time. Each version - will be given a distinguishing version number. - - 6.2. Effect of New Versions. - Once Covered Code has been published under a particular version of the - License, You may always continue to use it under the terms of that - version. You may also choose to use such Covered Code under the terms - of any subsequent version of the License published by Netscape. No one - other than Netscape has the right to modify the terms applicable to - Covered Code created under this License. - - 6.3. Derivative Works. - If You create or use a modified version of this License (which you may - only do in order to apply it to code which is not already Covered Code - governed by this License), You must (a) rename Your license so that - the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", - "MPL", "NPL" or any confusingly similar phrase do not appear in your - license (except to note that your license differs from this License) - and (b) otherwise make it clear that Your version of the license - contains terms which differ from the Mozilla Public License and - Netscape Public License. (Filling in the name of the Initial - Developer, Original Code or Contributor in the notice described in - Exhibit A shall not of themselves be deemed to be modifications of - this License.) - -7. DISCLAIMER OF WARRANTY. - - COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, - WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, - WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF - DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. - THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE - IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, - YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE - COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER - OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF - ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -8. TERMINATION. - - 8.1. This License and the rights granted hereunder will terminate - automatically if You fail to comply with terms herein and fail to cure - such breach within 30 days of becoming aware of the breach. All - sublicenses to the Covered Code which are properly granted shall - survive any termination of this License. Provisions which, by their - nature, must remain in effect beyond the termination of this License - shall survive. - - 8.2. If You initiate litigation by asserting a patent infringement - claim (excluding declatory judgment actions) against Initial Developer - or a Contributor (the Initial Developer or Contributor against whom - You file such action is referred to as "Participant") alleging that: - - (a) such Participant's Contributor Version directly or indirectly - infringes any patent, then any and all rights granted by such - Participant to You under Sections 2.1 and/or 2.2 of this License - shall, upon 60 days notice from Participant terminate prospectively, - unless if within 60 days after receipt of notice You either: (i) - agree in writing to pay Participant a mutually agreeable reasonable - royalty for Your past and future use of Modifications made by such - Participant, or (ii) withdraw Your litigation claim with respect to - the Contributor Version against such Participant. If within 60 days - of notice, a reasonable royalty and payment arrangement are not - mutually agreed upon in writing by the parties or the litigation claim - is not withdrawn, the rights granted by Participant to You under - Sections 2.1 and/or 2.2 automatically terminate at the expiration of - the 60 day notice period specified above. - - (b) any software, hardware, or device, other than such Participant's - Contributor Version, directly or indirectly infringes any patent, then - any rights granted to You by such Participant under Sections 2.1(b) - and 2.2(b) are revoked effective as of the date You first made, used, - sold, distributed, or had made, Modifications made by that - Participant. - - 8.3. If You assert a patent infringement claim against Participant - alleging that such Participant's Contributor Version directly or - indirectly infringes any patent where such claim is resolved (such as - by license or settlement) prior to the initiation of patent - infringement litigation, then the reasonable value of the licenses - granted by such Participant under Sections 2.1 or 2.2 shall be taken - into account in determining the amount or value of any payment or - license. - - 8.4. In the event of termination under Sections 8.1 or 8.2 above, - all end user license agreements (excluding distributors and resellers) - which have been validly granted by You or any distributor hereunder - prior to termination shall survive termination. - -9. LIMITATION OF LIABILITY. - - UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT - (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL - DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, - OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR - ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY - CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, - WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER - COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN - INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF - LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY - RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW - PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE - EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO - THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -10. U.S. GOVERNMENT END USERS. - - The Covered Code is a "commercial item," as that term is defined in - 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer - software" and "commercial computer software documentation," as such - terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 - C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), - all U.S. Government End Users acquire Covered Code with only those - rights set forth herein. - -11. MISCELLANEOUS. - - This License represents the complete agreement concerning subject - matter hereof. If any provision of this License is held to be - unenforceable, such provision shall be reformed only to the extent - necessary to make it enforceable. This License shall be governed by - California law provisions (except to the extent applicable law, if - any, provides otherwise), excluding its conflict-of-law provisions. - With respect to disputes in which at least one party is a citizen of, - or an entity chartered or registered to do business in the United - States of America, any litigation relating to this License shall be - subject to the jurisdiction of the Federal Courts of the Northern - District of California, with venue lying in Santa Clara County, - California, with the losing party responsible for costs, including - without limitation, court costs and reasonable attorneys' fees and - expenses. The application of the United Nations Convention on - Contracts for the International Sale of Goods is expressly excluded. - Any law or regulation which provides that the language of a contract - shall be construed against the drafter shall not apply to this - License. - -12. RESPONSIBILITY FOR CLAIMS. - - As between Initial Developer and the Contributors, each party is - responsible for claims and damages arising, directly or indirectly, - out of its utilization of rights under this License and You agree to - work with Initial Developer and Contributors to distribute such - responsibility on an equitable basis. Nothing herein is intended or - shall be deemed to constitute any admission of liability. - -13. MULTIPLE-LICENSED CODE. - - Initial Developer may designate portions of the Covered Code as - "Multiple-Licensed". "Multiple-Licensed" means that the Initial - Developer permits you to utilize portions of the Covered Code under - Your choice of the NPL or the alternative licenses, if any, specified - by the Initial Developer in the file described in Exhibit A. - -EXHIBIT A -Mozilla Public License. - - ``The contents of this file are subject to the Mozilla Public License - Version 1.1 (the "License"); you may not use this file except in - compliance with the License. You may obtain a copy of the License at - http://www.mozilla.org/MPL/ - - Software distributed under the License is distributed on an "AS IS" - basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the - License for the specific language governing rights and limitations - under the License. - - The Original Code is ______________________________________. - - The Initial Developer of the Original Code is ________________________. - Portions created by ______________________ are Copyright (C) ______ - _______________________. All Rights Reserved. - - Contributor(s): ______________________________________. - - Alternatively, the contents of this file may be used under the terms - of the _____ license (the "[___] License"), in which case the - provisions of [______] License are applicable instead of those - above. If you wish to allow use of your version of this file only - under the terms of the [____] License and not to allow others to use - your version of this file under the MPL, indicate your decision by - deleting the provisions above and replace them with the notice and - other provisions required by the [___] License. If you do not delete - the provisions above, a recipient may use your version of this file - under either the MPL or the [___] License." - - [NOTE: The text of this Exhibit A may differ slightly from the text of - the notices in the Source Code files of the Original Code. You should - use the text of this Exhibit A rather than the text found in the - Original Code Source Code for Your Modifications.] - diff --git a/3rdparty/hunspell/1.6.2/README.md b/3rdparty/hunspell/1.6.2/README.md deleted file mode 100644 index 13bac95c78..0000000000 --- a/3rdparty/hunspell/1.6.2/README.md +++ /dev/null @@ -1,182 +0,0 @@ -About Hunspell -============== - -NOTICE: Version 2 is in the works. For contributing see -[version 2 specification][v2spec] and the folder `src/hunspell2`. - -[v2spec]: https://github.com/hunspell/hunspell/wiki/Version-2-Specification - -Hunspell is a spell checker and morphological analyzer library and program -designed for languages with rich morphology and complex word compounding or -character encoding. Hunspell interfaces: Ispell-like terminal interface -using Curses library, Ispell pipe interface, C++ class and C functions. - -Hunspell's code base comes from the OpenOffice.org MySpell -(http://lingucomponent.openoffice.org/MySpell-3.zip). See README.MYSPELL, -AUTHORS.MYSPELL and license.myspell files. -Hunspell is designed to eventually replace Myspell in OpenOffice.org. - -Main features of Hunspell spell checker and morphological analyzer: - -- Unicode support (affix rules work only with the first 65535 Unicode - characters) -- Morphological analysis (in custom item and arrangement style) and stemming -- Max. 65535 affix classes and twofold affix stripping (for agglutinative - languages, like Azeri, Basque, Estonian, Finnish, Hungarian, Turkish, etc.) -- Support complex compoundings (for example, Hungarian and German) -- Support language specific features (for example, special casing of - Azeri and Turkish dotted i, or German sharp s) -- Handle conditional affixes, circumfixes, fogemorphemes, - forbidden words, pseudoroots and homonyms. -- Free software. Versions 1.x are licenced under LGPL, GPL, MPL tri-license. - Version 2 is licenced only under GNU LGPL. - -Compiling on GNU/Linux and Unixes -================================= - - autoreconf -vfi - ./configure - make - sudo make install - sudo ldconfig - -For dictionary development, use the `--with-warnings` option of configure. - -For interactive user interface of Hunspell executable, use the `--with-ui option`. - -The developer packages you need to compile Hunspell's interface: - - autoconf automake autopoint libtool g++ - -Optional developer packages: - -- ncurses (need for --with-ui), eg. libncursesw5 for UTF-8 -- readline (for fancy input line editing, - configure parameter: --with-readline) -- locale and gettext (but you can also use the - --with-included-gettext configure parameter) - -Compiling on Windows -==================== - -## 1. Compiling with Mingw64 and MSYS2 - -Download Msys2, update everything and install the following packages: - - pacman -S base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-libtool - -Open Mingw-w64 Win64 prompt and compile the same way as on Linux, see above. - -## 2. Compiling in Cygwin environment - -Download and install Cygwin environment for Windows with the following -extra packages: - -- make -- automake -- autoconf -- libtool -- gcc-g++ development package -- ncurses, readline (for user interface) -- iconv (character conversion) - -Then compile the same way as on Linux. Cygwin builds depend on Cygwin1.dll. - -Debugging -========= - -For debugging we need to create a debug build and then we need to start `gdb`. - - make clean - make CXXFLAGS='-g -O0' - libtool --mode=execute gdb src/tools/hunspell - -Testing -======= - -Testing Hunspell (see tests in tests/ subdirectory): - - make check - -or with Valgrind debugger: - - make check - VALGRIND=[Valgrind_tool] make check - -For example: - - make check - VALGRIND=memcheck make check - -Documentation -============= - -features and dictionary format: - - man 5 hunspell - man hunspell - hunspell -h - -http://hunspell.github.io/ - -Usage -===== - -The src/tools directory contains ten executables after compiling: - -- affixcompress: dictionary generation from large (millions of words) - vocabularies -- analyze: example of spell checking, stemming and morphological analysis -- chmorph: example of automatic morphological generation and conversion -- example: example of spell checking and suggestion -- hunspell: main program for spell checking and others (see manual) -- hunzip: decompressor of hzip format -- hzip: compressor of hzip format -- makealias: alias compression (Hunspell only, not back compatible with MySpell) -- munch: dictionary generation from vocabularies (it needs an affix file, too). -- unmunch: list all recognized words of a MySpell dictionary -- wordforms: word generation (Hunspell version of unmunch) - -After compiling and installing (see INSTALL) you can -run the Hunspell spell checker (compiled with user interface) -with a Hunspell or Myspell dictionary: - - hunspell -d en_US text.txt - -or without interface: - - hunspell - hunspell -d en_UK -l - -Linking with Hunspell static library: - - g++ -lhunspell example.cxx - -Dictionaries ------------- - -Myspell & Hunspell dictionaries: - -- http://extensions.libreoffice.org -- http://cgit.freedesktop.org/libreoffice/dictionaries -- http://extensions.openoffice.org -- http://wiki.services.openoffice.org/wiki/Dictionaries - -Aspell dictionaries (need some conversion): - -- ftp://ftp.gnu.org/gnu/aspell/dict - -Conversion steps: see relevant feature request at http://hunspell.github.io/ . - -László Németh -nemeth at numbertext org diff --git a/3rdparty/hunspell/1.6.2/THANKS b/3rdparty/hunspell/1.6.2/THANKS deleted file mode 100644 index 761fa77438..0000000000 --- a/3rdparty/hunspell/1.6.2/THANKS +++ /dev/null @@ -1,136 +0,0 @@ -Many thanks to the following contributors and supporters: - -Mehmet Akin -Göran Andersson -Lars Aronsson -Ruud Baars -Bartkó Zoltán -Mathias Bauer -Bencsáth Boldizsár -Bíró Árpád -Ingo H. de Boer -Simon Brouwer -Jeppe Bundsgaard -Ginn Chen -Tomáš Chvátal -Aaron Digulla -Dmitri Gabinski -Dvornik László -David Einstein -Rene Engelhard -Frederik Fouvry -Flemming Frandsen -Serge Gautherie -Marek Gleń -Gavins at OOo -Gefferth András -Godó Ferenc -Goldman Eleonóra -Steinar H. Gunderson -Halácsy Péter -Chris Halls -Khaled Hosny -Izsók András -Björn Jacke -Mike Tian-Jian Jiang -Dafydd Jones -Ryan Jones -Jean-Christophe Helary -Kevin Hendricks -Martin Hollmichel -Pavel Janík -John Winters -Mohamed Kebdani -Kelemen Gábor -Shewangizaw Gulilat -Kéménczy Kálmán -Dan Kenigsberg -Pham Ngoc Khanh -Khiraly László -Koblinger Egmont -Kornai András -Tor Lillqvist -Christian Lohmaier -Robert Longson -Marot at SF dot net -Mark McClain -Caolan McNamara -Michael Meeks -Moheb Mekhaiel -Laurie Mercer -Ladislav Michnovič -Ellis Miller -Giuseppe Modugno -János Mohácsi -Bram Moolenaar -Daniel Naber -Nagy Viktor -John Nisly -Noll János -S Page -Christophe Paris -Malcolm Parsons -Sylvain Paschein -Volkov Peter -Bryan Petty -Harri Pitkänen -Davide Prina -Kevin F. Quinn -Erdal Ronahi -Olivier Ronez -Bernhard Rosenkraenzer -Sarlós Tamás -Thobias Schlemmer -Jan Seeger -Jose da Silva -Paulo Ney de Souza -Roland Smith -Munzir Taha -Timeless at bemail dot org -Tímár András -Tonal at OOo -Török László -Trón Viktor -Gianluca Turconi -Ryan VanderMeulen -Varga Dániel -Elio Voci -Miha Vrhovnik -Martijn Wargers -Michel Weimerskirch -Brett Wilson -Friedel Wolff -Daniel Yacob -Gábor Zahemszky -Taha Zerrouki -and others (see also AUTHORS.myspell) - -FSF.hu Foundation -http://www.fsf.hu - -LibreOffice community -http://www.libreoffice.org - -MOKK Research Centre -Budapest University of Technology and Economics -Sociology and Communications Department -http://www.mokk.bme.hu - -Hungarian Ministry of Informatics and Telecommunications - -IMEDIA Kft. -http://www.imedia.hu - -OpenOffice.org community -http://www.openoffice.org - -OpenTaal Foundation, Netherlands and -Dutch Language Union (Nederlandse Taalunie) -http://opentaal.org - -UHU-Linux Kft. - -Thanks, - -Németh László -nemeth at numbertext org diff --git a/3rdparty/hunspell/1.6.2/TODO b/3rdparty/hunspell/1.6.2/TODO deleted file mode 100644 index fb32e7ec89..0000000000 --- a/3rdparty/hunspell/1.6.2/TODO +++ /dev/null @@ -1,4 +0,0 @@ -* shared dictionaries for multi-user environment -* improve compound handling -* Unicode unmunch (munch) -* forbiddenword and pseudoword support in unmunch diff --git a/3rdparty/hunspell/1.6.2/src/parsers/.gitignore b/3rdparty/hunspell/1.6.2/src/parsers/.gitignore deleted file mode 100644 index f2d014662d..0000000000 --- a/3rdparty/hunspell/1.6.2/src/parsers/.gitignore +++ /dev/null @@ -1 +0,0 @@ -testparser diff --git a/3rdparty/hunspell/1.6.2/src/win_api/config.h b/3rdparty/hunspell/1.6.2/src/win_api/config.h deleted file mode 100644 index f3b64fb819..0000000000 --- a/3rdparty/hunspell/1.6.2/src/win_api/config.h +++ /dev/null @@ -1,205 +0,0 @@ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP - systems. This function is required for `alloca.c' support on those systems. - */ -#define CRAY_STACKSEG_END 1 - -/* Define to 1 if using `alloca.c'. */ -#define C_ALLOCA 1 - -/* Define to 1 if translation of program messages to the user's native - language is requested. */ -#undef ENABLE_NLS - -/* Define to 1 if you have `alloca', as a function or macro. */ -#define HAVE_ALLOCA 1 - -/* Define to 1 if you have and it should be used (not on Ultrix). - */ -#define HAVE_ALLOCA_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_ARGZ_H 1 - -/* "Define if you have the header" */ -#undef HAVE_CURSES_H - -/* Define if the GNU dcgettext() function is already present or preinstalled. - */ -#define HAVE_DCGETTEXT 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_ERROR_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_FCNTL_H 1 - -/* Define to 1 if you have the `feof_unlocked' function. */ -#define HAVE_FEOF_UNLOCKED 1 - -/* Define to 1 if you have the `fgets_unlocked' function. */ -#define HAVE_FGETS_UNLOCKED 1 - -/* Define to 1 if you have the `getcwd' function. */ -#define HAVE_GETCWD 1 - -/* Define to 1 if you have the `getc_unlocked' function. */ -#define HAVE_GETC_UNLOCKED 1 - -/* Define to 1 if you have the `getegid' function. */ -#define HAVE_GETEGID 1 - -/* Define to 1 if you have the `geteuid' function. */ -#define HAVE_GETEUID 1 - -/* Define to 1 if you have the `getgid' function. */ -#define HAVE_GETGID 1 - -/* Define to 1 if you have the `getpagesize' function. */ -#define HAVE_GETPAGESIZE 1 - -/* Define if the GNU gettext() function is already present or preinstalled. */ -#define HAVE_GETTEXT 1 - -/* Define to 1 if you have the `getuid' function. */ -#define HAVE_GETUID 1 - -/* Define if you have the iconv() function. */ -#undef HAVE_ICONV - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define if you have and nl_langinfo(CODESET). */ -#define HAVE_LANGINFO_CODESET 1 - -/* Define if your file defines LC_MESSAGES. */ -#define HAVE_LC_MESSAGES 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIBINTL_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LOCALE_H 1 - -/* Define to 1 if you have the `memchr' function. */ -#define HAVE_MEMCHR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `mempcpy' function. */ -#define HAVE_MEMPCPY 1 - -/* Define to 1 if you have a working `mmap' system call. */ -#define HAVE_MMAP 1 - -/* Define to 1 if you have the `munmap' function. */ -#define HAVE_MUNMAP 1 - -/* "Define if you have the header" */ -#define HAVE_NCURSESW_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_NL_TYPES_H 1 - -/* Define to 1 if you have the `putenv' function. */ -#define HAVE_PUTENV 1 - -/* "Define if you have fancy command input editing with Readline" */ -#undef HAVE_READLINE - -/* Define to 1 if you have the `setenv' function. */ -#define HAVE_SETENV 1 - -/* Define to 1 if you have the `setlocale' function. */ -#define HAVE_SETLOCALE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDDEF_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `stpcpy' function. */ -#define HAVE_STPCPY 1 - -/* Define to 1 if you have the `strcasecmp' function. */ -#define HAVE_STRCASECMP 1 - -/* Define to 1 if you have the `strchr' function. */ -#define HAVE_STRCHR 1 - -/* Define to 1 if you have the `strdup' function. */ -#define HAVE_STRDUP 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the `strstr' function. */ -#define HAVE_STRSTR 1 - -/* Define to 1 if you have the `strtoul' function. */ -#define HAVE_STRTOUL 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_PARAM_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the `tsearch' function. */ -#define HAVE_TSEARCH 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if you have the `__argz_count' function. */ -#define HAVE___ARGZ_COUNT 1 - -/* Define to 1 if you have the `__argz_next' function. */ -#define HAVE___ARGZ_NEXT 1 - -/* Define to 1 if you have the `__argz_stringify' function. */ -#define HAVE___ARGZ_STRINGIFY 1 - -/* "Define if you need warning messages" */ -#define HUNSPELL_WARNING_ON - -/* Define as const if the declaration of iconv() needs const. */ -#define ICONV_CONST 1 - -/* Name of package */ -#define PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#define PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "1.6.2" -#define VERSION "1.6.2" diff --git a/3rdparty/hunspell/1.7.0/AUTHORS b/3rdparty/hunspell/1.7.0/AUTHORS new file mode 100644 index 0000000000..65ceecad2d --- /dev/null +++ b/3rdparty/hunspell/1.7.0/AUTHORS @@ -0,0 +1,23 @@ +Author: László Németh + +MySpell's (Hunspell's code base) author: Kevin Hendricks + +Maintainer, distinguished contributor: Caolán McNamara + +Author of rule-based transformation code for phonetic suggestions +(PHONE/phonet.cxx, used by English dictionaries): Björn Jacke + + +Hunspell/MySpell code base is result of work of several +contributors. See git log, ./THANKS, ./Changelog, ./Changelog.O, +MySpell's README and CONTRIBUTORS files for their contributions. + + +Note: Following contributors are not owners and +not representatives of Hunspell, and they haven't got +permission from Hunspell's author to act on the behalf +of Hunspell project: Sander van Geloven, Dimitri Mijoski + + +If you would like to support Hunspell, join the +development or contact the author. diff --git a/3rdparty/hunspell/1.7.0/COPYING b/3rdparty/hunspell/1.7.0/COPYING new file mode 100644 index 0000000000..1f963da0d1 --- /dev/null +++ b/3rdparty/hunspell/1.7.0/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + diff --git a/3rdparty/hunspell/1.7.0/COPYING.LESSER b/3rdparty/hunspell/1.7.0/COPYING.LESSER new file mode 100644 index 0000000000..67cd97bbc2 --- /dev/null +++ b/3rdparty/hunspell/1.7.0/COPYING.LESSER @@ -0,0 +1,503 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + diff --git a/3rdparty/hunspell/1.6.2/ChangeLog b/3rdparty/hunspell/1.7.0/ChangeLog similarity index 99% rename from 3rdparty/hunspell/1.6.2/ChangeLog rename to 3rdparty/hunspell/1.7.0/ChangeLog index 1f6e774a63..e7dc98ed38 100644 --- a/3rdparty/hunspell/1.6.2/ChangeLog +++ b/3rdparty/hunspell/1.7.0/ChangeLog @@ -1683,7 +1683,7 @@ changes: * src/hunspell/suggestmgr.cxx: - fix missing suggestions for words with crossed prefix and suffix - - fix redundant non compound word checking + - fix redundant non-compound word checking - fix losing suggestions problem. Bug reported by Dmitri Gabinski. * src/hunspell/dictmgr.*: diff --git a/3rdparty/hunspell/1.7.0/NEWS b/3rdparty/hunspell/1.7.0/NEWS new file mode 100644 index 0000000000..97750bf5d4 --- /dev/null +++ b/3rdparty/hunspell/1.7.0/NEWS @@ -0,0 +1,839 @@ +2018-11-12: Hunspell 1.7.0 release: + + New features and bug fixes by Lszl Nmeth, supported by FSF.hu Foundation: + + - No annoying suggestion times any more, especially in languages with + compound word handling and complex morphology. By adding balanced + multi-level time limits, now the guaranteed suggestion time is there + within half a second, not seconds (nor dozen of seconds or more + in extreme cases) for longer misspellings, too. + + - add SPELLML support for run-time dictionary extension with optional + affixation of user words. See new "Grammar By" feature of + language-specific user dictionaries of LibreOffice 6.0: + + News: https://wiki.documentfoundation.org/ReleaseNotes/6.0#.E2.80.9CGrammar_By.E2.80.9D_spell_checking + + Screencast with English example: https://www.youtube.com/watch?v=EsS3gaBTfOo + + Screencast with German example: https://www.youtube.com/watch?v=aYVFDqCUb6I + + - Improved, highly customizable suggestions on level of dictionary words: + Pronunciations and typical misspellings defined by optional "ph:" fields of + the dictionary words are used not only in n-gram suggestions, but as + elements of the REP replacement list getting the highest priority in normal + suggestions, also giving the best suggestions for short words, too. + More information: see "ph:" in man 5 hunspell. + + - Handling multiple word suggestions is much more easier. Like in a + traditional spelling dictionary, for example, to get the correct suggestion + "a lot" for the typical misspelling "alot" at the first place, now it's + enough to put the following line to the dic(tionary) file: + + a lot + + - Limit compound overgeneration by dictionary based word pairs: + Now it's possible to filter bad compound words by listing + the correct word pairs with space in the dictionary, as in a traditional + spelling dictionary. + + - clean-up suggestion: + + - no n-gram and compound word suggestions, if "good" suggestion + exists, ie. uppercase, REP, ph: or dictionary word pair suggestions + + - word pairs are always suggested, if they exist in the dic file + + - word pairs have top priority in suggestions, and + these are the only suggestions if there is no other good suggestion. + + - also dictionary word pairs separated by dash instead of space + are handled specially in two-word suggestion (depending from the + language) + + - limit bad suggestions by improved n-gram suggestion rules: + + don't suggest capitalized dictionary words for lower + case misspellings in n-gram suggestions, except + + - PHONE usage, or + - in the case of German, where not only proper + nouns are capitalized, or + - the capitalized word has special pronunciation + + and don't suggest if the difference of lengths of misspellings and + suggestions is 5 or more characters. + + - Extend dotless i and dotted I rules to Crimean Tatar language + Allow dotted I in dictionary, and disable bad capitalization of i. + + - BREAK: extended recursive word breaking algorithm to handle words or + words with suffixes when they already contain word break characters, + for example, "e-mail" is a dictionary word with a word break character, and + it wasn't accepted before in compounds in some languages. + + - FORBIDDENWORD precedes BREAK: Now it's possible to forbid compound + forms recognized by BREAK word breaking by adding the bad compounds to + the dictionary with FORBIDDENWORD flags. + + - lower limit for "doubletwochars" suggestion algorithm: + one of the typical misspellings recognized by Hunspell suggestion + mechanism is the syllable duplication. Along the old pattern + ABABA -> ABA, for example nutrITITIon -> nutrITIon, now also the + simpler ABAB -> AB pattern is recognized in non-starting position, + for example, regretTETEd -> regretTEd. + + - lower limit for longswapchar and movechar: recognized only max. + 4-character distances to avoid slow and bad suggestions. + + - fix compound handling for new Hungarian orthography reform + + - Allow suggestion search for prefix + *two suffixes*: + Remove artificial performance limit to get correct + suggestions for relatively simple misspellings in + Hungarian, etc., when the word form contains prefix + and both derivative and inflectional suffixes, too: + + lefikszlsa -> lefixlsa + + Improvements for command-line Hunspell: + + - Remove false alarms during checking OpenDocument (ODF) + documents by ignoring elements. (LibreOffice + creates a lot of elements also within words + during text reediting, resulted often huge amount of broken + words before this fix.) + + - List filenames during filtering multiple files in command-line: + + Examples: + + $ hunspell -l *.odt + a.odt: mispelling + b.odt: egzample + + $ hunspell -l -G *.odt + a.odt: good + b.odt: words + + - Dictionary search by option -D doesn't wait for the standard input + (fixed by Siva Mahadevan) + + Other improvements: + + - makealias dictionary compression: add option --minimize-diff + to reuse free positions of alias lists to create minimal and + readable diffs for alias compressed dictionaries stored in + revision control systems, as dictionaries of LibreOffice. + + - Brazilian-Portuguese translation by Rafael Fontenelle + + - Catalan translation by robert dot buj at gmail + + - Minor bug fixes by several contributors, see git log + +2017-09-03: Hunspell 1.6.2 release: + - Library changes: no. Same as 1.6.1. + - Command line tool: + - Added German translation + - Fixed bug with wrong output encoding, not respecting system locale. + +2017-03-25: Hunspell 1.6.1 release: + - Library changes: + - Performance improvements in suggest() + - Fixes regressions for Hungarian related to compounding. + - Fixes regressions for Korean related to ICONV. + - Command line tool: + - Added Tajik translation + - Fix regarding serching of OOo dicts installed in user folder + - Manpages: + - Fix microsoft-cp1251 to cp1251. Dicts should not use the first. + - Typos. + +2016-12-22: Hunspell 1.6.0 release: + - Library changes: + - Performance improvement in ngsuggest(), suggestions should be faster. + - Revert MAXWORDLEN to 100 as in 1.3.3 for performance reasons. + - MAXWORDLEN can be set during build time with -D defines. + - Fix crash when word with 102 consecutive X is spelled. + - Command line tool: + - -D shows all loaded dictionares insted of only the first. + - -D properly lists all available dictionaries on Windows. + +2016-11-30: Hunspell 1.5.4 release: + - Fixes the command COMPOUNDSYLLABLE used in Hungarian dictionary. + +2016-11-28: Hunspell 1.5.3 release: + - Removed a #include from hunspell.hxx that was creating trouble + +2016-11-27: Hunspell 1.5.2 release: + - Reverted full backward compatibility with 1.4 public API, again + +2016-11-27: Hunspell 1.5.1 release: + - Reverted full backward compatibility with 1.4 public API + +2016-11-18: Hunspell 1.5.0 release: + - Lot of stability fixes + - Fixed compilation errors on various systems (Windows, FreeBSD) + - Small performance improvement compared to 1.4.0 + - The C++ API is updated to use modern C++ types (string, vector). + Backward compatibility is kept for most of the functions except for + the following: + - get_wordchars(); + - get_version(); + - input_conv(string, string); + - removed get_csconv(); + +2016-04-15: Hunspell 1.4.0 release: + - various abi changes due to moving away from char* to std::string + +2014-06-02: Hunspell 1.3.3 release: + - OpenDocument (ODF and Flat ODF) support (ODF needs unzip program) + - various bug fixes + +2011-02-02: Hunspell 1.3.2 release: + - fix library versioning + - improved manual + +2011-02-02: Hunspell 1.3.1 release: + - bug fixes + +2011-01-26: Hunspell 1.2.15/1.3 release: + - new features: MAXDIFF, ONLYMAXDIFF, MAXCPDSUGS, FORBIDWARN, see manual + - bug fixes + +2011-01-21: + - new features: FORCEUCASE and WARN, see manual + - new options: -r to filter potential mistakes (rare words + signed by flag WARN in the dictionary) + - limited and optimized suggestions + +2011-01-06: Hunspell 1.2.14 release: + - bug fix +2011-01-03: Hunspell 1.2.13 release: + - bug fixes + - improved compound handling and + other improvements supported by OpenTaal Foundation, Netherlands +2010-07-15: Hunspell 1.2.12 release +2010-05-06: Hunspell 1.2.11 release: + - Maintenance release bug fixes +2010-04-30: Hunspell 1.2.10 release: + - Maintenance release bug fixes +2010-03-03: Hunspell 1.2.9 release: + - Maintenance release bug fixes and warnings + - MAP support for composed characters or character sequences +2008-11-01: Hunspell 1.2.8 release: + - Default BREAK feature and better hyphenated word suggestion to accept + and fix (compound) words with hyphen characters by spell checker + instead of by work breaking code of OpenOffice.org. With this feature + it's possible to accept hyphenated compound words, such as "scot-free", + where "scot" is not a correct English word. + + - ICONV & OCONV: input and output conversion tables for optional character + handling or using special inner format. Example: + + # Accepting de facto replacements of the Romanian comma acuted letters + SET UTF-8 + ICONV 4 + ICONV ş ș + ICONV ţ ț + ICONV Ş Ș + ICONV Ţ Ț + + Typical usage of ICONV/OCONV is to manage an inner format for a segmental + writing system, like the Ethiopic script of the Amharic language. + + - Extended CHECKCOMPOUNDPATTERN to handle conpound word alternations, like + sandhi feature of Telugu and other writing systems. + + - SIMPLIFIEDTRIPLE compound word feature: allow simplified Swedish and + Norwegian compound word forms, like tillåta (till|låta) and + bussjåfør (buss|sjåfør) + + - wordforms: word generator script for dictionary developers (Hunspell + version of unmunch). + + - bug fixes + +2008-08-15: Hunspell 1.2.7 release: + - FULLSTRIP: new option for affix handling. With FULLSTRIP, affix rules can + strip full words, not only one less characters. + - COMPOUNDRULE works with all flag types. (COMPOUNDRULE is for pattern + matching. For example, en_US dictionary of OpenOffice.org uses COMPOUNDRULE + for ordinal number recognition: 1st, 2nd, 11th, 12th, 22nd, 112th, 1000122nd + etc.). + - optimized suggestions: + - modified 1-character distance suggestion algorithms: search a TRY character + in all position instead of all TRY characters in a character position + (it can give more readable suggestion order, also better suggestions + in the first positions, when TRY characters are sorted by frequency.) + For example, suggestions for "moze": + ooze, doze, Roze, maze, more etc. (Hunspell 1.2.6), + maze, more, mote, ooze, mole etc. (Hunspell 1.2.7). + - extended compound word checking for better COMPOUNDRULE related + suggestions, for example English ordinal numbers: 121323th -> 121323rd + (it needs also a th->rd REP definition). + - bug fixes + +2008-07-15: Hunspell 1.2.6 release: + - bug fix release (fix affix rule condition checking of sk_SK dictionary, + iconv support in stemming and morphological analysis of the Hunspell + utility, see also Changelog) + +2008-07-09: Hunspell 1.2.5 release: + - bug fix release (fix affix rule condition checking of en_GB dictionary, + also morphological analysis by dictionaries with two-level suffixes) + +2008-06-18: Hunspell 1.2.4-2 release: + - fix GCC compiler warnings + +2008-06-17: Hunspell 1.2.4 release: + - add free_list() for C, C++ interfaces to deallocate suggestion lists + + - bug fixes + +2008-06-17: Hunspell 1.2.3 release: + - extended XML interface to use morphological functions by standard + spell checking interface, spell() and suggest(). See hunspell.3 manual page. + + - default dash suggestions for compound words: newword-> new word and new-word + + - new manual pages: hunspell.3, hzip.1, hunzip.1. + + - bug fixes + +2008-04-12: Hunspell 1.2.2 release: + - extended dictionary (dic file) support to use multiple base and + special dictionaries. + + - new and improved options of command line hunspell: + -m: morphological analysis or flag debug mode (without affix + rule data it signs the flag of the affix rules) + -s: stemming mode + -D: list available dictionaries and search path + -d: support extra dictionaries by comma separated list. Example: + + hunspell -d en_US,en_med,de_DE,de_med,de_geo UNESCO.txt + + - forbidding in personal dictionary (with asterisk, / signs affixation) + + - optional compressed dictionary format "hzip" for aff and dic files + usage: + hzip example.aff example.dic + mv example.aff example.dic /tmp + hunspell -d example + hunzip example.aff.hz >example.aff + hunzip example.dic.hz >example.dic + + - new affix compression tool "affixcompress": compression tool for + large (millions of words) dictionaries. + + - support encrypted dictionaries for closed OpenOffice.org extensions or + other commercial programs + + - improved manual + + - bug fixes + +2007-11-01: Hunspell 1.2.1 release: + - new memory efficient condition checking algorithm for affix rules + + - new morphological functions: + - stem() for stemming + - analyze() for morphological analysis + - generate() for morphological generation + + - new demos: + - analyze: stemming, morphological analysis and generation + - chmorph: morphological conversion of texts + +2007-09-05: Hunspell 1.1.12 release: + - dictionary based phonetic suggestion for words with + special or foreign pronounciation or alternative (bad) transliteration + (see Changelog, tests/phone.* and manual). + + - improved data structure and memory optimization for dictionaries + with variable count fields + + - bug fixes for Unicode encoding dictionaries and ngram suggestions + + - improved REP suggestions with space: it works without dictionary + modification + + - updated and new project files for Windows API + +2007-08-27: Hunspell 1.1.11 release: + - portability fixes + +2007-08-23: Hunspell 1.1.10 release: + - pronounciation based suggestion using Bjrn Jacke's original Aspell + phonetic transcription algorithm (http://aspell.net), relicensed under + GPL/LGPL/MPL tri-license with the permission of the author + + - keyboard base suggestion by KEY (see manual) + + - better time limits for suggestion search + + - test environment for suggestion based on Wikipedia data + + - bug fixes for non standard Mozilla platforms etc. + +2007-07-25: Hunspell 1.1.9 release: + - better tokenization: + - for URLs, mail addresses and directory paths (default: skip these tokens) + - for colons in words (for Finnish and Swedish) + + - new examples: + - affixation of personal dictionary words + - digits in words + + - bug fixes (see ChangeLog) + +2007-07-16: Hunspell 1.1.8 release: + - better Mac OS X/Cygwin and Windows compatibility + + - fix Hunspell's Valgrind environment and memory handling errors + detected by Valgrind + + - other bug fixes (see ChangeLog) + +2007-07-06: Hunspell 1.1.7 release: + - fix warning messages of OpenOffice.org build + +2007-06-29: Hunspell 1.1.6 release: + - check capitalization of the following word forms + - words with mixed capitalisation: OpenOffice.org - OPENOFFICE.ORG + - allcap words and suffixes: UNICEF's - UNICEF'S + - prefixes with apostrophe and proper names: Sant'Elia - SANT'ELIA + + - suggestion for missing sentence spacing: something.The -> something. The + + - Hunspell executable: improved locale support + - -i option: custom input encoding + - use locale data for default dictionary names. + - tools/hunspell.cxx: fix 8-bit tokenization (letters without + casing, like ß or Hebrew characters now are handled well) + - dictionary search path (automatic detection of OpenOffice.org directories) + - DICPATH environmental variable + - -D option: show directory path of loaded dictionary + + - patches and bug fixes for Mozilla, OpenOffice.org. + +2007-03-19: Hunspell 1.1.5 release: + - optimizations: 10-100% speed up, smaller code size and memory footprint + (conditional experimental code and warning messages) + + - extended Unicode support: + - non BMP Unicode characters in dictionary words and affixes (except + affix rules and conditions) + - support BOM sequence in aff and dic files + + - IGNORE feature for Arabic diacritics and other optional characters + + - New edit distance suggestion methods: + - capitalisation: nasa -> NASA + - long swap: permenant -> permanent + - long move: Ghandi -> Gandhi, greatful -> grateful + - double two characters: vacacation -> vacation + - spaces in REP sug.: REP alot a_lot (NOTE: "a lot" must be a dictionary word) + + - patches and bug fixes for Mozilla, OpenOffice.org, Emacs, MinGW, Aqua, + German and Arabic language, etc. + +2006-02-01: Hunspell 1.1.4 release: + - Improved suggestion for typical OCR bugs (missing spaces between + capitalized words). For example: "aNew" -> "a New". + http://qa.openoffice.org/issues/show_bug.cgi?id=58202 + + - tokenization fixes (fix incomplete tokenization of input texts on big-endian + platforms, and locale-dependent tokenization of dictionary entries) + +2006-01-06: Hunspell 1.1.3.2 release: + - fix Visual C++ compiling errors + +2006-01-05: Hunspell 1.1.3 release: + - GPL/LGPL/MPL tri-license for Mozilla integration + + - Alias compression of flag sets and morphological descriptions. + (For example, 16 MB Arabic dic file can be compressed to 1 MB.) + + - Improved suggestion. + + - Improved, language independent German sharp s casing with CHECKSHARPS + declaration. + + - Unicode tokenization in Hunspell program. + + - Bug fixes (at new and old compound word handling methods), etc. + +2005-11-11: Hunspell 1.1.2 release: + + - Bug fixes (MAP Unicode, COMPOUND pattern matching, ONLYINCOMPOUND + suggestions) + + - Checked with 51 regression tests in Valgrind debugging environment, + and tested with 52 OOo dictionaries on i686-pc-linux platform. + +2005-11-09: Hunspell 1.1.1 release: + + - Compound word patterns for complex compound word handling and + simple word-level lexical scanning. Ideal for checking + Arabic and Roman numbers, ordinal numbers in English, affixed + numbers in agglutinative languages, etc. + http://qa.openoffice.org/issues/show_bug.cgi?id=53643 + + - Support ISO-8859-15 encoding for French (French oe ligatures are + missing from the latin-1 encoding). + http://qa.openoffice.org/issues/show_bug.cgi?id=54980 + + - Implemented a flag to forbid obscene word suggestion: + http://qa.openoffice.org/issues/show_bug.cgi?id=55498 + + - Checked with 50 regression tests in Valgrind debugging environment, + and tested with 52 OOo dictionaries. + + - other improvements and bug fixes (see ChangeLog) + +2005-09-19: Hunspell 1.1.0 release + +* complete comparison with MySpell 3.2 (from OpenOffice.org 2 beta) + +* improved ngram suggestion with swap character detection and + case insensitivity + +------ examples for ngram improvement (input word and suggestions) ----- + +1. pernament (instead of permanent) + +MySpell 3.2: tournaments, tournament, ornaments, ornament's, ornamenting, ornamented, + ornament, ornamentals, ornamental, ornamentally + +Hunspell 1.0.9: ornamental, ornament, tournament + +Hunspell 1.1.0: permanent + +Note: swap character detection + + +2. PERNAMENT (instead of PERMANENT) + +MySpell 3.2: - + +Hunspell 1.0.9: - + +Hunspell 1.1.0: PERMANENT + + +3. Unesco (instead of UNESCO) + +MySpell 3.2: Genesco, Ionesco, Genesco's, Ionesco's, Frescoing, Fresco's, + Frescoed, Fresco, Escorts, Escorting + +Hunspell 1.0.9: Genesco, Ionesco, Fresco + +Hunspell 1.1.0: UNESCO + + +4. siggraph's (instead of SIGGRAPH's) + +MySpell 3.2: serigraph's, photograph's, serigraphs, physiography's, + physiography, digraphs, serigraph, stratigraphy's, stratigraphy + epigraphs + +Hunspell 1.0.9: serigraph's, epigraph's, digraph's + +Hunspell 1.1.0: SIGGRAPH's + +--------------- end of examples -------------------- + +* improved testing environment with suggestion checking and memory debugging + + memory debugging of all tests with a simple command: + + VALGRIND=memcheck make check + +* lots of other improvements and bug fixes (see ChangeLog) + + +2005-08-26: Hunspell 1.0.9 release + +* improved related character map suggestion + +* improved ngram suggestion + +------ examples for ngram improvement (O=old, N = new ngram suggestions) -- + +1. Permenant (instead of Permanent) + +O: Endangerment, Ferment, Fermented, Deferment's, Empowerment, + Ferment's, Ferments, Fermenting, Countermen, Weathermen + +N: Permanent, Supermen, Preferment + +Note: Ngram suggestions was case sensitive. + +2. permenant (instead of permanent) + +O: supermen, newspapermen, empowerment, endangerment, preferments, + preferment, permanent, preferment's, permanently, impermanent + +N: permanent, supermen, preferment + +Note: new suggestions are also weighted with longest common subsequence, +first letter and common character positions + +3. pernemant (instead of permanent) + +O: pimpernel's, pimpernel, pimpernels, permanently, permanents, permanent, + supernatant, impermanent, semipermanent, impermanently + +N: permanent, supernatant, pimpernel + +Note: new method also prefers root word instead of not +relevant affixes ('s, s and ly) + + +4. pernament (instead of permanent) + +O: tournaments, tournament, ornaments, ornament's, ornamenting, ornamented, + ornament, ornamentals, ornamental, ornamentally + +N: ornamental, ornament, tournament + +Note: Both ngram methods misses here. + + +5. obvus (instad of obvious): + +O: obvious, Corvus, obverse, obviously, Jacobus, obtuser, obtuse, + obviates, obviate, Travus + +N: obvious, obtuse, obverse + +Note: new method also prefers common first letters. + + +6. unambigus (instead of unambiguous) + +O: unambiguous, unambiguity, unambiguously, ambiguously, ambiguous, + unambitious, ambiguities, ambiguousness + +N: unambiguous, unambiguity, unambitious + + + +7. consecvence (instead of consequence) + +O: consecutive, consecutively, consecutiveness, nonconsecutive, consequence, + consecutiveness's, convenience's, consistences, consistence + +N: consequence, consecutive, consecrates + + +An example in a language with rich morphology: + +8. Misisipiben (instead of Mississippiben [`in Mississippi' in Hungarian]): + +O: Misikdiben, Pisisediben, Misikiiben, Pisisekiben, Misikiben, + Misikidiben, Misikkiben, Misikikiben, Misikimiben, Mississippiiben + +N: Mississippiben, Mississippiiben, Misiiben + +Note: Suggesting not relevant affixes was the biggest fault in ngram + suggestion for languages with a lot of affixes. + +--------------- end of examples -------------------- + +* support twofold prefix cutting + +* lots of other improvements and bug fixes (see ChangeLog) + +* test Hunspell with 54 OpenOffice.org dictionaries: + +source: ftp://ftp.services.openoffice.org/pub/OpenOffice.org/contrib/dictionaries + +testing shell script: +------------------------------------------------------- +for i in `ls *zip | grep '^[a-z]*_[A-Z]*[.]'` +do + dic=`basename $i .zip` + mkdir $dic + echo unzip $dic + unzip -d $dic $i 2>/dev/null + cd $dic + echo unmunch and test $dic + unmunch $dic.dic $dic.aff 2>/dev/null | awk '{print$0"\t"}' | + hunspell -d $dic -l -1 >$dic.result 2>$dic.err || rm -f $dic.result + cd .. +done +-------------------------------------------------------- + +test result (0 size is o.k.): + +$ for i in *_*/*.result; do wc -c $i; done +0 af_ZA/af_ZA.result +0 bg_BG/bg_BG.result +0 ca_ES/ca_ES.result +0 cy_GB/cy_GB.result +0 cs_CZ/cs_CZ.result +0 da_DK/da_DK.result +0 de_AT/de_AT.result +0 de_CH/de_CH.result +0 de_DE/de_DE.result +0 el_GR/el_GR.result +6 en_AU/en_AU.result +0 en_CA/en_CA.result +0 en_GB/en_GB.result +0 en_NZ/en_NZ.result +0 en_US/en_US.result +0 eo_EO/eo_EO.result +0 es_ES/es_ES.result +0 es_MX/es_MX.result +0 es_NEW/es_NEW.result +0 fo_FO/fo_FO.result +0 fr_FR/fr_FR.result +0 ga_IE/ga_IE.result +0 gd_GB/gd_GB.result +0 gl_ES/gl_ES.result +0 he_IL/he_IL.result +0 hr_HR/hr_HR.result +200694989 hu_HU/hu_HU.result +0 id_ID/id_ID.result +0 it_IT/it_IT.result +0 ku_TR/ku_TR.result +0 lt_LT/lt_LT.result +0 lv_LV/lv_LV.result +0 mg_MG/mg_MG.result +0 mi_NZ/mi_NZ.result +0 ms_MY/ms_MY.result +0 nb_NO/nb_NO.result +0 nl_NL/nl_NL.result +0 nn_NO/nn_NO.result +0 ny_MW/ny_MW.result +0 pl_PL/pl_PL.result +0 pt_BR/pt_BR.result +0 pt_PT/pt_PT.result +0 ro_RO/ro_RO.result +0 ru_RU/ru_RU.result +0 rw_RW/rw_RW.result +0 sk_SK/sk_SK.result +0 sl_SI/sl_SI.result +0 sv_SE/sv_SE.result +0 sw_KE/sw_KE.result +0 tet_ID/tet_ID.result +0 tl_PH/tl_PH.result +0 tn_ZA/tn_ZA.result +0 uk_UA/uk_UA.result +0 zu_ZA/zu_ZA.result + +In en_AU dictionary, there is an abbrevation with two dots (`eqn..'), but +`eqn.' is missing. Presumably it is a dictionary bug. Myspell also +haven't accepted it. + +Hungarian dictionary contains pseudoroots and forbidden words. +Unmunch haven't supported these features yet, and generates bad words, too. + +* check affix rules and OOo dictionaries. Detected bugs in cs_CZ, +es_ES, es_NEW, es_MX, lt_LT, nn_NO, pt_PT, ro_RO, sk_SK and sv_SE dictionaries). + +Details: +-------------------------------------------------------- +cs_CZ +warning - incompatible stripping characters and condition: +SFX D us ech [^ighk]os +SFX D us y [^i]os +SFX Q os ech [^ghk]es +SFX M o ech [^ghkei]a +SFX J m ej m +SFX J m ejme m +SFX J m ejte m +SFX A ouit up oupit +SFX A ouit upme oupit +SFX A ouit upte oupit +SFX A nout l [aeiouyr][^aeiouyrl][^aeiouy +SFX A nout l [aeiouyr][^aeiouyrl][^aeiouy + +es_ES +warning - incompatible stripping characters and condition: +SFX W umar se [ae]husar +SFX W emir iis eir + +es_NEW +warning - incompatible stripping characters and condition: +SFX I unan nen unar + +es_MX +warning - incompatible stripping characters and condition: +SFX A a ote e +SFX W umar se [ae]husar +SFX W emir iis eir + +lt_LT +warning - incompatible stripping characters and condition: +SFX U ti siuosi tis +SFX U ti siuosi tis +SFX U ti siesi tis +SFX U ti siesi tis +SFX U ti sis tis +SFX U ti sis tis +SFX U ti sims tis +SFX U ti sims tis +SFX U ti sits tis +SFX U ti sits tis + +nn_NO +warning - incompatible stripping characters and condition: +SFX D ar rar [^fmk]er +SFX U re orde ere +SFX U re ort ere + +pt_PT +warning - incompatible stripping characters and condition: +SFX g os oas o +SFX g os oas o + +ro_RO +warning - bad field number: +SFX L 0 le [^cg] i +SFX L 0 i [cg] i +SFX U 0 i [^i] ii +warning - incompatible stripping characters and condition: +SFX P l i l [<- there is an unnecessary tabulator here) +SFX I a ii [gc] a +warning - bad field number: +SFX I a ii [gc] a +SFX I a ei [^cg] a + +sk_SK +warning - incompatible stripping characters and condition: +SFX T a ol kla +SFX T a olc kla +SFX T sa l sla +SFX T sa lc sla +SFX R c liem c +SFX R is tie mias +SFX R iez iem [^i]ez +SFX R iez ie [^i]ez +SFX R iez ie [^i]ez +SFX R iez eme [^i]ez +SFX R iez ete [^i]ez +SFX R iez [^i]ez +SFX R iez c [^i]ez +SFX R iez z [^i]ez +SFX R iez me [^i]ez +SFX R iez te [^i]ez + +sv_SE +warning - bad field number: +SFX C 0 net nets [^e]n +-------------------------------------------------------- + +2005-08-01: Hunspell 1.0.8 release + +- improved compound word support +- fix German S handling +- port MySpell files and MAP feature + +2005-07-22: Hunspell 1.0.7 release + +2005-07-21: new home page: http://hunspell.sourceforge.net diff --git a/3rdparty/hunspell/1.7.0/README b/3rdparty/hunspell/1.7.0/README new file mode 100644 index 0000000000..27240e7880 --- /dev/null +++ b/3rdparty/hunspell/1.7.0/README @@ -0,0 +1,315 @@ +# About Hunspell + +Hunspell is a free spell checker and morphological analyzer library +and command-line tool, licensed under LGPL/GPL/MPL tri-license. + +Hunspell is used by LibreOffice office suite, free browsers, like +Mozilla Firefox and Google Chrome, and other tools and OSes, like +Linux distributions and macOS. It is also a command-line tool for +Linux, Unix-like and other OSes. + +It is designed for quick and high quality spell checking and +correcting for languages with word-level writing system, +including languages with rich morphology, complex word compounding +and character encoding. + +Hunspell interfaces: Ispell-like terminal interface using Curses +library, Ispell pipe interface, C++/C APIs and shared library, also +with existing language bindings for other programming languages. + +Hunspell's code base comes from OpenOffice.org's MySpell library, +developed by Kevin Hendricks (originally a C++ reimplementation of +spell checking and affixation of Geoff Kuenning's International +Ispell from scratch, later extended with eg. n-gram suggestions), +see http://lingucomponent.openoffice.org/MySpell-3.zip, and +its README, CONTRIBUTORS and license.readme (here: license.myspell) files. + +Main features of Hunspell library, developed by László Németh: + + - Unicode support + - Highly customizable suggestions: word-part replacement tables and + stem-level phonetic and other alternative transcriptions to recognize + and fix all typical misspellings, don't suggest offensive words etc. + - Complex morphology: dictionary and affix homonyms; twofold affix + stripping to handle inflectional and derivational morpheme groups for + agglutinative languages, like Azeri, Basque, Estonian, Finnish, Hungarian, + Turkish; 64 thousand affix classes with arbitrary number of affixes; + conditional affixes, circumfixes, fogemorphemes, zero morphemes, + virtual dictionary stems, forbidden words to avoid overgeneration etc. + - Handling complex compounds (for example, for Finno-Ugric, German and + Indo-Aryan languages): recognizing compounds made of arbitrary + number of words, handle affixation within compounds etc. + - Custom dictionaries with affixation + - Stemming + - Morphological analysis (in custom item and arrangement style) + - Morphological generation + - SPELLML XML API over plain spell() API function for easier integration + of stemming, morpological generation and custom dictionaries with affixation + - Language specific algorithms, like special casing of Azeri or Turkish + dotted i and German sharp s, and special compound rules of Hungarian. + +Main features of Hunspell command line tool, developed by László Németh: + + - Reimplementation of quick interactive interface of Geoff Kuenning's Ispell + - Parsing formats: text, OpenDocument, TeX/LaTeX, HTML/SGML/XML, nroff/troff + - Custom dictionaries with optional affixation, specified by a model word + - Multiple dictionary usage (for example hunspell -d en_US,de_DE,de_medical) + - Various filtering options (bad or good words/lines) + - Morphological analysis (option -m) + - Stemming (option -s) + +See man hunspell, man 3 hunspell, man 5 hunspell for complete manual. + +# Dependencies + +Build only dependencies: + + g++ make autoconf automake autopoint libtool + +Runtime dependencies: + +| | Mandatory | Optional | +|---------------|------------------|------------------| +|libhunspell | | | +|hunspell tool | libiconv gettext | ncurses readline | + +# Compiling on GNU/Linux and Unixes + +We first need to download the dependencies. On Linux, `gettext` and +`libiconv` are part of the standard library. On other Unixes we +need to manually install them. + +For Ubuntu: + + sudo apt install autoconf automake autopoint libtool + +Then run the following commands: + + autoreconf -vfi + ./configure + make + sudo make install + sudo ldconfig + +For dictionary development, use the `--with-warnings` option of +configure. + +For interactive user interface of Hunspell executable, use the +`--with-ui option`. + +Optional developer packages: + + - ncurses (need for --with-ui), eg. libncursesw5 for UTF-8 + - readline (for fancy input line editing, configure parameter: + --with-readline) + +In Ubuntu, the packages are: + + libncurses5-dev libreadline-dev + +# Compiling on OSX and macOS + +On macOS for compiler always use `clang` and not `g++` because Homebrew +dependencies are build with that. + + brew install autoconf automake libtool gettext + brew link gettext --force + +Then run autoreconf, configure, make. See above. + +# Compiling on Windows + +## Compiling with Mingw64 and MSYS2 + +Download Msys2, update everything and install the following + packages: + + pacman -S base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-libtool + +Open Mingw-w64 Win64 prompt and compile the same way as on Linux, see +above. + +## Compiling in Cygwin environment + +Download and install Cygwin environment for Windows with the following +extra packages: + + - make + - automake + - autoconf + - libtool + - gcc-g++ development package + - ncurses, readline (for user interface) + - iconv (character conversion) + +Then compile the same way as on Linux. Cygwin builds depend on +Cygwin1.dll. + +# Debugging + +It is recommended to install a debug build of the standard library: + + libstdc++6-6-dbg + +For debugging we need to create a debug build and then we need to start +`gdb`. + + ./configure CXXFLAGS='-g -O0 -Wall -Wextra' + make + ./libtool --mode=execute gdb src/tools/hunspell + +You can also pass the `CXXFLAGS` directly to `make` without calling +`./configure`, but we don't recommend this way during long development +sessions. + +If you like to develop and debug with an IDE, see documentation at +https://github.com/hunspell/hunspell/wiki/IDE-Setup + +# Testing + +Testing Hunspell (see tests in tests/ subdirectory): + + make check + +or with Valgrind debugger: + + make check + VALGRIND=[Valgrind_tool] make check + +For example: + + make check + VALGRIND=memcheck make check + +# Documentation + +features and dictionary format: + + man 5 hunspell + man hunspell + hunspell -h + +http://hunspell.github.io/ + +# Usage + +After compiling and installing (see INSTALL) you can run the Hunspell +spell checker (compiled with user interface) with a Hunspell or Myspell +dictionary: + + hunspell -d en_US text.txt + +or without interface: + + hunspell + hunspell -d en_GB -l + +Linking with Hunspell static library: + + g++ -lhunspell-1.7 example.cxx + # or better, use pkg-config + g++ $(pkg-config --cflags --libs hunspell) example.cxx + +## Dictionaries + +Hunspell (MySpell) dictionaries: + + - https://wiki.documentfoundation.org/Language_support_of_LibreOffice + - http://cgit.freedesktop.org/libreoffice/dictionaries + - http://extensions.libreoffice.org + - http://extensions.openoffice.org + - http://wiki.services.openoffice.org/wiki/Dictionaries + +Aspell dictionaries (conversion: man 5 hunspell): + + - ftp://ftp.gnu.org/gnu/aspell/dict + +László Németh, nemeth at numbertext org + diff --git a/3rdparty/hunspell/1.7.0/THANKS b/3rdparty/hunspell/1.7.0/THANKS new file mode 100644 index 0000000000..a256d4e0ca --- /dev/null +++ b/3rdparty/hunspell/1.7.0/THANKS @@ -0,0 +1,206 @@ +Many thanks to the following contributors and supporters: + +Mehmet Akin +Göran Andersson +Lars Aronsson +Ruud Baars +Bartkó Zoltán +Mathias Bauer +Bencsáth Boldizsár +Bíró Árpád +Ingo H. de Boer +Simon Brouwer +Jeppe Bundsgaard +Ginn Chen +Tomáš Chvátal +Aaron Digulla +Dmitri Gabinski +Dvornik László +David Einstein +Rene Engelhard +Frederik Fouvry +Flemming Frandsen +Serge Gautherie +Marek Gleń +Gavins at OOo +Gefferth András +Godó Ferenc +Goldman Eleonóra +Steinar H. Gunderson +Halácsy Péter +Chris Halls +Khaled Hosny +Izsók András +Björn Jacke +Mike Tian-Jian Jiang +Dafydd Jones +Ryan Jones +Jean-Christophe Helary +Kevin Hendricks +Martin Hollmichel +Pavel Janík +John Winters +Mohamed Kebdani +Kelemen Gábor +Shewangizaw Gulilat +Kéménczy Kálmán +Dan Kenigsberg +Pham Ngoc Khanh +Khiraly László +Koblinger Egmont +Kornai András +Tor Lillqvist +Christian Lohmaier +Robert Longson +Marot at SF dot net +Mark McClain +Caolan McNamara +Michael Meeks +Moheb Mekhaiel +Laurie Mercer +Ladislav Michnovič +Ellis Miller +Giuseppe Modugno +János Mohácsi +Bram Moolenaar +Daniel Naber +Nagy Viktor +John Nisly +Noll János +S Page +Christophe Paris +Malcolm Parsons +Sylvain Paschein +Volkov Peter +Bryan Petty +Harri Pitkänen +Davide Prina +Kevin F. Quinn +Erdal Ronahi +Olivier Ronez +Bernhard Rosenkraenzer +Sarlós Tamás +Thobias Schlemmer +Jan Seeger +Jose da Silva +Paulo Ney de Souza +Roland Smith +Munzir Taha +Timeless at bemail dot org +Tímár András +Tonal at OOo +Török László +Trón Viktor +Gianluca Turconi +Ryan VanderMeulen +Varga Dániel +Elio Voci +Miha Vrhovnik +Martijn Wargers +Michel Weimerskirch +Brett Wilson +Friedel Wolff +Daniel Yacob +Gábor Zahemszky +Taha Zerrouki +and others (see also MySpell authors bellow) + +FSF.hu Foundation +http://www.fsf.hu + +LibreOffice community +http://www.libreoffice.org + +MOKK Research Centre +Budapest University of Technology and Economics +Sociology and Communications Department +http://www.mokk.bme.hu + +Hungarian Ministry of Informatics and Telecommunications + +IMEDIA Kft. +http://www.imedia.hu + +OpenOffice.org community +http://www.openoffice.org + +OpenTaal Foundation, Netherlands and +Dutch Language Union (Nederlandse Taalunie) +http://opentaal.org + +UHU-Linux Kft. + +Thanks, + +Németh László +nemeth at numbertext org + +-------------------------- + +MySpell Developer Credits: + +Special credit and thanks go to ispell's creator Geoff Kuenning. +Ispell affix compression code was used as the basis for the +affix code used in MySpell. Specifically Geoff's use of a +conds[] array that makes it easy to check if the conditions +required for a particular affix are present was very +ingenious! Kudos to Geoff. Very nicely done. +BTW: ispell is available under a BSD style license +from Geoff Kuennings ispell website: +http://www.cs.ucla.edu/ficus-members/geoff/ispell.html + + +Kevin Hendricks is the original +author and now maintainer of the MySpell codebase. Recent +additions include ngram support, and related character maps +to help improve and create suggestions for very poorly +spelled words. + +Please send any and all contributions or improvements +to him or to dev@lingucomponent.openoffice.org. + + +David Einstein (Deinst@world.std.com) developed an almost +complete rewrite of MySpell for use by the Mozilla project. +David and I are now working on parallel development tracks +to help our respective projects (Mozilla and OpenOffice.org) +and we will maintain full affix file and dictionary file +compatibility and work on merging our versions of MySpell +back into a single tree. David has been a significant help +in improving MySpell. + + +Németh László is the author of +the Hungarian dictionary and he developed and contributed +extensive changes to MySpell including ... + * code to support compound words in MySpell + * fixed numerous problems with encoding case conversion tables. + * designed/developed replacement tables to improve suggestions + * changed affix file parsing to trees to greatly speed loading + * removed the need for malloc/free pairs in suffix_check which + speeds up spell checking in suffix rich languages by 20% + +Davide Prina , Giuseppe Modugno +, Gianluca Turconi +all from the it_IT OpenOffice.org team performed an +extremely detailed code review of MySpell and generated +fixes for bugs, leaks, and speedup improvements. + +Simon Brouwer for fixes and enhancements +that have greatly improved MySpell auggestions + * n-gram suggestions for an initcap word have an init. cap. + * fix for too many n-gram suggestions from specialized dictionary, + * fix for long suggestions rather than close ones in case of + dictionaries with many compound words (kompuuter) + * optionally disabling split-word suggestions (controlled + by NOSPLITSUGS line in affix file) + + +Special Thanks to all others who have either contributed ideas or +testing for MySpell + + +Thanks, + +Kevin Hendricks +kevin.hendricks@sympatico.ca diff --git a/3rdparty/hunspell/1.6.2/license.hunspell b/3rdparty/hunspell/1.7.0/license.hunspell similarity index 100% rename from 3rdparty/hunspell/1.6.2/license.hunspell rename to 3rdparty/hunspell/1.7.0/license.hunspell diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/affentry.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/affentry.cxx similarity index 99% rename from 3rdparty/hunspell/1.6.2/src/hunspell/affentry.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/affentry.cxx index 4ef0c00d9b..ffcdb21be2 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/affentry.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/affentry.cxx @@ -399,28 +399,28 @@ std::string PfxEntry::check_morph(const char* word, ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || (contclass && TESTAFF(contclass, needflag, contclasslen)))) { if (morphcode) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(morphcode); } else result.append(getKey()); if (!HENTRY_FIND(he, MORPH_STEM)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_STEM); result.append(HENTRY_WORD(he)); } // store the pointer of the hash entry if (HENTRY_DATA(he)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(HENTRY_DATA2(he)); } else { // return with debug information char* flag = pmyMgr->encode_flag(getFlag()); - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_FLAG); result.append(flag); free(flag); } - result.append("\n"); + result.push_back(MSEP_REC); } he = he->next_homonym; } while (he); @@ -804,7 +804,7 @@ std::string SfxEntry::check_twosfx_morph(const char* word, if (!st.empty()) { if (ppfx->getMorph()) { result.append(ppfx->getMorph()); - result.append(" "); + result.push_back(MSEP_FLD); } result.append(st); mychomp(result); diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/affentry.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/affentry.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/affentry.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/affentry.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/affixmgr.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/affixmgr.cxx similarity index 96% rename from 3rdparty/hunspell/1.6.2/src/hunspell/affixmgr.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/affixmgr.cxx index ffce7bb1bd..87ab6c5ab8 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/affixmgr.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/affixmgr.cxx @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -96,7 +97,6 @@ AffixMgr::AffixMgr(const char* affpath, complexprefixes = 0; parsedmaptable = false; parsedbreaktable = false; - parsedrep = false; iconvtable = NULL; oconvtable = NULL; // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN) @@ -113,7 +113,7 @@ AffixMgr::AffixMgr(const char* affpath, compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word compoundmoresuffixes = 0; // allow more suffixes within compound words checkcompounddup = 0; // forbid double words in compounds - checkcompoundrep = 0; // forbid bad compounds (may be non compound word with + checkcompoundrep = 0; // forbid bad compounds (may be non-compound word with // a REP substitution) checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds @@ -439,7 +439,7 @@ int AffixMgr::parse_file(const char* affpath, const char* key) { } } - /* parse in the flag used by forbidden words */ + /* parse in the flag used by forbidden words (is deprecated) */ if (line.compare(0, 13, "LEMMA_PRESENT", 13) == 0) { if (!parse_flag(line, &lemma_present, afflst)) { finishFileMgr(afflst); @@ -463,7 +463,7 @@ int AffixMgr::parse_file(const char* affpath, const char* key) { } } - /* parse in the flag used by `needaffixs' */ + /* parse in the flag used by `needaffixs' (is deprecated) */ if (line.compare(0, 10, "PSEUDOROOT", 10) == 0) { if (!parse_flag(line, &needaffix, afflst)) { finishFileMgr(afflst); @@ -529,14 +529,6 @@ int AffixMgr::parse_file(const char* affpath, const char* key) { } } - /* parse in the typical fault correcting table */ - if (line.compare(0, 3, "REP", 3) == 0) { - if (!parse_reptable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - /* parse in the input conversion table */ if (line.compare(0, 5, "ICONV", 5) == 0) { if (!parse_convtable(line, afflst, &iconvtable, "ICONV")) { @@ -545,7 +537,7 @@ int AffixMgr::parse_file(const char* affpath, const char* key) { } } - /* parse in the input conversion table */ + /* parse in the output conversion table */ if (line.compare(0, 5, "OCONV", 5) == 0) { if (!parse_convtable(line, afflst, &oconvtable, "OCONV")) { finishFileMgr(afflst); @@ -1023,7 +1015,7 @@ int AffixMgr::process_sfx_order() { // add flags to the result for dictionary debugging std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) { char* st = encode_flag(flag); - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_FLAG); if (st) { result.append(st); @@ -1146,7 +1138,7 @@ struct hentry* AffixMgr::prefix_check(const char* word, return NULL; } -// check word for prefixes +// check word for prefixes and two-level suffixes struct hentry* AffixMgr::prefix_check_twosfx(const char* word, int len, char in_compound, @@ -1187,7 +1179,7 @@ struct hentry* AffixMgr::prefix_check_twosfx(const char* word, return NULL; } -// check word for prefixes +// check word for prefixes and morph std::string AffixMgr::prefix_check_morph(const char* word, int len, char in_compound, @@ -1234,7 +1226,7 @@ std::string AffixMgr::prefix_check_morph(const char* word, return result; } -// check word for prefixes +// check word for prefixes and morph and two-level suffixes std::string AffixMgr::prefix_check_twosfx_morph(const char* word, int len, char in_compound, @@ -1275,25 +1267,44 @@ std::string AffixMgr::prefix_check_twosfx_morph(const char* word, return result; } -// Is word a non compound with a REP substitution (see checkcompoundrep)? +// Is word a non-compound with a REP substitution (see checkcompoundrep)? int AffixMgr::cpdrep_check(const char* word, int wl) { - if ((wl < 2) || reptable.empty()) + if ((wl < 2) || get_reptable().empty()) return 0; - for (size_t i = 0; i < reptable.size(); ++i) { - const char* r = word; - const size_t lenp = reptable[i].pattern.size(); - // search every occurence of the pattern in the word - while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) { - std::string candidate(word); - size_t type = r == word && langnum != LANG_hu ? 1 : 0; - if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu) - type += 2; - candidate.replace(r - word, lenp, reptable[i].outstrings[type]); + for (size_t i = 0; i < get_reptable().size(); ++i) { + // use only available mid patterns + if (!get_reptable()[i].outstrings[0].empty()) { + const char* r = word; + const size_t lenp = get_reptable()[i].pattern.size(); + // search every occurence of the pattern in the word + while ((r = strstr(r, get_reptable()[i].pattern.c_str())) != NULL) { + std::string candidate(word); + candidate.replace(r - word, lenp, get_reptable()[i].outstrings[0]); + if (candidate_check(candidate.c_str(), candidate.size())) + return 1; + ++r; // search for the next letter + } + } + } + + return 0; +} + +// forbid compound words, if they are in the dictionary as a +// word pair separated by space +int AffixMgr::cpdwordpair_check(const char * word, int wl) { + if (wl > 2) { + std::string candidate(word); + for (size_t i = 1; i < candidate.size(); i++) { + // go to end of the UTF-8 character + if (utf8 && ((word[i] & 0xc0) == 0x80)) + continue; + candidate.insert(i, 1, ' '); if (candidate_check(candidate.c_str(), candidate.size())) return 1; - ++r; // search for the next letter + candidate.erase(i, 1); } } @@ -1584,6 +1595,17 @@ struct hentry* AffixMgr::compound_check(const std::string& word, int checked_prefix; + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + + HUNSPELL_THREAD_LOCAL clock_t timelimit; + + if (wordnum == 0) + timelimit = clock(); + else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { + timelimit = 0; + } + setcminmax(&cmin, &cmax, word.c_str(), len); st.assign(word); @@ -1608,6 +1630,9 @@ struct hentry* AffixMgr::compound_check(const std::string& word, do { // simplified checkcompoundpattern loop + if (timelimit == 0) + return 0; + if (scpd > 0) { for (; scpd <= checkcpdtable.size() && (checkcpdtable[scpd - 1].pattern3.empty() || @@ -1647,6 +1672,12 @@ struct hentry* AffixMgr::compound_check(const std::string& word, affixed = 1; rv = lookup(st.c_str()); // perhaps without prefix + // forbid dictionary stems with COMPOUNDFORBIDFLAG in + // compound words, overriding the effect of COMPOUNDPERMITFLAG + if ((rv) && compoundforbidflag && + TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule) + continue; + // search homonym with compound flag while ((rv) && !hu_mov_rule && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || @@ -1909,9 +1940,10 @@ struct hentry* AffixMgr::compound_check(const std::string& word, && (scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) { - // forbid compound word, if it is a non compound word with typical + // forbid compound word, if it is a non-compound word with typical // fault - if (checkcompoundrep && cpdrep_check(word.c_str(), len)) + if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) || + cpdwordpair_check(word.c_str(), len)) return NULL; return rv_first; } @@ -1989,7 +2021,9 @@ struct hentry* AffixMgr::compound_check(const std::string& word, if (sfxappnd) { std::string tmp(sfxappnd); reverseword(tmp); - numsyllable -= get_syllable(tmp) + sfxextra; + numsyllable -= short(get_syllable(tmp) + sfxextra); + } else { + numsyllable -= short(sfxextra); } // + 1 word, if syllable number of the prefix > 1 (hungarian @@ -2024,7 +2058,6 @@ struct hentry* AffixMgr::compound_check(const std::string& word, (TESTAFF(rv->astr, compoundroot, rv->alen))) { wordnum++; } - // second word is acceptable, as a word with prefix or/and suffix? // hungarian conventions: compounding is acceptable, // when compound forms consist 2 word, otherwise @@ -2033,9 +2066,10 @@ struct hentry* AffixMgr::compound_check(const std::string& word, (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && ((!checkcompounddup || (rv != rv_first)))) { - // forbid compound word, if it is a non compound word with typical + // forbid compound word, if it is a non-compound word with typical // fault - if (checkcompoundrep && cpdrep_check(word.c_str(), len)) + if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) || + cpdwordpair_check(word.c_str(), len)) return NULL; return rv_first; } @@ -2059,8 +2093,12 @@ struct hentry* AffixMgr::compound_check(const std::string& word, rv = NULL; } if (rv) { - // forbid compound word, if it is a non compound word with typical - // fault + // forbid compound word, if it is a non-compound word with typical + // fault, or a dictionary word pair + + if (cpdwordpair_check(word.c_str(), len)) + return NULL; + if (checkcompoundrep || forbiddenword) { if (checkcompoundrep && cpdrep_check(word.c_str(), len)) @@ -2071,7 +2109,8 @@ struct hentry* AffixMgr::compound_check(const std::string& word, char r = st[i + rv->blen]; st[i + rv->blen] = '\0'; - if (checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) { + if ((checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) || + cpdwordpair_check(st.c_str(), i + rv->blen)) { st[ + i + rv->blen] = r; continue; } @@ -2162,6 +2201,17 @@ int AffixMgr::compound_check_morph(const char* word, char affixed = 0; hentry** oldwords = words; + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + + HUNSPELL_THREAD_LOCAL clock_t timelimit; + + if (wordnum == 0) + timelimit = clock(); + else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { + timelimit = 0; + } + setcminmax(&cmin, &cmax, word, len); st.assign(word); @@ -2180,6 +2230,9 @@ int AffixMgr::compound_check_morph(const char* word, do { // onlycpdrule loop + if (timelimit == 0) + return 0; + oldnumsyllable = numsyllable; oldwordnum = wordnum; checked_prefix = 0; @@ -2198,6 +2251,12 @@ int AffixMgr::compound_check_morph(const char* word, rv = lookup(st.c_str()); // perhaps without prefix + // forbid dictionary stems with COMPOUNDFORBIDFLAG in + // compound words, overriding the effect of COMPOUNDPERMITFLAG + if ((rv) && compoundforbidflag && + TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule) + continue; + // search homonym with compound flag while ((rv) && !hu_mov_rule && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || @@ -2215,6 +2274,9 @@ int AffixMgr::compound_check_morph(const char* word, rv = rv->next_homonym; } + if (timelimit == 0) + return 0; + if (rv) affixed = 0; @@ -2405,22 +2467,22 @@ int AffixMgr::compound_check_morph(const char* word, if (rv && words && words[wnum + 1]) { result.append(presult); - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_PART); result.append(word + i); if (complexprefixes && HENTRY_DATA(rv)) result.append(HENTRY_DATA2(rv)); if (!HENTRY_FIND(rv, MORPH_STEM)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_STEM); result.append(HENTRY_WORD(rv)); } // store the pointer of the hash entry if (!complexprefixes && HENTRY_DATA(rv)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(HENTRY_DATA2(rv)); } - result.append("\n"); + result.push_back(MSEP_REC); return 0; } @@ -2462,7 +2524,7 @@ int AffixMgr::compound_check_morph(const char* word, ((!checkcompounddup || (rv != rv_first)))) { // bad compound word result.append(presult); - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_PART); result.append(word + i); @@ -2470,17 +2532,17 @@ int AffixMgr::compound_check_morph(const char* word, if (complexprefixes) result.append(HENTRY_DATA2(rv)); if (!HENTRY_FIND(rv, MORPH_STEM)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_STEM); result.append(HENTRY_WORD(rv)); } // store the pointer of the hash entry if (!complexprefixes) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(HENTRY_DATA2(rv)); } } - result.append("\n"); + result.push_back(MSEP_REC); ok = 1; } @@ -2519,7 +2581,7 @@ int AffixMgr::compound_check_morph(const char* word, line_uniq_app(m, MSEP_REC); result.append(m); } - result.append("\n"); + result.push_back(MSEP_REC); ok = 1; } } @@ -2552,7 +2614,9 @@ int AffixMgr::compound_check_morph(const char* word, if (sfxappnd) { std::string tmp(sfxappnd); reverseword(tmp); - numsyllable -= get_syllable(tmp) + sfxextra; + numsyllable -= short(get_syllable(tmp) + sfxextra); + } else { + numsyllable -= short(sfxextra); } // + 1 word, if syllable number of the prefix > 1 (hungarian @@ -2605,8 +2669,9 @@ int AffixMgr::compound_check_morph(const char* word, if (!m.empty()) { result.push_back(MSEP_FLD); result.append(MORPH_PART); - result.append(word + 1); + result.append(word + i); line_uniq_app(m, MSEP_REC); + result.push_back(MSEP_FLD); result.append(m); } result.push_back(MSEP_REC); @@ -2769,7 +2834,6 @@ struct hentry* AffixMgr::suffix_check(const char* word, } // check word for two-level suffixes - struct hentry* AffixMgr::suffix_check_twosfx(const char* word, int len, int sfxopts, @@ -2814,6 +2878,7 @@ struct hentry* AffixMgr::suffix_check_twosfx(const char* word, return NULL; } +// check word for two-level suffixes and morph std::string AffixMgr::suffix_check_twosfx_morph(const char* word, int len, int sfxopts, @@ -2832,17 +2897,17 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word, if (ppfx) { if (ppfx->getMorph()) { result.append(ppfx->getMorph()); - result.append(" "); + result.push_back(MSEP_FLD); } else debugflag(result, ppfx->getFlag()); } result.append(st); if (se->getMorph()) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(se->getMorph()); } else debugflag(result, se->getFlag()); - result.append("\n"); + result.push_back(MSEP_REC); } } se = se->getNext(); @@ -2867,12 +2932,12 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word, result3.clear(); if (sptr->getMorph()) { - result3.append(" "); + result3.push_back(MSEP_FLD); result3.append(sptr->getMorph()); } else debugflag(result3, sptr->getFlag()); strlinecat(result2, result3); - result2.append("\n"); + result2.push_back(MSEP_REC); result.append(result2); } } @@ -2935,28 +3000,28 @@ std::string AffixMgr::suffix_check_morph(const char* word, if (ppfx) { if (ppfx->getMorph()) { result.append(ppfx->getMorph()); - result.append(" "); + result.push_back(MSEP_FLD); } else debugflag(result, ppfx->getFlag()); } if (complexprefixes && HENTRY_DATA(rv)) result.append(HENTRY_DATA2(rv)); if (!HENTRY_FIND(rv, MORPH_STEM)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_STEM); result.append(HENTRY_WORD(rv)); } if (!complexprefixes && HENTRY_DATA(rv)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(HENTRY_DATA2(rv)); } if (se->getMorph()) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(se->getMorph()); } else debugflag(result, se->getFlag()); - result.append("\n"); + result.push_back(MSEP_REC); rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); } } @@ -3002,29 +3067,29 @@ std::string AffixMgr::suffix_check_morph(const char* word, if (ppfx) { if (ppfx->getMorph()) { result.append(ppfx->getMorph()); - result.append(" "); + result.push_back(MSEP_FLD); } else debugflag(result, ppfx->getFlag()); } if (complexprefixes && HENTRY_DATA(rv)) result.append(HENTRY_DATA2(rv)); if (!HENTRY_FIND(rv, MORPH_STEM)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_STEM); result.append(HENTRY_WORD(rv)); } if (!complexprefixes && HENTRY_DATA(rv)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(HENTRY_DATA2(rv)); } if (sptr->getMorph()) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(sptr->getMorph()); } else debugflag(result, sptr->getFlag()); - result.append("\n"); + result.push_back(MSEP_REC); rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); } sptr = sptr->getNextEQ(); @@ -3213,7 +3278,7 @@ std::string AffixMgr::morphgen(const char* ts, // use input suffix fields, if exist if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) { mymorph.assign(morph); - mymorph.append(" "); + mymorph.push_back(MSEP_FLD); stemmorphcatpos = mymorph.size(); } else { stemmorphcatpos = std::string::npos; @@ -3414,7 +3479,7 @@ int AffixMgr::expand_rootword(struct guessword* wlst, // return replacing table const std::vector& AffixMgr::get_reptable() const { - return reptable; + return pHMgr->get_reptable(); } // return iconv table @@ -3554,6 +3619,11 @@ FLAG AffixMgr::get_nongramsuggest() const { return nongramsuggest; } +// return the substandard root/affix control flag +FLAG AffixMgr::get_substandard() const { + return substandard; +} + // return the forbidden words flag modify flag FLAG AffixMgr::get_needaffix() const { return needaffix; @@ -3692,103 +3762,6 @@ bool AffixMgr::parse_cpdsyllable(const std::string& line, FileMgr* af) { return true; } -/* parse in the typical fault correcting table */ -bool AffixMgr::parse_reptable(const std::string& line, FileMgr* af) { - if (parsedrep) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return false; - } - parsedrep = true; - int numrep = -1; - int i = 0; - int np = 0; - std::string::const_iterator iter = line.begin(); - std::string::const_iterator start_piece = mystrsep(line, iter); - while (start_piece != line.end()) { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numrep = atoi(std::string(start_piece, iter).c_str()); - if (numrep < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", - af->getlinenum()); - return false; - } - reptable.reserve(numrep); - np++; - break; - } - default: - break; - } - ++i; - start_piece = mystrsep(line, iter); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return false; - } - - /* now parse the numrep lines to read in the remainder of the table */ - for (int j = 0; j < numrep; ++j) { - std::string nl; - if (!af->getline(nl)) - return false; - mychomp(nl); - reptable.push_back(replentry()); - iter = nl.begin(); - i = 0; - int type = 0; - start_piece = mystrsep(nl, iter); - while (start_piece != nl.end()) { - switch (i) { - case 0: { - if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - reptable.clear(); - return false; - } - break; - } - case 1: { - if (*start_piece == '^') - type = 1; - reptable.back().pattern.assign(start_piece + type, iter); - mystrrep(reptable.back().pattern, "_", " "); - if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') { - type += 2; - reptable.back().pattern.resize(reptable.back().pattern.size() - 1); - } - break; - } - case 2: { - reptable.back().outstrings[type].assign(start_piece, iter); - mystrrep(reptable.back().outstrings[type], "_", " "); - break; - } - default: - break; - } - ++i; - start_piece = mystrsep(nl, iter); - } - if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - reptable.clear(); - return false; - } - } - return true; -} - -/* parse in the typical fault correcting table */ bool AffixMgr::parse_convtable(const std::string& line, FileMgr* af, RepList** rl, @@ -4617,7 +4590,7 @@ bool AffixMgr::parse_affix(const std::string& line, entry->appnd = std::string(start_piece, dash); std::string dash_str(dash + 1, iter); - if (!ignorechars.empty()) { + if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { if (utf8) { remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); } else { @@ -4653,7 +4626,7 @@ bool AffixMgr::parse_affix(const std::string& line, } else { entry->appnd = std::string(start_piece, iter); - if (!ignorechars.empty()) { + if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { if (utf8) { remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); } else { diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/affixmgr.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/affixmgr.hxx similarity index 99% rename from 3rdparty/hunspell/1.6.2/src/hunspell/affixmgr.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/affixmgr.hxx index d41e69cfd2..38842a3ddc 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/affixmgr.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/affixmgr.hxx @@ -120,8 +120,6 @@ class AffixMgr { FLAG nongramsuggest; FLAG needaffix; int cpdmin; - bool parsedrep; - std::vector reptable; RepList* iconvtable; RepList* oconvtable; bool parsedmaptable; @@ -251,6 +249,7 @@ class AffixMgr { short get_syllable(const std::string& word); int cpdrep_check(const char* word, int len); + int cpdwordpair_check(const char * word, int len); int cpdpat_check(const char* word, int len, hentry* r1, @@ -311,6 +310,7 @@ class AffixMgr { FLAG get_forbiddenword() const; FLAG get_nosuggest() const; FLAG get_nongramsuggest() const; + FLAG get_substandard() const; FLAG get_needaffix() const; FLAG get_onlyincompound() const; const char* get_derived() const; @@ -338,7 +338,6 @@ class AffixMgr { bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af); bool parse_num(const std::string& line, int* out, FileMgr* af); bool parse_cpdsyllable(const std::string& line, FileMgr* af); - bool parse_reptable(const std::string& line, FileMgr* af); bool parse_convtable(const std::string& line, FileMgr* af, RepList** rl, diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/atypes.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/atypes.hxx similarity index 89% rename from 3rdparty/hunspell/1.6.2/src/hunspell/atypes.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/atypes.hxx index f841523189..38396db943 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/atypes.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/atypes.hxx @@ -95,6 +95,16 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {} #define TESTAFF(a, b, c) (std::binary_search(a, a + c, b)) +// timelimit: max. ~1/4 sec (process time on Linux) for +// for a suggestion, including max. ~/10 sec for a case +// sensitive plain or compound word suggestion, within +// ~1/20 sec long time consuming suggestion functions +#define TIMELIMIT_GLOBAL (CLOCKS_PER_SEC / 4) +#define TIMELIMIT_SUGGESTION (CLOCKS_PER_SEC / 10) +#define TIMELIMIT (CLOCKS_PER_SEC / 20) +#define MINTIMER 100 +#define MAXPLUSTIMER 100 + struct guessword { char* word; bool allow; diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/baseaffix.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/baseaffix.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/baseaffix.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/baseaffix.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/csutil.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/csutil.cxx similarity index 98% rename from 3rdparty/hunspell/1.6.2/src/hunspell/csutil.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/csutil.cxx index df97b577aa..deb1a4e25b 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/csutil.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/csutil.cxx @@ -69,6 +69,7 @@ */ #include +#include #include #include #include @@ -79,13 +80,6 @@ #include "atypes.hxx" #include "langnum.hxx" -// Unicode character encoding information -struct unicode_info { - unsigned short c; - unsigned short cupper; - unsigned short clower; -}; - #ifdef _WIN32 #include #include @@ -95,19 +89,17 @@ struct unicode_info { #include #else #ifndef MOZILLA_CLIENT -#include "utf_info.cxx" +#include "utf_info.hxx" #define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info))) #endif #endif #ifdef MOZILLA_CLIENT #include "nsCOMPtr.h" -#include "nsIUnicodeEncoder.h" -#include "nsIUnicodeDecoder.h" #include "nsUnicharUtils.h" -#include "mozilla/dom/EncodingUtils.h" +#include "mozilla/Encoding.h" -using mozilla::dom::EncodingUtils; +using namespace mozilla; #endif struct unicode_info2 { @@ -495,20 +487,17 @@ void uniqlist(std::vector& list) { namespace { unsigned char cupper(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; + assert(nIndex >= 0 && nIndex <= 255); return csconv[nIndex].cupper; } unsigned char clower(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; + assert(nIndex >= 0 && nIndex <= 255); return csconv[nIndex].clower; } unsigned char ccase(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; + assert(nIndex >= 0 && nIndex <= 255); return csconv[nIndex].ccase; } } @@ -2306,20 +2295,12 @@ struct cs_info* get_current_cs(const std::string& es) { ccs[i].cupper = i; } - nsCOMPtr encoder; - nsCOMPtr decoder; - - nsresult rv; - - nsAutoCString label(es.c_str()); - nsAutoCString encoding; - if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) { + auto encoding = Encoding::ForLabelNoReplacement(es); + if (!encoding) { return ccs; } - encoder = EncodingUtils::EncoderForEncoding(encoding); - decoder = EncodingUtils::DecoderForEncoding(encoding); - encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?'); - decoder->SetInputErrorBehavior(decoder->kOnError_Signal); + auto encoder = encoding->NewEncoder(); + auto decoder = encoding->NewDecoderWithoutBOMHandling(); for (unsigned int i = 0; i <= 0xff; ++i) { bool success = false; @@ -2327,36 +2308,50 @@ struct cs_info* get_current_cs(const std::string& es) { // in this 1-byte character encoding. Call our encoding/decoding // APIs separately for each byte since they may reject some of the // bytes, and we want to handle errors separately for each byte. - char lower, upper; + uint8_t lower, upper; do { if (i == 0) break; - const char source = char(i); - char16_t uni, uniCased; - int32_t charLength = 1, uniLength = 1; + uint8_t source = uint8_t(i); + char16_t uni[2]; + char16_t uniCased; + uint8_t destination[4]; + auto src1 = MakeSpan(&source, 1); + auto dst1 = MakeSpan(uni); + auto src2 = MakeSpan(&uniCased, 1); + auto dst2 = MakeSpan(destination); - rv = decoder->Convert(&source, &charLength, &uni, &uniLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) - break; - uniCased = ToLowerCase(uni); - rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) + uint32_t result; + size_t read; + size_t written; + Tie(result, read, written) = + decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true); + if (result != kInputEmpty || read != 1 || written != 1) { break; + } - uniCased = ToUpperCase(uni); - rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) + uniCased = ToLowerCase(uni[0]); + Tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); + if (result != kInputEmpty || read != 1 || written != 1) { break; + } + lower = destination[0]; + + uniCased = ToUpperCase(uni[0]); + Tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); + if (result != kInputEmpty || read != 1 || written != 1) { + break; + } + upper = destination[0]; success = true; } while (0); + encoding->NewEncoderInto(*encoder); + encoding->NewDecoderWithoutBOMHandlingInto(*decoder); + if (success) { ccs[i].cupper = upper; ccs[i].clower = lower; @@ -2401,6 +2396,7 @@ static struct lang_map lang2enc[] = {{"ar", LANG_ar}, {"az", LANG_az}, {"az_AZ", LANG_az}, // for back-compatibility {"bg", LANG_bg}, {"ca", LANG_ca}, + {"crh", LANG_crh}, {"cs", LANG_cs}, {"da", LANG_da}, {"de", LANG_de}, {"el", LANG_el}, {"en", LANG_en}, {"es", LANG_es}, @@ -2458,7 +2454,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) { // In Azeri and Turkish, I and i dictinct letters: // There are a dotless lower case i pair of upper `I', // and an upper I with dot pair of lower `i'. - if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) + if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) return 0x0130; #ifdef OPENOFFICEORG return static_cast(u_toupper(c)); @@ -2475,7 +2471,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) { // In Azeri and Turkish, I and i dictinct letters: // There are a dotless lower case i pair of upper `I', // and an upper I with dot pair of lower `i'. - if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) + if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) return 0x0131; #ifdef OPENOFFICEORG return static_cast(u_tolower(c)); diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/csutil.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/csutil.hxx similarity index 93% rename from 3rdparty/hunspell/1.6.2/src/hunspell/csutil.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/csutil.hxx index 5d83f80970..739e2299f3 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/csutil.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/csutil.hxx @@ -269,10 +269,23 @@ LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source); // conversion function for protected memory LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s); + +// to avoid unnecessary string copies and Unicode conversions +// we simply check the ignored_chars characters in the word +// (in the case of UTF-8 encoded strings, "false" means +// "likely false", if ignored_chars characters are not ASCII) +inline bool has_no_ignored_chars(const std::string& word, + const std::string& ignored_chars) { + for (std::string::const_iterator it = ignored_chars.begin(), end = ignored_chars.end(); it != end; ++it) + if (word.find(*it) != std::string::npos) + return false; + return true; +} + // hash entry macros -LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) { +inline char* HENTRY_DATA(struct hentry* h) { char* ret; - if (!h->var) + if (!(h->var & H_OPT)) ret = NULL; else if (h->var & H_OPT_ALIASM) ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); @@ -281,10 +294,10 @@ LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) { return ret; } -LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA( +inline const char* HENTRY_DATA( const struct hentry* h) { const char* ret; - if (!h->var) + if (!(h->var & H_OPT)) ret = NULL; else if (h->var & H_OPT_ALIASM) ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); @@ -294,10 +307,10 @@ LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA( } // NULL-free version for warning-free OOo build -LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2( +inline const char* HENTRY_DATA2( const struct hentry* h) { const char* ret; - if (!h->var) + if (!(h->var & H_OPT)) ret = ""; else if (h->var & H_OPT_ALIASM) ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); @@ -306,7 +319,7 @@ LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2( return ret; } -LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h, +inline char* HENTRY_FIND(struct hentry* h, const char* p) { return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); } diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/filemgr.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/filemgr.cxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/filemgr.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/filemgr.cxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/filemgr.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/filemgr.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/filemgr.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/filemgr.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hashmgr.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/hashmgr.cxx similarity index 79% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hashmgr.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/hashmgr.cxx index 23421b567a..7e843c3e76 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/hashmgr.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/hashmgr.cxx @@ -78,6 +78,7 @@ #include "hashmgr.hxx" #include "csutil.hxx" #include "atypes.hxx" +#include "langnum.hxx" // build a hash table from a munched word list @@ -182,13 +183,14 @@ int HashMgr::add_word(const std::string& in_word, unsigned short* aff, int al, const std::string* in_desc, - bool onlyupcase) { + bool onlyupcase, + int captype) { const std::string* word = &in_word; const std::string* desc = in_desc; std::string *word_copy = NULL; std::string *desc_copy = NULL; - if (!ignorechars.empty() || complexprefixes) { + if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) { word_copy = new std::string(in_word); if (!ignorechars.empty()) { @@ -243,20 +245,119 @@ int HashMgr::add_word(const std::string& in_word, hp->astr = aff; hp->next = NULL; hp->next_homonym = NULL; + hp->var = (captype == INITCAP) ? H_OPT_INITCAP : 0; // store the description string or its pointer if (desc) { - hp->var = H_OPT; + hp->var += H_OPT; if (aliasm) { hp->var += H_OPT_ALIASM; store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str()))); } else { strcpy(hpw + word->size() + 1, desc->c_str()); } - if (strstr(HENTRY_DATA(hp), MORPH_PHON)) + if (strstr(HENTRY_DATA(hp), MORPH_PHON)) { hp->var += H_OPT_PHON; - } else - hp->var = 0; + // store ph: fields (pronounciation, misspellings, old orthography etc.) + // of a morphological description in reptable to use in REP replacements. + if (reptable.capacity() < (unsigned int)(tablesize/MORPH_PHON_RATIO)) + reptable.reserve(tablesize/MORPH_PHON_RATIO); + std::string fields = HENTRY_DATA(hp); + std::string::const_iterator iter = fields.begin(); + std::string::const_iterator start_piece = mystrsep(fields, iter); + while (start_piece != fields.end()) { + if (std::string(start_piece, iter).find(MORPH_PHON) == 0) { + std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1); + if (ph.size() > 0) { + std::vector w; + size_t strippatt; + std::string wordpart; + // dictionary based REP replacement, separated by "->" + // for example "pretty ph:prity ph:priti->pretti" to handle + // both prity -> pretty and pritier -> prettiest suggestions. + if (((strippatt = ph.find("->")) != std::string::npos) && + (strippatt > 0) && (strippatt < ph.size() - 2)) { + wordpart = ph.substr(strippatt + 2); + ph.erase(ph.begin() + strippatt, ph.end()); + } else + wordpart = in_word; + // when the ph: field ends with the character *, + // strip last character of the pattern and the replacement + // to match in REP suggestions also at character changes, + // for example, "pretty ph:prity*" results "prit->prett" + // REP replacement instead of "prity->pretty", to get + // prity->pretty and pritiest->prettiest suggestions. + if (ph.at(ph.size()-1) == '*') { + strippatt = 1; + size_t stripword = 0; + if (utf8) { + while ((strippatt < ph.size()) && + ((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80)) + ++strippatt; + while ((stripword < wordpart.size()) && + ((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80)) + ++stripword; + } + ++strippatt; + ++stripword; + if ((ph.size() > strippatt) && (wordpart.size() > stripword)) { + ph.erase(ph.size()-strippatt, strippatt); + wordpart.erase(in_word.size()-stripword, stripword); + } + } + // capitalize lowercase pattern for capitalized words to support + // good suggestions also for capitalized misspellings, eg. + // Wednesday ph:wendsay + // results wendsay -> Wednesday and Wendsay -> Wednesday, too. + if (captype==INITCAP) { + std::string ph_capitalized; + if (utf8) { + u8_u16(w, ph); + if (get_captype_utf8(w, langnum) == NOCAP) { + mkinitcap_utf(w, langnum); + u16_u8(ph_capitalized, w); + } + } else if (get_captype(ph, csconv) == NOCAP) + mkinitcap(ph_capitalized, csconv); + + if (ph_capitalized.size() > 0) { + // add also lowercase word in the case of German or + // Hungarian to support lowercase suggestions lowercased by + // compound word generation or derivational suffixes + // (for example by adjectival suffix "-i" of geographical + // names in Hungarian: + // Massachusetts ph:messzecsuzec + // messzecsuzeci -> massachusettsi (adjective) + // For lowercasing by conditional PFX rules, see + // tests/germancompounding test example or the + // Hungarian dictionary.) + if (langnum == LANG_de || langnum == LANG_hu) { + std::string wordpart_lower(wordpart); + if (utf8) { + u8_u16(w, wordpart_lower); + mkallsmall_utf(w, langnum); + u16_u8(wordpart_lower, w); + } else { + mkallsmall(wordpart_lower, csconv); + } + reptable.push_back(replentry()); + reptable.back().pattern.assign(ph); + reptable.back().outstrings[0].assign(wordpart_lower); + } + reptable.push_back(replentry()); + reptable.back().pattern.assign(ph_capitalized); + reptable.back().outstrings[0].assign(wordpart); + } + } + reptable.push_back(replentry()); + reptable.back().pattern.assign(ph); + reptable.back().outstrings[0].assign(wordpart); + } + } + start_piece = mystrsep(fields, iter); + } + } + } struct hentry* dp = tableptr[i]; if (!dp) { @@ -347,12 +448,12 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, mkallsmall_utf(w, langnum); mkinitcap_utf(w, langnum); u16_u8(st, w); - return add_word(st, wcl, flags2, flagslen + 1, dp, true); + return add_word(st, wcl, flags2, flagslen + 1, dp, true, INITCAP); } else { std::string new_word(word); mkallsmall(new_word, csconv); mkinitcap(new_word, csconv); - int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true); + int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true, INITCAP); return ret; } } @@ -405,24 +506,8 @@ int HashMgr::remove_forbidden_flag(const std::string& word) { if (!dp) return 1; while (dp) { - if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { - if (dp->alen == 1) - dp->alen = 0; // XXX forbidden words of personal dic. - else { - unsigned short* flags2 = - (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen - 1)); - if (!flags2) - return 1; - int i, j = 0; - for (i = 0; i < dp->alen; i++) { - if (dp->astr[i] != forbiddenword) - flags2[j++] = dp->astr[i]; - } - dp->alen--; - free(dp->astr); - dp->astr = flags2; // XXX allowed forbidden words - } - } + if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) + dp->alen = 0; // XXX forbidden words of personal dic. dp = dp->next_homonym; } return 0; @@ -435,7 +520,7 @@ int HashMgr::add(const std::string& word) { int al = 0; unsigned short* flags = NULL; int wcl = get_clen_and_captype(word, &captype); - add_word(word, wcl, flags, al, NULL, false); + add_word(word, wcl, flags, al, NULL, false, captype); return add_hidden_capitalized_word(word, wcl, flags, al, NULL, captype); } @@ -450,14 +535,14 @@ int HashMgr::add_with_affix(const std::string& word, const std::string& example) int captype; int wcl = get_clen_and_captype(word, &captype); if (aliasf) { - add_word(word, wcl, dp->astr, dp->alen, NULL, false); + add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); } else { unsigned short* flags = (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); if (flags) { memcpy((void*)flags, (void*)dp->astr, dp->alen * sizeof(unsigned short)); - add_word(word, wcl, flags, dp->alen, NULL, false); + add_word(word, wcl, flags, dp->alen, NULL, false, captype); } else return 1; } @@ -605,7 +690,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) { int wcl = get_clen_and_captype(ts, &captype, workbuf); const std::string *dp_str = dp.empty() ? NULL : &dp; // add the word and its index plus its capitalized form optionally - if (add_word(ts, wcl, flags, al, dp_str, false) || + if (add_word(ts, wcl, flags, al, dp_str, false, captype) || add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) { delete dict; return 5; @@ -940,8 +1025,19 @@ int HashMgr::load_config(const char* affpath, const char* key) { if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0) complexprefixes = 1; + /* parse in the typical fault correcting table */ + if (line.compare(0, 3, "REP", 3) == 0) { + if (!parse_reptable(line, afflst)) { + delete afflst; + return 1; + } + } + + // don't check the full affix file, yet if (((line.compare(0, 3, "SFX", 3) == 0) || - (line.compare(0, 3, "PFX", 3) == 0)) && line.size() > 3 && isspace(line[3])) + (line.compare(0, 3, "PFX", 3) == 0)) && + line.size() > 3 && isspace(line[3]) && + !reptable.empty()) // (REP table is in the end of Afrikaans aff file) break; } @@ -1191,3 +1287,103 @@ char* HashMgr::get_aliasm(int index) const { HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); return NULL; } + +/* parse in the typical fault correcting table */ +bool HashMgr::parse_reptable(const std::string& line, FileMgr* af) { + if (!reptable.empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + int numrep = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numrep = atoi(std::string(start_piece, iter).c_str()); + if (numrep < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", + af->getlinenum()); + return false; + } + reptable.reserve(numrep); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numrep lines to read in the remainder of the table */ + for (int j = 0; j < numrep; ++j) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + reptable.push_back(replentry()); + iter = nl.begin(); + i = 0; + int type = 0; + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + reptable.clear(); + return false; + } + break; + } + case 1: { + if (*start_piece == '^') + type = 1; + reptable.back().pattern.assign(start_piece + type, iter); + mystrrep(reptable.back().pattern, "_", " "); + if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') { + type += 2; + reptable.back().pattern.resize(reptable.back().pattern.size() - 1); + } + break; + } + case 2: { + reptable.back().outstrings[type].assign(start_piece, iter); + mystrrep(reptable.back().outstrings[type], "_", " "); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + reptable.clear(); + return false; + } + } + return true; +} + +// return replacing table +const std::vector& HashMgr::get_reptable() const { + return reptable; +} diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hashmgr.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/hashmgr.hxx similarity index 89% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hashmgr.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/hashmgr.hxx index da485d7afa..b6eadddecc 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/hashmgr.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/hashmgr.hxx @@ -81,6 +81,12 @@ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; +// morphological description of a dictionary item can contain +// arbitrary number "ph:" (MORPH_PHON) fields to store typical +// phonetic or other misspellings of that word. +// ratio of lines/lines with "ph:" in the dic file: 1/MORPH_PHON_RATIO +#define MORPH_PHON_RATIO 500 + class HashMgr { int tablesize; struct hentry** tableptr; @@ -99,6 +105,10 @@ class HashMgr { unsigned short* aliasflen; int numaliasm; // morphological desciption `compression' with aliases char** aliasm; + // reptable created from REP table of aff file and from "ph:" fields + // of the dic file. It contains phonetic and other common misspellings + // (letters, letter groups and words) for better suggestions + std::vector reptable; public: HashMgr(const char* tpath, const char* apath, const char* key = NULL); @@ -119,6 +129,7 @@ class HashMgr { int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const; int is_aliasm() const; char* get_aliasm(int index) const; + const std::vector& get_reptable() const; private: int get_clen_and_captype(const std::string& word, int* captype); @@ -129,7 +140,8 @@ class HashMgr { unsigned short* ap, int al, const std::string* desc, - bool onlyupcase); + bool onlyupcase, + int captype); int load_config(const char* affpath, const char* key); bool parse_aliasf(const std::string& line, FileMgr* af); int add_hidden_capitalized_word(const std::string& word, @@ -139,6 +151,7 @@ class HashMgr { const std::string* dp, int captype); bool parse_aliasm(const std::string& line, FileMgr* af); + bool parse_reptable(const std::string& line, FileMgr* af); int remove_forbidden_flag(const std::string& word); }; diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/htypes.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/htypes.hxx similarity index 84% rename from 3rdparty/hunspell/1.6.2/src/hunspell/htypes.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/htypes.hxx index 8f66a0080e..8e03a03466 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/htypes.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/htypes.hxx @@ -44,9 +44,10 @@ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1)); // hentry options -#define H_OPT (1 << 0) -#define H_OPT_ALIASM (1 << 1) -#define H_OPT_PHON (1 << 2) +#define H_OPT (1 << 0) // is there optional morphological data? +#define H_OPT_ALIASM (1 << 1) // using alias compression? +#define H_OPT_PHON (1 << 2) // is there ph: field in the morphological data? +#define H_OPT_INITCAP (1 << 3) // is dictionary word capitalized? // see also csutil.hxx #define HENTRY_WORD(h) &(h->word[0]) @@ -54,6 +55,12 @@ // approx. number of user defined words #define USERWORD 1000 +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) +# define HUNSPELL_THREAD_LOCAL thread_local +#else +# define HUNSPELL_THREAD_LOCAL static +#endif + struct hentry { unsigned char blen; // word length in bytes unsigned char clen; // word length in characters (different for UTF-8 enc.) @@ -61,7 +68,7 @@ struct hentry { unsigned short* astr; // affix flag vector struct hentry* next; // next word with same hash code struct hentry* next_homonym; // next homonym word (with same hash code) - char var; // variable fields (only for special pronounciation yet) + char var; // bit vector of H_OPT hentry options char word[1]; // variable-length word (8-bit or UTF-8 encoding) }; diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunspell.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/hunspell.cxx similarity index 82% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunspell.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunspell.cxx index 1ef11df341..35266c980d 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/hunspell.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/hunspell.cxx @@ -71,6 +71,7 @@ #include #include #include +#include #include "affixmgr.hxx" #include "hunspell.hxx" @@ -86,9 +87,9 @@ class HunspellImpl { public: - HunspellImpl(const char* affpath, const char* dpath, const char* key); + HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL); ~HunspellImpl(); - int add_dic(const char* dpath, const char* key); + int add_dic(const char* dpath, const char* key = NULL); std::vector suffix_suggest(const std::string& root_word); std::vector generate(const std::string& word, const std::vector& pl); std::vector generate(const std::string& word, const std::string& pattern); @@ -99,15 +100,28 @@ public: bool input_conv(const std::string& word, std::string& dest); bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); std::vector suggest(const std::string& word); - const std::string& get_wordchars() const; + const std::string& get_wordchars_cpp() const; const std::vector& get_wordchars_utf16() const; const std::string& get_dict_encoding() const; int add(const std::string& word); int add_with_affix(const std::string& word, const std::string& example); int remove(const std::string& word); - const std::string& get_version() const; + const std::string& get_version_cpp() const; struct cs_info* get_csconv(); - std::vector dic_encoding_vec; + + int spell(const char* word, int* info = NULL, char** root = NULL); + int suggest(char*** slst, const char* word); + int suffix_suggest(char*** slst, const char* root_word); + void free_list(char*** slst, int n); + char* get_dic_encoding(); + int analyze(char*** slst, const char* word); + int stem(char*** slst, const char* word); + int stem(char*** slst, char** morph, int n); + int generate(char*** slst, const char* word, const char* word2); + int generate(char*** slst, const char* word, char** desc, int n); + const char* get_wordchars() const; + const char* get_version() const; + int input_conv(const char* word, char* dest, size_t destsize); private: AffixMgr* pAMgr; @@ -122,12 +136,17 @@ private: std::vector wordbreak; private: + std::vector analyze_internal(const std::string& word); + bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); + std::vector suggest_internal(const std::string& word, + bool& capitalized, size_t& abbreviated, int& captype); void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev); size_t cleanword2(std::string& dest, std::vector& dest_u, const std::string& src, int* pcaptype, size_t* pabbrev); + void clean_ignore(std::string& dest, const std::string& src); void mkinitcap(std::string& u8); int mkinitcap2(std::string& u8, std::vector& u16); int mkinitsmall2(std::string& u8, std::vector& u16); @@ -150,10 +169,6 @@ private: HunspellImpl& operator=(const HunspellImpl&); }; -Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) - : m_Impl(new HunspellImpl(affpath, dpath, key)) { -} - HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) { csconv = NULL; utf8 = 0; @@ -178,19 +193,12 @@ HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* k complexprefixes = pAMgr->get_complexprefixes(); wordbreak = pAMgr->get_breaktable(); - dic_encoding_vec.resize(encoding.size()+1); - strcpy(&dic_encoding_vec[0], encoding.c_str()); - /* and finally set up the suggestion manager */ pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); if (try_string) free(try_string); } -Hunspell::~Hunspell() { - delete m_Impl; -} - HunspellImpl::~HunspellImpl() { delete pSMgr; delete pAMgr; @@ -207,11 +215,6 @@ HunspellImpl::~HunspellImpl() { affixpath = NULL; } -// load extra dictionaries -int Hunspell::add_dic(const char* dpath, const char* key) { - return m_Impl->add_dic(dpath, key); -} - // load extra dictionaries int HunspellImpl::add_dic(const char* dpath, const char* key) { if (!affixpath) @@ -220,6 +223,26 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) { return 0; } + +// make a copy of src at dest while removing all characters +// specified in IGNORE rule +void HunspellImpl::clean_ignore(std::string& dest, + const std::string& src) { + dest.clear(); + dest.assign(src); + const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; + if (ignoredchars != NULL) { + if (utf8) { + const std::vector& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(dest, ignoredchars_utf16); + } else { + remove_ignored_chars(dest, ignoredchars); + } + } +} + + // make a copy of src at destination while removing all leading // blanks and removing any trailing periods after recording // their presence with the abbreviation flag @@ -235,7 +258,11 @@ size_t HunspellImpl::cleanword2(std::string& dest, dest.clear(); dest_utf.clear(); - const char* q = src.c_str(); + // remove IGNORE characters from the string + std::string w2; + clean_ignore(w2, src); + + const char* q = w2.c_str(); // first skip over any leading blanks while (*q == ' ') @@ -407,11 +434,22 @@ void HunspellImpl::insert_sug(std::vector& slst, const std::string& slst.insert(slst.begin(), word); } -bool Hunspell::spell(const std::string& word, int* info, std::string* root) { - return m_Impl->spell(word, info, root); +bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { + bool r = spell_internal(word, info, root); + if (r && root) { + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + std::string wspace; + if (rl->conv(*root, wspace)) { + *root = wspace; + } + } + } + return r; } -bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { +bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) { struct hentry* rv = NULL; int info2 = 0; @@ -561,12 +599,17 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) break; } } + /* FALLTHROUGH */ case INITCAP: { - + // handle special capitalization of dotted I + bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0); *info += SPELL_ORIGCAP; - mkallsmall2(scw, sunicw); - std::string u8buffer(scw); - mkinitcap2(scw, sunicw); + if (captype == ALLCAP) { + mkallsmall2(scw, sunicw); + mkinitcap2(scw, sunicw); + if (Idot) + scw.replace(0, 1, "\xc4\xb0"); + } if (captype == INITCAP) *info += SPELL_INITCAP; rv = checkword(scw, info, root); @@ -581,9 +624,13 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL; - if (rv) + if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh)) break; + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); + rv = checkword(u8buffer, info, root); if (abbv && !rv) { u8buffer.push_back('.'); @@ -625,7 +672,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } // recursive breaking at break points - if (!wordbreak.empty()) { + if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) { int nbr = 0; wl = scw.size(); @@ -662,6 +709,37 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } // other patterns + for (size_t j = 0; j < wordbreak.size(); ++j) { + size_t plen = wordbreak[j].size(); + size_t found = scw.find(wordbreak[j]); + if ((found > 0) && (found < wl - plen)) { + size_t found2 = scw.find(wordbreak[j], found + 1); + // try to break at the second occurance + // to recognize dictionary words with wordbreak + if (found2 > 0 && (found2 < wl - plen)) + found = found2; + if (!spell(scw.substr(found + plen))) + continue; + std::string suffix(scw.substr(found)); + scw.resize(found); + // examine 2 sides of the break point + if (spell(scw)) + return true; + scw.append(suffix); + + // LANG_hu: spec. dash rule + if (langnum == LANG_hu && wordbreak[j] == "-") { + suffix = scw.substr(found + 1); + scw.resize(found + 1); + if (spell(scw)) + return true; // check the first part with dash + scw.append(suffix); + } + // end of LANG specific region + } + } + + // other patterns (break at first break point) for (size_t j = 0; j < wordbreak.size(); ++j) { size_t plen = wordbreak[j].size(); size_t found = scw.find(wordbreak[j]); @@ -697,23 +775,12 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str const char* word; int len; - const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; - if (ignoredchars != NULL) { - w2.assign(w); - if (utf8) { - const std::vector& ignoredchars_utf16 = - pAMgr->get_ignore_utf16(); - remove_ignored_chars_utf(w2, ignoredchars_utf16); - } else { - remove_ignored_chars(w2, ignoredchars); - } - word = w2.c_str(); - len = w2.size(); - usebuffer = true; - } else { - word = w.c_str(); - len = w.size(); - } + // remove IGNORE characters from the string + clean_ignore(w2, w); + + word = w2.c_str(); + len = w2.size(); + usebuffer = true; if (!len) return NULL; @@ -825,255 +892,11 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str return he; } -std::vector Hunspell::suggest(const std::string& word) { - return m_Impl->suggest(word); -} - std::vector HunspellImpl::suggest(const std::string& word) { - std::vector slst; - - int onlycmpdsug = 0; - if (!pSMgr || m_HMgrs.empty()) - return slst; - - // process XML input of the simplified API (see manual) - if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { - return spellml(word); - } - if (utf8) { - if (word.size() >= MAXWORDUTF8LEN) - return slst; - } else { - if (word.size() >= MAXWORDLEN) - return slst; - } - int captype = NOCAP; - size_t abbv = 0; - size_t wl = 0; - - std::string scw; - std::vector sunicw; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - { - std::string wspace; - - bool convstatus = rl ? rl->conv(word, wspace) : false; - if (convstatus) - wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); - else - wl = cleanword2(scw, sunicw, word, &captype, &abbv); - - if (wl == 0) - return slst; - } - - int capwords = 0; - - // check capitalized form for FORCEUCASE - if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { - int info = SPELL_ORIGCAP; - if (checkword(scw, &info, NULL)) { - std::string form(scw); - mkinitcap(form); - slst.push_back(form); - return slst; - } - } - - switch (captype) { - case NOCAP: { - pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); - break; - } - - case INITCAP: { - capwords = 1; - pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); - break; - } - case HUHINITCAP: - capwords = 1; - case HUHCAP: { - pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); - // something.The -> something. The - size_t dot_pos = scw.find('.'); - if (dot_pos != std::string::npos) { - std::string postdot = scw.substr(dot_pos + 1); - int captype_; - if (utf8) { - std::vector postdotu; - u8_u16(postdotu, postdot); - captype_ = get_captype_utf8(postdotu, langnum); - } else { - captype_ = get_captype(postdot, csconv); - } - if (captype_ == INITCAP) { - std::string str(scw); - str.insert(dot_pos + 1, 1, ' '); - insert_sug(slst, str); - } - } - - std::string wspace; - - if (captype == HUHINITCAP) { - // TheOpenOffice.org -> The OpenOffice.org - wspace = scw; - mkinitsmall2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); - } - wspace = scw; - mkallsmall2(wspace, sunicw); - if (spell(wspace.c_str())) - insert_sug(slst, wspace); - size_t prevns = slst.size(); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); - if (captype == HUHINITCAP) { - mkinitcap2(wspace, sunicw); - if (spell(wspace.c_str())) - insert_sug(slst, wspace); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); - } - // aNew -> "a New" (instead of "a new") - for (size_t j = prevns; j < slst.size(); ++j) { - const char* space = strchr(slst[j].c_str(), ' '); - if (space) { - size_t slen = strlen(space + 1); - // different case after space (need capitalisation) - if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { - std::string first(slst[j].c_str(), space + 1); - std::string second(space + 1); - std::vector w; - if (utf8) - u8_u16(w, second); - mkinitcap2(second, w); - // set as first suggestion - slst.erase(slst.begin() + j); - slst.insert(slst.begin(), first + second); - } - } - } - break; - } - - case ALLCAP: { - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); - if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) - insert_sug(slst, wspace); - mkinitcap2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); - for (size_t j = 0; j < slst.size(); ++j) { - mkallcap(slst[j]); - if (pAMgr && pAMgr->get_checksharps()) { - if (utf8) { - mystrrep(slst[j], "\xC3\x9F", "SS"); - } else { - mystrrep(slst[j], "\xDF", "SS"); - } - } - } - break; - } - } - - // LANG_hu section: replace '-' with ' ' in Hungarian - if (langnum == LANG_hu) { - for (size_t j = 0; j < slst.size(); ++j) { - size_t pos = slst[j].find('-'); - if (pos != std::string::npos) { - int info; - std::string w(slst[j].substr(0, pos)); - w.append(slst[j].substr(pos + 1)); - (void)spell(w, &info, NULL); - if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { - slst[j][pos] = ' '; - } else - slst[j][pos] = '-'; - } - } - } - // END OF LANG_hu section - - // try ngram approach since found nothing or only compound words - if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) { - switch (captype) { - case NOCAP: { - pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs); - break; - } - case HUHINITCAP: - capwords = 1; - case HUHCAP: { - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs); - break; - } - case INITCAP: { - capwords = 1; - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs); - break; - } - case ALLCAP: { - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - size_t oldns = slst.size(); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs); - for (size_t j = oldns; j < slst.size(); ++j) { - mkallcap(slst[j]); - } - break; - } - } - } - - // try dash suggestion (Afo-American -> Afro-American) - size_t dash_pos = scw.find('-'); - if (dash_pos != std::string::npos) { - int nodashsug = 1; - for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) { - if (slst[j].find('-') != std::string::npos) - nodashsug = 0; - } - - size_t prev_pos = 0; - bool last = false; - - while (nodashsug && !last) { - if (dash_pos == scw.size()) - last = 1; - std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); - if (!spell(chunk.c_str())) { - std::vector nlst = suggest(chunk.c_str()); - for (std::vector::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { - std::string wspace = scw.substr(0, prev_pos); - wspace.append(*j); - if (!last) { - wspace.append("-"); - wspace.append(scw.substr(dash_pos + 1)); - } - insert_sug(slst, wspace); - } - nodashsug = 0; - } - if (!last) { - prev_pos = dash_pos + 1; - dash_pos = scw.find('-', prev_pos); - } - if (dash_pos == std::string::npos) - dash_pos = scw.size(); - } - } - + bool capwords; + size_t abbv; + int captype; + std::vector slst = suggest_internal(word, capwords, abbv, captype); // word reversing wrapper for complex prefixes if (complexprefixes) { for (size_t j = 0; j < slst.size(); ++j) { @@ -1148,29 +971,322 @@ std::vector HunspellImpl::suggest(const std::string& word) { slst.resize(l); // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (size_t j = 0; rl && j < slst.size(); ++j) { - std::string wspace; - if (rl->conv(slst[j], wspace)) { - slst[j] = wspace; + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } } } - return slst; } -const std::string& Hunspell::get_dict_encoding() const { - return m_Impl->get_dict_encoding(); +std::vector HunspellImpl::suggest_internal(const std::string& word, + bool& capwords, size_t& abbv, int& captype) { + captype = NOCAP; + abbv = 0; + capwords = false; + + std::vector slst; + + int onlycmpdsug = 0; + if (!pSMgr || m_HMgrs.empty()) + return slst; + + // process XML input of the simplified API (see manual) + if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { + return spellml(word); + } + if (utf8) { + if (word.size() >= MAXWORDUTF8LEN) + return slst; + } else { + if (word.size() >= MAXWORDLEN) + return slst; + } + size_t wl = 0; + + std::string scw; + std::vector sunicw; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + { + std::string wspace; + + bool convstatus = rl ? rl->conv(word, wspace) : false; + if (convstatus) + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &captype, &abbv); + + if (wl == 0) + return slst; + } + + bool good = false; + + clock_t timelimit; + // initialize in every suggestion call + timelimit = clock(); + + // check capitalized form for FORCEUCASE + if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { + int info = SPELL_ORIGCAP; + if (checkword(scw, &info, NULL)) { + std::string form(scw); + mkinitcap(form); + slst.push_back(form); + return slst; + } + } + + switch (captype) { + case NOCAP: { + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (abbv) { + std::string wspace(scw); + wspace.push_back('.'); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } + break; + } + + case INITCAP: { + capwords = true; + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + case HUHINITCAP: + capwords = true; + /* FALLTHROUGH */ + case HUHCAP: { + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + // something.The -> something. The + size_t dot_pos = scw.find('.'); + if (dot_pos != std::string::npos) { + std::string postdot = scw.substr(dot_pos + 1); + int captype_; + if (utf8) { + std::vector postdotu; + u8_u16(postdotu, postdot); + captype_ = get_captype_utf8(postdotu, langnum); + } else { + captype_ = get_captype(postdot, csconv); + } + if (captype_ == INITCAP) { + std::string str(scw); + str.insert(dot_pos + 1, 1, ' '); + insert_sug(slst, str); + } + } + + std::string wspace; + + if (captype == HUHINITCAP) { + // TheOpenOffice.org -> The OpenOffice.org + wspace = scw; + mkinitsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } + wspace = scw; + mkallsmall2(wspace, sunicw); + if (spell(wspace.c_str())) + insert_sug(slst, wspace); + size_t prevns = slst.size(); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (captype == HUHINITCAP) { + mkinitcap2(wspace, sunicw); + if (spell(wspace.c_str())) + insert_sug(slst, wspace); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } + // aNew -> "a New" (instead of "a new") + for (size_t j = prevns; j < slst.size(); ++j) { + const char* space = strchr(slst[j].c_str(), ' '); + if (space) { + size_t slen = strlen(space + 1); + // different case after space (need capitalisation) + if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { + std::string first(slst[j].c_str(), space + 1); + std::string second(space + 1); + std::vector w; + if (utf8) + u8_u16(w, second); + mkinitcap2(second, w); + // set as first suggestion + slst.erase(slst.begin() + j); + slst.insert(slst.begin(), first + second); + } + } + } + break; + } + + case ALLCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) + insert_sug(slst, wspace); + mkinitcap2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + for (size_t j = 0; j < slst.size(); ++j) { + mkallcap(slst[j]); + if (pAMgr && pAMgr->get_checksharps()) { + if (utf8) { + mystrrep(slst[j], "\xC3\x9F", "SS"); + } else { + mystrrep(slst[j], "\xDF", "SS"); + } + } + } + break; + } + } + + // LANG_hu section: replace '-' with ' ' in Hungarian + if (langnum == LANG_hu) { + for (size_t j = 0; j < slst.size(); ++j) { + size_t pos = slst[j].find('-'); + if (pos != std::string::npos) { + int info; + std::string w(slst[j].substr(0, pos)); + w.append(slst[j].substr(pos + 1)); + (void)spell(w, &info, NULL); + if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { + slst[j][pos] = ' '; + } else + slst[j][pos] = '-'; + } + } + } + // END OF LANG_hu section + // try ngram approach since found nothing good suggestion + if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) { + switch (captype) { + case NOCAP: { + pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + /* FALLTHROUGH */ + case HUHINITCAP: + capwords = true; + /* FALLTHROUGH */ + case HUHCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + case INITCAP: { + capwords = true; + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + case ALLCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + size_t oldns = slst.size(); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + for (size_t j = oldns; j < slst.size(); ++j) { + mkallcap(slst[j]); + } + break; + } + } + } + + // try dash suggestion (Afo-American -> Afro-American) + // Note: LibreOffice was modified to treat dashes as word + // characters to check "scot-free" etc. word forms, but + // we need to handle suggestions for "Afo-American", etc., + // while "Afro-American" is missing from the dictionary. + // TODO avoid possible overgeneration + size_t dash_pos = scw.find('-'); + if (dash_pos != std::string::npos) { + int nodashsug = 1; + for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) { + if (slst[j].find('-') != std::string::npos) + nodashsug = 0; + } + + size_t prev_pos = 0; + bool last = false; + + while (!good && nodashsug && !last) { + if (dash_pos == scw.size()) + last = 1; + std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); + if (!spell(chunk.c_str())) { + std::vector nlst = suggest(chunk.c_str()); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + for (std::vector::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { + std::string wspace = scw.substr(0, prev_pos); + wspace.append(*j); + if (!last) { + wspace.append("-"); + wspace.append(scw.substr(dash_pos + 1)); + } + int info = 0; + if (pAMgr && pAMgr->get_forbiddenword()) + checkword(wspace, &info, NULL); + if (!(info & SPELL_FORBIDDEN)) + insert_sug(slst, wspace); + } + nodashsug = 0; + } + if (!last) { + prev_pos = dash_pos + 1; + dash_pos = scw.find('-', prev_pos); + } + if (dash_pos == std::string::npos) + dash_pos = scw.size(); + } + } + return slst; } const std::string& HunspellImpl::get_dict_encoding() const { return encoding; } -std::vector Hunspell::stem(const std::vector& desc) { - return m_Impl->stem(desc); -} - std::vector HunspellImpl::stem(const std::vector& desc) { std::vector slst; @@ -1239,30 +1355,14 @@ std::vector HunspellImpl::stem(const std::vector& desc return slst; } -std::vector Hunspell::stem(const std::string& word) { - return m_Impl->stem(word); -} - std::vector HunspellImpl::stem(const std::string& word) { return stem(analyze(word)); } -const char* Hunspell::get_wordchars() const { - return m_Impl->get_wordchars().c_str(); -} - -const std::string& Hunspell::get_wordchars_cpp() const { - return m_Impl->get_wordchars(); -} - -const std::string& HunspellImpl::get_wordchars() const { +const std::string& HunspellImpl::get_wordchars_cpp() const { return pAMgr->get_wordchars(); } -const std::vector& Hunspell::get_wordchars_utf16() const { - return m_Impl->get_wordchars_utf16(); -} - const std::vector& HunspellImpl::get_wordchars_utf16() const { return pAMgr->get_wordchars_utf16(); } @@ -1298,45 +1398,25 @@ int HunspellImpl::mkinitsmall2(std::string& u8, std::vector& u16) { return u8.size(); } -int Hunspell::add(const std::string& word) { - return m_Impl->add(word); -} - int HunspellImpl::add(const std::string& word) { if (!m_HMgrs.empty()) return m_HMgrs[0]->add(word); return 0; } -int Hunspell::add_with_affix(const std::string& word, const std::string& example) { - return m_Impl->add_with_affix(word, example); -} - int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) { if (!m_HMgrs.empty()) return m_HMgrs[0]->add_with_affix(word, example); return 0; } -int Hunspell::remove(const std::string& word) { - return m_Impl->remove(word); -} - int HunspellImpl::remove(const std::string& word) { if (!m_HMgrs.empty()) return m_HMgrs[0]->remove(word); return 0; } -const char* Hunspell::get_version() const { - return m_Impl->get_version().c_str(); -} - -const std::string& Hunspell::get_version_cpp() const { - return m_Impl->get_version(); -} - -const std::string& HunspellImpl::get_version() const { +const std::string& HunspellImpl::get_version_cpp() const { return pAMgr->get_version(); } @@ -1344,10 +1424,6 @@ struct cs_info* HunspellImpl::get_csconv() { return csconv; } -struct cs_info* Hunspell::get_csconv() { - return m_Impl->get_csconv(); -} - void HunspellImpl::cat_result(std::string& result, const std::string& st) { if (!st.empty()) { if (!result.empty()) @@ -1356,11 +1432,22 @@ void HunspellImpl::cat_result(std::string& result, const std::string& st) { } } -std::vector Hunspell::analyze(const std::string& word) { - return m_Impl->analyze(word); +std::vector HunspellImpl::analyze(const std::string& word) { + std::vector slst = analyze_internal(word); + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; } -std::vector HunspellImpl::analyze(const std::string& word) { +std::vector HunspellImpl::analyze_internal(const std::string& word) { std::vector slst; if (!pSMgr || m_HMgrs.empty()) return slst; @@ -1593,10 +1680,6 @@ std::vector HunspellImpl::analyze(const std::string& word) { return slst; } -std::vector Hunspell::generate(const std::string& word, const std::vector& pl) { - return m_Impl->generate(word, pl); -} - std::vector HunspellImpl::generate(const std::string& word, const std::vector& pl) { std::vector slst; if (!pSMgr || pl.empty()) @@ -1641,10 +1724,6 @@ std::vector HunspellImpl::generate(const std::string& word, const s return slst; } -std::vector Hunspell::generate(const std::string& word, const std::string& pattern) { - return m_Impl->generate(word, pattern); -} - std::vector HunspellImpl::generate(const std::string& word, const std::string& pattern) { std::vector pl = analyze(pattern); std::vector slst = generate(word, pl); @@ -1670,28 +1749,10 @@ std::string HunspellImpl::get_xml_par(const char* par) { return dest; } -int Hunspell::get_langnum() const { - return m_Impl->get_langnum(); -} - int HunspellImpl::get_langnum() const { return langnum; } -bool Hunspell::input_conv(const std::string& word, std::string& dest) { - return m_Impl->input_conv(word, dest); -} - -int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { - std::string d; - bool ret = input_conv(word, d); - if (ret && d.size() < destsize) { - strncpy(dest, d.c_str(), destsize); - return 1; - } - return 0; -} - bool HunspellImpl::input_conv(const std::string& word, std::string& dest) { RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL; if (rl) { @@ -1804,186 +1865,25 @@ std::vector HunspellImpl::spellml(const std::string& in_word) { } } } + } else if (check_xml_par(q, "type=", "add")) { + std::string cw = get_xml_par(strchr(q2, '>')); + if (cw.empty()) + return slst; + const char* q3 = strstr(q2 + 1, "')); + if (!cw2.empty()) { + add_with_affix(cw, cw2); + } else { + add(cw); + } + } else { + add(cw); + } } return slst; } -int Hunspell::spell(const char* word, int* info, char** root) { - std::string sroot; - bool ret = m_Impl->spell(word, info, root ? &sroot : NULL); - if (root) { - if (sroot.empty()) { - *root = NULL; - } else { - *root = mystrdup(sroot.c_str()); - } - } - return ret; -} - -namespace { - int munge_vector(char*** slst, const std::vector& items) { - if (items.empty()) { - *slst = NULL; - return 0; - } else { - *slst = (char**)malloc(sizeof(char*) * items.size()); - if (!*slst) - return 0; - for (size_t i = 0; i < items.size(); ++i) - (*slst)[i] = mystrdup(items[i].c_str()); - } - return items.size(); - } -} - -void Hunspell::free_list(char*** slst, int n) { - Hunspell_free_list((Hunhandle*)(this), slst, n); -} - -int Hunspell::suggest(char*** slst, const char* word) { - return Hunspell_suggest((Hunhandle*)(this), slst, word); -} - -int Hunspell::suffix_suggest(char*** slst, const char* root_word) { - std::vector stems = m_Impl->suffix_suggest(root_word); - return munge_vector(slst, stems); -} - -char* Hunspell::get_dic_encoding() { - return &(m_Impl->dic_encoding_vec[0]); -} - -int Hunspell::stem(char*** slst, char** desc, int n) { - return Hunspell_stem2((Hunhandle*)(this), slst, desc, n); -} - -int Hunspell::stem(char*** slst, const char* word) { - return Hunspell_stem((Hunhandle*)(this), slst, word); -} - -int Hunspell::analyze(char*** slst, const char* word) { - return Hunspell_analyze((Hunhandle*)(this), slst, word); -} - -int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { - return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln); -} - -int Hunspell::generate(char*** slst, const char* word, const char* pattern) { - return Hunspell_generate((Hunhandle*)(this), slst, word, pattern); -} - -Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { - return (Hunhandle*)(new Hunspell(affpath, dpath)); -} - -Hunhandle* Hunspell_create_key(const char* affpath, - const char* dpath, - const char* key) { - return reinterpret_cast(new Hunspell(affpath, dpath, key)); -} - -void Hunspell_destroy(Hunhandle* pHunspell) { - delete reinterpret_cast(pHunspell); -} - -int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { - return reinterpret_cast(pHunspell)->add_dic(dpath); -} - -int Hunspell_spell(Hunhandle* pHunspell, const char* word) { - return reinterpret_cast(pHunspell)->spell(std::string(word)); -} - -char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { - return reinterpret_cast(pHunspell)->get_dic_encoding(); -} - -int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { - std::vector suggests = reinterpret_cast(pHunspell)->suggest(word); - return munge_vector(slst, suggests); -} - -int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { - std::vector stems = reinterpret_cast(pHunspell)->analyze(word); - return munge_vector(slst, stems); -} - -int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { - - std::vector stems = reinterpret_cast(pHunspell)->stem(word); - return munge_vector(slst, stems); -} - -int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { - std::vector morph; - for (int i = 0; i < n; ++i) - morph.push_back(desc[i]); - - std::vector stems = reinterpret_cast(pHunspell)->stem(morph); - return munge_vector(slst, stems); -} - -int Hunspell_generate(Hunhandle* pHunspell, - char*** slst, - const char* word, - const char* pattern) { - std::vector stems = reinterpret_cast(pHunspell)->generate(word, pattern); - return munge_vector(slst, stems); -} - -int Hunspell_generate2(Hunhandle* pHunspell, - char*** slst, - const char* word, - char** desc, - int n) { - std::vector morph; - for (int i = 0; i < n; ++i) - morph.push_back(desc[i]); - - std::vector stems = reinterpret_cast(pHunspell)->generate(word, morph); - return munge_vector(slst, stems); -} - -/* functions for run-time modification of the dictionary */ - -/* add word to the run-time dictionary */ - -int Hunspell_add(Hunhandle* pHunspell, const char* word) { - return reinterpret_cast(pHunspell)->add(word); -} - -/* add word to the run-time dictionary with affix flags of - * the example (a dictionary word): Hunspell will recognize - * affixed forms of the new word, too. - */ - -int Hunspell_add_with_affix(Hunhandle* pHunspell, - const char* word, - const char* example) { - return reinterpret_cast(pHunspell)->add_with_affix(word, example); -} - -/* remove word from the run-time dictionary */ - -int Hunspell_remove(Hunhandle* pHunspell, const char* word) { - return reinterpret_cast(pHunspell)->remove(word); -} - -void Hunspell_free_list(Hunhandle*, char*** list, int n) { - if (list && *list) { - for (int i = 0; i < n; i++) - free((*list)[i]); - free(*list); - *list = NULL; - } -} - -std::vector Hunspell::suffix_suggest(const std::string& root_word) { - return m_Impl->suffix_suggest(root_word); -} - std::vector HunspellImpl::suffix_suggest(const std::string& root_word) { std::vector slst; struct hentry* he = NULL; @@ -2017,3 +1917,331 @@ std::vector HunspellImpl::suffix_suggest(const std::string& root_wo } return slst; } + +namespace { + int munge_vector(char*** slst, const std::vector& items) { + if (items.empty()) { + *slst = NULL; + return 0; + } else { + *slst = (char**)malloc(sizeof(char*) * items.size()); + if (!*slst) + return 0; + for (size_t i = 0; i < items.size(); ++i) + (*slst)[i] = mystrdup(items[i].c_str()); + } + return items.size(); + } +} + +int HunspellImpl::spell(const char* word, int* info, char** root) { + std::string sroot; + bool ret = spell(word, info, root ? &sroot : NULL); + if (root) { + if (sroot.empty()) { + *root = NULL; + } else { + *root = mystrdup(sroot.c_str()); + } + } + return ret; +} + +int HunspellImpl::suggest(char*** slst, const char* word) { + std::vector suggests = suggest(word); + return munge_vector(slst, suggests); +} + +int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) { + std::vector stems = suffix_suggest(root_word); + return munge_vector(slst, stems); +} + +void HunspellImpl::free_list(char*** slst, int n) { + if (slst && *slst) { + for (int i = 0; i < n; i++) + free((*slst)[i]); + free(*slst); + *slst = NULL; + } +} + +char* HunspellImpl::get_dic_encoding() { + return &encoding[0]; +} + +int HunspellImpl::analyze(char*** slst, const char* word) { + std::vector stems = analyze(word); + return munge_vector(slst, stems); +} + +int HunspellImpl::stem(char*** slst, const char* word) { + std::vector stems = stem(word); + return munge_vector(slst, stems); +} + +int HunspellImpl::stem(char*** slst, char** desc, int n) { + std::vector morph; + for (int i = 0; i < n; ++i) + morph.push_back(desc[i]); + + std::vector stems = stem(morph); + return munge_vector(slst, stems); +} + +int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) { + std::vector stems = generate(word, pattern); + return munge_vector(slst, stems); +} + +int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) { + std::vector morph; + for (int i = 0; i < pln; ++i) + morph.push_back(pl[i]); + + std::vector stems = generate(word, morph); + return munge_vector(slst, stems); +} + +const char* HunspellImpl::get_wordchars() const { + return get_wordchars_cpp().c_str(); +} + +const char* HunspellImpl::get_version() const { + return get_version_cpp().c_str(); +} + +int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) { + std::string d; + bool ret = input_conv(word, d); + if (ret && d.size() < destsize) { + strncpy(dest, d.c_str(), destsize); + return 1; + } + return 0; +} + +Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) + : m_Impl(new HunspellImpl(affpath, dpath, key)) { +} + +Hunspell::~Hunspell() { + delete m_Impl; +} + +// load extra dictionaries +int Hunspell::add_dic(const char* dpath, const char* key) { + return m_Impl->add_dic(dpath, key); +} + +bool Hunspell::spell(const std::string& word, int* info, std::string* root) { + return m_Impl->spell(word, info, root); +} + +std::vector Hunspell::suggest(const std::string& word) { + return m_Impl->suggest(word); +} + +std::vector Hunspell::suffix_suggest(const std::string& root_word) { + return m_Impl->suffix_suggest(root_word); +} + +const std::string& Hunspell::get_dict_encoding() const { + return m_Impl->get_dict_encoding(); +} + +std::vector Hunspell::stem(const std::vector& desc) { + return m_Impl->stem(desc); +} + +std::vector Hunspell::stem(const std::string& word) { + return m_Impl->stem(word); +} + +const std::string& Hunspell::get_wordchars_cpp() const { + return m_Impl->get_wordchars_cpp(); +} + +const std::vector& Hunspell::get_wordchars_utf16() const { + return m_Impl->get_wordchars_utf16(); +} + +int Hunspell::add(const std::string& word) { + return m_Impl->add(word); +} + +int Hunspell::add_with_affix(const std::string& word, const std::string& example) { + return m_Impl->add_with_affix(word, example); +} + +int Hunspell::remove(const std::string& word) { + return m_Impl->remove(word); +} + +const std::string& Hunspell::get_version_cpp() const { + return m_Impl->get_version_cpp(); +} + +struct cs_info* Hunspell::get_csconv() { + return m_Impl->get_csconv(); +} + +std::vector Hunspell::analyze(const std::string& word) { + return m_Impl->analyze(word); +} + +std::vector Hunspell::generate(const std::string& word, const std::vector& pl) { + return m_Impl->generate(word, pl); +} + +std::vector Hunspell::generate(const std::string& word, const std::string& pattern) { + return m_Impl->generate(word, pattern); +} + +int Hunspell::get_langnum() const { + return m_Impl->get_langnum(); +} + +bool Hunspell::input_conv(const std::string& word, std::string& dest) { + return m_Impl->input_conv(word, dest); +} + +int Hunspell::spell(const char* word, int* info, char** root) { + return m_Impl->spell(word, info, root); +} + +int Hunspell::suggest(char*** slst, const char* word) { + return m_Impl->suggest(slst, word); +} + +int Hunspell::suffix_suggest(char*** slst, const char* root_word) { + return m_Impl->suffix_suggest(slst, root_word); +} + +void Hunspell::free_list(char*** slst, int n) { + m_Impl->free_list(slst, n); +} + +char* Hunspell::get_dic_encoding() { + return m_Impl->get_dic_encoding(); +} + +int Hunspell::analyze(char*** slst, const char* word) { + return m_Impl->analyze(slst, word); +} + +int Hunspell::stem(char*** slst, const char* word) { + return m_Impl->stem(slst, word); +} + +int Hunspell::stem(char*** slst, char** desc, int n) { + return m_Impl->stem(slst, desc, n); +} + +int Hunspell::generate(char*** slst, const char* word, const char* pattern) { + return m_Impl->generate(slst, word, pattern); +} + +int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { + return m_Impl->generate(slst, word, pl, pln); +} + +const char* Hunspell::get_wordchars() const { + return m_Impl->get_wordchars(); +} + +const char* Hunspell::get_version() const { + return m_Impl->get_version(); +} + +int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { + return m_Impl->input_conv(word, dest, destsize); +} + +Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { + return reinterpret_cast(new HunspellImpl(affpath, dpath)); +} + +Hunhandle* Hunspell_create_key(const char* affpath, + const char* dpath, + const char* key) { + return reinterpret_cast(new HunspellImpl(affpath, dpath, key)); +} + +void Hunspell_destroy(Hunhandle* pHunspell) { + delete reinterpret_cast(pHunspell); +} + +int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { + return reinterpret_cast(pHunspell)->add_dic(dpath); +} + +int Hunspell_spell(Hunhandle* pHunspell, const char* word) { + return reinterpret_cast(pHunspell)->spell(word); +} + +char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { + return reinterpret_cast(pHunspell)->get_dic_encoding(); +} + +int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { + return reinterpret_cast(pHunspell)->suggest(slst, word); +} + +int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { + return reinterpret_cast(pHunspell)->analyze(slst, word); +} + +int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { + return reinterpret_cast(pHunspell)->stem(slst, word); +} + +int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { + return reinterpret_cast(pHunspell)->stem(slst, desc, n); +} + +int Hunspell_generate(Hunhandle* pHunspell, + char*** slst, + const char* word, + const char* pattern) +{ + return reinterpret_cast(pHunspell)->generate(slst, word, pattern); +} + +int Hunspell_generate2(Hunhandle* pHunspell, + char*** slst, + const char* word, + char** desc, + int n) +{ + return reinterpret_cast(pHunspell)->generate(slst, word, desc, n); +} + +/* functions for run-time modification of the dictionary */ + +/* add word to the run-time dictionary */ + +int Hunspell_add(Hunhandle* pHunspell, const char* word) { + return reinterpret_cast(pHunspell)->add(word); +} + +/* add word to the run-time dictionary with affix flags of + * the example (a dictionary word): Hunspell will recognize + * affixed forms of the new word, too. + */ + +int Hunspell_add_with_affix(Hunhandle* pHunspell, + const char* word, + const char* example) { + return reinterpret_cast(pHunspell)->add_with_affix(word, example); +} + +/* remove word from the run-time dictionary */ + +int Hunspell_remove(Hunhandle* pHunspell, const char* word) { + return reinterpret_cast(pHunspell)->remove(word); +} + +void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) { + reinterpret_cast(pHunspell)->free_list(list, n); +} diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunspell.h b/3rdparty/hunspell/1.7.0/src/hunspell/hunspell.h similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunspell.h rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunspell.h diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunspell.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/hunspell.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunspell.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunspell.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunvisapi.h b/3rdparty/hunspell/1.7.0/src/hunspell/hunvisapi.h similarity index 95% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunvisapi.h rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunvisapi.h index eb2b348091..ed0a502ba2 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/hunvisapi.h +++ b/3rdparty/hunspell/1.7.0/src/hunspell/hunvisapi.h @@ -3,7 +3,7 @@ #if defined(HUNSPELL_STATIC) # define LIBHUNSPELL_DLL_EXPORTED -#elif defined(_MSC_VER) +#elif defined(_WIN32) # if defined(BUILDING_LIBHUNSPELL) # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) # else diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunvisapi.h.in b/3rdparty/hunspell/1.7.0/src/hunspell/hunvisapi.h.in similarity index 95% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunvisapi.h.in rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunvisapi.h.in index a1020c8048..d8bacc6765 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/hunvisapi.h.in +++ b/3rdparty/hunspell/1.7.0/src/hunspell/hunvisapi.h.in @@ -3,7 +3,7 @@ #if defined(HUNSPELL_STATIC) # define LIBHUNSPELL_DLL_EXPORTED -#elif defined(_MSC_VER) +#elif defined(_WIN32) # if defined(BUILDING_LIBHUNSPELL) # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) # else diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunzip.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/hunzip.cxx similarity index 99% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunzip.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunzip.cxx index 8962b100b1..a88e9749f2 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/hunzip.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/hunzip.cxx @@ -178,7 +178,7 @@ int Hunzip::getbuf() { do { if (inc == 0) { fin.read(in, BUFSIZE); - inbits = fin.gcount() * 8; + inbits = int(fin.gcount() * 8); } for (; inc < inbits; inc++) { int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/hunzip.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/hunzip.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/hunzip.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/hunzip.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/langnum.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/langnum.hxx similarity index 98% rename from 3rdparty/hunspell/1.6.2/src/hunspell/langnum.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/langnum.hxx index a64d3d7869..f09de4041b 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/langnum.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/langnum.hxx @@ -48,6 +48,7 @@ enum { LANG_az = 100, // custom number LANG_bg = 41, LANG_ca = 37, + LANG_crh = 102, // custom number LANG_cs = 42, LANG_da = 45, LANG_de = 49, diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/phonet.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/phonet.cxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/phonet.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/phonet.cxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/phonet.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/phonet.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/phonet.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/phonet.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/replist.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/replist.cxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/replist.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/replist.cxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/replist.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/replist.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/replist.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/replist.hxx diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/suggestmgr.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/suggestmgr.cxx similarity index 89% rename from 3rdparty/hunspell/1.6.2/src/hunspell/suggestmgr.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/suggestmgr.cxx index 73ea91e3a3..dba084e9bd 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/suggestmgr.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/suggestmgr.cxx @@ -72,6 +72,7 @@ #include #include #include +#include #include "suggestmgr.hxx" #include "htypes.hxx" @@ -79,6 +80,8 @@ const w_char W_VLINE = {'\0', '|'}; +#define MAX_CHAR_DISTANCE 4 + SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) { // register affix manager and check in string of chars to // try when building candidate suggestions @@ -132,6 +135,11 @@ SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) { ctryl = u8_u16(ctry_utf, tryme); } } + + // language with possible dash usage + // (latin letters or dash in TRY characters) + lang_with_dash_usage = (ctry && + ((strchr(ctry, '-') != NULL) || (strchr(ctry, 'a') != NULL))); } SuggestMgr::~SuggestMgr() { @@ -169,10 +177,13 @@ void SuggestMgr::testsug(std::vector& wlst, } } -// generate suggestions for a misspelled word -// pass in address of array of char * pointers -// onlycompoundsug: probably bad suggestions (need for ngram sugs, too) -void SuggestMgr::suggest(std::vector& slst, +/* generate suggestions for a misspelled word + * pass in address of array of char * pointers + * onlycompoundsug: probably bad suggestions (need for ngram sugs, too) + * return value: true, if there is a good suggestion + * (REP, ph: or a dictionary word pair) + */ +bool SuggestMgr::suggest(std::vector& slst, const char* w, int* onlycompoundsug) { int nocompoundtwowords = 0; @@ -182,6 +193,7 @@ void SuggestMgr::suggest(std::vector& slst, std::string w2; const char* word = w; size_t oldSug = 0; + bool good_suggestion = false; // word reversing wrapper for complex prefixes if (complexprefixes) { @@ -196,34 +208,49 @@ void SuggestMgr::suggest(std::vector& slst, if (utf8) { wl = u8_u16(word_utf, word); if (wl == -1) { - return; + return false; } } - for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0); + for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0) && !good_suggestion; cpdsuggest++) { + + clock_t timelimit; + // initialize both in non-compound and compound cycles + timelimit = clock(); + // limit compound suggestion if (cpdsuggest > 0) oldSug = slst.size(); // suggestions for an uppercase word (html -> HTML) if (slst.size() < maxSug) { + size_t i = slst.size(); if (utf8) capchars_utf(slst, &word_utf[0], wl, cpdsuggest); else capchars(slst, word, cpdsuggest); + if (slst.size() > i) + good_suggestion = true; } // perhaps we made a typical fault of spelling if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + size_t i = slst.size(); replchars(slst, word, cpdsuggest); + if (slst.size() > i) + good_suggestion = true; } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // perhaps we made chose the wrong char from a related set if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { mapchars(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // only suggest compound words when no other suggestion if ((cpdsuggest == 0) && (slst.size() > nsugorig)) @@ -236,6 +263,8 @@ void SuggestMgr::suggest(std::vector& slst, else swapchar(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we swap the order of non adjacent chars by mistake if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -244,6 +273,8 @@ void SuggestMgr::suggest(std::vector& slst, else longswapchar(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we just hit the wrong key in place of a good char (case and keyboard) if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -252,6 +283,8 @@ void SuggestMgr::suggest(std::vector& slst, else badcharkey(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we add a char that should not be there if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -260,6 +293,8 @@ void SuggestMgr::suggest(std::vector& slst, else extrachar(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we forgot a char if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -268,6 +303,8 @@ void SuggestMgr::suggest(std::vector& slst, else forgotchar(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we move a char if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -276,6 +313,8 @@ void SuggestMgr::suggest(std::vector& slst, else movechar(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we just hit the wrong key in place of a good char if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -284,6 +323,8 @@ void SuggestMgr::suggest(std::vector& slst, else badchar(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // did we double two characters if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { @@ -292,17 +333,25 @@ void SuggestMgr::suggest(std::vector& slst, else doubletwochars(slst, word, cpdsuggest); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; // perhaps we forgot to hit space and two words ran together - if (!nosplitsugs && (slst.size() < maxSug) && - (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { - twowords(slst, word, cpdsuggest); + // (dictionary word pairs have top priority here, so + // we always suggest them, in despite of nosplitsugs, and + // drop compound word and other suggestions) + if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) { + good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion); } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; } // repeating ``for'' statement compounding support if (!nocompoundtwowords && (!slst.empty()) && onlycompoundsug) *onlycompoundsug = 1; + + return good_suggestion; } // suggestions for an uppercase word (html -> HTML) @@ -450,8 +499,11 @@ int SuggestMgr::replchars(std::vector& wlst, return wlst.size(); } -// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -// -> vacacation) +// perhaps we doubled two characters +// (for example vacation -> vacacation) +// The recognized pattern with regex back-references: +// "(.)(.)\1\2\1" or "..(.)(.)\1\2" + int SuggestMgr::doubletwochars(std::vector& wlst, const char* word, int cpdsuggest) { @@ -462,7 +514,7 @@ int SuggestMgr::doubletwochars(std::vector& wlst, for (int i = 2; i < wl; i++) { if (word[i] == word[i - 2]) { state++; - if (state == 3) { + if (state == 3 || (state == 2 && i >= 4)) { std::string candidate(word, word + i - 1); candidate.insert(candidate.end(), word + i + 1, word + wl); testsug(wlst, candidate, cpdsuggest, NULL, NULL); @@ -475,8 +527,11 @@ int SuggestMgr::doubletwochars(std::vector& wlst, return wlst.size(); } -// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -// -> vacacation) +// perhaps we doubled two characters +// (for example vacation -> vacacation) +// The recognized pattern with regex back-references: +// "(.)(.)\1\2\1" or "..(.)(.)\1\2" + int SuggestMgr::doubletwochars_utf(std::vector& wlst, const w_char* word, int wl, @@ -487,7 +542,7 @@ int SuggestMgr::doubletwochars_utf(std::vector& wlst, for (int i = 2; i < wl; i++) { if (word[i] == word[i - 2]) { state++; - if (state == 3) { + if (state == 3 || (state == 2 && i >= 4)) { std::vector candidate_utf(word, word + i - 1); candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); std::string candidate; @@ -721,17 +776,22 @@ int SuggestMgr::forgotchar_utf(std::vector& wlst, return wlst.size(); } -/* error is should have been two words */ -int SuggestMgr::twowords(std::vector& wlst, +/* error is should have been two words + * return value is true, if there is a dictionary word pair, + * or there was already a good suggestion before calling + * this function. + */ +bool SuggestMgr::twowords(std::vector& wlst, const char* word, - int cpdsuggest) { + int cpdsuggest, + bool good) { int c2; int forbidden = 0; int cwrd; int wl = strlen(word); if (wl < 3) - return wlst.size(); + return false; if (langnum == LANG_hu) forbidden = check_forbidden(word, wl); @@ -750,63 +810,87 @@ int SuggestMgr::twowords(std::vector& wlst, } if (utf8 && p[1] == '\0') break; // last UTF-8 character - *p = '\0'; - int c1 = checkword(candidate, cpdsuggest, NULL, NULL); - if (c1) { - c2 = checkword((p + 1), cpdsuggest, NULL, NULL); - if (c2) { - *p = ' '; - // spec. Hungarian code (need a better compound word support) - if ((langnum == LANG_hu) && !forbidden && - // if 3 repeating letter, use - instead of space - (((p[-1] == p[1]) && + // Suggest only word pairs, if they are listed in the dictionary. + // For example, adding "a lot" to the English dic file will + // result only "alot" -> "a lot" suggestion instead of + // "alto, slot, alt, lot, allot, aloft, aloe, clot, plot, blot, a lot". + // Note: using "ph:alot" keeps the other suggestions: + // a lot ph:alot + // alot -> a lot, alto, slot... + *p = ' '; + if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) { + // remove not word pair suggestions + if (!good) { + good = true; + wlst.clear(); + } + wlst.insert(wlst.begin(), candidate); + } + + // word pairs with dash? + if (lang_with_dash_usage) { + *p = '-'; + + if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) { + // remove not word pair suggestions + if (!good) { + good = true; + wlst.clear(); + } + wlst.insert(wlst.begin(), candidate); + } + } + + if (wlst.size() < maxSug && !nosplitsugs && !good) { + *p = '\0'; + int c1 = checkword(candidate, cpdsuggest, NULL, NULL); + if (c1) { + c2 = checkword((p + 1), cpdsuggest, NULL, NULL); + if (c2) { + // spec. Hungarian code (TODO need a better compound word support) + if ((langnum == LANG_hu) && !forbidden && + // if 3 repeating letter, use - instead of space + (((p[-1] == p[1]) && (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) || - // or multiple compounding, with more, than 6 syllables - ((c1 == 3) && (c2 >= 2)))) - *p = '-'; + // or multiple compounding, with more, than 6 syllables + ((c1 == 3) && (c2 >= 2)))) + *p = '-'; + else + *p = ' '; - cwrd = 1; - for (size_t k = 0; k < wlst.size(); ++k) { - if (wlst[k] == candidate) { - cwrd = 0; - break; - } - } - if (wlst.size() < maxSug) { - if (cwrd) { - wlst.push_back(candidate); - } - } else { - free(candidate); - return wlst.size(); - } - // add two word suggestion with dash, if TRY string contains - // "a" or "-" - // NOTE: cwrd doesn't modified for REP twoword sugg. - if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) && - mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { - *p = '-'; + cwrd = 1; for (size_t k = 0; k < wlst.size(); ++k) { if (wlst[k] == candidate) { cwrd = 0; break; } } - if (wlst.size() < maxSug) { - if (cwrd) { + + if (cwrd && (wlst.size() < maxSug)) wlst.push_back(candidate); + + // add two word suggestion with dash, depending on the language + // Note that cwrd doesn't modified for REP twoword sugg. + if ( !nosplitsugs && lang_with_dash_usage && + mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { + *p = '-'; + for (size_t k = 0; k < wlst.size(); ++k) { + if (wlst[k] == candidate) { + cwrd = 0; + break; + } } - } else { - free(candidate); - return wlst.size(); + + if ((wlst.size() < maxSug) && cwrd) + wlst.push_back(candidate); } } } } } free(candidate); - return wlst.size(); + return good; } // error is adjacent letter were swapped @@ -891,7 +975,8 @@ int SuggestMgr::longswapchar(std::vector& wlst, // try swapping not adjacent chars one by one for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) { - if (std::abs(std::distance(q, p)) > 1) { + size_t distance = std::abs(std::distance(q, p)); + if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { std::swap(*p, *q); testsug(wlst, candidate, cpdsuggest, NULL, NULL); std::swap(*p, *q); @@ -910,7 +995,8 @@ int SuggestMgr::longswapchar_utf(std::vector& wlst, // try swapping not adjacent chars for (std::vector::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { for (std::vector::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { - if (std::abs(std::distance(q, p)) > 1) { + size_t distance = std::abs(std::distance(q, p)); + if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { std::swap(*p, *q); std::string candidate; u16_u8(candidate, candidate_utf); @@ -932,7 +1018,7 @@ int SuggestMgr::movechar(std::vector& wlst, // try moving a char for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { - for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) < 10; ++q) { + for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { std::swap(*q, *(q - 1)); if (std::distance(p, q) < 2) continue; // omit swap char @@ -942,7 +1028,7 @@ int SuggestMgr::movechar(std::vector& wlst, } for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) { - for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) < 10; ++q) { + for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { std::swap(*q, *(q - 1)); if (std::distance(p, q) < 2) continue; // omit swap char @@ -965,7 +1051,7 @@ int SuggestMgr::movechar_utf(std::vector& wlst, // try moving a char for (std::vector::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { - for (std::vector::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) { + for (std::vector::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { std::swap(*q, *(q - 1)); if (std::distance(p, q) < 2) continue; // omit swap char @@ -977,7 +1063,7 @@ int SuggestMgr::movechar_utf(std::vector& wlst, } for (std::vector::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) { - for (std::vector::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) { + for (std::vector::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { std::swap(*q, *(q - 1)); if (std::distance(p, q) < 2) continue; // omit swap char @@ -994,7 +1080,8 @@ int SuggestMgr::movechar_utf(std::vector& wlst, // generate a set of suggestions for very poorly spelled words void SuggestMgr::ngsuggest(std::vector& wlst, const char* w, - const std::vector& rHMgr) { + const std::vector& rHMgr, + int captype) { int lval; int sc; int lp, lpphon; @@ -1071,18 +1158,34 @@ void SuggestMgr::ngsuggest(std::vector& wlst, u8_u16(w_word, word); u8_u16(w_target, target); } - + std::string f; std::vector w_f; - + for (size_t i = 0; i < rHMgr.size(); ++i) { while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) { - if ((hp->astr) && (pAMgr) && - (TESTAFF(hp->astr, forbiddenword, hp->alen) || - TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || - TESTAFF(hp->astr, nosuggest, hp->alen) || - TESTAFF(hp->astr, nongramsuggest, hp->alen) || - TESTAFF(hp->astr, onlyincompound, hp->alen))) + // skip exceptions + if ( + // skip it, if the word length different by 5 or + // more characters (to avoid strange suggestions) + // (except Unicode characters over BMP) + (((abs(n - hp->clen) > 4) && !nonbmp)) || + // don't suggest capitalized dictionary words for + // lower case misspellings in ngram suggestions, except + // - PHONE usage, or + // - in the case of German, where not only proper + // nouns are capitalized, or + // - the capitalized word has special pronunciation + ((captype == NOCAP) && (hp->var & H_OPT_INITCAP) && + !ph && (langnum != LANG_de) && !(hp->var & H_OPT_PHON)) || + // or it has one of the following special flags + ((hp->astr) && (pAMgr) && + (TESTAFF(hp->astr, forbiddenword, hp->alen) || + TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || + TESTAFF(hp->astr, nosuggest, hp->alen) || + TESTAFF(hp->astr, nongramsuggest, hp->alen) || + TESTAFF(hp->astr, onlyincompound, hp->alen))) + ) continue; if (utf8) { @@ -1105,7 +1208,7 @@ void SuggestMgr::ngsuggest(std::vector& wlst, sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; } - // check special pronounciation + // check special pronunciation f.clear(); if ((hp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { @@ -1559,7 +1662,8 @@ int SuggestMgr::checkword(const std::string& word, if (rv) { if ((rv->astr) && (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen))) + TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_substandard(), rv->alen))) return 0; while (rv) { if (rv->astr && @@ -1584,7 +1688,7 @@ int SuggestMgr::checkword(const std::string& word, if (!rv && pAMgr->have_contclass()) { rv = pAMgr->suffix_check_twosfx(word.c_str(), word.size(), 0, NULL, FLAG_NULL); if (!rv) - rv = pAMgr->prefix_check_twosfx(word.c_str(), word.size(), 1, FLAG_NULL); + rv = pAMgr->prefix_check_twosfx(word.c_str(), word.size(), 0, FLAG_NULL); } // check forbidden words @@ -1649,15 +1753,15 @@ std::string SuggestMgr::suggest_morph(const std::string& in_w) { TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { if (!HENTRY_FIND(rv, MORPH_STEM)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(MORPH_STEM); result.append(w); } if (HENTRY_DATA(rv)) { - result.append(" "); + result.push_back(MSEP_FLD); result.append(HENTRY_DATA2(rv)); } - result.append("\n"); + result.push_back(MSEP_REC); } rv = rv->next_homonym; } @@ -1713,7 +1817,7 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { HENTRY_DATA(rv), pattern, 0); if (!aff.empty()) { result.append(aff); - result.append("\n"); + result.push_back(MSEP_REC); } } @@ -1737,7 +1841,7 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { rv2->alen, HENTRY_DATA(rv2), pattern, 0); if (!aff.empty()) { result.append(aff); - result.append("\n"); + result.push_back(MSEP_REC); } } } diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/suggestmgr.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/suggestmgr.hxx similarity index 95% rename from 3rdparty/hunspell/1.6.2/src/hunspell/suggestmgr.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/suggestmgr.hxx index 19ffc03a84..7137be279b 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/suggestmgr.hxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/suggestmgr.hxx @@ -78,11 +78,6 @@ #define MAXPHONSUGS 2 #define MAXCOMPOUNDSUGS 3 -// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function -#define TIMELIMIT (CLOCKS_PER_SEC >> 2) -#define MINTIMER 100 -#define MAXPLUSTIMER 100 - #define NGRAM_LONGER_WORSE (1 << 0) #define NGRAM_ANY_MISMATCH (1 << 1) #define NGRAM_LOWERING (1 << 2) @@ -92,7 +87,6 @@ #include "affixmgr.hxx" #include "hashmgr.hxx" #include "langnum.hxx" -#include enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; @@ -109,6 +103,7 @@ class SuggestMgr { char* ctry; size_t ctryl; std::vector ctry_utf; + bool lang_with_dash_usage; AffixMgr* pAMgr; unsigned int maxSug; @@ -124,8 +119,8 @@ class SuggestMgr { SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr); ~SuggestMgr(); - void suggest(std::vector& slst, const char* word, int* onlycmpdsug); - void ngsuggest(std::vector& slst, const char* word, const std::vector& rHMgr); + bool suggest(std::vector& slst, const char* word, int* onlycmpdsug); + void ngsuggest(std::vector& slst, const char* word, const std::vector& rHMgr, int captype); std::string suggest_morph(const std::string& word); std::string suggest_gen(const std::vector& pl, const std::string& pattern); @@ -149,7 +144,7 @@ class SuggestMgr { int extrachar(std::vector&, const char*, int); int badcharkey(std::vector&, const char*, int); int badchar(std::vector&, const char*, int); - int twowords(std::vector&, const char*, int); + bool twowords(std::vector&, const char*, int, bool); void capchars_utf(std::vector&, const w_char*, int wl, int); int doubletwochars_utf(std::vector&, const w_char*, int wl, int); diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/utf_info.cxx b/3rdparty/hunspell/1.7.0/src/hunspell/utf_info.hxx similarity index 99% rename from 3rdparty/hunspell/1.6.2/src/hunspell/utf_info.cxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/utf_info.hxx index 6bb847f2a6..ddf3c8c5ba 100644 --- a/3rdparty/hunspell/1.6.2/src/hunspell/utf_info.cxx +++ b/3rdparty/hunspell/1.7.0/src/hunspell/utf_info.hxx @@ -35,9 +35,15 @@ * * ***** END LICENSE BLOCK ***** */ -#include "csutil.hxx" +// Unicode character encoding information +struct unicode_info { + unsigned short c; + unsigned short cupper; + unsigned short clower; +}; + /* fields: Unicode letter, toupper, tolower */ -static struct unicode_info utf_lst[] = { +static const struct unicode_info utf_lst[] = { {0x0041, 0x0041, 0x0061}, {0x0042, 0x0042, 0x0062}, {0x0043, 0x0043, 0x0063}, {0x0044, 0x0044, 0x0064}, {0x0045, 0x0045, 0x0065}, {0x0046, 0x0046, 0x0066}, diff --git a/3rdparty/hunspell/1.6.2/src/hunspell/w_char.hxx b/3rdparty/hunspell/1.7.0/src/hunspell/w_char.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/hunspell/w_char.hxx rename to 3rdparty/hunspell/1.7.0/src/hunspell/w_char.hxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/Makefile.am b/3rdparty/hunspell/1.7.0/src/parsers/Makefile.am similarity index 93% rename from 3rdparty/hunspell/1.6.2/src/parsers/Makefile.am rename to 3rdparty/hunspell/1.7.0/src/parsers/Makefile.am index 5f001a5d5f..b27551498b 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/Makefile.am +++ b/3rdparty/hunspell/1.7.0/src/parsers/Makefile.am @@ -15,4 +15,4 @@ testparser_SOURCES=firstparser.cxx firstparser.hxx xmlparser.cxx \ htmlparser.hxx odfparser.hxx odfparser.cxx # need mystrdup() -LDADD = ../hunspell/libhunspell-1.6.la +LDADD = ../hunspell/libhunspell-1.7.la diff --git a/3rdparty/hunspell/1.6.2/src/parsers/firstparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/firstparser.cxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/parsers/firstparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/firstparser.cxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/firstparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/firstparser.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/parsers/firstparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/firstparser.hxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/htmlparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx similarity index 96% rename from 3rdparty/hunspell/1.6.2/src/parsers/htmlparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx index c23d373736..1b6573368f 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/htmlparser.cxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx @@ -69,6 +69,10 @@ static const char* PATTERN2[][2] = { #define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char*) * 2)) +static const char* (*PATTERN3)[2] = NULL; + +#define PATTERN_LEN3 0 + HTMLParser::HTMLParser(const char* wordchars) : XMLParser(wordchars) { } @@ -78,7 +82,7 @@ HTMLParser::HTMLParser(const w_char* wordchars, int len) } bool HTMLParser::next_token(std::string& t) { - return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, t); + return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t); } HTMLParser::~HTMLParser() {} diff --git a/3rdparty/hunspell/1.6.2/src/parsers/htmlparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/htmlparser.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/parsers/htmlparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/htmlparser.hxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/latexparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx similarity index 91% rename from 3rdparty/hunspell/1.6.2/src/parsers/latexparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx index c387b2df2f..c70236e8be 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/latexparser.cxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx @@ -47,6 +47,9 @@ using namespace std; #endif +#define UTF8_APOS "\xe2\x80\x99" +#define APOSTROPHE "'" + static struct { const char* pat[2]; int arg; @@ -203,7 +206,20 @@ bool LaTeXParser::next_token(std::string& t) { break; case 1: // wordchar apostrophe = 0; - if (!is_wordchar(line[actual].c_str() + head) || + if ((is_wordchar((char*)APOSTROPHE) || + (is_utf8() && is_wordchar((char*)UTF8_APOS))) && + !line[actual].empty() && line[actual][head] == '\'' && + is_wordchar(line[actual].c_str() + head + 1)) { + head++; + } else if (is_utf8() && + is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe + // to the WORDCHARS, if + // needed + strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) == + 0 && + is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) { + head += strlen(UTF8_APOS) - 1; + } else if (!is_wordchar(line[actual].c_str() + head) || (line[actual][head] == '\'' && line[actual][head + 1] == '\'' && ++apostrophe)) { state = 0; diff --git a/3rdparty/hunspell/1.6.2/src/parsers/latexparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/latexparser.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/parsers/latexparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/latexparser.hxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/manparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx similarity index 99% rename from 3rdparty/hunspell/1.6.2/src/parsers/manparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx index 8a0f760be0..bf3bdac4c3 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/manparser.cxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx @@ -71,7 +71,7 @@ bool ManParser::next_token(std::string& t) { } else { state = 2; } - // no break + /* FALLTHROUGH */ case 2: // non word chars if (is_wordchar(line[actual].c_str() + head)) { state = 3; diff --git a/3rdparty/hunspell/1.6.2/src/parsers/manparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/manparser.hxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/parsers/manparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/manparser.hxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/odfparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx similarity index 87% rename from 3rdparty/hunspell/1.6.2/src/parsers/odfparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx index 6bca75587d..caa10e3f3a 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/odfparser.cxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx @@ -61,6 +61,12 @@ static const char* (*PATTERN2)[2] = NULL; #define PATTERN_LEN2 0 +static const char* PATTERN3[][2] = { + {""}, // part of the reedited words + {""}}; // for example, an inserted letter + +#define PATTERN_LEN3 (sizeof(PATTERN3) / (sizeof(char*) * 2)) + ODFParser::ODFParser(const char* wordchars) : XMLParser(wordchars) { } @@ -70,7 +76,11 @@ ODFParser::ODFParser(const w_char* wordchars, int len) } bool ODFParser::next_token(std::string& t) { - return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, t); + return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t); +} + +std::string ODFParser::get_word(const std::string &tok) { + return XMLParser::get_word2(PATTERN3, PATTERN_LEN3, tok); } ODFParser::~ODFParser() {} diff --git a/3rdparty/hunspell/1.6.2/src/parsers/odfparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx similarity index 97% rename from 3rdparty/hunspell/1.6.2/src/parsers/odfparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx index 02c85a0c94..e96956a0b7 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/odfparser.hxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx @@ -50,6 +50,7 @@ class ODFParser : public XMLParser { explicit ODFParser(const char* wc); ODFParser(const w_char* wordchars, int len); virtual bool next_token(std::string&); + virtual std::string get_word(const std::string &tok); virtual ~ODFParser(); }; diff --git a/3rdparty/hunspell/1.6.2/src/parsers/testparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/testparser.cxx similarity index 100% rename from 3rdparty/hunspell/1.6.2/src/parsers/testparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/testparser.cxx diff --git a/3rdparty/hunspell/1.6.2/src/parsers/textparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx similarity index 99% rename from 3rdparty/hunspell/1.6.2/src/parsers/textparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx index 6f2ccaf3d9..d7ac599bb1 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/textparser.cxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx @@ -222,6 +222,10 @@ int TextParser::change_token(const char* word) { return 0; } +std::string TextParser::get_word(const std::string &tok) { + return tok; +} + void TextParser::check_urls() { urlline.resize(line[actual].size() + 1); int url_state = 0; diff --git a/3rdparty/hunspell/1.6.2/src/parsers/textparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx similarity index 98% rename from 3rdparty/hunspell/1.6.2/src/parsers/textparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx index ac5e82ca75..c063723318 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/textparser.hxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx @@ -78,6 +78,7 @@ class TextParser { std::string get_line() const; std::string get_prevline(int n) const; virtual bool next_token(std::string&); + virtual std::string get_word(const std::string &tok); virtual int change_token(const char* word); void set_url_checking(int check); diff --git a/3rdparty/hunspell/1.6.2/src/parsers/xmlparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx similarity index 81% rename from 3rdparty/hunspell/1.6.2/src/parsers/xmlparser.cxx rename to 3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx index 1315011df4..abd3a7578d 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/xmlparser.cxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx @@ -55,22 +55,29 @@ static const char* __PATTERN__[][2] = {{""}, #define __PATTERN_LEN__ (sizeof(__PATTERN__) / (sizeof(char*) * 2)) +// for checking attributes, eg. text in HTML static const char* (*__PATTERN2__)[2] = NULL; #define __PATTERN_LEN2__ 0 +// for checking words with in-word patterns +// for example, "example" in ODT +static const char* (*__PATTERN3__)[2] = NULL; + +#define __PATTERN_LEN3__ 0 + #define ENTITY_APOS "'" #define UTF8_APOS "\xe2\x80\x99" #define APOSTROPHE "'" XMLParser::XMLParser(const char* wordchars) : TextParser(wordchars) - , pattern_num(0), pattern2_num(0), prevstate(0), checkattr(0), quotmark(0) { + , pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) { } XMLParser::XMLParser(const w_char* wordchars, int len) : TextParser(wordchars, len) - , pattern_num(0), pattern2_num(0), prevstate(0), checkattr(0), quotmark(0) { + , pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) { } XMLParser::~XMLParser() {} @@ -98,6 +105,8 @@ bool XMLParser::next_token(const char* PATTERN[][2], unsigned int PATTERN_LEN, const char* PATTERN2[][2], unsigned int PATTERN_LEN2, + const char* PATTERN3[][2], + unsigned int PATTERN_LEN3, std::string& t) { t.clear(); const char* latin1; @@ -141,7 +150,19 @@ bool XMLParser::next_token(const char* PATTERN[][2], is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) { head += strlen(UTF8_APOS) - 1; } else if (!is_wordchar(line[actual].c_str() + head)) { + // in-word patterns + if ((pattern3_num = look_pattern(PATTERN3, PATTERN_LEN3, 0)) != -1) { + size_t pos = line[actual].find(PATTERN3[pattern3_num][1], head); + if (pos != std::string::npos) { + size_t endpos = pos + strlen(PATTERN3[pattern3_num][1]) - 1; + if (is_wordchar(line[actual].c_str() + endpos + 1)) { + head = endpos; + break; + } + } + } state = prevstate; + // return with the token, except in the case of in-word patterns if (alloc_token(token, &head, t)) return true; } @@ -193,7 +214,26 @@ bool XMLParser::next_token(const char* PATTERN[][2], bool XMLParser::next_token(std::string& t) { return next_token(__PATTERN__, __PATTERN_LEN__, __PATTERN2__, - __PATTERN_LEN2__, t); + __PATTERN_LEN2__, __PATTERN3__, __PATTERN_LEN3__, t); +} + +// remove in-word patterns +std::string XMLParser::get_word2( + const char* PATTERN3[][2], + unsigned int PATTERN_LEN3, + const std::string &tok) { + std::string word = tok; + for (unsigned int i = 0; i < PATTERN_LEN3; i++) { + size_t pos; + while ((pos = word.find(PATTERN3[i][0])) != word.npos) { + size_t endpos = word.find(PATTERN3[i][1], pos); + if (endpos != word.npos) { + word.erase(pos, endpos + strlen(PATTERN3[i][1]) - pos); + } else + return word; + } + } + return word; } int XMLParser::change_token(const char* word) { diff --git a/3rdparty/hunspell/1.6.2/src/parsers/xmlparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx similarity index 92% rename from 3rdparty/hunspell/1.6.2/src/parsers/xmlparser.hxx rename to 3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx index 212de6217b..03f024574d 100644 --- a/3rdparty/hunspell/1.6.2/src/parsers/xmlparser.hxx +++ b/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx @@ -53,8 +53,13 @@ class XMLParser : public TextParser { unsigned int len, const char* p2[][2], unsigned int len2, + const char* p3[][2], + unsigned int len3, std::string&); virtual bool next_token(std::string&); + std::string get_word2(const char* p2[][2], + unsigned int len2, + const std::string &tok); int change_token(const char* word); virtual ~XMLParser(); @@ -62,6 +67,7 @@ class XMLParser : public TextParser { int look_pattern(const char* p[][2], unsigned int len, int column); int pattern_num; int pattern2_num; + int pattern3_num; int prevstate; int checkattr; char quotmark; diff --git a/3rdparty/hunspell/Makefile.am b/3rdparty/hunspell/Makefile.am index fe0502dd21..cb9341f19b 100644 --- a/3rdparty/hunspell/Makefile.am +++ b/3rdparty/hunspell/Makefile.am @@ -4,62 +4,59 @@ noinst_LIBRARIES = liblyxhunspell.a EXTRA_DIST = \ CMakeLists.txt \ - 1.6.2/AUTHORS \ - 1.6.2/BUGS \ - 1.6.2/ChangeLog \ - 1.6.2/COPYING \ - 1.6.2/COPYING.LESSER \ - 1.6.2/license.hunspell \ - 1.6.2/README.md \ - 1.6.2/THANKS \ - 1.6.2/TODO \ - 1.6.2/src/hunspell/hunvisapi.h.in \ - 1.6.2/src/hunspell/utf_info.cxx + 1.7.0/AUTHORS \ + 1.7.0/ChangeLog \ + 1.7.0/COPYING \ + 1.7.0/COPYING.LESSER \ + 1.7.0/license.hunspell \ + 1.7.0/NEWS \ + 1.7.0/README \ + 1.7.0/THANKS \ + 1.7.0/src/hunspell/hunvisapi.h.in AM_CPPFLAGS += -DHUNSPELL_STATIC @STDLIB_DEBUG@ liblyxhunspell_a_SOURCES = \ - 1.6.2/src/hunspell/affentry.cxx \ - 1.6.2/src/hunspell/affentry.hxx \ - 1.6.2/src/hunspell/affixmgr.cxx \ - 1.6.2/src/hunspell/affixmgr.hxx \ - 1.6.2/src/hunspell/atypes.hxx \ - 1.6.2/src/hunspell/baseaffix.hxx \ - 1.6.2/src/hunspell/csutil.cxx \ - 1.6.2/src/hunspell/csutil.hxx \ - 1.6.2/src/hunspell/filemgr.cxx \ - 1.6.2/src/hunspell/filemgr.hxx \ - 1.6.2/src/hunspell/hashmgr.cxx \ - 1.6.2/src/hunspell/hashmgr.hxx \ - 1.6.2/src/hunspell/htypes.hxx \ - 1.6.2/src/hunspell/hunspell.cxx \ - 1.6.2/src/hunspell/hunspell.h \ - 1.6.2/src/hunspell/hunspell.hxx \ - 1.6.2/src/hunspell/hunvisapi.h \ - 1.6.2/src/hunspell/hunzip.cxx \ - 1.6.2/src/hunspell/hunzip.hxx \ - 1.6.2/src/hunspell/langnum.hxx \ - 1.6.2/src/hunspell/phonet.cxx \ - 1.6.2/src/hunspell/phonet.hxx \ - 1.6.2/src/hunspell/replist.cxx \ - 1.6.2/src/hunspell/replist.hxx \ - 1.6.2/src/hunspell/suggestmgr.cxx \ - 1.6.2/src/hunspell/suggestmgr.hxx \ - 1.6.2/src/hunspell/w_char.hxx \ - 1.6.2/src/parsers/firstparser.cxx \ - 1.6.2/src/parsers/firstparser.hxx \ - 1.6.2/src/parsers/htmlparser.cxx \ - 1.6.2/src/parsers/htmlparser.hxx \ - 1.6.2/src/parsers/latexparser.cxx \ - 1.6.2/src/parsers/latexparser.hxx \ - 1.6.2/src/parsers/manparser.cxx \ - 1.6.2/src/parsers/manparser.hxx \ - 1.6.2/src/parsers/odfparser.cxx \ - 1.6.2/src/parsers/odfparser.hxx \ - 1.6.2/src/parsers/testparser.cxx \ - 1.6.2/src/parsers/textparser.cxx \ - 1.6.2/src/parsers/textparser.hxx \ - 1.6.2/src/parsers/xmlparser.cxx \ - 1.6.2/src/parsers/xmlparser.hxx \ - 1.6.2/src/win_api/config.h - + 1.7.0/src/hunspell/affentry.cxx \ + 1.7.0/src/hunspell/affentry.hxx \ + 1.7.0/src/hunspell/affixmgr.cxx \ + 1.7.0/src/hunspell/affixmgr.hxx \ + 1.7.0/src/hunspell/atypes.hxx \ + 1.7.0/src/hunspell/baseaffix.hxx \ + 1.7.0/src/hunspell/csutil.cxx \ + 1.7.0/src/hunspell/csutil.hxx \ + 1.7.0/src/hunspell/filemgr.cxx \ + 1.7.0/src/hunspell/filemgr.hxx \ + 1.7.0/src/hunspell/hashmgr.cxx \ + 1.7.0/src/hunspell/hashmgr.hxx \ + 1.7.0/src/hunspell/htypes.hxx \ + 1.7.0/src/hunspell/hunspell.cxx \ + 1.7.0/src/hunspell/hunspell.h \ + 1.7.0/src/hunspell/hunspell.hxx \ + 1.7.0/src/hunspell/hunvisapi.h \ + 1.7.0/src/hunspell/hunzip.cxx \ + 1.7.0/src/hunspell/hunzip.hxx \ + 1.7.0/src/hunspell/langnum.hxx \ + 1.7.0/src/hunspell/phonet.cxx \ + 1.7.0/src/hunspell/phonet.hxx \ + 1.7.0/src/hunspell/replist.cxx \ + 1.7.0/src/hunspell/replist.hxx \ + 1.7.0/src/hunspell/suggestmgr.cxx \ + 1.7.0/src/hunspell/suggestmgr.hxx \ + 1.7.0/src/hunspell/utf_info.hxx \ + 1.7.0/src/hunspell/w_char.hxx \ + 1.7.0/src/parsers/firstparser.cxx \ + 1.7.0/src/parsers/firstparser.hxx \ + 1.7.0/src/parsers/htmlparser.cxx \ + 1.7.0/src/parsers/htmlparser.hxx \ + 1.7.0/src/parsers/latexparser.cxx \ + 1.7.0/src/parsers/latexparser.hxx \ + 1.7.0/src/parsers/manparser.cxx \ + 1.7.0/src/parsers/manparser.hxx \ + 1.7.0/src/parsers/odfparser.cxx \ + 1.7.0/src/parsers/odfparser.hxx \ + 1.7.0/src/parsers/testparser.cxx \ + 1.7.0/src/parsers/textparser.cxx \ + 1.7.0/src/parsers/textparser.hxx \ + 1.7.0/src/parsers/xmlparser.cxx \ + 1.7.0/src/parsers/xmlparser.hxx diff --git a/config/spell.m4 b/config/spell.m4 index 4d949aa572..c40eafce80 100644 --- a/config/spell.m4 +++ b/config/spell.m4 @@ -121,7 +121,7 @@ AC_DEFUN([LYX_USE_INCLUDED_HUNSPELL],[ AC_MSG_RESULT([$lyx_cv_with_included_hunspell]) if test x$lyx_cv_with_included_hunspell = xyes ; then lyx_included_libs="$lyx_included_libs hunspell" - HUNSPELL_CFLAGS='-I$(top_srcdir)/3rdparty/hunspell/1.6.2/src' + HUNSPELL_CFLAGS='-I$(top_srcdir)/3rdparty/hunspell/1.7.0/src' HUNSPELL_LIBS='$(top_builddir)/3rdparty/hunspell/liblyxhunspell.a' AC_SUBST(HUNSPELL_CFLAGS) AC_SUBST(HUNSPELL_LIBS)