Bug Summary

File:root/firefox-clang/extensions/spellcheck/hunspell/src/hunspell.cxx
Warning:line 780, column 3
Value stored to 'word' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name Unified_cpp_hunspell_src0.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/extensions/spellcheck/hunspell/src -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/extensions/spellcheck/hunspell/src -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -include hunspell_alloc_hooks.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/stl_wrappers -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D HUNSPELL_STATIC -D MOZ_HAS_MOZGLUE -D MOZILLA_INTERNAL_API -D IMPL_LIBXUL -D MOZ_SUPPORT_LEAKCHECKING -D STATIC_EXPORTABLE_JS_API -I /root/firefox-clang/extensions/spellcheck/hunspell/src -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/extensions/spellcheck/hunspell/src -I /root/firefox-clang/extensions/spellcheck/hunspell/glue -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/ipc/ipdl/_ipdlheaders -I /root/firefox-clang/ipc/chromium/src -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-implicit-fallthrough -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ Unified_cpp_hunspell_src0.cpp
1/* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * Copyright (C) 2002-2022 Németh László
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 *
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37/*
38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39 * And Contributors. All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 *
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 *
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * 3. All modifications to the source code must be clearly marked as
53 * such. Binary redistributions based on modified source code
54 * must be clearly marked as modified versions in the documentation
55 * and/or other materials provided with the distribution.
56 *
57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71#include <stdlib.h>
72#include <string.h>
73#include <stdio.h>
74#include <time.h>
75
76#include "affixmgr.hxx"
77#include "hunspell.hxx"
78#include "suggestmgr.hxx"
79#include "hunspell.h"
80#include "csutil.hxx"
81
82#include <limits>
83#include <string>
84
85#define MAXWORDUTF8LEN(100 * 3) (MAXWORDLEN100 * 3)
86
87class HunspellImpl
88{
89public:
90 HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL__null);
91 ~HunspellImpl();
92 int add_dic(const char* dpath, const char* key = NULL__null);
93 std::vector<std::string> suffix_suggest(const std::string& root_word);
94 std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
95 std::vector<std::string> generate(const std::string& word, const std::string& pattern);
96 std::vector<std::string> stem(const std::string& word);
97 std::vector<std::string> stem(const std::vector<std::string>& morph);
98 std::vector<std::string> analyze(const std::string& word);
99 int get_langnum() const;
100 bool input_conv(const std::string& word, std::string& dest);
101 bool spell(const std::string& word, int* info = NULL__null, std::string* root = NULL__null);
102 std::vector<std::string> suggest(const std::string& word);
103 const std::string& get_wordchars_cpp() const;
104 const std::vector<w_char>& get_wordchars_utf16() const;
105 const std::string& get_dict_encoding() const;
106 int add(const std::string& word);
107 int add_with_affix(const std::string& word, const std::string& example);
108 int remove(const std::string& word);
109 const std::string& get_version_cpp() const;
110 struct cs_info* get_csconv();
111
112 int spell(const char* word, int* info = NULL__null, char** root = NULL__null);
113 int suggest(char*** slst, const char* word);
114 int suffix_suggest(char*** slst, const char* root_word);
115 void free_list(char*** slst, int n);
116 char* get_dic_encoding();
117 int analyze(char*** slst, const char* word);
118 int stem(char*** slst, const char* word);
119 int stem(char*** slst, char** morph, int n);
120 int generate(char*** slst, const char* word, const char* word2);
121 int generate(char*** slst, const char* word, char** desc, int n);
122 const char* get_wordchars() const;
123 const char* get_version() const;
124 int input_conv(const char* word, char* dest, size_t destsize);
125
126private:
127 AffixMgr* pAMgr;
128 std::vector<HashMgr*> m_HMgrs;
129 SuggestMgr* pSMgr;
130 char* affixpath;
131 std::string encoding;
132 struct cs_info* csconv;
133 int langnum;
134 int utf8;
135 int complexprefixes;
136 std::vector<std::string> wordbreak;
137
138private:
139 std::vector<std::string> analyze_internal(const std::string& word);
140 bool spell_internal(const std::string& word, int* info = NULL__null, std::string* root = NULL__null);
141 std::vector<std::string> suggest_internal(const std::string& word,
142 bool& capitalized, size_t& abbreviated, int& captype);
143 void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
144 size_t cleanword2(std::string& dest,
145 std::vector<w_char>& dest_u,
146 const std::string& src,
147 int* pcaptype,
148 size_t* pabbrev);
149 void clean_ignore(std::string& dest, const std::string& src);
150 void mkinitcap(std::string& u8);
151 int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
152 int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
153 void mkallcap(std::string& u8);
154 int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
155 struct hentry* checkword(const std::string& source, int* info, std::string* root);
156 std::string sharps_u8_l1(const std::string& source);
157 hentry*
158 spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
159 int is_keepcase(const hentry* rv);
160 void insert_sug(std::vector<std::string>& slst, const std::string& word);
161 void cat_result(std::string& result, const std::string& st);
162 std::vector<std::string> spellml(const std::string& word);
163 std::string get_xml_par(const std::string& par, std::string::size_type pos);
164 std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
165 std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
166 int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
167private:
168 HunspellImpl(const HunspellImpl&);
169 HunspellImpl& operator=(const HunspellImpl&);
170};
171
172HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
173 csconv = NULL__null;
174 utf8 = 0;
175 complexprefixes = 0;
176 affixpath = mystrdup(affpath);
177
178 /* first set up the hash manager */
179 m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
180
181 /* next set up the affix manager */
182 /* it needs access to the hash manager lookup methods */
183 pAMgr = new AffixMgr(affpath, m_HMgrs, key);
184
185 /* get the preferred try string and the dictionary */
186 /* encoding from the Affix Manager for that dictionary */
187 char* try_string = pAMgr->get_try_string();
188 encoding = pAMgr->get_encoding();
189 langnum = pAMgr->get_langnum();
190 utf8 = pAMgr->get_utf8();
191 if (!utf8)
192 csconv = get_current_cs(encoding);
193 complexprefixes = pAMgr->get_complexprefixes();
194 wordbreak = pAMgr->get_breaktable();
195
196 /* and finally set up the suggestion manager */
197 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION15, pAMgr);
198 if (try_string)
199 free(try_string)HunspellAllocator::CountingFree(try_string);
200}
201
202HunspellImpl::~HunspellImpl() {
203 delete pSMgr;
204 delete pAMgr;
205 for (size_t i = 0; i < m_HMgrs.size(); ++i)
206 delete m_HMgrs[i];
207 pSMgr = NULL__null;
208 pAMgr = NULL__null;
209#ifdef MOZILLA_CLIENT1
210 delete[] csconv;
211#endif
212 csconv = NULL__null;
213 if (affixpath)
214 free(affixpath)HunspellAllocator::CountingFree(affixpath);
215 affixpath = NULL__null;
216}
217
218// load extra dictionaries
219int HunspellImpl::add_dic(const char* dpath, const char* key) {
220 if (!affixpath)
221 return 1;
222 m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
223 return 0;
224}
225
226
227// make a copy of src at dest while removing all characters
228// specified in IGNORE rule
229void HunspellImpl::clean_ignore(std::string& dest,
230 const std::string& src) {
231 dest.clear();
232 dest.assign(src);
233 const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL__null;
234 if (ignoredchars != NULL__null) {
235 if (utf8) {
236 const std::vector<w_char>& ignoredchars_utf16 =
237 pAMgr->get_ignore_utf16();
238 remove_ignored_chars_utf(dest, ignoredchars_utf16);
239 } else {
240 remove_ignored_chars(dest, ignoredchars);
241 }
242 }
243}
244
245
246// make a copy of src at destination while removing all leading
247// blanks and removing any trailing periods after recording
248// their presence with the abbreviation flag
249// also since already going through character by character,
250// set the capitalization type
251// return the length of the "cleaned" (and UTF-8 encoded) word
252
253size_t HunspellImpl::cleanword2(std::string& dest,
254 std::vector<w_char>& dest_utf,
255 const std::string& src,
256 int* pcaptype,
257 size_t* pabbrev) {
258 dest.clear();
259 dest_utf.clear();
260
261 // remove IGNORE characters from the string
262 std::string w2;
263 clean_ignore(w2, src);
264
265 const char* q = w2.c_str();
266
267 // first skip over any leading blanks
268 while (*q == ' ')
269 ++q;
270
271 // now strip off any trailing periods (recording their presence)
272 *pabbrev = 0;
273 int nl = strlen(q);
274 while ((nl > 0) && (*(q + nl - 1) == '.')) {
275 nl--;
276 (*pabbrev)++;
277 }
278
279 // if no characters are left it can't be capitalized
280 if (nl <= 0) {
281 *pcaptype = NOCAP0;
282 return 0;
283 }
284
285 dest.append(q, nl);
286 nl = dest.size();
287 if (utf8) {
288 u8_u16(dest_utf, dest);
289 *pcaptype = get_captype_utf8(dest_utf, langnum);
290 } else {
291 *pcaptype = get_captype(dest, csconv);
292 }
293 return nl;
294}
295
296void HunspellImpl::cleanword(std::string& dest,
297 const std::string& src,
298 int* pcaptype,
299 int* pabbrev) {
300 dest.clear();
301 const unsigned char* q = (const unsigned char*)src.c_str();
302 int firstcap = 0;
303
304 // first skip over any leading blanks
305 while (*q == ' ')
306 ++q;
307
308 // now strip off any trailing periods (recording their presence)
309 *pabbrev = 0;
310 int nl = strlen((const char*)q);
311 while ((nl > 0) && (*(q + nl - 1) == '.')) {
312 nl--;
313 (*pabbrev)++;
314 }
315
316 // if no characters are left it can't be capitalized
317 if (nl <= 0) {
318 *pcaptype = NOCAP0;
319 return;
320 }
321
322 // now determine the capitalization type of the first nl letters
323 int ncap = 0;
324 int nneutral = 0;
325 int nc = 0;
326
327 if (!utf8) {
328 while (nl > 0) {
329 nc++;
330 if (csconv[(*q)].ccase)
331 ncap++;
332 if (csconv[(*q)].cupper == csconv[(*q)].clower)
333 nneutral++;
334 dest.push_back(*q++);
335 nl--;
336 }
337 // remember to terminate the destination string
338 firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
339 } else {
340 std::vector<w_char> t;
341 u8_u16(t, src);
342 for (size_t i = 0; i < t.size(); ++i) {
343 unsigned short idx = (t[i].h << 8) + t[i].l;
344 unsigned short low = unicodetolower(idx, langnum);
345 if (idx != low)
346 ncap++;
347 if (unicodetoupper(idx, langnum) == low)
348 nneutral++;
349 }
350 u16_u8(dest, t);
351 if (ncap) {
352 unsigned short idx = (t[0].h << 8) + t[0].l;
353 firstcap = (idx != unicodetolower(idx, langnum));
354 }
355 }
356
357 // now finally set the captype
358 if (ncap == 0) {
359 *pcaptype = NOCAP0;
360 } else if ((ncap == 1) && firstcap) {
361 *pcaptype = INITCAP1;
362 } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
363 *pcaptype = ALLCAP2;
364 } else if ((ncap > 1) && firstcap) {
365 *pcaptype = HUHINITCAP4;
366 } else {
367 *pcaptype = HUHCAP3;
368 }
369}
370
371void HunspellImpl::mkallcap(std::string& u8) {
372 if (utf8) {
373 std::vector<w_char> u16;
374 u8_u16(u16, u8);
375 ::mkallcap_utf(u16, langnum);
376 u16_u8(u8, u16);
377 } else {
378 ::mkallcap(u8, csconv);
379 }
380}
381
382int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
383 if (utf8) {
384 ::mkallsmall_utf(u16, langnum);
385 u16_u8(u8, u16);
386 } else {
387 ::mkallsmall(u8, csconv);
388 }
389 return u8.size();
390}
391
392// convert UTF-8 sharp S codes to latin 1
393std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
394 std::string dest(source);
395 mystrrep(dest, "\xC3\x9F", "\xDF");
396 return dest;
397}
398
399// recursive search for right ss - sharp s permutations
400hentry* HunspellImpl::spellsharps(std::string& base,
401 size_t n_pos,
402 int n,
403 int repnum,
404 int* info,
405 std::string* root) {
406 size_t pos = base.find("ss", n_pos);
407 if (pos != std::string::npos && (n < MAXSHARPS5)) {
408 base[pos] = '\xC3';
409 base[pos + 1] = '\x9F';
410 hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
411 if (h)
412 return h;
413 base[pos] = 's';
414 base[pos + 1] = 's';
415 h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
416 if (h)
417 return h;
418 } else if (repnum > 0) {
419 if (utf8)
420 return checkword(base, info, root);
421 std::string tmp(sharps_u8_l1(base));
422 return checkword(tmp, info, root);
423 }
424 return NULL__null;
425}
426
427int HunspellImpl::is_keepcase(const hentry* rv) {
428 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
429 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, pAMgr
->get_keepcase()))
;
430}
431
432/* insert a word to the beginning of the suggestion array */
433void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
434 slst.insert(slst.begin(), word);
435}
436
437bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
438 bool r = spell_internal(word, info, root);
439 if (r && root) {
440 // output conversion
441 RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL__null;
442 if (rl) {
443 std::string wspace;
444 if (rl->conv(*root, wspace)) {
445 *root = wspace;
446 }
447 }
448 }
449 return r;
450}
451
452bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) {
453 struct hentry* rv = NULL__null;
454
455 int info2 = 0;
456 if (!info)
457 info = &info2;
458 else
459 *info = 0;
460
461 // Hunspell supports XML input of the simplified API (see manual)
462 if (word == SPELL_XML"<?xml?>")
463 return true;
464 if (utf8) {
465 if (word.size() >= MAXWORDUTF8LEN(100 * 3))
466 return false;
467 } else {
468 if (word.size() >= MAXWORDLEN100)
469 return false;
470 }
471 int captype = NOCAP0;
472 size_t abbv = 0;
473 size_t wl = 0;
474
475 std::string scw;
476 std::vector<w_char> sunicw;
477
478 // input conversion
479 RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL__null;
480 {
481 std::string wspace;
482
483 bool convstatus = rl ? rl->conv(word, wspace) : false;
484 if (convstatus)
485 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
486 else
487 wl = cleanword2(scw, sunicw, word, &captype, &abbv);
488 }
489
490#ifdef MOZILLA_CLIENT1
491 // accept the abbreviated words without dots
492 // workaround for the incomplete tokenization of Mozilla
493 abbv = 1;
494#endif
495
496 if (wl == 0 || m_HMgrs.empty())
497 return true;
498 if (root)
499 root->clear();
500
501 // allow numbers with dots, dashes and commas (but forbid double separators:
502 // "..", "--" etc.)
503 enum { NBEGIN, NNUM, NSEP };
504 int nstate = NBEGIN;
505 size_t i;
506
507 for (i = 0; (i < wl); i++) {
508 if ((scw[i] <= '9') && (scw[i] >= '0')) {
509 nstate = NNUM;
510 } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
511 if ((nstate == NSEP) || (i == 0))
512 break;
513 nstate = NSEP;
514 } else
515 break;
516 }
517 if ((i == wl) && (nstate == NNUM))
518 return true;
519
520 switch (captype) {
521 case HUHCAP3:
522 /* FALLTHROUGH */
523 case HUHINITCAP4:
524 *info |= SPELL_ORIGCAP(1 << 5);
525 /* FALLTHROUGH */
526 case NOCAP0:
527 rv = checkword(scw, info, root);
528 if ((abbv) && !(rv)) {
529 std::string u8buffer(scw);
530 u8buffer.push_back('.');
531 rv = checkword(u8buffer, info, root);
532 }
533 break;
534 case ALLCAP2: {
535 *info |= SPELL_ORIGCAP(1 << 5);
536 rv = checkword(scw, info, root);
537 if (rv)
538 break;
539 if (abbv) {
540 std::string u8buffer(scw);
541 u8buffer.push_back('.');
542 rv = checkword(u8buffer, info, root);
543 if (rv)
544 break;
545 }
546 // Spec. prefix handling for Catalan, French, Italian:
547 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
548 size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
549 if (apos != std::string::npos) {
550 mkallsmall2(scw, sunicw);
551 //conversion may result in string with different len to pre-mkallsmall2
552 //so re-scan
553 if (apos != std::string::npos && apos < scw.size() - 1) {
554 std::string part1 = scw.substr(0, apos+1);
555 std::string part2 = scw.substr(apos+1);
556 if (utf8) {
557 std::vector<w_char> part1u, part2u;
558 u8_u16(part1u, part1);
559 u8_u16(part2u, part2);
560 mkinitcap2(part2, part2u);
561 scw = part1 + part2;
562 sunicw = part1u;
563 sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
564 rv = checkword(scw, info, root);
565 if (rv)
566 break;
567 } else {
568 mkinitcap2(part2, sunicw);
569 scw = part1 + part2;
570 rv = checkword(scw, info, root);
571 if (rv)
572 break;
573 }
574 mkinitcap2(scw, sunicw);
575 rv = checkword(scw, info, root);
576 if (rv)
577 break;
578 }
579 }
580 if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
581
582 mkallsmall2(scw, sunicw);
583 std::string u8buffer(scw);
584 rv = spellsharps(u8buffer, 0, 0, 0, info, root);
585 if (!rv) {
586 mkinitcap2(scw, sunicw);
587 rv = spellsharps(scw, 0, 0, 0, info, root);
588 }
589 if ((abbv) && !(rv)) {
590 u8buffer.push_back('.');
591 rv = spellsharps(u8buffer, 0, 0, 0, info, root);
592 if (!rv) {
593 u8buffer = std::string(scw);
594 u8buffer.push_back('.');
595 rv = spellsharps(u8buffer, 0, 0, 0, info, root);
596 }
597 }
598 if (rv)
599 break;
600 }
601 }
602 /* FALLTHROUGH */
603 case INITCAP1: {
604 // handle special capitalization of dotted I
605 bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
606 *info |= SPELL_ORIGCAP(1 << 5);
607 if (captype == ALLCAP2) {
608 mkallsmall2(scw, sunicw);
609 mkinitcap2(scw, sunicw);
610 if (Idot)
611 scw.replace(0, 1, "\xc4\xb0");
612 }
613 if (captype == INITCAP1)
614 *info |= SPELL_INITCAP(1 << 4);
615 rv = checkword(scw, info, root);
616 if (captype == INITCAP1)
617 *info &= ~SPELL_INITCAP(1 << 4);
618 // forbid bad capitalization
619 // (for example, ijs -> Ijs instead of IJs in Dutch)
620 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
621 if (*info & SPELL_FORBIDDEN(1 << 1)) {
622 rv = NULL__null;
623 break;
624 }
625 if (rv && is_keepcase(rv) && (captype == ALLCAP2))
626 rv = NULL__null;
627 if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
628 break;
629
630 mkallsmall2(scw, sunicw);
631 std::string u8buffer(scw);
632 mkinitcap2(scw, sunicw);
633
634 rv = checkword(u8buffer, info, root);
635 if (abbv && !rv) {
636 u8buffer.push_back('.');
637 rv = checkword(u8buffer, info, root);
638 if (!rv) {
639 u8buffer = scw;
640 u8buffer.push_back('.');
641 if (captype == INITCAP1)
642 *info |= SPELL_INITCAP(1 << 4);
643 rv = checkword(u8buffer, info, root);
644 if (captype == INITCAP1)
645 *info &= ~SPELL_INITCAP(1 << 4);
646 if (rv && is_keepcase(rv) && (captype == ALLCAP2))
647 rv = NULL__null;
648 break;
649 }
650 }
651 if (rv && is_keepcase(rv) &&
652 ((captype == ALLCAP2) ||
653 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
654 // in INITCAP form, too.
655 !(pAMgr->get_checksharps() &&
656 ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
657 (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
658 rv = NULL__null;
659 break;
660 }
661 }
662
663 if (rv) {
664 if (pAMgr && pAMgr->get_warn() && rv->astr &&
665 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, pAMgr
->get_warn()))
) {
666 *info |= SPELL_WARN(1 << 6);
667 if (pAMgr->get_forbidwarn())
668 return false;
669 return true;
670 }
671 return true;
672 }
673
674 // recursive breaking at break points
675 if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN(1 << 1))) {
676
677 int nbr = 0;
678 wl = scw.size();
679
680 // calculate break points for recursion limit
681 for (size_t j = 0; j < wordbreak.size(); ++j) {
682 size_t pos = 0;
683 while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
684 ++nbr;
685 pos += wordbreak[j].size();
686 }
687 }
688 if (nbr >= 10)
689 return false;
690
691 // check boundary patterns (^begin and end$)
692 for (size_t j = 0; j < wordbreak.size(); ++j) {
693 size_t plen = wordbreak[j].size();
694 if (plen == 1 || plen > wl)
695 continue;
696
697 if (wordbreak[j][0] == '^' &&
698 scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
699 return true;
700
701 if (wordbreak[j][plen - 1] == '$' &&
702 scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
703 std::string suffix(scw.substr(wl - plen + 1));
704 scw.resize(wl - plen + 1);
705 if (spell(scw))
706 return true;
707 scw.append(suffix);
708 }
709 }
710
711 // other patterns
712 for (size_t j = 0; j < wordbreak.size(); ++j) {
713 size_t plen = wordbreak[j].size();
714 size_t found = scw.find(wordbreak[j]);
715 if ((found > 0) && (found < wl - plen)) {
716 size_t found2 = scw.find(wordbreak[j], found + 1);
717 // try to break at the second occurance
718 // to recognize dictionary words with wordbreak
719 if (found2 > 0 && (found2 < wl - plen))
720 found = found2;
721 if (!spell(scw.substr(found + plen)))
722 continue;
723 std::string suffix(scw.substr(found));
724 scw.resize(found);
725 // examine 2 sides of the break point
726 if (spell(scw))
727 return true;
728 scw.append(suffix);
729
730 // LANG_hu: spec. dash rule
731 if (langnum == LANG_hu && wordbreak[j] == "-") {
732 suffix = scw.substr(found + 1);
733 scw.resize(found + 1);
734 if (spell(scw))
735 return true; // check the first part with dash
736 scw.append(suffix);
737 }
738 // end of LANG specific region
739 }
740 }
741
742 // other patterns (break at first break point)
743 for (size_t j = 0; j < wordbreak.size(); ++j) {
744 size_t plen = wordbreak[j].size();
745 size_t found = scw.find(wordbreak[j]);
746 if ((found > 0) && (found < wl - plen)) {
747 if (!spell(scw.substr(found + plen)))
748 continue;
749 std::string suffix(scw.substr(found));
750 scw.resize(found);
751 // examine 2 sides of the break point
752 if (spell(scw))
753 return true;
754 scw.append(suffix);
755
756 // LANG_hu: spec. dash rule
757 if (langnum == LANG_hu && wordbreak[j] == "-") {
758 suffix = scw.substr(found + 1);
759 scw.resize(found + 1);
760 if (spell(scw))
761 return true; // check the first part with dash
762 scw.append(suffix);
763 }
764 // end of LANG specific region
765 }
766 }
767 }
768
769 return false;
770}
771
772struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
773 std::string w2;
774 const char* word;
775 int len;
776
777 // remove IGNORE characters from the string
778 clean_ignore(w2, w);
779
780 word = w2.c_str();
Value stored to 'word' is never read
781 len = w2.size();
782
783 if (!len)
784 return NULL__null;
785
786 // word reversing wrapper for complex prefixes
787 if (complexprefixes) {
788 if (utf8)
789 reverseword_utf(w2);
790 else
791 reverseword(w2);
792 }
793
794 word = w2.c_str();
795
796 // look word in hash table
797 struct hentry* he = NULL__null;
798 for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
799 he = m_HMgrs[i]->lookup(word);
800
801 // check forbidden and onlyincompound words
802 if ((he) && (he->astr) && (pAMgr) &&
803 TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)(std::binary_search(he->astr, he->astr + he->alen, pAMgr
->get_forbiddenword()))
) {
804 if (info)
805 *info |= SPELL_FORBIDDEN(1 << 1);
806 // LANG_hu section: set dash information for suggestions
807 if (langnum == LANG_hu) {
808 if (pAMgr->get_compoundflag() &&
809 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)(std::binary_search(he->astr, he->astr + he->alen, pAMgr
->get_compoundflag()))
) {
810 if (info)
811 *info |= SPELL_COMPOUND(1 << 0);
812 }
813 }
814 return NULL__null;
815 }
816
817 // he = next not needaffix, onlyincompound homonym or onlyupcase word
818 while (he && (he->astr) && pAMgr &&
819 ((pAMgr->get_needaffix() &&
820 TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)(std::binary_search(he->astr, he->astr + he->alen, pAMgr
->get_needaffix()))
) ||
821 (pAMgr->get_onlyincompound() &&
822 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)(std::binary_search(he->astr, he->astr + he->alen, pAMgr
->get_onlyincompound()))
) ||
823 (info && (*info & SPELL_INITCAP(1 << 4)) &&
824 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)(std::binary_search(he->astr, he->astr + he->alen, 65511
))
)))
825 he = he->next_homonym;
826 }
827
828 // check with affixes
829 if (!he && pAMgr) {
830 // try stripping off affixes */
831 he = pAMgr->affix_check(word, len, 0);
832
833 // check compound restriction and onlyupcase
834 if (he && he->astr &&
835 ((pAMgr->get_onlyincompound() &&
836 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)(std::binary_search(he->astr, he->astr + he->alen, pAMgr
->get_onlyincompound()))
) ||
837 (info && (*info & SPELL_INITCAP(1 << 4)) &&
838 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)(std::binary_search(he->astr, he->astr + he->alen, 65511
))
))) {
839 he = NULL__null;
840 }
841
842 if (he) {
843 if ((he->astr) && (pAMgr) &&
844 TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)(std::binary_search(he->astr, he->astr + he->alen, pAMgr
->get_forbiddenword()))
) {
845 if (info)
846 *info |= SPELL_FORBIDDEN(1 << 1);
847 return NULL__null;
848 }
849 if (root) {
850 root->assign(he->word);
851 if (complexprefixes) {
852 if (utf8)
853 reverseword_utf(*root);
854 else
855 reverseword(*root);
856 }
857 }
858 // try check compound word
859 } else if (pAMgr->get_compound()) {
860 struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
861 he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL__null, (hentry**)&rwords, 0, 0, info);
862 // LANG_hu section: `moving rule' with last dash
863 if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
864 std::string dup(word, len - 1);
865 he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL__null, (hentry**)&rwords, 1, 0, info);
866 }
867 // end of LANG specific region
868 if (he) {
869 if (root) {
870 root->assign(he->word);
871 if (complexprefixes) {
872 if (utf8)
873 reverseword_utf(*root);
874 else
875 reverseword(*root);
876 }
877 }
878 if (info)
879 *info |= SPELL_COMPOUND(1 << 0);
880 }
881 }
882 }
883
884 return he;
885}
886
887std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
888 bool capwords;
889 size_t abbv;
890 int captype;
891 std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype);
892 // word reversing wrapper for complex prefixes
893 if (complexprefixes) {
894 for (size_t j = 0; j < slst.size(); ++j) {
895 if (utf8)
896 reverseword_utf(slst[j]);
897 else
898 reverseword(slst[j]);
899 }
900 }
901
902 // capitalize
903 if (capwords)
904 for (size_t j = 0; j < slst.size(); ++j) {
905 mkinitcap(slst[j]);
906 }
907
908 // expand suggestions with dot(s)
909 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
910 for (size_t j = 0; j < slst.size(); ++j) {
911 slst[j].append(word.substr(word.size() - abbv));
912 }
913 }
914
915 // remove bad capitalized and forbidden forms
916 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
917 switch (captype) {
918 case INITCAP1:
919 case ALLCAP2: {
920 size_t l = 0;
921 for (size_t j = 0; j < slst.size(); ++j) {
922 if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
923 std::string s;
924 std::vector<w_char> w;
925 if (utf8) {
926 u8_u16(w, slst[j]);
927 } else {
928 s = slst[j];
929 }
930 mkallsmall2(s, w);
931 if (spell(s)) {
932 slst[l] = s;
933 ++l;
934 } else {
935 mkinitcap2(s, w);
936 if (spell(s)) {
937 slst[l] = s;
938 ++l;
939 }
940 }
941 } else {
942 slst[l] = slst[j];
943 ++l;
944 }
945 }
946 slst.resize(l);
947 }
948 }
949 }
950
951 // remove duplications
952 size_t l = 0;
953 for (size_t j = 0; j < slst.size(); ++j) {
954 slst[l] = slst[j];
955 for (size_t k = 0; k < l; ++k) {
956 if (slst[k] == slst[j]) {
957 --l;
958 break;
959 }
960 }
961 ++l;
962 }
963 slst.resize(l);
964
965 // output conversion
966 RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL__null;
967 if (rl) {
968 for (size_t i = 0; rl && i < slst.size(); ++i) {
969 std::string wspace;
970 if (rl->conv(slst[i], wspace)) {
971 slst[i] = wspace;
972 }
973 }
974 }
975 return slst;
976}
977
978std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
979 bool& capwords, size_t& abbv, int& captype) {
980 captype = NOCAP0;
981 abbv = 0;
982 capwords = false;
983
984 std::vector<std::string> slst;
985
986 int onlycmpdsug = 0;
987 if (!pSMgr || m_HMgrs.empty())
988 return slst;
989
990 // process XML input of the simplified API (see manual)
991 if (word.compare(0, sizeof(SPELL_XML"<?xml?>") - 3, SPELL_XML"<?xml?>", sizeof(SPELL_XML"<?xml?>") - 3) == 0) {
992 return spellml(word);
993 }
994 if (utf8) {
995 if (word.size() >= MAXWORDUTF8LEN(100 * 3))
996 return slst;
997 } else {
998 if (word.size() >= MAXWORDLEN100)
999 return slst;
1000 }
1001 size_t wl = 0;
1002
1003 std::string scw;
1004 std::vector<w_char> sunicw;
1005
1006 // input conversion
1007 RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL__null;
1008 {
1009 std::string wspace;
1010
1011 bool convstatus = rl ? rl->conv(word, wspace) : false;
1012 if (convstatus)
1013 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1014 else
1015 wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1016
1017 if (wl == 0)
1018 return slst;
1019 }
1020
1021 bool good = false;
1022
1023 clock_t timelimit;
1024 // initialize in every suggestion call
1025 timelimit = clock();
1026
1027 // check capitalized form for FORCEUCASE
1028 if (pAMgr && captype == NOCAP0 && pAMgr->get_forceucase()) {
1029 int info = SPELL_ORIGCAP(1 << 5);
1030 if (checkword(scw, &info, NULL__null)) {
1031 std::string form(scw);
1032 mkinitcap(form);
1033 slst.push_back(form);
1034 return slst;
1035 }
1036 }
1037
1038 switch (captype) {
1039 case NOCAP0: {
1040 good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
1041 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1042 return slst;
1043 if (abbv) {
1044 std::string wspace(scw);
1045 wspace.push_back('.');
1046 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1047 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1048 return slst;
1049 }
1050 break;
1051 }
1052
1053 case INITCAP1: {
1054 capwords = true;
1055 good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
1056 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1057 return slst;
1058 std::string wspace(scw);
1059 mkallsmall2(wspace, sunicw);
1060 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1061 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1062 return slst;
1063 break;
1064 }
1065 case HUHINITCAP4:
1066 capwords = true;
1067 /* FALLTHROUGH */
1068 case HUHCAP3: {
1069 good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
1070 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1071 return slst;
1072 // something.The -> something. The
1073 size_t dot_pos = scw.find('.');
1074 if (dot_pos != std::string::npos) {
1075 std::string postdot = scw.substr(dot_pos + 1);
1076 int captype_;
1077 if (utf8) {
1078 std::vector<w_char> postdotu;
1079 u8_u16(postdotu, postdot);
1080 captype_ = get_captype_utf8(postdotu, langnum);
1081 } else {
1082 captype_ = get_captype(postdot, csconv);
1083 }
1084 if (captype_ == INITCAP1) {
1085 std::string str(scw);
1086 str.insert(dot_pos + 1, 1, ' ');
1087 insert_sug(slst, str);
1088 }
1089 }
1090
1091 std::string wspace;
1092
1093 if (captype == HUHINITCAP4) {
1094 // TheOpenOffice.org -> The OpenOffice.org
1095 wspace = scw;
1096 mkinitsmall2(wspace, sunicw);
1097 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1098 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1099 return slst;
1100 }
1101 wspace = scw;
1102 mkallsmall2(wspace, sunicw);
1103 if (spell(wspace.c_str()))
1104 insert_sug(slst, wspace);
1105 size_t prevns = slst.size();
1106 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1107 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1108 return slst;
1109 if (captype == HUHINITCAP4) {
1110 mkinitcap2(wspace, sunicw);
1111 if (spell(wspace.c_str()))
1112 insert_sug(slst, wspace);
1113 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1114 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1115 return slst;
1116 }
1117 // aNew -> "a New" (instead of "a new")
1118 for (size_t j = prevns; j < slst.size(); ++j) {
1119 const char* space = strchr(slst[j].c_str(), ' ');
1120 if (space) {
1121 size_t slen = strlen(space + 1);
1122 // different case after space (need capitalisation)
1123 if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
1124 std::string first(slst[j].c_str(), space + 1);
1125 std::string second(space + 1);
1126 std::vector<w_char> w;
1127 if (utf8)
1128 u8_u16(w, second);
1129 mkinitcap2(second, w);
1130 // set as first suggestion
1131 slst.erase(slst.begin() + j);
1132 slst.insert(slst.begin(), first + second);
1133 }
1134 }
1135 }
1136 break;
1137 }
1138
1139 case ALLCAP2: {
1140 std::string wspace(scw);
1141 mkallsmall2(wspace, sunicw);
1142 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1143 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1144 return slst;
1145 if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
1146 insert_sug(slst, wspace);
1147 mkinitcap2(wspace, sunicw);
1148 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1149 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1150 return slst;
1151 for (size_t j = 0; j < slst.size(); ++j) {
1152 mkallcap(slst[j]);
1153 if (pAMgr && pAMgr->get_checksharps()) {
1154 if (utf8) {
1155 mystrrep(slst[j], "\xC3\x9F", "SS");
1156 } else {
1157 mystrrep(slst[j], "\xDF", "SS");
1158 }
1159 }
1160 }
1161 break;
1162 }
1163 }
1164
1165 // LANG_hu section: replace '-' with ' ' in Hungarian
1166 if (langnum == LANG_hu) {
1167 for (size_t j = 0; j < slst.size(); ++j) {
1168 size_t pos = slst[j].find('-');
1169 if (pos != std::string::npos) {
1170 int info;
1171 std::string w(slst[j].substr(0, pos));
1172 w.append(slst[j].substr(pos + 1));
1173 (void)spell(w, &info, NULL__null);
1174 if ((info & SPELL_COMPOUND(1 << 0)) && (info & SPELL_FORBIDDEN(1 << 1))) {
1175 slst[j][pos] = ' ';
1176 } else
1177 slst[j][pos] = '-';
1178 }
1179 }
1180 }
1181 // END OF LANG_hu section
1182 // try ngram approach since found nothing good suggestion
1183 if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1184 switch (captype) {
1185 case NOCAP0: {
1186 pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP0);
1187 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1188 return slst;
1189 break;
1190 }
1191 /* FALLTHROUGH */
1192 case HUHINITCAP4:
1193 capwords = true;
1194 /* FALLTHROUGH */
1195 case HUHCAP3: {
1196 std::string wspace(scw);
1197 mkallsmall2(wspace, sunicw);
1198 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP3);
1199 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1200 return slst;
1201 break;
1202 }
1203 case INITCAP1: {
1204 capwords = true;
1205 std::string wspace(scw);
1206 mkallsmall2(wspace, sunicw);
1207 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP1);
1208 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1209 return slst;
1210 break;
1211 }
1212 case ALLCAP2: {
1213 std::string wspace(scw);
1214 mkallsmall2(wspace, sunicw);
1215 size_t oldns = slst.size();
1216 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP2);
1217 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1218 return slst;
1219 for (size_t j = oldns; j < slst.size(); ++j) {
1220 mkallcap(slst[j]);
1221 }
1222 break;
1223 }
1224 }
1225 }
1226
1227 // try dash suggestion (Afo-American -> Afro-American)
1228 // Note: LibreOffice was modified to treat dashes as word
1229 // characters to check "scot-free" etc. word forms, but
1230 // we need to handle suggestions for "Afo-American", etc.,
1231 // while "Afro-American" is missing from the dictionary.
1232 // TODO avoid possible overgeneration
1233 size_t dash_pos = scw.find('-');
1234 if (dash_pos != std::string::npos) {
1235 int nodashsug = 1;
1236 for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1237 if (slst[j].find('-') != std::string::npos)
1238 nodashsug = 0;
1239 }
1240
1241 size_t prev_pos = 0;
1242 bool last = false;
1243
1244 while (!good && nodashsug && !last) {
1245 if (dash_pos == scw.size())
1246 last = 1;
1247 std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1248 if (!spell(chunk.c_str())) {
1249 std::vector<std::string> nlst = suggest(chunk.c_str());
1250 if (clock() > timelimit + TIMELIMIT_GLOBAL(((__clock_t) 1000000) / 4))
1251 return slst;
1252 for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
1253 std::string wspace = scw.substr(0, prev_pos);
1254 wspace.append(*j);
1255 if (!last) {
1256 wspace.append("-");
1257 wspace.append(scw.substr(dash_pos + 1));
1258 }
1259 int info = 0;
1260 if (pAMgr && pAMgr->get_forbiddenword())
1261 checkword(wspace, &info, NULL__null);
1262 if (!(info & SPELL_FORBIDDEN(1 << 1)))
1263 insert_sug(slst, wspace);
1264 }
1265 nodashsug = 0;
1266 }
1267 if (!last) {
1268 prev_pos = dash_pos + 1;
1269 dash_pos = scw.find('-', prev_pos);
1270 }
1271 if (dash_pos == std::string::npos)
1272 dash_pos = scw.size();
1273 }
1274 }
1275 return slst;
1276}
1277
1278const std::string& HunspellImpl::get_dict_encoding() const {
1279 return encoding;
1280}
1281
1282std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1283 std::vector<std::string> slst;
1284
1285 std::string result2;
1286 if (desc.empty())
1287 return slst;
1288 for (size_t i = 0; i < desc.size(); ++i) {
1289
1290 std::string result;
1291
1292 // add compound word parts (except the last one)
1293 const char* s = desc[i].c_str();
1294 const char* part = strstr(s, MORPH_PART"pa:");
1295 if (part) {
1296 const char* nextpart = strstr(part + 1, MORPH_PART"pa:");
1297 while (nextpart) {
1298 std::string field;
1299 copy_field(field, part, MORPH_PART"pa:");
1300 result.append(field);
1301 part = nextpart;
1302 nextpart = strstr(part + 1, MORPH_PART"pa:");
1303 }
1304 s = part;
1305 }
1306
1307 std::string tok(s);
1308 size_t alt = 0;
1309 while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1310 tok[alt + 1] = MSEP_ALT'\v';
1311 }
1312 std::vector<std::string> pl = line_tok(tok, MSEP_ALT'\v');
1313 for (size_t k = 0; k < pl.size(); ++k) {
1314 // add derivational suffixes
1315 if (pl[k].find(MORPH_DERI_SFX"ds:") != std::string::npos) {
1316 // remove inflectional suffixes
1317 const size_t is = pl[k].find(MORPH_INFL_SFX"is:");
1318 if (is != std::string::npos)
1319 pl[k].resize(is);
1320 std::vector<std::string> singlepl;
1321 singlepl.push_back(pl[k]);
1322 std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
1323 if (!sg.empty()) {
1324 std::vector<std::string> gen = line_tok(sg, MSEP_REC'\n');
1325 for (size_t j = 0; j < gen.size(); ++j) {
1326 result2.push_back(MSEP_REC'\n');
1327 result2.append(result);
1328 result2.append(gen[j]);
1329 }
1330 }
1331 } else {
1332 result2.push_back(MSEP_REC'\n');
1333 result2.append(result);
1334 if (pl[k].find(MORPH_SURF_PFX"sp:") != std::string::npos) {
1335 std::string field;
1336 copy_field(field, pl[k], MORPH_SURF_PFX"sp:");
1337 result2.append(field);
1338 }
1339 std::string field;
1340 copy_field(field, pl[k], MORPH_STEM"st:");
1341 result2.append(field);
1342 }
1343 }
1344 }
1345 slst = line_tok(result2, MSEP_REC'\n');
1346 uniqlist(slst);
1347 return slst;
1348}
1349
1350std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1351 return stem(analyze(word));
1352}
1353
1354const std::string& HunspellImpl::get_wordchars_cpp() const {
1355 return pAMgr->get_wordchars();
1356}
1357
1358const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1359 return pAMgr->get_wordchars_utf16();
1360}
1361
1362void HunspellImpl::mkinitcap(std::string& u8) {
1363 if (utf8) {
1364 std::vector<w_char> u16;
1365 u8_u16(u16, u8);
1366 ::mkinitcap_utf(u16, langnum);
1367 u16_u8(u8, u16);
1368 } else {
1369 ::mkinitcap(u8, csconv);
1370 }
1371}
1372
1373int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1374 if (utf8) {
1375 ::mkinitcap_utf(u16, langnum);
1376 u16_u8(u8, u16);
1377 } else {
1378 ::mkinitcap(u8, csconv);
1379 }
1380 return u8.size();
1381}
1382
1383int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1384 if (utf8) {
1385 ::mkinitsmall_utf(u16, langnum);
1386 u16_u8(u8, u16);
1387 } else {
1388 ::mkinitsmall(u8, csconv);
1389 }
1390 return u8.size();
1391}
1392
1393int HunspellImpl::add(const std::string& word) {
1394 if (!m_HMgrs.empty())
1395 return m_HMgrs[0]->add(word);
1396 return 0;
1397}
1398
1399int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1400 if (!m_HMgrs.empty())
1401 return m_HMgrs[0]->add_with_affix(word, example);
1402 return 0;
1403}
1404
1405int HunspellImpl::remove(const std::string& word) {
1406 if (!m_HMgrs.empty())
1407 return m_HMgrs[0]->remove(word);
1408 return 0;
1409}
1410
1411const std::string& HunspellImpl::get_version_cpp() const {
1412 return pAMgr->get_version();
1413}
1414
1415struct cs_info* HunspellImpl::get_csconv() {
1416 return csconv;
1417}
1418
1419void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1420 if (!st.empty()) {
1421 if (!result.empty())
1422 result.append("\n");
1423 result.append(st);
1424 }
1425}
1426
1427std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1428 std::vector<std::string> slst = analyze_internal(word);
1429 // output conversion
1430 RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL__null;
1431 if (rl) {
1432 for (size_t i = 0; rl && i < slst.size(); ++i) {
1433 std::string wspace;
1434 if (rl->conv(slst[i], wspace)) {
1435 slst[i] = wspace;
1436 }
1437 }
1438 }
1439 return slst;
1440}
1441
1442std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
1443 std::vector<std::string> slst;
1444 if (!pSMgr || m_HMgrs.empty())
1445 return slst;
1446 if (utf8) {
1447 if (word.size() >= MAXWORDUTF8LEN(100 * 3))
1448 return slst;
1449 } else {
1450 if (word.size() >= MAXWORDLEN100)
1451 return slst;
1452 }
1453 int captype = NOCAP0;
1454 size_t abbv = 0;
1455 size_t wl = 0;
1456
1457 std::string scw;
1458 std::vector<w_char> sunicw;
1459
1460 // input conversion
1461 RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL__null;
1462 {
1463 std::string wspace;
1464
1465 bool convstatus = rl ? rl->conv(word, wspace) : false;
1466 if (convstatus)
1467 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1468 else
1469 wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1470 }
1471
1472 if (wl == 0) {
1473 if (abbv) {
1474 scw.clear();
1475 for (wl = 0; wl < abbv; wl++)
1476 scw.push_back('.');
1477 abbv = 0;
1478 } else
1479 return slst;
1480 }
1481
1482 std::string result;
1483
1484 size_t n = 0;
1485 // test numbers
1486 // LANG_hu section: set dash information for suggestions
1487 if (langnum == LANG_hu) {
1488 size_t n2 = 0;
1489 size_t n3 = 0;
1490
1491 while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1492 (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1493 n++;
1494 if ((scw[n] == '.') || (scw[n] == ',')) {
1495 if (((n2 == 0) && (n > 3)) ||
1496 ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1497 break;
1498 n2++;
1499 n3 = n;
1500 }
1501 }
1502
1503 if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1504 return slst;
1505 if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1506 checkword(scw.substr(n), NULL__null, NULL__null))) {
1507 result.append(scw);
1508 result.resize(n - 1);
1509 if (n == wl)
1510 cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1511 else {
1512 std::string chunk = scw.substr(n - 1, 1);
1513 cat_result(result, pSMgr->suggest_morph(chunk));
1514 result.push_back('+'); // XXX SPEC. MORPHCODE
1515 cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1516 }
1517 return line_tok(result, MSEP_REC'\n');
1518 }
1519 }
1520 // END OF LANG_hu section
1521
1522 switch (captype) {
1523 case HUHCAP3:
1524 case HUHINITCAP4:
1525 case NOCAP0: {
1526 cat_result(result, pSMgr->suggest_morph(scw));
1527 if (abbv) {
1528 std::string u8buffer(scw);
1529 u8buffer.push_back('.');
1530 cat_result(result, pSMgr->suggest_morph(u8buffer));
1531 }
1532 break;
1533 }
1534 case INITCAP1: {
1535 mkallsmall2(scw, sunicw);
1536 std::string u8buffer(scw);
1537 mkinitcap2(scw, sunicw);
1538 cat_result(result, pSMgr->suggest_morph(u8buffer));
1539 cat_result(result, pSMgr->suggest_morph(scw));
1540 if (abbv) {
1541 u8buffer.push_back('.');
1542 cat_result(result, pSMgr->suggest_morph(u8buffer));
1543
1544 u8buffer = scw;
1545 u8buffer.push_back('.');
1546
1547 cat_result(result, pSMgr->suggest_morph(u8buffer));
1548 }
1549 break;
1550 }
1551 case ALLCAP2: {
1552 cat_result(result, pSMgr->suggest_morph(scw));
1553 if (abbv) {
1554 std::string u8buffer(scw);
1555 u8buffer.push_back('.');
1556 cat_result(result, pSMgr->suggest_morph(u8buffer));
1557 }
1558 mkallsmall2(scw, sunicw);
1559 std::string u8buffer(scw);
1560 mkinitcap2(scw, sunicw);
1561
1562 cat_result(result, pSMgr->suggest_morph(u8buffer));
1563 cat_result(result, pSMgr->suggest_morph(scw));
1564 if (abbv) {
1565 u8buffer.push_back('.');
1566 cat_result(result, pSMgr->suggest_morph(u8buffer));
1567
1568 u8buffer = scw;
1569 u8buffer.push_back('.');
1570
1571 cat_result(result, pSMgr->suggest_morph(u8buffer));
1572 }
1573 break;
1574 }
1575 }
1576
1577 if (!result.empty()) {
1578 // word reversing wrapper for complex prefixes
1579 if (complexprefixes) {
1580 if (utf8)
1581 reverseword_utf(result);
1582 else
1583 reverseword(result);
1584 }
1585 return line_tok(result, MSEP_REC'\n');
1586 }
1587
1588 // compound word with dash (HU) I18n
1589 // LANG_hu section: set dash information for suggestions
1590
1591 size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1592 if (dash_pos != std::string::npos) {
1593 int nresult = 0;
1594
1595 std::string part1 = scw.substr(0, dash_pos);
1596 std::string part2 = scw.substr(dash_pos+1);
1597
1598 // examine 2 sides of the dash
1599 if (part2.empty()) { // base word ending with dash
1600 if (spell(part1)) {
1601 std::string p = pSMgr->suggest_morph(part1);
1602 if (!p.empty()) {
1603 slst = line_tok(p, MSEP_REC'\n');
1604 return slst;
1605 }
1606 }
1607 } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat.
1608 if (spell(part1) && (spell("-e"))) {
1609 std::string st = pSMgr->suggest_morph(part1);
1610 if (!st.empty()) {
1611 result.append(st);
1612 }
1613 result.push_back('+'); // XXX spec. separator in MORPHCODE
1614 st = pSMgr->suggest_morph("-e");
1615 if (!st.empty()) {
1616 result.append(st);
1617 }
1618 return line_tok(result, MSEP_REC'\n');
1619 }
1620 } else {
1621 // first word ending with dash: word- XXX ???
1622 part1.push_back(' ');
1623 nresult = spell(part1);
1624 part1.erase(part1.size() - 1);
1625 if (nresult && spell(part2) &&
1626 ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1627 std::string st = pSMgr->suggest_morph(part1);
1628 if (!st.empty()) {
1629 result.append(st);
1630 result.push_back('+'); // XXX spec. separator in MORPHCODE
1631 }
1632 st = pSMgr->suggest_morph(part2);
1633 if (!st.empty()) {
1634 result.append(st);
1635 }
1636 return line_tok(result, MSEP_REC'\n');
1637 }
1638 }
1639 // affixed number in correct word
1640 if (nresult && (dash_pos > 0) &&
1641 (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1642 (scw[dash_pos - 1] == '.'))) {
1643 n = 1;
1644 if (scw[dash_pos - n] == '.')
1645 n++;
1646 // search first not a number character to left from dash
1647 while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1648 (n < 6)) {
1649 n++;
1650 }
1651 if (dash_pos < n)
1652 n--;
1653 // numbers: valami1000000-hoz
1654 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1655 // 56-hoz, 6-hoz
1656 for (; n >= 1; n--) {
1657 if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1658 continue;
1659 }
1660 std::string chunk = scw.substr(dash_pos - n);
1661 if (checkword(chunk, NULL__null, NULL__null)) {
1662 result.append(chunk);
1663 std::string st = pSMgr->suggest_morph(chunk);
1664 if (!st.empty()) {
1665 result.append(st);
1666 }
1667 return line_tok(result, MSEP_REC'\n');
1668 }
1669 }
1670 }
1671 }
1672 return slst;
1673}
1674
1675std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1676 std::vector<std::string> slst;
1677 if (!pSMgr || pl.empty())
1678 return slst;
1679 std::vector<std::string> pl2 = analyze(word);
1680 int captype = NOCAP0;
1681 int abbv = 0;
1682 std::string cw;
1683 cleanword(cw, word, &captype, &abbv);
1684 std::string result;
1685
1686 for (size_t i = 0; i < pl.size(); ++i) {
1687 cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
1688 }
1689
1690 if (!result.empty()) {
1691 // allcap
1692 if (captype == ALLCAP2)
1693 mkallcap(result);
1694
1695 // line split
1696 slst = line_tok(result, MSEP_REC'\n');
1697
1698 // capitalize
1699 if (captype == INITCAP1 || captype == HUHINITCAP4) {
1700 for (size_t j = 0; j < slst.size(); ++j) {
1701 mkinitcap(slst[j]);
1702 }
1703 }
1704
1705 // temporary filtering of prefix related errors (eg.
1706 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1707 std::vector<std::string>::iterator it = slst.begin();
1708 while (it != slst.end()) {
1709 if (!spell(*it)) {
1710 it = slst.erase(it);
1711 } else {
1712 ++it;
1713 }
1714 }
1715 }
1716 return slst;
1717}
1718
1719std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1720 std::vector<std::string> pl = analyze(pattern);
1721 std::vector<std::string> slst = generate(word, pl);
1722 uniqlist(slst);
1723 return slst;
1724}
1725
1726// minimal XML parser functions
1727std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
1728 std::string dest;
1729 if (pos == std::string::npos)
1730 return dest;
1731 const char* par = in_par.c_str() + pos;
1732 char end = *par;
1733 if (end == '>')
1734 end = '<';
1735 else if (end != '\'' && end != '"')
1736 return dest; // bad XML
1737 for (par++; *par != '\0' && *par != end; ++par) {
1738 dest.push_back(*par);
1739 }
1740 mystrrep(dest, "&lt;", "<");
1741 mystrrep(dest, "&amp;", "&");
1742 return dest;
1743}
1744
1745int HunspellImpl::get_langnum() const {
1746 return langnum;
1747}
1748
1749bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1750 RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL__null;
1751 if (rl) {
1752 return rl->conv(word, dest);
1753 }
1754 dest.assign(word);
1755 return false;
1756}
1757
1758// return the beginning of the element (attr == NULL) or the attribute
1759std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
1760 if (pos == std::string::npos)
1761 return std::string::npos;
1762
1763 std::string::size_type endpos = s.find('>', pos);
1764 if (attr == NULL__null)
1765 return endpos;
1766 while (true) {
1767 pos = s.find(attr, pos);
1768 if (pos == std::string::npos || pos >= endpos)
1769 return std::string::npos;
1770 if (s[pos - 1] == ' ' || s[pos - 1] == '\n')
1771 break;
1772 pos += strlen(attr);
1773 }
1774 return pos + strlen(attr);
1775}
1776
1777int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
1778 const char* attr,
1779 const char* value) {
1780 std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
1781 if (cw == value)
1782 return 1;
1783 return 0;
1784}
1785
1786std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
1787 std::vector<std::string> slst;
1788 if (pos == std::string::npos)
1789 return slst;
1790 while (true) {
1791 pos = list.find(tag, pos);
1792 if (pos == std::string::npos)
1793 break;
1794 std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
1795 if (cw.empty()) {
1796 break;
1797 }
1798 slst.push_back(cw);
1799 ++pos;
1800 }
1801 return slst;
1802}
1803
1804std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1805 std::vector<std::string> slst;
1806
1807 std::string::size_type qpos = in_word.find("<query");
1808 if (qpos == std::string::npos)
1809 return slst; // bad XML input
1810
1811 std::string::size_type q2pos = in_word.find('>', qpos);
1812 if (q2pos == std::string::npos)
1813 return slst; // bad XML input
1814
1815 q2pos = in_word.find("<word", q2pos);
1816 if (q2pos == std::string::npos)
1817 return slst; // bad XML input
1818
1819 if (check_xml_par(in_word, qpos, "type=", "analyze")) {
1820 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1821 if (!cw.empty())
1822 slst = analyze(cw);
1823 if (slst.empty())
1824 return slst;
1825 // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1826 std::string r;
1827 r.append("<code>");
1828 for (size_t i = 0; i < slst.size(); ++i) {
1829 r.append("<a>");
1830
1831 std::string entry(slst[i]);
1832 mystrrep(entry, "\t", " ");
1833 mystrrep(entry, "&", "&amp;");
1834 mystrrep(entry, "<", "&lt;");
1835 r.append(entry);
1836
1837 r.append("</a>");
1838 }
1839 r.append("</code>");
1840 slst.clear();
1841 slst.push_back(r);
1842 return slst;
1843 } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
1844 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1845 if (!cw.empty())
1846 return stem(cw);
1847 } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
1848 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1849 if (cw.empty())
1850 return slst;
1851 std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1852 if (q3pos != std::string::npos) {
1853 std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1854 if (!cw2.empty()) {
1855 return generate(cw, cw2);
1856 }
1857 } else {
1858 q2pos = in_word.find("<code", q2pos + 1);
1859 if (q2pos != std::string::npos) {
1860 std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
1861 if (!slst2.empty()) {
1862 slst = generate(cw, slst2);
1863 uniqlist(slst);
1864 return slst;
1865 }
1866 }
1867 }
1868 } else if (check_xml_par(in_word, qpos, "type=", "add")) {
1869 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1870 if (cw.empty())
1871 return slst;
1872 std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1873 if (q3pos != std::string::npos) {
1874 std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1875 if (!cw2.empty()) {
1876 add_with_affix(cw, cw2);
1877 } else {
1878 add(cw);
1879 }
1880 } else {
1881 add(cw);
1882 }
1883 }
1884 return slst;
1885}
1886
1887std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1888 std::vector<std::string> slst;
1889 struct hentry* he = NULL__null;
1890 int len;
1891 std::string w2;
1892 const char* word;
1893 const char* ignoredchars = pAMgr->get_ignore();
1894 if (ignoredchars != NULL__null) {
1895 w2.assign(root_word);
1896 if (utf8) {
1897 const std::vector<w_char>& ignoredchars_utf16 =
1898 pAMgr->get_ignore_utf16();
1899 remove_ignored_chars_utf(w2, ignoredchars_utf16);
1900 } else {
1901 remove_ignored_chars(w2, ignoredchars);
1902 }
1903 word = w2.c_str();
1904 } else
1905 word = root_word.c_str();
1906
1907 len = strlen(word);
1908
1909 if (!len)
1910 return slst;
1911
1912 for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
1913 he = m_HMgrs[i]->lookup(word);
1914 }
1915 if (he) {
1916 slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
1917 }
1918 return slst;
1919}
1920
1921namespace {
1922 int munge_vector(char*** slst, const std::vector<std::string>& items) {
1923 if (items.empty()) {
1924 *slst = NULL__null;
1925 return 0;
1926 } else {
1927 *slst = (char**)malloc(sizeof(char*) * items.size())HunspellAllocator::CountingMalloc(sizeof(char*) * items.size(
))
;
1928 if (!*slst)
1929 return 0;
1930 for (size_t i = 0; i < items.size(); ++i)
1931 (*slst)[i] = mystrdup(items[i].c_str());
1932 }
1933 return items.size();
1934 }
1935}
1936
1937int HunspellImpl::spell(const char* word, int* info, char** root) {
1938 std::string sroot;
1939 bool ret = spell(word, info, root ? &sroot : NULL__null);
1940 if (root) {
1941 if (sroot.empty()) {
1942 *root = NULL__null;
1943 } else {
1944 *root = mystrdup(sroot.c_str());
1945 }
1946 }
1947 return ret;
1948}
1949
1950int HunspellImpl::suggest(char*** slst, const char* word) {
1951 std::vector<std::string> suggests = suggest(word);
1952 return munge_vector(slst, suggests);
1953}
1954
1955int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
1956 std::vector<std::string> stems = suffix_suggest(root_word);
1957 return munge_vector(slst, stems);
1958}
1959
1960void HunspellImpl::free_list(char*** slst, int n) {
1961 if (slst && *slst) {
1962 for (int i = 0; i < n; i++)
1963 free((*slst)[i])HunspellAllocator::CountingFree((*slst)[i]);
1964 free(*slst)HunspellAllocator::CountingFree(*slst);
1965 *slst = NULL__null;
1966 }
1967}
1968
1969char* HunspellImpl::get_dic_encoding() {
1970 return &encoding[0];
1971}
1972
1973int HunspellImpl::analyze(char*** slst, const char* word) {
1974 std::vector<std::string> stems = analyze(word);
1975 return munge_vector(slst, stems);
1976}
1977
1978int HunspellImpl::stem(char*** slst, const char* word) {
1979 std::vector<std::string> stems = stem(word);
1980 return munge_vector(slst, stems);
1981}
1982
1983int HunspellImpl::stem(char*** slst, char** desc, int n) {
1984 std::vector<std::string> morph;
1985 morph.reserve(n);
1986 for (int i = 0; i < n; ++i)
1987 morph.push_back(desc[i]);
1988
1989 std::vector<std::string> stems = stem(morph);
1990 return munge_vector(slst, stems);
1991}
1992
1993int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
1994 std::vector<std::string> stems = generate(word, pattern);
1995 return munge_vector(slst, stems);
1996}
1997
1998int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
1999 std::vector<std::string> morph;
2000 morph.reserve(pln);
2001 for (int i = 0; i < pln; ++i)
2002 morph.push_back(pl[i]);
2003
2004 std::vector<std::string> stems = generate(word, morph);
2005 return munge_vector(slst, stems);
2006}
2007
2008const char* HunspellImpl::get_wordchars() const {
2009 return get_wordchars_cpp().c_str();
2010}
2011
2012const char* HunspellImpl::get_version() const {
2013 return get_version_cpp().c_str();
2014}
2015
2016int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
2017 std::string d;
2018 bool ret = input_conv(word, d);
2019 if (ret && d.size() < destsize) {
2020 strncpy(dest, d.c_str(), destsize);
2021 return 1;
2022 }
2023 return 0;
2024}
2025
2026Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
2027 : m_Impl(new HunspellImpl(affpath, dpath, key)) {
2028}
2029
2030Hunspell::~Hunspell() {
2031 delete m_Impl;
2032}
2033
2034// load extra dictionaries
2035int Hunspell::add_dic(const char* dpath, const char* key) {
2036 return m_Impl->add_dic(dpath, key);
2037}
2038
2039bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
2040 return m_Impl->spell(word, info, root);
2041}
2042
2043std::vector<std::string> Hunspell::suggest(const std::string& word) {
2044 return m_Impl->suggest(word);
2045}
2046
2047std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
2048 return m_Impl->suffix_suggest(root_word);
2049}
2050
2051const std::string& Hunspell::get_dict_encoding() const {
2052 return m_Impl->get_dict_encoding();
2053}
2054
2055std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
2056 return m_Impl->stem(desc);
2057}
2058
2059std::vector<std::string> Hunspell::stem(const std::string& word) {
2060 return m_Impl->stem(word);
2061}
2062
2063const std::string& Hunspell::get_wordchars_cpp() const {
2064 return m_Impl->get_wordchars_cpp();
2065}
2066
2067const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
2068 return m_Impl->get_wordchars_utf16();
2069}
2070
2071int Hunspell::add(const std::string& word) {
2072 return m_Impl->add(word);
2073}
2074
2075int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
2076 return m_Impl->add_with_affix(word, example);
2077}
2078
2079int Hunspell::remove(const std::string& word) {
2080 return m_Impl->remove(word);
2081}
2082
2083const std::string& Hunspell::get_version_cpp() const {
2084 return m_Impl->get_version_cpp();
2085}
2086
2087struct cs_info* Hunspell::get_csconv() {
2088 return m_Impl->get_csconv();
2089}
2090
2091std::vector<std::string> Hunspell::analyze(const std::string& word) {
2092 return m_Impl->analyze(word);
2093}
2094
2095std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
2096 return m_Impl->generate(word, pl);
2097}
2098
2099std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
2100 return m_Impl->generate(word, pattern);
2101}
2102
2103int Hunspell::get_langnum() const {
2104 return m_Impl->get_langnum();
2105}
2106
2107bool Hunspell::input_conv(const std::string& word, std::string& dest) {
2108 return m_Impl->input_conv(word, dest);
2109}
2110
2111int Hunspell::spell(const char* word, int* info, char** root) {
2112 return m_Impl->spell(word, info, root);
2113}
2114
2115int Hunspell::suggest(char*** slst, const char* word) {
2116 return m_Impl->suggest(slst, word);
2117}
2118
2119int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
2120 return m_Impl->suffix_suggest(slst, root_word);
2121}
2122
2123void Hunspell::free_list(char*** slst, int n) {
2124 m_Impl->free_list(slst, n);
2125}
2126
2127char* Hunspell::get_dic_encoding() {
2128 return m_Impl->get_dic_encoding();
2129}
2130
2131int Hunspell::analyze(char*** slst, const char* word) {
2132 return m_Impl->analyze(slst, word);
2133}
2134
2135int Hunspell::stem(char*** slst, const char* word) {
2136 return m_Impl->stem(slst, word);
2137}
2138
2139int Hunspell::stem(char*** slst, char** desc, int n) {
2140 return m_Impl->stem(slst, desc, n);
2141}
2142
2143int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
2144 return m_Impl->generate(slst, word, pattern);
2145}
2146
2147int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
2148 return m_Impl->generate(slst, word, pl, pln);
2149}
2150
2151const char* Hunspell::get_wordchars() const {
2152 return m_Impl->get_wordchars();
2153}
2154
2155const char* Hunspell::get_version() const {
2156 return m_Impl->get_version();
2157}
2158
2159int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
2160 return m_Impl->input_conv(word, dest, destsize);
2161}
2162
2163Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
2164 return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
2165}
2166
2167Hunhandle* Hunspell_create_key(const char* affpath,
2168 const char* dpath,
2169 const char* key) {
2170 return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
2171}
2172
2173void Hunspell_destroy(Hunhandle* pHunspell) {
2174 delete reinterpret_cast<HunspellImpl*>(pHunspell);
2175}
2176
2177int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
2178 return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
2179}
2180
2181int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
2182 return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
2183}
2184
2185char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
2186 return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
2187}
2188
2189int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
2190 return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
2191}
2192
2193int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
2194 return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
2195}
2196
2197int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
2198 return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
2199}
2200
2201int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
2202 return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
2203}
2204
2205int Hunspell_generate(Hunhandle* pHunspell,
2206 char*** slst,
2207 const char* word,
2208 const char* pattern)
2209{
2210 return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
2211}
2212
2213int Hunspell_generate2(Hunhandle* pHunspell,
2214 char*** slst,
2215 const char* word,
2216 char** desc,
2217 int n)
2218{
2219 return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
2220}
2221
2222/* functions for run-time modification of the dictionary */
2223
2224/* add word to the run-time dictionary */
2225
2226int Hunspell_add(Hunhandle* pHunspell, const char* word) {
2227 return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
2228}
2229
2230/* add word to the run-time dictionary with affix flags of
2231 * the example (a dictionary word): Hunspell will recognize
2232 * affixed forms of the new word, too.
2233 */
2234
2235int Hunspell_add_with_affix(Hunhandle* pHunspell,
2236 const char* word,
2237 const char* example) {
2238 return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
2239}
2240
2241/* remove word from the run-time dictionary */
2242
2243int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
2244 return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
2245}
2246
2247void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
2248 reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
2249}