/root/firefox-clang/extensions/spellcheck/hunspell/src/affixmgr.cxx

Bug Summary

File:	root/firefox-clang/extensions/spellcheck/hunspell/src/affixmgr.cxx
Warning:	line 4320, column 13 Value stored to 'numbreak' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name Unified_cpp_hunspell_src0.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/extensions/spellcheck/hunspell/src -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/extensions/spellcheck/hunspell/src -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -include hunspell_alloc_hooks.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/stl_wrappers -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D HUNSPELL_STATIC -D MOZ_HAS_MOZGLUE -D MOZILLA_INTERNAL_API -D IMPL_LIBXUL -D MOZ_SUPPORT_LEAKCHECKING -D STATIC_EXPORTABLE_JS_API -I /root/firefox-clang/extensions/spellcheck/hunspell/src -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/extensions/spellcheck/hunspell/src -I /root/firefox-clang/extensions/spellcheck/hunspell/glue -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/ipc/ipdl/_ipdlheaders -I /root/firefox-clang/ipc/chromium/src -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-implicit-fallthrough -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-30-093548-1913035-1 -x c++ Unified_cpp_hunspell_src0.cpp

1	/* *** BEGIN LICENSE BLOCK ***
2	* Version: MPL 1.1/GPL 2.0/LGPL 2.1
3	*
4	* Copyright (C) 2002-2022 Németh László
5	*
6	* The contents of this file are subject to the Mozilla Public License Version
7	* 1.1 (the "License"); you may not use this file except in compliance with
8	* the License. You may obtain a copy of the License at
9	* http://www.mozilla.org/MPL/
10	*
11	* Software distributed under the License is distributed on an "AS IS" basis,
12	* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13	* for the specific language governing rights and limitations under the
14	* License.
15	*
16	* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17	*
18	* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19	* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20	* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21	* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22	* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23	*
24	* Alternatively, the contents of this file may be used under the terms of
25	* either the GNU General Public License Version 2 or later (the "GPL"), or
26	* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27	* in which case the provisions of the GPL or the LGPL are applicable instead
28	* of those above. If you wish to allow use of your version of this file only
29	* under the terms of either the GPL or the LGPL, and not to allow others to
30	* use your version of this file under the terms of the MPL, indicate your
31	* decision by deleting the provisions above and replace them with the notice
32	* and other provisions required by the GPL or the LGPL. If you do not delete
33	* the provisions above, a recipient may use your version of this file under
34	* the terms of any one of the MPL, the GPL or the LGPL.
35	*
36	* *** END LICENSE BLOCK *** */
37	/*
38	* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39	* And Contributors. All rights reserved.
40	*
41	* Redistribution and use in source and binary forms, with or without
42	* modification, are permitted provided that the following conditions
43	* are met:
44	*
45	* 1. Redistributions of source code must retain the above copyright
46	* notice, this list of conditions and the following disclaimer.
47	*
48	* 2. Redistributions in binary form must reproduce the above copyright
49	* notice, this list of conditions and the following disclaimer in the
50	* documentation and/or other materials provided with the distribution.
51	*
52	* 3. All modifications to the source code must be clearly marked as
53	* such. Binary redistributions based on modified source code
54	* must be clearly marked as modified versions in the documentation
55	* and/or other materials provided with the distribution.
56	*
57	* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
61	* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68	* SUCH DAMAGE.
69	*/
70
71	#include <stdlib.h>
72	#include <string.h>
73	#include <stdio.h>
74	#include <ctype.h>
75	#include <time.h>
76
77	#include <algorithm>
78	#include <limits>
79	#include <string>
80	#include <vector>
81
82	#include "affixmgr.hxx"
83	#include "affentry.hxx"
84	#include "langnum.hxx"
85
86	#include "csutil.hxx"
87
88	AffixMgr::AffixMgr(const char* affpath,
89	const std::vector<HashMgr*>& ptr,
90	const char* key)
91	: alldic(ptr)
92	, pHMgr(ptr[0]) {
93
94	// register hash manager and load affix data from aff file
95	csconv = NULL__null;
96	utf8 = 0;
97	complexprefixes = 0;
98	parsedmaptable = false;
99	parsedbreaktable = false;
100	iconvtable = NULL__null;
101	oconvtable = NULL__null;
102	// allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
103	simplifiedcpd = 0;
104	parsedcheckcpd = false;
105	parseddefcpd = false;
106	phone = NULL__null;
107	compoundflag = FLAG_NULL0x00; // permits word in compound forms
108	compoundbegin = FLAG_NULL0x00; // may be first word in compound forms
109	compoundmiddle = FLAG_NULL0x00; // may be middle word in compound forms
110	compoundend = FLAG_NULL0x00; // may be last word in compound forms
111	compoundroot = FLAG_NULL0x00; // compound word signing flag
112	compoundpermitflag = FLAG_NULL0x00; // compound permitting flag for suffixed word
113	compoundforbidflag = FLAG_NULL0x00; // compound fordidden flag for suffixed word
114	compoundmoresuffixes = 0; // allow more suffixes within compound words
115	checkcompounddup = 0; // forbid double words in compounds
116	checkcompoundrep = 0; // forbid bad compounds (may be non-compound word with
117	// a REP substitution)
118	checkcompoundcase =
119	0; // forbid upper and lowercase combinations at word bounds
120	checkcompoundtriple = 0; // forbid compounds with triple letters
121	simplifiedtriple = 0; // allow simplified triple letters in compounds
122	// (Schiff+fahrt -> Schiffahrt)
123	forbiddenword = FORBIDDENWORD65510; // forbidden word signing flag
124	nosuggest = FLAG_NULL0x00; // don't suggest words signed with NOSUGGEST flag
125	nongramsuggest = FLAG_NULL0x00;
126	langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
127	needaffix = FLAG_NULL0x00; // forbidden root, allowed only with suffixes
128	cpdwordmax = -1; // default: unlimited wordcount in compound words
129	cpdmin = -1; // undefined
130	cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
131	pfxappnd = NULL__null; // previous prefix for counting syllables of the prefix BUG
132	sfxappnd = NULL__null; // previous suffix for counting syllables of the suffix BUG
133	sfxextra = 0; // modifier for syllable count of sfxappnd BUG
134	checknum = 0; // checking numbers, and word with numbers
135	havecontclass = 0; // flags of possible continuing classes (double affix)
136	// LEMMA_PRESENT: not put root into the morphological output. Lemma presents
137	// in morhological description in dictionary file. It's often combined with
138	// PSEUDOROOT.
139	lemma_present = FLAG_NULL0x00;
140	circumfix = FLAG_NULL0x00;
141	onlyincompound = FLAG_NULL0x00;
142	maxngramsugs = -1; // undefined
143	maxdiff = -1; // undefined
144	onlymaxdiff = 0;
145	maxcpdsugs = -1; // undefined
146	nosplitsugs = 0;
147	sugswithdots = 0;
148	keepcase = 0;
149	forceucase = 0;
150	warn = 0;
151	forbidwarn = 0;
152	checksharps = 0;
153	substandard = FLAG_NULL0x00;
154	fullstrip = 0;
155
156	sfx = NULL__null;
157	pfx = NULL__null;
158
159	for (int i = 0; i < SETSIZE256; i++) {
160	pStart[i] = NULL__null;
161	sStart[i] = NULL__null;
162	pFlag[i] = NULL__null;
163	sFlag[i] = NULL__null;
164	}
165
166	for (int j = 0; j < CONTSIZE65536; j++) {
167	contclasses[j] = 0;
168	}
169
170	if (parse_file(affpath, key)) {
171	HUNSPELL_WARNING(stderrstderr, "Failure loading aff file %s\n", affpath);
172	}
173
174	if (cpdmin == -1)
175	cpdmin = MINCPDLEN3;
176	}
177
178	AffixMgr::~AffixMgr() {
179	// pass through linked prefix entries and clean up
180	for (int i = 0; i < SETSIZE256; i++) {
181	pFlag[i] = NULL__null;
182	PfxEntry* ptr = pStart[i];
183	PfxEntry* nptr = NULL__null;
184	while (ptr) {
185	nptr = ptr->getNext();
186	delete (ptr);
187	ptr = nptr;
188	nptr = NULL__null;
189	}
190	}
191
192	// pass through linked suffix entries and clean up
193	for (int j = 0; j < SETSIZE256; j++) {
194	sFlag[j] = NULL__null;
195	SfxEntry* ptr = sStart[j];
196	SfxEntry* nptr = NULL__null;
197	while (ptr) {
198	nptr = ptr->getNext();
199	delete (ptr);
200	ptr = nptr;
201	nptr = NULL__null;
202	}
203	sStart[j] = NULL__null;
204	}
205
206	delete iconvtable;
207	delete oconvtable;
208	delete phone;
209
210	FREE_FLAG(compoundflag)compoundflag = 0;
211	FREE_FLAG(compoundbegin)compoundbegin = 0;
212	FREE_FLAG(compoundmiddle)compoundmiddle = 0;
213	FREE_FLAG(compoundend)compoundend = 0;
214	FREE_FLAG(compoundpermitflag)compoundpermitflag = 0;
215	FREE_FLAG(compoundforbidflag)compoundforbidflag = 0;
216	FREE_FLAG(compoundroot)compoundroot = 0;
217	FREE_FLAG(forbiddenword)forbiddenword = 0;
218	FREE_FLAG(nosuggest)nosuggest = 0;
219	FREE_FLAG(nongramsuggest)nongramsuggest = 0;
220	FREE_FLAG(needaffix)needaffix = 0;
221	FREE_FLAG(lemma_present)lemma_present = 0;
222	FREE_FLAG(circumfix)circumfix = 0;
223	FREE_FLAG(onlyincompound)onlyincompound = 0;
224
225	cpdwordmax = 0;
226	pHMgr = NULL__null;
227	cpdmin = 0;
228	cpdmaxsyllable = 0;
229	free_utf_tbl();
230	checknum = 0;
231	#ifdef MOZILLA_CLIENT1
232	delete[] csconv;
233	#endif
234	}
235
236	void AffixMgr::finishFileMgr(FileMgr* afflst) {
237	delete afflst;
238
239	// convert affix trees to sorted list
240	process_pfx_tree_to_list();
241	process_sfx_tree_to_list();
242	}
243
244	// read in aff file and build up prefix and suffix entry objects
245	int AffixMgr::parse_file(const char* affpath, const char* key) {
246
247	// checking flag duplication
248	char dupflags[CONTSIZE65536];
249	char dupflags_ini = 1;
250
251	// first line indicator for removing byte order mark
252	int firstline = 1;
253
254	// open the affix file
255	FileMgr* afflst = new FileMgr(affpath, key);
256	if (!afflst) {
257	HUNSPELL_WARNING(
258	stderrstderr, "error: could not open affix description file %s\n", affpath);
259	return 1;
260	}
261
262	// step one is to parse the affix file building up the internal
263	// affix data structures
264
265	// read in each line ignoring any that do not
266	// start with a known line type indicator
267	std::string line;
268	while (afflst->getline(line)) {
269	mychomp(line);
270
271	/* remove byte order mark */
272	if (firstline) {
273	firstline = 0;
274	// Affix file begins with byte order mark: possible incompatibility with
275	// old Hunspell versions
276	if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
277	line.erase(0, 3);
278	}
279	}
280
281	/* parse in the keyboard string */
282	if (line.compare(0, 3, "KEY", 3) == 0) {
283	if (!parse_string(line, keystring, afflst->getlinenum())) {
284	finishFileMgr(afflst);
285	return 1;
286	}
287	}
288
289	/* parse in the try string */
290	if (line.compare(0, 3, "TRY", 3) == 0) {
291	if (!parse_string(line, trystring, afflst->getlinenum())) {
292	finishFileMgr(afflst);
293	return 1;
294	}
295	}
296
297	/* parse in the name of the character set used by the .dict and .aff */
298	if (line.compare(0, 3, "SET", 3) == 0) {
299	if (!parse_string(line, encoding, afflst->getlinenum())) {
300	finishFileMgr(afflst);
301	return 1;
302	}
303	if (encoding == "UTF-8") {
304	utf8 = 1;
305	#ifndef OPENOFFICEORG
306	#ifndef MOZILLA_CLIENT1
307	initialize_utf_tbl();
308	#endif
309	#endif
310	}
311	}
312
313	/* parse COMPLEXPREFIXES for agglutinative languages with right-to-left
314	* writing system */
315	if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)
316	complexprefixes = 1;
317
318	/* parse in the flag used by the controlled compound words */
319	if (line.compare(0, 12, "COMPOUNDFLAG", 12) == 0) {
320	if (!parse_flag(line, &compoundflag, afflst)) {
321	finishFileMgr(afflst);
322	return 1;
323	}
324	}
325
326	/* parse in the flag used by compound words */
327	if (line.compare(0, 13, "COMPOUNDBEGIN", 13) == 0) {
328	if (complexprefixes) {
329	if (!parse_flag(line, &compoundend, afflst)) {
330	finishFileMgr(afflst);
331	return 1;
332	}
333	} else {
334	if (!parse_flag(line, &compoundbegin, afflst)) {
335	finishFileMgr(afflst);
336	return 1;
337	}
338	}
339	}
340
341	/* parse in the flag used by compound words */
342	if (line.compare(0, 14, "COMPOUNDMIDDLE", 14) == 0) {
343	if (!parse_flag(line, &compoundmiddle, afflst)) {
344	finishFileMgr(afflst);
345	return 1;
346	}
347	}
348
349	/* parse in the flag used by compound words */
350	if (line.compare(0, 11, "COMPOUNDEND", 11) == 0) {
351	if (complexprefixes) {
352	if (!parse_flag(line, &compoundbegin, afflst)) {
353	finishFileMgr(afflst);
354	return 1;
355	}
356	} else {
357	if (!parse_flag(line, &compoundend, afflst)) {
358	finishFileMgr(afflst);
359	return 1;
360	}
361	}
362	}
363
364	/* parse in the data used by compound_check() method */
365	if (line.compare(0, 15, "COMPOUNDWORDMAX", 15) == 0) {
366	if (!parse_num(line, &cpdwordmax, afflst)) {
367	finishFileMgr(afflst);
368	return 1;
369	}
370	}
371
372	/* parse in the flag sign compounds in dictionary */
373	if (line.compare(0, 12, "COMPOUNDROOT", 12) == 0) {
374	if (!parse_flag(line, &compoundroot, afflst)) {
375	finishFileMgr(afflst);
376	return 1;
377	}
378	}
379
380	/* parse in the flag used by compound_check() method */
381	if (line.compare(0, 18, "COMPOUNDPERMITFLAG", 18) == 0) {
382	if (!parse_flag(line, &compoundpermitflag, afflst)) {
383	finishFileMgr(afflst);
384	return 1;
385	}
386	}
387
388	/* parse in the flag used by compound_check() method */
389	if (line.compare(0, 18, "COMPOUNDFORBIDFLAG", 18) == 0) {
390	if (!parse_flag(line, &compoundforbidflag, afflst)) {
391	finishFileMgr(afflst);
392	return 1;
393	}
394	}
395
396	if (line.compare(0, 20, "COMPOUNDMORESUFFIXES", 20) == 0) {
397	compoundmoresuffixes = 1;
398	}
399
400	if (line.compare(0, 16, "CHECKCOMPOUNDDUP", 16) == 0) {
401	checkcompounddup = 1;
402	}
403
404	if (line.compare(0, 16, "CHECKCOMPOUNDREP", 16) == 0) {
405	checkcompoundrep = 1;
406	}
407
408	if (line.compare(0, 19, "CHECKCOMPOUNDTRIPLE", 19) == 0) {
409	checkcompoundtriple = 1;
410	}
411
412	if (line.compare(0, 16, "SIMPLIFIEDTRIPLE", 16) == 0) {
413	simplifiedtriple = 1;
414	}
415
416	if (line.compare(0, 17, "CHECKCOMPOUNDCASE", 17) == 0) {
417	checkcompoundcase = 1;
418	}
419
420	if (line.compare(0, 9, "NOSUGGEST", 9) == 0) {
421	if (!parse_flag(line, &nosuggest, afflst)) {
422	finishFileMgr(afflst);
423	return 1;
424	}
425	}
426
427	if (line.compare(0, 14, "NONGRAMSUGGEST", 14) == 0) {
428	if (!parse_flag(line, &nongramsuggest, afflst)) {
429	finishFileMgr(afflst);
430	return 1;
431	}
432	}
433
434	/* parse in the flag used by forbidden words */
435	if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {
436	if (!parse_flag(line, &forbiddenword, afflst)) {
437	finishFileMgr(afflst);
438	return 1;
439	}
440	}
441
442	/* parse in the flag used by forbidden words (is deprecated) */
443	if (line.compare(0, 13, "LEMMA_PRESENT", 13) == 0) {
444	if (!parse_flag(line, &lemma_present, afflst)) {
445	finishFileMgr(afflst);
446	return 1;
447	}
448	}
449
450	/* parse in the flag used by circumfixes */
451	if (line.compare(0, 9, "CIRCUMFIX", 9) == 0) {
452	if (!parse_flag(line, &circumfix, afflst)) {
453	finishFileMgr(afflst);
454	return 1;
455	}
456	}
457
458	/* parse in the flag used by fogemorphemes */
459	if (line.compare(0, 14, "ONLYINCOMPOUND", 14) == 0) {
460	if (!parse_flag(line, &onlyincompound, afflst)) {
461	finishFileMgr(afflst);
462	return 1;
463	}
464	}
465
466	/* parse in the flag used by `needaffixs' (is deprecated) */
467	if (line.compare(0, 10, "PSEUDOROOT", 10) == 0) {
468	if (!parse_flag(line, &needaffix, afflst)) {
469	finishFileMgr(afflst);
470	return 1;
471	}
472	}
473
474	/* parse in the flag used by `needaffixs' */
475	if (line.compare(0, 9, "NEEDAFFIX", 9) == 0) {
476	if (!parse_flag(line, &needaffix, afflst)) {
477	finishFileMgr(afflst);
478	return 1;
479	}
480	}
481
482	/* parse in the minimal length for words in compounds */
483	if (line.compare(0, 11, "COMPOUNDMIN", 11) == 0) {
484	if (!parse_num(line, &cpdmin, afflst)) {
485	finishFileMgr(afflst);
486	return 1;
487	}
488	if (cpdmin < 1)
489	cpdmin = 1;
490	}
491
492	/* parse in the max. words and syllables in compounds */
493	if (line.compare(0, 16, "COMPOUNDSYLLABLE", 16) == 0) {
494	if (!parse_cpdsyllable(line, afflst)) {
495	finishFileMgr(afflst);
496	return 1;
497	}
498	}
499
500	/* parse in the flag used by compound_check() method */
501	if (line.compare(0, 11, "SYLLABLENUM", 11) == 0) {
502	if (!parse_string(line, cpdsyllablenum, afflst->getlinenum())) {
503	finishFileMgr(afflst);
504	return 1;
505	}
506	}
507
508	/* parse in the flag used by the controlled compound words */
509	if (line.compare(0, 8, "CHECKNUM", 8) == 0) {
510	checknum = 1;
511	}
512
513	/* parse in the extra word characters */
514	if (line.compare(0, 9, "WORDCHARS", 9) == 0) {
515	if (!parse_array(line, wordchars, wordchars_utf16,
516	utf8, afflst->getlinenum())) {
517	finishFileMgr(afflst);
518	return 1;
519	}
520	}
521
522	/* parse in the ignored characters (for example, Arabic optional diacretics
523	* charachters */
524	if (line.compare(0, 6, "IGNORE", 6) == 0) {
525	if (!parse_array(line, ignorechars, ignorechars_utf16,
526	utf8, afflst->getlinenum())) {
527	finishFileMgr(afflst);
528	return 1;
529	}
530	}
531
532	/* parse in the input conversion table */
533	if (line.compare(0, 5, "ICONV", 5) == 0) {
534	if (!parse_convtable(line, afflst, &iconvtable, "ICONV")) {
535	finishFileMgr(afflst);
536	return 1;
537	}
538	}
539
540	/* parse in the output conversion table */
541	if (line.compare(0, 5, "OCONV", 5) == 0) {
542	if (!parse_convtable(line, afflst, &oconvtable, "OCONV")) {
543	finishFileMgr(afflst);
544	return 1;
545	}
546	}
547
548	/* parse in the phonetic translation table */
549	if (line.compare(0, 5, "PHONE", 5) == 0) {
550	if (!parse_phonetable(line, afflst)) {
551	finishFileMgr(afflst);
552	return 1;
553	}
554	}
555
556	/* parse in the checkcompoundpattern table */
557	if (line.compare(0, 20, "CHECKCOMPOUNDPATTERN", 20) == 0) {
558	if (!parse_checkcpdtable(line, afflst)) {
559	finishFileMgr(afflst);
560	return 1;
561	}
562	}
563
564	/* parse in the defcompound table */
565	if (line.compare(0, 12, "COMPOUNDRULE", 12) == 0) {
566	if (!parse_defcpdtable(line, afflst)) {
567	finishFileMgr(afflst);
568	return 1;
569	}
570	}
571
572	/* parse in the related character map table */
573	if (line.compare(0, 3, "MAP", 3) == 0) {
574	if (!parse_maptable(line, afflst)) {
575	finishFileMgr(afflst);
576	return 1;
577	}
578	}
579
580	/* parse in the word breakpoints table */
581	if (line.compare(0, 5, "BREAK", 5) == 0) {
582	if (!parse_breaktable(line, afflst)) {
583	finishFileMgr(afflst);
584	return 1;
585	}
586	}
587
588	/* parse in the language for language specific codes */
589	if (line.compare(0, 4, "LANG", 4) == 0) {
590	if (!parse_string(line, lang, afflst->getlinenum())) {
591	finishFileMgr(afflst);
592	return 1;
593	}
594	langnum = get_lang_num(lang);
595	}
596
597	if (line.compare(0, 7, "VERSION", 7) == 0) {
598	size_t startpos = line.find_first_not_of(" \t", 7);
599	if (startpos != std::string::npos) {
600	version = line.substr(startpos);
601	}
602	}
603
604	if (line.compare(0, 12, "MAXNGRAMSUGS", 12) == 0) {
605	if (!parse_num(line, &maxngramsugs, afflst)) {
606	finishFileMgr(afflst);
607	return 1;
608	}
609	}
610
611	if (line.compare(0, 11, "ONLYMAXDIFF", 11) == 0)
612	onlymaxdiff = 1;
613
614	if (line.compare(0, 7, "MAXDIFF", 7) == 0) {
615	if (!parse_num(line, &maxdiff, afflst)) {
616	finishFileMgr(afflst);
617	return 1;
618	}
619	}
620
621	if (line.compare(0, 10, "MAXCPDSUGS", 10) == 0) {
622	if (!parse_num(line, &maxcpdsugs, afflst)) {
623	finishFileMgr(afflst);
624	return 1;
625	}
626	}
627
628	if (line.compare(0, 11, "NOSPLITSUGS", 11) == 0) {
629	nosplitsugs = 1;
630	}
631
632	if (line.compare(0, 9, "FULLSTRIP", 9) == 0) {
633	fullstrip = 1;
634	}
635
636	if (line.compare(0, 12, "SUGSWITHDOTS", 12) == 0) {
637	sugswithdots = 1;
638	}
639
640	/* parse in the flag used by forbidden words */
641	if (line.compare(0, 8, "KEEPCASE", 8) == 0) {
642	if (!parse_flag(line, &keepcase, afflst)) {
643	finishFileMgr(afflst);
644	return 1;
645	}
646	}
647
648	/* parse in the flag used by `forceucase' */
649	if (line.compare(0, 10, "FORCEUCASE", 10) == 0) {
650	if (!parse_flag(line, &forceucase, afflst)) {
651	finishFileMgr(afflst);
652	return 1;
653	}
654	}
655
656	/* parse in the flag used by `warn' */
657	if (line.compare(0, 4, "WARN", 4) == 0) {
658	if (!parse_flag(line, &warn, afflst)) {
659	finishFileMgr(afflst);
660	return 1;
661	}
662	}
663
664	if (line.compare(0, 10, "FORBIDWARN", 10) == 0) {
665	forbidwarn = 1;
666	}
667
668	/* parse in the flag used by the affix generator */
669	if (line.compare(0, 11, "SUBSTANDARD", 11) == 0) {
670	if (!parse_flag(line, &substandard, afflst)) {
671	finishFileMgr(afflst);
672	return 1;
673	}
674	}
675
676	if (line.compare(0, 11, "CHECKSHARPS", 11) == 0) {
677	checksharps = 1;
678	}
679
680	/* parse this affix: P - prefix, S - suffix */
681	// affix type
682	char ft = ' ';
683	if (line.compare(0, 3, "PFX", 3) == 0)
684	ft = complexprefixes ? 'S' : 'P';
685	if (line.compare(0, 3, "SFX", 3) == 0)
686	ft = complexprefixes ? 'P' : 'S';
687	if (ft != ' ') {
688	if (dupflags_ini) {
689	memset(dupflags, 0, sizeof(dupflags));
690	dupflags_ini = 0;
691	}
692	if (!parse_affix(line, ft, afflst, dupflags)) {
693	finishFileMgr(afflst);
694	return 1;
695	}
696	}
697	}
698
699	finishFileMgr(afflst);
700	// affix trees are sorted now
701
702	// now we can speed up performance greatly taking advantage of the
703	// relationship between the affixes and the idea of "subsets".
704
705	// View each prefix as a potential leading subset of another and view
706	// each suffix (reversed) as a potential trailing subset of another.
707
708	// To illustrate this relationship if we know the prefix "ab" is found in the
709	// word to examine, only prefixes that "ab" is a leading subset of need be
710	// examined.
711	// Furthermore is "ab" is not present then none of the prefixes that "ab" is
712	// is a subset need be examined.
713	// The same argument goes for suffix string that are reversed.
714
715	// Then to top this off why not examine the first char of the word to quickly
716	// limit the set of prefixes to examine (i.e. the prefixes to examine must
717	// be leading supersets of the first character of the word (if they exist)
718
719	// To take advantage of this "subset" relationship, we need to add two links
720	// from entry. One to take next if the current prefix is found (call it
721	// nexteq)
722	// and one to take next if the current prefix is not found (call it nextne).
723
724	// Since we have built ordered lists, all that remains is to properly
725	// initialize
726	// the nextne and nexteq pointers that relate them
727
728	process_pfx_order();
729	process_sfx_order();
730
731	/* get encoding for CHECKCOMPOUNDCASE */
732	if (!utf8) {
733	csconv = get_current_cs(get_encoding());
734	for (int i = 0; i <= 255; i++) {
735	if ((csconv[i].cupper != csconv[i].clower) &&
736	(wordchars.find((char)i) == std::string::npos)) {
737	wordchars.push_back((char)i);
738	}
739	}
740
741	}
742
743	// default BREAK definition
744	if (!parsedbreaktable) {
745	breaktable.push_back("-");
746	breaktable.push_back("^-");
747	breaktable.push_back("-$");
748	parsedbreaktable = true;
749	}
750	return 0;
751	}
752
753	// we want to be able to quickly access prefix information
754	// both by prefix flag, and sorted by prefix string itself
755	// so we need to set up two indexes
756
757	int AffixMgr::build_pfxtree(PfxEntry* pfxptr) {
758	PfxEntry* ptr;
759	PfxEntry* pptr;
760	PfxEntry* ep = pfxptr;
761
762	// get the right starting points
763	const char* key = ep->getKey();
764	const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF);
765
766	// first index by flag which must exist
767	ptr = pFlag[flg];
768	ep->setFlgNxt(ptr);
769	pFlag[flg] = ep;
770
771	// handle the special case of null affix string
772	if (strlen(key) == 0) {
773	// always inset them at head of list at element 0
774	ptr = pStart[0];
775	ep->setNext(ptr);
776	pStart[0] = ep;
777	return 0;
778	}
779
780	// now handle the normal case
781	ep->setNextEQ(NULL__null);
782	ep->setNextNE(NULL__null);
783
784	unsigned char sp = ((const unsigned char)key);
785	ptr = pStart[sp];
786
787	// handle the first insert
788	if (!ptr) {
789	pStart[sp] = ep;
790	return 0;
791	}
792
793	// otherwise use binary tree insertion so that a sorted
794	// list can easily be generated later
795	pptr = NULL__null;
796	for (;;) {
797	pptr = ptr;
798	if (strcmp(ep->getKey(), ptr->getKey()) <= 0) {
799	ptr = ptr->getNextEQ();
800	if (!ptr) {
801	pptr->setNextEQ(ep);
802	break;
803	}
804	} else {
805	ptr = ptr->getNextNE();
806	if (!ptr) {
807	pptr->setNextNE(ep);
808	break;
809	}
810	}
811	}
812	return 0;
813	}
814
815	// we want to be able to quickly access suffix information
816	// both by suffix flag, and sorted by the reverse of the
817	// suffix string itself; so we need to set up two indexes
818	int AffixMgr::build_sfxtree(SfxEntry* sfxptr) {
819
820	sfxptr->initReverseWord();
821
822	SfxEntry* ptr;
823	SfxEntry* pptr;
824	SfxEntry* ep = sfxptr;
825
826	/* get the right starting point */
827	const char* key = ep->getKey();
828	const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF);
829
830	// first index by flag which must exist
831	ptr = sFlag[flg];
832	ep->setFlgNxt(ptr);
833	sFlag[flg] = ep;
834
835	// next index by affix string
836
837	// handle the special case of null affix string
838	if (strlen(key) == 0) {
839	// always inset them at head of list at element 0
840	ptr = sStart[0];
841	ep->setNext(ptr);
842	sStart[0] = ep;
843	return 0;
844	}
845
846	// now handle the normal case
847	ep->setNextEQ(NULL__null);
848	ep->setNextNE(NULL__null);
849
850	unsigned char sp = ((const unsigned char)key);
851	ptr = sStart[sp];
852
853	// handle the first insert
854	if (!ptr) {
855	sStart[sp] = ep;
856	return 0;
857	}
858
859	// otherwise use binary tree insertion so that a sorted
860	// list can easily be generated later
861	pptr = NULL__null;
862	for (;;) {
863	pptr = ptr;
864	if (strcmp(ep->getKey(), ptr->getKey()) <= 0) {
865	ptr = ptr->getNextEQ();
866	if (!ptr) {
867	pptr->setNextEQ(ep);
868	break;
869	}
870	} else {
871	ptr = ptr->getNextNE();
872	if (!ptr) {
873	pptr->setNextNE(ep);
874	break;
875	}
876	}
877	}
878	return 0;
879	}
880
881	// convert from binary tree to sorted list
882	int AffixMgr::process_pfx_tree_to_list() {
883	for (int i = 1; i < SETSIZE256; i++) {
884	pStart[i] = process_pfx_in_order(pStart[i], NULL__null);
885	}
886	return 0;
887	}
888
889	PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr) {
890	if (ptr) {
891	nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
892	ptr->setNext(nptr);
893	nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
894	}
895	return nptr;
896	}
897
898	// convert from binary tree to sorted list
899	int AffixMgr::process_sfx_tree_to_list() {
900	for (int i = 1; i < SETSIZE256; i++) {
901	sStart[i] = process_sfx_in_order(sStart[i], NULL__null);
902	}
903	return 0;
904	}
905
906	SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr) {
907	if (ptr) {
908	nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
909	ptr->setNext(nptr);
910	nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
911	}
912	return nptr;
913	}
914
915	// reinitialize the PfxEntry links NextEQ and NextNE to speed searching
916	// using the idea of leading subsets this time
917	int AffixMgr::process_pfx_order() {
918	PfxEntry* ptr;
919
920	// loop through each prefix list starting point
921	for (int i = 1; i < SETSIZE256; i++) {
922	ptr = pStart[i];
923
924	// look through the remainder of the list
925	// and find next entry with affix that
926	// the current one is not a subset of
927	// mark that as destination for NextNE
928	// use next in list that you are a subset
929	// of as NextEQ
930
931	for (; ptr != NULL__null; ptr = ptr->getNext()) {
932	PfxEntry* nptr = ptr->getNext();
933	for (; nptr != NULL__null; nptr = nptr->getNext()) {
934	if (!isSubset(ptr->getKey(), nptr->getKey()))
935	break;
936	}
937	ptr->setNextNE(nptr);
938	ptr->setNextEQ(NULL__null);
939	if ((ptr->getNext()) &&
940	isSubset(ptr->getKey(), (ptr->getNext())->getKey()))
941	ptr->setNextEQ(ptr->getNext());
942	}
943
944	// now clean up by adding smart search termination strings:
945	// if you are already a superset of the previous prefix
946	// but not a subset of the next, search can end here
947	// so set NextNE properly
948
949	ptr = pStart[i];
950	for (; ptr != NULL__null; ptr = ptr->getNext()) {
951	PfxEntry* nptr = ptr->getNext();
952	PfxEntry* mptr = NULL__null;
953	for (; nptr != NULL__null; nptr = nptr->getNext()) {
954	if (!isSubset(ptr->getKey(), nptr->getKey()))
955	break;
956	mptr = nptr;
957	}
958	if (mptr)
959	mptr->setNextNE(NULL__null);
960	}
961	}
962	return 0;
963	}
964
965	// initialize the SfxEntry links NextEQ and NextNE to speed searching
966	// using the idea of leading subsets this time
967	int AffixMgr::process_sfx_order() {
968	SfxEntry* ptr;
969
970	// loop through each prefix list starting point
971	for (int i = 1; i < SETSIZE256; i++) {
972	ptr = sStart[i];
973
974	// look through the remainder of the list
975	// and find next entry with affix that
976	// the current one is not a subset of
977	// mark that as destination for NextNE
978	// use next in list that you are a subset
979	// of as NextEQ
980
981	for (; ptr != NULL__null; ptr = ptr->getNext()) {
982	SfxEntry* nptr = ptr->getNext();
983	for (; nptr != NULL__null; nptr = nptr->getNext()) {
984	if (!isSubset(ptr->getKey(), nptr->getKey()))
985	break;
986	}
987	ptr->setNextNE(nptr);
988	ptr->setNextEQ(NULL__null);
989	if ((ptr->getNext()) &&
990	isSubset(ptr->getKey(), (ptr->getNext())->getKey()))
991	ptr->setNextEQ(ptr->getNext());
992	}
993
994	// now clean up by adding smart search termination strings:
995	// if you are already a superset of the previous suffix
996	// but not a subset of the next, search can end here
997	// so set NextNE properly
998
999	ptr = sStart[i];
1000	for (; ptr != NULL__null; ptr = ptr->getNext()) {
1001	SfxEntry* nptr = ptr->getNext();
1002	SfxEntry* mptr = NULL__null;
1003	for (; nptr != NULL__null; nptr = nptr->getNext()) {
1004	if (!isSubset(ptr->getKey(), nptr->getKey()))
1005	break;
1006	mptr = nptr;
1007	}
1008	if (mptr)
1009	mptr->setNextNE(NULL__null);
1010	}
1011	}
1012	return 0;
1013	}
1014
1015	// add flags to the result for dictionary debugging
1016	std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) {
1017	char* st = encode_flag(flag);
1018	result.push_back(MSEP_FLD' ');
1019	result.append(MORPH_FLAG"fl:");
1020	if (st) {
1021	result.append(st);
1022	free(st)HunspellAllocator::CountingFree(st);
1023	}
1024	return result;
1025	}
1026
1027	// calculate the character length of the condition
1028	int AffixMgr::condlen(const char* st) {
1029	int l = 0;
1030	bool group = false;
1031	for (; *st; st++) {
1032	if (*st == '[') {
1033	group = true;
1034	l++;
1035	} else if (*st == ']')
1036	group = false;
1037	else if (!group && (!utf8 \|\| (!(st & 0x80) \|\| ((st & 0xc0) == 0x80))))
1038	l++;
1039	}
1040	return l;
1041	}
1042
1043	int AffixMgr::encodeit(AffEntry& entry, const char* cs) {
1044	if (strcmp(cs, ".") != 0) {
1045	entry.numconds = (char)condlen(cs);
1046	const size_t cslen = strlen(cs);
1047	const size_t short_part = std::min<size_t>(MAXCONDLEN20, cslen);
1048	memcpy(entry.c.conds, cs, short_part);
1049	if (short_part < MAXCONDLEN20) {
1050	//blank out the remaining space
1051	memset(entry.c.conds + short_part, 0, MAXCONDLEN20 - short_part);
1052	} else if (cs[MAXCONDLEN20]) {
1053	//there is more conditions than fit in fixed space, so its
1054	//a long condition
1055	entry.opts \|= aeLONGCOND(1 << 4);
1056	entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1(20 - sizeof(char*)));
1057	if (!entry.c.l.conds2)
1058	return 1;
1059	}
1060	} else {
1061	entry.numconds = 0;
1062	entry.c.conds[0] = '\0';
1063	}
1064	return 0;
1065	}
1066
1067	// return 1 if s1 is a leading subset of s2 (dots are for infixes)
1068	inline int AffixMgr::isSubset(const char* s1, const char* s2) {
1069	while (((s1 == s2) \|\| (s1 == '.')) && (s1 != '\0')) {
1070	s1++;
1071	s2++;
1072	}
1073	return (*s1 == '\0');
1074	}
1075
1076	// check word for prefixes
1077	struct hentry* AffixMgr::prefix_check(const char* word,
1078	int len,
1079	char in_compound,
1080	const FLAGunsigned short needflag) {
1081	struct hentry* rv = NULL__null;
1082
1083	pfx = NULL__null;
1084	pfxappnd = NULL__null;
1085	sfxappnd = NULL__null;
1086	sfxextra = 0;
1087
1088	// first handle the special case of 0 length prefixes
1089	PfxEntry* pe = pStart[0];
1090	while (pe) {
1091	if (
1092	// fogemorpheme
1093	((in_compound != IN_CPD_NOT0) \|\|
1094	!(pe->getCont() &&
1095	(TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())(std::binary_search(pe->getCont(), pe->getCont() + pe-> getContLen(), onlyincompound))))) &&
1096	// permit prefixes in compounds
1097	((in_compound != IN_CPD_END2) \|\|
1098	(pe->getCont() &&
1099	(TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen())(std::binary_search(pe->getCont(), pe->getCont() + pe-> getContLen(), compoundpermitflag)))))) {
1100	// check prefix
1101	rv = pe->checkword(word, len, in_compound, needflag);
1102	if (rv) {
1103	pfx = pe; // BUG: pfx not stateless
1104	return rv;
1105	}
1106	}
1107	pe = pe->getNext();
1108	}
1109
1110	// now handle the general case
1111	unsigned char sp = ((const unsigned char)word);
1112	PfxEntry* pptr = pStart[sp];
1113
1114	while (pptr) {
1115	if (isSubset(pptr->getKey(), word)) {
1116	if (
1117	// fogemorpheme
1118	((in_compound != IN_CPD_NOT0) \|\|
1119	!(pptr->getCont() &&
1120	(TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())(std::binary_search(pptr->getCont(), pptr->getCont() + pptr ->getContLen(), onlyincompound))))) &&
1121	// permit prefixes in compounds
1122	((in_compound != IN_CPD_END2) \|\|
1123	(pptr->getCont() && (TESTAFF(pptr->getCont(), compoundpermitflag,(std::binary_search(pptr->getCont(), pptr->getCont() + pptr ->getContLen(), compoundpermitflag))
1124	pptr->getContLen())(std::binary_search(pptr->getCont(), pptr->getCont() + pptr ->getContLen(), compoundpermitflag)))))) {
1125	// check prefix
1126	rv = pptr->checkword(word, len, in_compound, needflag);
1127	if (rv) {
1128	pfx = pptr; // BUG: pfx not stateless
1129	return rv;
1130	}
1131	}
1132	pptr = pptr->getNextEQ();
1133	} else {
1134	pptr = pptr->getNextNE();
1135	}
1136	}
1137
1138	return NULL__null;
1139	}
1140
1141	// check word for prefixes and two-level suffixes
1142	struct hentry* AffixMgr::prefix_check_twosfx(const char* word,
1143	int len,
1144	char in_compound,
1145	const FLAGunsigned short needflag) {
1146	struct hentry* rv = NULL__null;
1147
1148	pfx = NULL__null;
1149	sfxappnd = NULL__null;
1150	sfxextra = 0;
1151
1152	// first handle the special case of 0 length prefixes
1153	PfxEntry* pe = pStart[0];
1154
1155	while (pe) {
1156	rv = pe->check_twosfx(word, len, in_compound, needflag);
1157	if (rv)
1158	return rv;
1159	pe = pe->getNext();
1160	}
1161
1162	// now handle the general case
1163	unsigned char sp = ((const unsigned char)word);
1164	PfxEntry* pptr = pStart[sp];
1165
1166	while (pptr) {
1167	if (isSubset(pptr->getKey(), word)) {
1168	rv = pptr->check_twosfx(word, len, in_compound, needflag);
1169	if (rv) {
1170	pfx = pptr;
1171	return rv;
1172	}
1173	pptr = pptr->getNextEQ();
1174	} else {
1175	pptr = pptr->getNextNE();
1176	}
1177	}
1178
1179	return NULL__null;
1180	}
1181
1182	// check word for prefixes and morph
1183	std::string AffixMgr::prefix_check_morph(const char* word,
1184	int len,
1185	char in_compound,
1186	const FLAGunsigned short needflag) {
1187
1188	std::string result;
1189
1190	pfx = NULL__null;
1191	sfxappnd = NULL__null;
1192	sfxextra = 0;
1193
1194	// first handle the special case of 0 length prefixes
1195	PfxEntry* pe = pStart[0];
1196	while (pe) {
1197	std::string st = pe->check_morph(word, len, in_compound, needflag);
1198	if (!st.empty()) {
1199	result.append(st);
1200	}
1201	pe = pe->getNext();
1202	}
1203
1204	// now handle the general case
1205	unsigned char sp = ((const unsigned char)word);
1206	PfxEntry* pptr = pStart[sp];
1207
1208	while (pptr) {
1209	if (isSubset(pptr->getKey(), word)) {
1210	std::string st = pptr->check_morph(word, len, in_compound, needflag);
1211	if (!st.empty()) {
1212	// fogemorpheme
1213	if ((in_compound != IN_CPD_NOT0) \|\|
1214	!((pptr->getCont() && (TESTAFF(pptr->getCont(), onlyincompound,(std::binary_search(pptr->getCont(), pptr->getCont() + pptr ->getContLen(), onlyincompound))
1215	pptr->getContLen())(std::binary_search(pptr->getCont(), pptr->getCont() + pptr ->getContLen(), onlyincompound)))))) {
1216	result.append(st);
1217	pfx = pptr;
1218	}
1219	}
1220	pptr = pptr->getNextEQ();
1221	} else {
1222	pptr = pptr->getNextNE();
1223	}
1224	}
1225
1226	return result;
1227	}
1228
1229	// check word for prefixes and morph and two-level suffixes
1230	std::string AffixMgr::prefix_check_twosfx_morph(const char* word,
1231	int len,
1232	char in_compound,
1233	const FLAGunsigned short needflag) {
1234	std::string result;
1235
1236	pfx = NULL__null;
1237	sfxappnd = NULL__null;
1238	sfxextra = 0;
1239
1240	// first handle the special case of 0 length prefixes
1241	PfxEntry* pe = pStart[0];
1242	while (pe) {
1243	std::string st = pe->check_twosfx_morph(word, len, in_compound, needflag);
1244	if (!st.empty()) {
1245	result.append(st);
1246	}
1247	pe = pe->getNext();
1248	}
1249
1250	// now handle the general case
1251	unsigned char sp = ((const unsigned char)word);
1252	PfxEntry* pptr = pStart[sp];
1253
1254	while (pptr) {
1255	if (isSubset(pptr->getKey(), word)) {
1256	std::string st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
1257	if (!st.empty()) {
1258	result.append(st);
1259	pfx = pptr;
1260	}
1261	pptr = pptr->getNextEQ();
1262	} else {
1263	pptr = pptr->getNextNE();
1264	}
1265	}
1266
1267	return result;
1268	}
1269
1270	// Is word a non-compound with a REP substitution (see checkcompoundrep)?
1271	int AffixMgr::cpdrep_check(const char* word, int wl) {
1272
1273	if ((wl < 2) \|\| get_reptable().empty())
1274	return 0;
1275
1276	for (size_t i = 0; i < get_reptable().size(); ++i) {
1277	// use only available mid patterns
1278	if (!get_reptable()[i].outstrings[0].empty()) {
1279	const char* r = word;
1280	const size_t lenp = get_reptable()[i].pattern.size();
1281	// search every occurence of the pattern in the word
1282	while ((r = strstr(r, get_reptable()[i].pattern.c_str())) != NULL__null) {
1283	std::string candidate(word);
1284	candidate.replace(r - word, lenp, get_reptable()[i].outstrings[0]);
1285	if (candidate_check(candidate.c_str(), candidate.size()))
1286	return 1;
1287	++r; // search for the next letter
1288	}
1289	}
1290	}
1291
1292	return 0;
1293	}
1294
1295	// forbid compound words, if they are in the dictionary as a
1296	// word pair separated by space
1297	int AffixMgr::cpdwordpair_check(const char * word, int wl) {
1298	if (wl > 2) {
1299	std::string candidate(word);
1300	for (size_t i = 1; i < candidate.size(); i++) {
1301	// go to end of the UTF-8 character
1302	if (utf8 && ((word[i] & 0xc0) == 0x80))
1303	continue;
1304	candidate.insert(i, 1, ' ');
1305	if (candidate_check(candidate.c_str(), candidate.size()))
1306	return 1;
1307	candidate.erase(i, 1);
1308	}
1309	}
1310
1311	return 0;
1312	}
1313
1314	// forbid compoundings when there are special patterns at word bound
1315	int AffixMgr::cpdpat_check(const char* word,
1316	int pos,
1317	hentry* r1,
1318	hentry* r2,
1319	const char /affixed/) {
1320	for (size_t i = 0; i < checkcpdtable.size(); ++i) {
1321	size_t len;
1322	if (isSubset(checkcpdtable[i].pattern2.c_str(), word + pos) &&
1323	(!r1 \|\| !checkcpdtable[i].cond \|\|
1324	(r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen)(std::binary_search(r1->astr, r1->astr + r1->alen, checkcpdtable [i].cond)))) &&
1325	(!r2 \|\| !checkcpdtable[i].cond2 \|\|
1326	(r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen)(std::binary_search(r2->astr, r2->astr + r2->alen, checkcpdtable [i].cond2)))) &&
1327	// zero length pattern => only TESTAFF
1328	// zero pattern (0/flag) => unmodified stem (zero affixes allowed)
1329	(checkcpdtable[i].pattern.empty() \|\|
1330	((checkcpdtable[i].pattern[0] == '0' && r1->blen <= pos &&
1331	strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) \|\|
1332	(checkcpdtable[i].pattern[0] != '0' &&
1333	((len = checkcpdtable[i].pattern.size()) != 0) &&
1334	strncmp(word + pos - len, checkcpdtable[i].pattern.c_str(), len) == 0)))) {
1335	return 1;
1336	}
1337	}
1338	return 0;
1339	}
1340
1341	// forbid compounding with neighbouring upper and lower case characters at word
1342	// bounds
1343	int AffixMgr::cpdcase_check(const char* word, int pos) {
1344	if (utf8) {
1345	const char* p;
1346	for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)
1347	;
1348	std::string pair(p);
1349	std::vector<w_char> pair_u;
1350	u8_u16(pair_u, pair);
1351	unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0;
1352	unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;
1353	if (((unicodetoupper(a, langnum) == a) \|\|
1354	(unicodetoupper(b, langnum) == b)) &&
1355	(a != '-') && (b != '-'))
1356	return 1;
1357	} else {
1358	unsigned char a = *(word + pos - 1);
1359	unsigned char b = *(word + pos);
1360	if ((csconv[a].ccase \|\| csconv[b].ccase) && (a != '-') && (b != '-'))
1361	return 1;
1362	}
1363	return 0;
1364	}
1365
1366	struct metachar_data {
1367	signed short btpp; // metacharacter (*, ?) position for backtracking
1368	signed short btwp; // word position for metacharacters
1369	int btnum; // number of matched characters in metacharacter
1370	};
1371
1372	// check compound patterns
1373	int AffixMgr::defcpd_check(hentry*** words,
1374	short wnum,
1375	hentry* rv,
1376	hentry** def,
1377	char all) {
1378	int w = 0;
1379
1380	if (!*words) {
1381	w = 1;
1382	*words = def;
1383	}
1384
1385	if (!*words) {
1386	return 0;
1387	}
1388
1389	std::vector<metachar_data> btinfo(1);
1390
1391	short bt = 0;
1392
1393	(*words)[wnum] = rv;
1394
1395	// has the last word COMPOUNDRULE flag?
1396	if (rv->alen == 0) {
1397	(*words)[wnum] = NULL__null;
1398	if (w)
1399	*words = NULL__null;
1400	return 0;
1401	}
1402	int ok = 0;
1403	for (size_t i = 0; i < defcpdtable.size(); ++i) {
1404	for (size_t j = 0; j < defcpdtable[i].size(); ++j) {
1405	if (defcpdtable[i][j] != '*' && defcpdtable[i][j] != '?' &&
1406	TESTAFF(rv->astr, defcpdtable[i][j], rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, defcpdtable [i][j]))) {
1407	ok = 1;
1408	break;
1409	}
1410	}
1411	}
1412	if (ok == 0) {
1413	(*words)[wnum] = NULL__null;
1414	if (w)
1415	*words = NULL__null;
1416	return 0;
1417	}
1418
1419	for (size_t i = 0; i < defcpdtable.size(); ++i) {
1420	size_t pp = 0; // pattern position
1421	signed short wp = 0; // "words" position
1422	int ok2;
1423	ok = 1;
1424	ok2 = 1;
1425	do {
1426	while ((pp < defcpdtable[i].size()) && (wp <= wnum)) {
1427	if (((pp + 1) < defcpdtable[i].size()) &&
1428	((defcpdtable[i][pp + 1] == '*') \|\|
1429	(defcpdtable[i][pp + 1] == '?'))) {
1430	int wend = (defcpdtable[i][pp + 1] == '?') ? wp : wnum;
1431	ok2 = 1;
1432	pp += 2;
1433	btinfo[bt].btpp = pp;
1434	btinfo[bt].btwp = wp;
1435	while (wp <= wend) {
1436	if (!(*words)[wp]->alen \|\|
1437	!TESTAFF((words)[wp]->astr, defcpdtable[i][pp - 2],(std::binary_search((words)[wp]->astr, (words)[wp]->astr + (words)[wp]->alen, defcpdtable[i][pp - 2]))
1438	(words)[wp]->alen)(std::binary_search((words)[wp]->astr, (words)[wp]->astr + (words)[wp]->alen, defcpdtable[i][pp - 2]))) {
1439	ok2 = 0;
1440	break;
1441	}
1442	wp++;
1443	}
1444	if (wp <= wnum)
1445	ok2 = 0;
1446	btinfo[bt].btnum = wp - btinfo[bt].btwp;
1447	if (btinfo[bt].btnum > 0) {
1448	++bt;
1449	btinfo.resize(bt+1);
1450	}
1451	if (ok2)
1452	break;
1453	} else {
1454	ok2 = 1;
1455	if (!(words)[wp] \|\| !(words)[wp]->alen \|\|
1456	!TESTAFF((words)[wp]->astr, defcpdtable[i][pp],(std::binary_search((words)[wp]->astr, (words)[wp]->astr + (words)[wp]->alen, defcpdtable[i][pp]))
1457	(words)[wp]->alen)(std::binary_search((words)[wp]->astr, (words)[wp]->astr + (words)[wp]->alen, defcpdtable[i][pp]))) {
1458	ok = 0;
1459	break;
1460	}
1461	pp++;
1462	wp++;
1463	if ((defcpdtable[i].size() == pp) && !(wp > wnum))
1464	ok = 0;
1465	}
1466	}
1467	if (ok && ok2) {
1468	size_t r = pp;
1469	while ((defcpdtable[i].size() > r) && ((r + 1) < defcpdtable[i].size()) &&
1470	((defcpdtable[i][r + 1] == '*') \|\|
1471	(defcpdtable[i][r + 1] == '?')))
1472	r += 2;
1473	if (defcpdtable[i].size() <= r)
1474	return 1;
1475	}
1476	// backtrack
1477	if (bt)
1478	do {
1479	ok = 1;
1480	btinfo[bt - 1].btnum--;
1481	pp = btinfo[bt - 1].btpp;
1482	wp = btinfo[bt - 1].btwp + (signed short)btinfo[bt - 1].btnum;
1483	} while ((btinfo[bt - 1].btnum < 0) && --bt);
1484	} while (bt);
1485
1486	if (ok && ok2 && (!all \|\| (defcpdtable[i].size() <= pp)))
1487	return 1;
1488
1489	// check zero ending
1490	while (ok && ok2 && (defcpdtable[i].size() > pp) &&
1491	((pp + 1) < defcpdtable[i].size()) &&
1492	((defcpdtable[i][pp + 1] == '*') \|\|
1493	(defcpdtable[i][pp + 1] == '?')))
1494	pp += 2;
1495	if (ok && ok2 && (defcpdtable[i].size() <= pp))
1496	return 1;
1497	}
1498	(*words)[wnum] = NULL__null;
1499	if (w)
1500	*words = NULL__null;
1501	return 0;
1502	}
1503
1504	inline int AffixMgr::candidate_check(const char* word, int len) {
1505
1506	struct hentry* rv = lookup(word);
1507	if (rv)
1508	return 1;
1509
1510	// rv = prefix_check(word,len,1);
1511	// if (rv) return 1;
1512
1513	rv = affix_check(word, len);
1514	if (rv)
1515	return 1;
1516	return 0;
1517	}
1518
1519	// calculate number of syllable for compound-checking
1520	short AffixMgr::get_syllable(const std::string& word) {
1521	if (cpdmaxsyllable == 0)
1522	return 0;
1523
1524	short num = 0;
1525
1526	if (!utf8) {
1527	for (size_t i = 0; i < word.size(); ++i) {
1528	if (std::binary_search(cpdvowels.begin(), cpdvowels.end(),
1529	word[i])) {
1530	++num;
1531	}
1532	}
1533	} else if (!cpdvowels_utf16.empty()) {
1534	std::vector<w_char> w;
1535	u8_u16(w, word);
1536	for (size_t i = 0; i < w.size(); ++i) {
1537	if (std::binary_search(cpdvowels_utf16.begin(),
1538	cpdvowels_utf16.end(),
1539	w[i])) {
1540	++num;
1541	}
1542	}
1543	}
1544
1545	return num;
1546	}
1547
1548	void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) {
1549	if (utf8) {
1550	int i;
1551	for (cmin = 0, i = 0; (i < cpdmin) && cmin < len; i++) {
1552	for ((cmin)++; cmin < len && (word[cmin] & 0xc0) == 0x80; (cmin)++)
1553	;
1554	}
1555	for (cmax = len, i = 0; (i < (cpdmin - 1)) && cmax >= 0; i++) {
1556	for ((cmax)--; cmax >= 0 && (word[cmax] & 0xc0) == 0x80; (cmax)--)
1557	;
1558	}
1559	} else {
1560	*cmin = cpdmin;
1561	*cmax = len - cpdmin + 1;
1562	}
1563	}
1564
1565	// check if compound word is correctly spelled
1566	// hu_mov_rule = spec. Hungarian rule (XXX)
1567	struct hentry* AffixMgr::compound_check(const std::string& word,
1568	short wordnum,
1569	short numsyllable,
1570	short maxwordnum,
1571	short wnum,
1572	hentry** words = NULL__null,
1573	hentry** rwords = NULL__null,
1574	char hu_mov_rule = 0,
1575	char is_sug = 0,
1576	int* info = NULL__null) {
1577	int i;
1578	short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
1579	struct hentry* rv = NULL__null;
1580	struct hentry* rv_first;
1581	std::string st;
1582	char ch = '\0';
1583	int cmin;
1584	int cmax;
1585	int striple = 0;
1586	size_t scpd = 0;
1587	int soldi = 0;
1588	int oldcmin = 0;
1589	int oldcmax = 0;
1590	int oldlen = 0;
1591	int checkedstriple = 0;
1592	char affixed = 0;
1593	hentry** oldwords = words;
1594	size_t len = word.size();
1595
1596	int checked_prefix;
1597
1598	// add a time limit to handle possible
1599	// combinatorical explosion of the overlapping words
1600
1601	HUNSPELL_THREAD_LOCALthread_local clock_t timelimit;
1602
1603	if (wordnum == 0) {
1604	// get the start time, seeing as we're reusing this set to 0
1605	// to flag timeout, use clock() + 1 to avoid start clock()
1606	// of 0 as being a timeout
1607	timelimit = clock() + 1;
1608	}
1609	else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT(((__clock_t) 1000000) / 20))) {
1610	timelimit = 0;
1611	}
1612
1613	setcminmax(&cmin, &cmax, word.c_str(), len);
1614
1615	st.assign(word);
1616
1617	for (i = cmin; i < cmax; i++) {
1618	// go to end of the UTF-8 character
1619	if (utf8) {
1620	for (; (st[i] & 0xc0) == 0x80; i++)
1621	;
1622	if (i >= cmax)
1623	return NULL__null;
1624	}
1625
1626	words = oldwords;
1627	int onlycpdrule = (words) ? 1 : 0;
1628
1629	do { // onlycpdrule loop
1630
1631	oldnumsyllable = numsyllable;
1632	oldwordnum = wordnum;
1633	checked_prefix = 0;
1634
1635	do { // simplified checkcompoundpattern loop
1636
1637	if (timelimit == 0)
1638	return 0;
1639
1640	if (scpd > 0) {
1641	for (; scpd <= checkcpdtable.size() &&
1642	(checkcpdtable[scpd - 1].pattern3.empty() \|\|
1643	strncmp(word.c_str() + i, checkcpdtable[scpd - 1].pattern3.c_str(),
1644	checkcpdtable[scpd - 1].pattern3.size()) != 0);
1645	scpd++)
1646	;
1647
1648	if (scpd > checkcpdtable.size())
1649	break; // break simplified checkcompoundpattern loop
1650	st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern);
1651	soldi = i;
1652	i += checkcpdtable[scpd - 1].pattern.size();
1653	st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern2);
1654	st.replace(i + checkcpdtable[scpd - 1].pattern2.size(), std::string::npos,
1655	word.substr(soldi + checkcpdtable[scpd - 1].pattern3.size()));
1656
1657	oldlen = len;
1658	len += checkcpdtable[scpd - 1].pattern.size() +
1659	checkcpdtable[scpd - 1].pattern2.size() -
1660	checkcpdtable[scpd - 1].pattern3.size();
1661	oldcmin = cmin;
1662	oldcmax = cmax;
1663	setcminmax(&cmin, &cmax, st.c_str(), len);
1664
1665	cmax = len - cpdmin + 1;
1666	}
1667
1668	ch = st[i];
1669	st[i] = '\0';
1670
1671	sfx = NULL__null;
1672	pfx = NULL__null;
1673
1674	// FIRST WORD
1675
1676	affixed = 1;
1677	rv = lookup(st.c_str()); // perhaps without prefix
1678
1679	// forbid dictionary stems with COMPOUNDFORBIDFLAG in
1680	// compound words, overriding the effect of COMPOUNDPERMITFLAG
1681	if ((rv) && compoundforbidflag &&
1682	TESTAFF(rv->astr, compoundforbidflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundforbidflag )) && !hu_mov_rule)
1683	continue;
1684
1685	// search homonym with compound flag
1686	while ((rv) && !hu_mov_rule &&
1687	((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix ))) \|\|
1688	!((compoundflag && !words && !onlycpdrule &&
1689	TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
1690	(compoundbegin && !wordnum && !onlycpdrule &&
1691	TESTAFF(rv->astr, compoundbegin, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundbegin ))) \|\|
1692	(compoundmiddle && wordnum && !words && !onlycpdrule &&
1693	TESTAFF(rv->astr, compoundmiddle, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundmiddle ))) \|\|
1694	(!defcpdtable.empty() && onlycpdrule &&
1695	((!words && !wordnum &&
1696	defcpd_check(&words, wnum, rv, rwords, 0)) \|\|
1697	(words &&
1698	defcpd_check(&words, wnum, rv, rwords, 0))))) \|\|
1699	(scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL0x00 &&
1700	!TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, checkcpdtable [scpd - 1].cond))))) {
1701	rv = rv->next_homonym;
1702	}
1703
1704	if (rv)
1705	affixed = 0;
1706
1707	if (!rv) {
1708	if (onlycpdrule)
1709	break;
1710	if (compoundflag &&
1711	!(rv = prefix_check(st.c_str(), i,
1712	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1,
1713	compoundflag))) {
1714	if (((rv = suffix_check(
1715	st.c_str(), i, 0, NULL__null, FLAG_NULL0x00, compoundflag,
1716	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1)) \|\|
1717	(compoundmoresuffixes &&
1718	(rv = suffix_check_twosfx(st.c_str(), i, 0, NULL__null, compoundflag)))) &&
1719	!hu_mov_rule && sfx->getCont() &&
1720	((compoundforbidflag &&
1721	TESTAFF(sfx->getCont(), compoundforbidflag,(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))
1722	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))) \|\|
1723	(compoundend &&
1724	TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundend))))) {
1725	rv = NULL__null;
1726	}
1727	}
1728
1729	if (rv \|\|
1730	(((wordnum == 0) && compoundbegin &&
1731	((rv = suffix_check(
1732	st.c_str(), i, 0, NULL__null, FLAG_NULL0x00, compoundbegin,
1733	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1)) \|\|
1734	(compoundmoresuffixes &&
1735	(rv = suffix_check_twosfx(
1736	st.c_str(), i, 0, NULL__null,
1737	compoundbegin))) \|\| // twofold suffixes + compound
1738	(rv = prefix_check(st.c_str(), i,
1739	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1,
1740	compoundbegin)))) \|\|
1741	((wordnum > 0) && compoundmiddle &&
1742	((rv = suffix_check(
1743	st.c_str(), i, 0, NULL__null, FLAG_NULL0x00, compoundmiddle,
1744	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1)) \|\|
1745	(compoundmoresuffixes &&
1746	(rv = suffix_check_twosfx(
1747	st.c_str(), i, 0, NULL__null,
1748	compoundmiddle))) \|\| // twofold suffixes + compound
1749	(rv = prefix_check(st.c_str(), i,
1750	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1,
1751	compoundmiddle))))))
1752	checked_prefix = 1;
1753	// else check forbiddenwords and needaffix
1754	} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
1755	TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix )) \|\|
1756	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 )) \|\|
1757	(is_sug && nosuggest &&
1758	TESTAFF(rv->astr, nosuggest, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, nosuggest ))))) {
1759	st[i] = ch;
1760	// continue;
1761	break;
1762	}
1763
1764	// check non_compound flag in suffix and prefix
1765	if ((rv) && !hu_mov_rule &&
1766	((pfx && pfx->getCont() &&
1767	TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundforbidflag))) \|\|
1768	(sfx && sfx->getCont() &&
1769	TESTAFF(sfx->getCont(), compoundforbidflag,(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))
1770	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))))) {
1771	rv = NULL__null;
1772	}
1773
1774	// check compoundend flag in suffix and prefix
1775	if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
1776	((pfx && pfx->getCont() &&
1777	TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundend))) \|\|
1778	(sfx && sfx->getCont() &&
1779	TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundend))))) {
1780	rv = NULL__null;
1781	}
1782
1783	// check compoundmiddle flag in suffix and prefix
1784	if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle &&
1785	!hu_mov_rule &&
1786	((pfx && pfx->getCont() &&
1787	TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundmiddle))) \|\|
1788	(sfx && sfx->getCont() &&
1789	TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundmiddle))))) {
1790	rv = NULL__null;
1791	}
1792
1793	// check forbiddenwords
1794	if ((rv) && (rv->astr) &&
1795	(TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
1796	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 )) \|\|
1797	(is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, nosuggest ))))) {
1798	return NULL__null;
1799	}
1800
1801	// increment word number, if the second root has a compoundroot flag
1802	if ((rv) && compoundroot &&
1803	(TESTAFF(rv->astr, compoundroot, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundroot )))) {
1804	wordnum++;
1805	}
1806
1807	// first word is acceptable in compound words?
1808	if (((rv) &&
1809	(checked_prefix \|\| (words && words[wnum]) \|\|
1810	(compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
1811	((oldwordnum == 0) && compoundbegin &&
1812	TESTAFF(rv->astr, compoundbegin, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundbegin ))) \|\|
1813	((oldwordnum > 0) && compoundmiddle &&
1814	TESTAFF(rv->astr, compoundmiddle, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundmiddle )))
1815
1816	// LANG_hu section: spec. Hungarian rule
1817	\|\| ((langnum == LANG_hu) && hu_mov_rule &&
1818	(TESTAFF((std::binary_search(rv->astr, rv->astr + rv->alen, 'F' ))
1819	rv->astr, 'F',(std::binary_search(rv->astr, rv->astr + rv->alen, 'F' ))
1820	rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'F' )) \|\| // XXX hardwired Hungarian dictionary codes
1821	TESTAFF(rv->astr, 'G', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'G' )) \|\|
1822	TESTAFF(rv->astr, 'H', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'H' ))))
1823	// END of LANG_hu section
1824	) &&
1825	(
1826	// test CHECKCOMPOUNDPATTERN conditions
1827	scpd == 0 \|\| checkcpdtable[scpd - 1].cond == FLAG_NULL0x00 \|\|
1828	TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, checkcpdtable [scpd - 1].cond))) &&
1829	!((checkcompoundtriple && scpd == 0 &&
1830	!words && // test triple letters
1831	(word[i - 1] == word[i]) &&
1832	(((i > 1) && (word[i - 1] == word[i - 2])) \|\|
1833	((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0'
1834	)) \|\|
1835	(checkcompoundcase && scpd == 0 && !words &&
1836	cpdcase_check(word.c_str(), i))))
1837	// LANG_hu section: spec. Hungarian rule
1838	\|\| ((!rv) && (langnum == LANG_hu) && hu_mov_rule &&
1839	(rv = affix_check(st.c_str(), i)) &&
1840	(sfx && sfx->getCont() &&
1841	( // XXX hardwired Hungarian dic. codes
1842	TESTAFF(sfx->getCont(), (unsigned short)'x',(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'x'))
1843	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'x')) \|\|
1844	TESTAFF((std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'%'))
1845	sfx->getCont(), (unsigned short)'%',(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'%'))
1846	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'%')))))) { // first word is ok condition
1847
1848	// LANG_hu section: spec. Hungarian rule
1849	if (langnum == LANG_hu) {
1850	// calculate syllable number of the word
1851	numsyllable += get_syllable(st.substr(0, i));
1852	// + 1 word, if syllable number of the prefix > 1 (hungarian
1853	// convention)
1854	if (pfx && (get_syllable(pfx->getKey()) > 1))
1855	wordnum++;
1856	}
1857	// END of LANG_hu section
1858
1859	// NEXT WORD(S)
1860	rv_first = rv;
1861	st[i] = ch;
1862
1863	do { // striple loop
1864
1865	// check simplifiedtriple
1866	if (simplifiedtriple) {
1867	if (striple) {
1868	checkedstriple = 1;
1869	i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
1870	} else if (i > 2 && word[i - 1] == word[i - 2])
1871	striple = 1;
1872	}
1873
1874	rv = lookup(st.c_str() + i); // perhaps without prefix
1875
1876	// search homonym with compound flag
1877	while ((rv) &&
1878	((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix ))) \|\|
1879	!((compoundflag && !words &&
1880	TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
1881	(compoundend && !words &&
1882	TESTAFF(rv->astr, compoundend, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundend ))) \|\|
1883	(!defcpdtable.empty() && words &&
1884	defcpd_check(&words, wnum + 1, rv, NULL__null, 1))) \|\|
1885	(scpd != 0 && checkcpdtable[scpd - 1].cond2 != FLAG_NULL0x00 &&
1886	!TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2,(std::binary_search(rv->astr, rv->astr + rv->alen, checkcpdtable [scpd - 1].cond2))
1887	rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, checkcpdtable [scpd - 1].cond2))))) {
1888	rv = rv->next_homonym;
1889	}
1890
1891	// check FORCEUCASE
1892	if (rv && forceucase && (rv) &&
1893	(TESTAFF(rv->astr, forceucase, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forceucase ))) &&
1894	!(info && *info & SPELL_ORIGCAP(1 << 5)))
1895	rv = NULL__null;
1896
1897	if (rv && words && words[wnum + 1])
1898	return rv_first;
1899
1900	oldnumsyllable2 = numsyllable;
1901	oldwordnum2 = wordnum;
1902
1903	// LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary
1904	// code
1905	if ((rv) && (langnum == LANG_hu) &&
1906	(TESTAFF(rv->astr, 'I', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'I' ))) &&
1907	!(TESTAFF(rv->astr, 'J', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'J' )))) {
1908	numsyllable--;
1909	}
1910	// END of LANG_hu section
1911
1912	// increment word number, if the second root has a compoundroot flag
1913	if ((rv) && (compoundroot) &&
1914	(TESTAFF(rv->astr, compoundroot, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundroot )))) {
1915	wordnum++;
1916	}
1917
1918	// check forbiddenwords
1919	if ((rv) && (rv->astr) &&
1920	(TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
1921	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 )) \|\|
1922	(is_sug && nosuggest &&
1923	TESTAFF(rv->astr, nosuggest, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, nosuggest )))))
1924	return NULL__null;
1925
1926	// second word is acceptable, as a root?
1927	// hungarian conventions: compounding is acceptable,
1928	// when compound forms consist of 2 words, or if more,
1929	// then the syllable number of root words must be 6, or lesser.
1930
1931	if ((rv) &&
1932	((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
1933	(compoundend && TESTAFF(rv->astr, compoundend, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundend )))) &&
1934	(((cpdwordmax == -1) \|\| (wordnum + 1 < cpdwordmax)) \|\|
1935	((cpdmaxsyllable != 0) &&
1936	(numsyllable + get_syllable(std::string(HENTRY_WORD(rv)&(rv->word[0]), rv->blen)) <=
1937	cpdmaxsyllable))) &&
1938	(
1939	// test CHECKCOMPOUNDPATTERN
1940	checkcpdtable.empty() \|\| scpd != 0 \|\|
1941	!cpdpat_check(word.c_str(), i, rv_first, rv, 0)) &&
1942	((!checkcompounddup \|\| (rv != rv_first)))
1943	// test CHECKCOMPOUNDPATTERN conditions
1944	&&
1945	(scpd == 0 \|\| checkcpdtable[scpd - 1].cond2 == FLAG_NULL0x00 \|\|
1946	TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, checkcpdtable [scpd - 1].cond2)))) {
1947	// forbid compound word, if it is a non-compound word with typical
1948	// fault
1949	if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) \|\|
1950	cpdwordpair_check(word.c_str(), len))
1951	return NULL__null;
1952	return rv_first;
1953	}
1954
1955	numsyllable = oldnumsyllable2;
1956	wordnum = oldwordnum2;
1957
1958	// perhaps second word has prefix or/and suffix
1959	sfx = NULL__null;
1960	sfxflag = FLAG_NULL0x00;
1961	rv = (compoundflag && !onlycpdrule)
1962	? affix_check((word.c_str() + i), strlen(word.c_str() + i), compoundflag,
1963	IN_CPD_END2)
1964	: NULL__null;
1965	if (!rv && compoundend && !onlycpdrule) {
1966	sfx = NULL__null;
1967	pfx = NULL__null;
1968	rv = affix_check((word.c_str() + i), strlen(word.c_str() + i), compoundend,
1969	IN_CPD_END2);
1970	}
1971
1972	if (!rv && !defcpdtable.empty() && words) {
1973	rv = affix_check((word.c_str() + i), strlen(word.c_str() + i), 0, IN_CPD_END2);
1974	if (rv && defcpd_check(&words, wnum + 1, rv, NULL__null, 1))
1975	return rv_first;
1976	rv = NULL__null;
1977	}
1978
1979	// test CHECKCOMPOUNDPATTERN conditions (allowed forms)
1980	if (rv &&
1981	!(scpd == 0 \|\| checkcpdtable[scpd - 1].cond2 == FLAG_NULL0x00 \|\|
1982	TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, checkcpdtable [scpd - 1].cond2))))
1983	rv = NULL__null;
1984
1985	// test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
1986	if (rv && !checkcpdtable.empty() && scpd == 0 &&
1987	cpdpat_check(word.c_str(), i, rv_first, rv, affixed))
1988	rv = NULL__null;
1989
1990	// check non_compound flag in suffix and prefix
1991	if ((rv) && ((pfx && pfx->getCont() &&
1992	TESTAFF(pfx->getCont(), compoundforbidflag,(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundforbidflag))
1993	pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundforbidflag))) \|\|
1994	(sfx && sfx->getCont() &&
1995	TESTAFF(sfx->getCont(), compoundforbidflag,(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))
1996	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))))) {
1997	rv = NULL__null;
1998	}
1999
2000	// check FORCEUCASE
2001	if (rv && forceucase && (rv) &&
2002	(TESTAFF(rv->astr, forceucase, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forceucase ))) &&
2003	!(info && *info & SPELL_ORIGCAP(1 << 5)))
2004	rv = NULL__null;
2005
2006	// check forbiddenwords
2007	if ((rv) && (rv->astr) &&
2008	(TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
2009	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 )) \|\|
2010	(is_sug && nosuggest &&
2011	TESTAFF(rv->astr, nosuggest, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, nosuggest )))))
2012	return NULL__null;
2013
2014	// pfxappnd = prefix of word+i, or NULL
2015	// calculate syllable number of prefix.
2016	// hungarian convention: when syllable number of prefix is more,
2017	// than 1, the prefix+word counts as two words.
2018
2019	if (langnum == LANG_hu) {
2020	// calculate syllable number of the word
2021	numsyllable += get_syllable(word.c_str() + i);
2022
2023	// - affix syllable num.
2024	// XXX only second suffix (inflections, not derivations)
2025	if (sfxappnd) {
2026	std::string tmp(sfxappnd);
2027	reverseword(tmp);
2028	numsyllable -= short(get_syllable(tmp) + sfxextra);
2029	} else {
2030	numsyllable -= short(sfxextra);
2031	}
2032
2033	// + 1 word, if syllable number of the prefix > 1 (hungarian
2034	// convention)
2035	if (pfx && (get_syllable(pfx->getKey()) > 1))
2036	wordnum++;
2037
2038	// increment syllable num, if last word has a SYLLABLENUM flag
2039	// and the suffix is beginning `s'
2040
2041	if (!cpdsyllablenum.empty()) {
2042	switch (sfxflag) {
2043	case 'c': {
2044	numsyllable += 2;
2045	break;
2046	}
2047	case 'J': {
2048	numsyllable += 1;
2049	break;
2050	}
2051	case 'I': {
2052	if (rv && TESTAFF(rv->astr, 'J', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'J' )))
2053	numsyllable += 1;
2054	break;
2055	}
2056	}
2057	}
2058	}
2059
2060	// increment word number, if the second word has a compoundroot flag
2061	if ((rv) && (compoundroot) &&
2062	(TESTAFF(rv->astr, compoundroot, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundroot )))) {
2063	wordnum++;
2064	}
2065	// second word is acceptable, as a word with prefix or/and suffix?
2066	// hungarian conventions: compounding is acceptable,
2067	// when compound forms consist 2 word, otherwise
2068	// the syllable number of root words is 6, or lesser.
2069	if ((rv) &&
2070	(((cpdwordmax == -1) \|\| (wordnum + 1 < cpdwordmax)) \|\|
2071	((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
2072	((!checkcompounddup \|\| (rv != rv_first)))) {
2073	// forbid compound word, if it is a non-compound word with typical
2074	// fault
2075	if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) \|\|
2076	cpdwordpair_check(word.c_str(), len))
2077	return NULL__null;
2078	return rv_first;
2079	}
2080
2081	numsyllable = oldnumsyllable2;
2082	wordnum = oldwordnum2;
2083
2084	// perhaps second word is a compound word (recursive call)
2085	if (wordnum + 2 < maxwordnum) {
2086	rv = compound_check(st.substr(i), wordnum + 1,
2087	numsyllable, maxwordnum, wnum + 1, words, rwords, 0,
2088	is_sug, info);
2089
2090	if (rv && !checkcpdtable.empty() &&
2091	((scpd == 0 &&
2092	cpdpat_check(word.c_str(), i, rv_first, rv, affixed)) \|\|
2093	(scpd != 0 &&
2094	!cpdpat_check(word.c_str(), i, rv_first, rv, affixed))))
2095	rv = NULL__null;
2096	} else {
2097	rv = NULL__null;
2098	}
2099	if (rv) {
2100	// forbid compound word, if it is a non-compound word with typical
2101	// fault, or a dictionary word pair
2102
2103	if (cpdwordpair_check(word.c_str(), len))
2104	return NULL__null;
2105
2106	if (checkcompoundrep \|\| forbiddenword) {
2107
2108	if (checkcompoundrep && cpdrep_check(word.c_str(), len))
2109	return NULL__null;
2110
2111	// check first part
2112	if (strncmp(rv->word, word.c_str() + i, rv->blen) == 0) {
2113	char r = st[i + rv->blen];
2114	st[i + rv->blen] = '\0';
2115
2116	if ((checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) \|\|
2117	cpdwordpair_check(st.c_str(), i + rv->blen)) {
2118	st[ + i + rv->blen] = r;
2119	continue;
2120	}
2121
2122	if (forbiddenword) {
2123	struct hentry* rv2 = lookup(word.c_str());
2124	if (!rv2)
2125	rv2 = affix_check(word.c_str(), len);
2126	if (rv2 && rv2->astr &&
2127	TESTAFF(rv2->astr, forbiddenword, rv2->alen)(std::binary_search(rv2->astr, rv2->astr + rv2->alen , forbiddenword)) &&
2128	(strncmp(rv2->word, st.c_str(), i + rv->blen) == 0)) {
2129	return NULL__null;
2130	}
2131	}
2132	st[i + rv->blen] = r;
2133	}
2134	}
2135	return rv_first;
2136	}
2137	} while (striple && !checkedstriple); // end of striple loop
2138
2139	if (checkedstriple) {
2140	i++;
2141	checkedstriple = 0;
2142	striple = 0;
2143	}
2144
2145	} // first word is ok condition
2146
2147	if (soldi != 0) {
2148	i = soldi;
2149	soldi = 0;
2150	len = oldlen;
2151	cmin = oldcmin;
2152	cmax = oldcmax;
2153	}
2154	scpd++;
2155
2156	} while (!onlycpdrule && simplifiedcpd &&
2157	scpd <= checkcpdtable.size()); // end of simplifiedcpd loop
2158
2159	scpd = 0;
2160	wordnum = oldwordnum;
2161	numsyllable = oldnumsyllable;
2162
2163	if (soldi != 0) {
2164	i = soldi;
2165	st.assign(word); // XXX add more optim.
2166	soldi = 0;
2167	} else
2168	st[i] = ch;
2169
2170	} while (!defcpdtable.empty() && oldwordnum == 0 &&
2171	onlycpdrule++ < 1); // end of onlycpd loop
2172	}
2173
2174	return NULL__null;
2175	}
2176
2177	// check if compound word is correctly spelled
2178	// hu_mov_rule = spec. Hungarian rule (XXX)
2179	int AffixMgr::compound_check_morph(const char* word,
2180	int len,
2181	short wordnum,
2182	short numsyllable,
2183	short maxwordnum,
2184	short wnum,
2185	hentry** words,
2186	hentry** rwords,
2187	char hu_mov_rule,
2188	std::string& result,
2189	const std::string* partresult) {
2190	int i;
2191	short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
2192	int ok = 0;
2193
2194	struct hentry* rv = NULL__null;
2195	struct hentry* rv_first;
2196	std::string st;
2197	char ch;
2198
2199	int checked_prefix;
2200	std::string presult;
2201
2202	int cmin;
2203	int cmax;
2204
2205	char affixed = 0;
2206	hentry** oldwords = words;
2207
2208	// add a time limit to handle possible
2209	// combinatorical explosion of the overlapping words
2210
2211	HUNSPELL_THREAD_LOCALthread_local clock_t timelimit;
2212
2213	if (wordnum == 0) {
2214	// get the start time, seeing as we're reusing this set to 0
2215	// to flag timeout, use clock() + 1 to avoid start clock()
2216	// of 0 as being a timeout
2217	timelimit = clock() + 1;
2218	}
2219	else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT(((__clock_t) 1000000) / 20))) {
2220	timelimit = 0;
2221	}
2222
2223	setcminmax(&cmin, &cmax, word, len);
2224
2225	st.assign(word);
2226
2227	for (i = cmin; i < cmax; i++) {
2228	// go to end of the UTF-8 character
2229	if (utf8) {
2230	for (; (st[i] & 0xc0) == 0x80; i++)
2231	;
2232	if (i >= cmax)
2233	return 0;
2234	}
2235
2236	words = oldwords;
2237	int onlycpdrule = (words) ? 1 : 0;
2238
2239	do { // onlycpdrule loop
2240
2241	if (timelimit == 0)
2242	return 0;
2243
2244	oldnumsyllable = numsyllable;
2245	oldwordnum = wordnum;
2246	checked_prefix = 0;
2247
2248	ch = st[i];
2249	st[i] = '\0';
2250	sfx = NULL__null;
2251
2252	// FIRST WORD
2253
2254	affixed = 1;
2255
2256	presult.clear();
2257	if (partresult)
2258	presult.append(*partresult);
2259
2260	rv = lookup(st.c_str()); // perhaps without prefix
2261
2262	// forbid dictionary stems with COMPOUNDFORBIDFLAG in
2263	// compound words, overriding the effect of COMPOUNDPERMITFLAG
2264	if ((rv) && compoundforbidflag &&
2265	TESTAFF(rv->astr, compoundforbidflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundforbidflag )) && !hu_mov_rule)
2266	continue;
2267
2268	// search homonym with compound flag
2269	while ((rv) && !hu_mov_rule &&
2270	((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix ))) \|\|
2271	!((compoundflag && !words && !onlycpdrule &&
2272	TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
2273	(compoundbegin && !wordnum && !onlycpdrule &&
2274	TESTAFF(rv->astr, compoundbegin, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundbegin ))) \|\|
2275	(compoundmiddle && wordnum && !words && !onlycpdrule &&
2276	TESTAFF(rv->astr, compoundmiddle, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundmiddle ))) \|\|
2277	(!defcpdtable.empty() && onlycpdrule &&
2278	((!words && !wordnum &&
2279	defcpd_check(&words, wnum, rv, rwords, 0)) \|\|
2280	(words &&
2281	defcpd_check(&words, wnum, rv, rwords, 0))))))) {
2282	rv = rv->next_homonym;
2283	}
2284
2285	if (timelimit == 0)
2286	return 0;
2287
2288	if (rv)
2289	affixed = 0;
2290
2291	if (rv) {
2292	presult.push_back(MSEP_FLD' ');
2293	presult.append(MORPH_PART"pa:");
2294	presult.append(st.c_str());
2295	if (!HENTRY_FIND(rv, MORPH_STEM"st:")) {
2296	presult.push_back(MSEP_FLD' ');
2297	presult.append(MORPH_STEM"st:");
2298	presult.append(st.c_str());
2299	}
2300	if (HENTRY_DATA(rv)) {
2301	presult.push_back(MSEP_FLD' ');
2302	presult.append(HENTRY_DATA2(rv));
2303	}
2304	}
2305
2306	if (!rv) {
2307	if (compoundflag &&
2308	!(rv =
2309	prefix_check(st.c_str(), i, hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1,
2310	compoundflag))) {
2311	if (((rv = suffix_check(st.c_str(), i, 0, NULL__null, FLAG_NULL0x00,
2312	compoundflag,
2313	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1)) \|\|
2314	(compoundmoresuffixes &&
2315	(rv = suffix_check_twosfx(st.c_str(), i, 0, NULL__null, compoundflag)))) &&
2316	!hu_mov_rule && sfx->getCont() &&
2317	((compoundforbidflag &&
2318	TESTAFF(sfx->getCont(), compoundforbidflag,(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))
2319	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))) \|\|
2320	(compoundend &&
2321	TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundend))))) {
2322	rv = NULL__null;
2323	}
2324	}
2325
2326	if (rv \|\|
2327	(((wordnum == 0) && compoundbegin &&
2328	((rv = suffix_check(st.c_str(), i, 0, NULL__null, FLAG_NULL0x00,
2329	compoundbegin,
2330	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1)) \|\|
2331	(compoundmoresuffixes &&
2332	(rv = suffix_check_twosfx(
2333	st.c_str(), i, 0, NULL__null,
2334	compoundbegin))) \|\| // twofold suffix+compound
2335	(rv = prefix_check(st.c_str(), i,
2336	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1,
2337	compoundbegin)))) \|\|
2338	((wordnum > 0) && compoundmiddle &&
2339	((rv = suffix_check(st.c_str(), i, 0, NULL__null, FLAG_NULL0x00,
2340	compoundmiddle,
2341	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1)) \|\|
2342	(compoundmoresuffixes &&
2343	(rv = suffix_check_twosfx(
2344	st.c_str(), i, 0, NULL__null,
2345	compoundmiddle))) \|\| // twofold suffix+compound
2346	(rv = prefix_check(st.c_str(), i,
2347	hu_mov_rule ? IN_CPD_OTHER3 : IN_CPD_BEGIN1,
2348	compoundmiddle)))))) {
2349	std::string p;
2350	if (compoundflag)
2351	p = affix_check_morph(st.c_str(), i, compoundflag);
2352	if (p.empty()) {
2353	if ((wordnum == 0) && compoundbegin) {
2354	p = affix_check_morph(st.c_str(), i, compoundbegin);
2355	} else if ((wordnum > 0) && compoundmiddle) {
2356	p = affix_check_morph(st.c_str(), i, compoundmiddle);
2357	}
2358	}
2359	if (!p.empty()) {
2360	presult.push_back(MSEP_FLD' ');
2361	presult.append(MORPH_PART"pa:");
2362	presult.append(st.c_str());
2363	line_uniq_app(p, MSEP_REC'\n');
2364	presult.append(p);
2365	}
2366	checked_prefix = 1;
2367	}
2368	// else check forbiddenwords
2369	} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
2370	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 )) \|\|
2371	TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix )))) {
2372	st[i] = ch;
2373	continue;
2374	}
2375
2376	// check non_compound flag in suffix and prefix
2377	if ((rv) && !hu_mov_rule &&
2378	((pfx && pfx->getCont() &&
2379	TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundforbidflag))) \|\|
2380	(sfx && sfx->getCont() &&
2381	TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))))) {
2382	continue;
2383	}
2384
2385	// check compoundend flag in suffix and prefix
2386	if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
2387	((pfx && pfx->getCont() &&
2388	TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundend))) \|\|
2389	(sfx && sfx->getCont() &&
2390	TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundend))))) {
2391	continue;
2392	}
2393
2394	// check compoundmiddle flag in suffix and prefix
2395	if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle &&
2396	!hu_mov_rule &&
2397	((pfx && pfx->getCont() &&
2398	TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundmiddle))) \|\|
2399	(sfx && sfx->getCont() &&
2400	TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundmiddle))))) {
2401	rv = NULL__null;
2402	}
2403
2404	// check forbiddenwords
2405	if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
2406	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 ))))
2407	continue;
2408
2409	// increment word number, if the second root has a compoundroot flag
2410	if ((rv) && (compoundroot) &&
2411	(TESTAFF(rv->astr, compoundroot, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundroot )))) {
2412	wordnum++;
2413	}
2414
2415	// first word is acceptable in compound words?
2416	if (((rv) &&
2417	(checked_prefix \|\| (words && words[wnum]) \|\|
2418	(compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
2419	((oldwordnum == 0) && compoundbegin &&
2420	TESTAFF(rv->astr, compoundbegin, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundbegin ))) \|\|
2421	((oldwordnum > 0) && compoundmiddle &&
2422	TESTAFF(rv->astr, compoundmiddle, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundmiddle )))
2423	// LANG_hu section: spec. Hungarian rule
2424	\|\| ((langnum == LANG_hu) && // hu_mov_rule
2425	hu_mov_rule && (TESTAFF(rv->astr, 'F', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'F' )) \|\|
2426	TESTAFF(rv->astr, 'G', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'G' )) \|\|
2427	TESTAFF(rv->astr, 'H', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'H' ))))
2428	// END of LANG_hu section
2429	) &&
2430	!((checkcompoundtriple && !words && // test triple letters
2431	(word[i - 1] == word[i]) &&
2432	(((i > 1) && (word[i - 1] == word[i - 2])) \|\|
2433	((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0'
2434	)) \|\|
2435	(
2436	// test CHECKCOMPOUNDPATTERN
2437	!checkcpdtable.empty() && !words &&
2438	cpdpat_check(word, i, rv, NULL__null, affixed)) \|\|
2439	(checkcompoundcase && !words && cpdcase_check(word, i))))
2440	// LANG_hu section: spec. Hungarian rule
2441	\|\|
2442	((!rv) && (langnum == LANG_hu) && hu_mov_rule &&
2443	(rv = affix_check(st.c_str(), i)) &&
2444	(sfx && sfx->getCont() &&
2445	(TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'x')) \|\|
2446	TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), (unsigned short)'%')))))
2447	// END of LANG_hu section
2448	) {
2449	// LANG_hu section: spec. Hungarian rule
2450	if (langnum == LANG_hu) {
2451	// calculate syllable number of the word
2452	numsyllable += get_syllable(st.substr(0, i));
2453
2454	// + 1 word, if syllable number of the prefix > 1 (hungarian
2455	// convention)
2456	if (pfx && (get_syllable(pfx->getKey()) > 1))
2457	wordnum++;
2458	}
2459	// END of LANG_hu section
2460
2461	// NEXT WORD(S)
2462	rv_first = rv;
2463	rv = lookup((word + i)); // perhaps without prefix
2464
2465	// search homonym with compound flag
2466	while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix ))) \|\|
2467	!((compoundflag && !words &&
2468	TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
2469	(compoundend && !words &&
2470	TESTAFF(rv->astr, compoundend, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundend ))) \|\|
2471	(!defcpdtable.empty() && words &&
2472	defcpd_check(&words, wnum + 1, rv, NULL__null, 1))))) {
2473	rv = rv->next_homonym;
2474	}
2475
2476	if (rv && words && words[wnum + 1]) {
2477	result.append(presult);
2478	result.push_back(MSEP_FLD' ');
2479	result.append(MORPH_PART"pa:");
2480	result.append(word + i);
2481	if (complexprefixes && HENTRY_DATA(rv))
2482	result.append(HENTRY_DATA2(rv));
2483	if (!HENTRY_FIND(rv, MORPH_STEM"st:")) {
2484	result.push_back(MSEP_FLD' ');
2485	result.append(MORPH_STEM"st:");
2486	result.append(HENTRY_WORD(rv)&(rv->word[0]));
2487	}
2488	// store the pointer of the hash entry
2489	if (!complexprefixes && HENTRY_DATA(rv)) {
2490	result.push_back(MSEP_FLD' ');
2491	result.append(HENTRY_DATA2(rv));
2492	}
2493	result.push_back(MSEP_REC'\n');
2494	return 0;
2495	}
2496
2497	oldnumsyllable2 = numsyllable;
2498	oldwordnum2 = wordnum;
2499
2500	// LANG_hu section: spec. Hungarian rule
2501	if ((rv) && (langnum == LANG_hu) &&
2502	(TESTAFF(rv->astr, 'I', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'I' ))) &&
2503	!(TESTAFF(rv->astr, 'J', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'J' )))) {
2504	numsyllable--;
2505	}
2506	// END of LANG_hu section
2507	// increment word number, if the second root has a compoundroot flag
2508	if ((rv) && (compoundroot) &&
2509	(TESTAFF(rv->astr, compoundroot, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundroot )))) {
2510	wordnum++;
2511	}
2512
2513	// check forbiddenwords
2514	if ((rv) && (rv->astr) &&
2515	(TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
2516	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 )))) {
2517	st[i] = ch;
2518	continue;
2519	}
2520
2521	// second word is acceptable, as a root?
2522	// hungarian conventions: compounding is acceptable,
2523	// when compound forms consist of 2 words, or if more,
2524	// then the syllable number of root words must be 6, or lesser.
2525	if ((rv) &&
2526	((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundflag ))) \|\|
2527	(compoundend && TESTAFF(rv->astr, compoundend, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundend )))) &&
2528	(((cpdwordmax == -1) \|\| (wordnum + 1 < cpdwordmax)) \|\|
2529	((cpdmaxsyllable != 0) &&
2530	(numsyllable + get_syllable(std::string(HENTRY_WORD(rv)&(rv->word[0]), rv->blen)) <=
2531	cpdmaxsyllable))) &&
2532	((!checkcompounddup \|\| (rv != rv_first)))) {
2533	// bad compound word
2534	result.append(presult);
2535	result.push_back(MSEP_FLD' ');
2536	result.append(MORPH_PART"pa:");
2537	result.append(word + i);
2538
2539	if (HENTRY_DATA(rv)) {
2540	if (complexprefixes)
2541	result.append(HENTRY_DATA2(rv));
2542	if (!HENTRY_FIND(rv, MORPH_STEM"st:")) {
2543	result.push_back(MSEP_FLD' ');
2544	result.append(MORPH_STEM"st:");
2545	result.append(HENTRY_WORD(rv)&(rv->word[0]));
2546	}
2547	// store the pointer of the hash entry
2548	if (!complexprefixes) {
2549	result.push_back(MSEP_FLD' ');
2550	result.append(HENTRY_DATA2(rv));
2551	}
2552	}
2553	result.push_back(MSEP_REC'\n');
2554	ok = 1;
2555	}
2556
2557	numsyllable = oldnumsyllable2;
2558	wordnum = oldwordnum2;
2559
2560	// perhaps second word has prefix or/and suffix
2561	sfx = NULL__null;
2562	sfxflag = FLAG_NULL0x00;
2563
2564	if (compoundflag && !onlycpdrule)
2565	rv = affix_check((word + i), strlen(word + i), compoundflag);
2566	else
2567	rv = NULL__null;
2568
2569	if (!rv && compoundend && !onlycpdrule) {
2570	sfx = NULL__null;
2571	pfx = NULL__null;
2572	rv = affix_check((word + i), strlen(word + i), compoundend);
2573	}
2574
2575	if (!rv && !defcpdtable.empty() && words) {
2576	rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END2);
2577	if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL__null, 1)) {
2578	std::string m;
2579	if (compoundflag)
2580	m = affix_check_morph((word + i), strlen(word + i), compoundflag);
2581	if (m.empty() && compoundend) {
2582	m = affix_check_morph((word + i), strlen(word + i), compoundend);
2583	}
2584	result.append(presult);
2585	if (!m.empty()) {
2586	result.push_back(MSEP_FLD' ');
2587	result.append(MORPH_PART"pa:");
2588	result.append(word + i);
2589	line_uniq_app(m, MSEP_REC'\n');
2590	result.append(m);
2591	}
2592	result.push_back(MSEP_REC'\n');
2593	ok = 1;
2594	}
2595	}
2596
2597	// check non_compound flag in suffix and prefix
2598	if ((rv) &&
2599	((pfx && pfx->getCont() &&
2600	TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())(std::binary_search(pfx->getCont(), pfx->getCont() + pfx ->getContLen(), compoundforbidflag))) \|\|
2601	(sfx && sfx->getCont() &&
2602	TESTAFF(sfx->getCont(), compoundforbidflag,(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))
2603	sfx->getContLen())(std::binary_search(sfx->getCont(), sfx->getCont() + sfx ->getContLen(), compoundforbidflag))))) {
2604	rv = NULL__null;
2605	}
2606
2607	// check forbiddenwords
2608	if ((rv) && (rv->astr) &&
2609	(TESTAFF(rv->astr, forbiddenword, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, forbiddenword )) \|\|
2610	TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 65511 ))) &&
2611	(!TESTAFF(rv->astr, needaffix, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, needaffix )))) {
2612	st[i] = ch;
2613	continue;
2614	}
2615
2616	if (langnum == LANG_hu) {
2617	// calculate syllable number of the word
2618	numsyllable += get_syllable(word + i);
2619
2620	// - affix syllable num.
2621	// XXX only second suffix (inflections, not derivations)
2622	if (sfxappnd) {
2623	std::string tmp(sfxappnd);
2624	reverseword(tmp);
2625	numsyllable -= short(get_syllable(tmp) + sfxextra);
2626	} else {
2627	numsyllable -= short(sfxextra);
2628	}
2629
2630	// + 1 word, if syllable number of the prefix > 1 (hungarian
2631	// convention)
2632	if (pfx && (get_syllable(pfx->getKey()) > 1))
2633	wordnum++;
2634
2635	// increment syllable num, if last word has a SYLLABLENUM flag
2636	// and the suffix is beginning `s'
2637
2638	if (!cpdsyllablenum.empty()) {
2639	switch (sfxflag) {
2640	case 'c': {
2641	numsyllable += 2;
2642	break;
2643	}
2644	case 'J': {
2645	numsyllable += 1;
2646	break;
2647	}
2648	case 'I': {
2649	if (rv && TESTAFF(rv->astr, 'J', rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, 'J' )))
2650	numsyllable += 1;
2651	break;
2652	}
2653	}
2654	}
2655	}
2656
2657	// increment word number, if the second word has a compoundroot flag
2658	if ((rv) && (compoundroot) &&
2659	(TESTAFF(rv->astr, compoundroot, rv->alen)(std::binary_search(rv->astr, rv->astr + rv->alen, compoundroot )))) {
2660	wordnum++;
2661	}
2662	// second word is acceptable, as a word with prefix or/and suffix?
2663	// hungarian conventions: compounding is acceptable,
2664	// when compound forms consist 2 word, otherwise
2665	// the syllable number of root words is 6, or lesser.
2666	if ((rv) &&
2667	(((cpdwordmax == -1) \|\| (wordnum + 1 < cpdwordmax)) \|\|
2668	((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
2669	((!checkcompounddup \|\| (rv != rv_first)))) {
2670	std::string m;
2671	if (compoundflag)
2672	m = affix_check_morph((word + i), strlen(word + i), compoundflag);
2673	if (m.empty() && compoundend) {
2674	m = affix_check_morph((word + i), strlen(word + i), compoundend);
2675	}
2676	result.append(presult);
2677	if (!m.empty()) {
2678	result.push_back(MSEP_FLD' ');
2679	result.append(MORPH_PART"pa:");
2680	result.append(word + i);
2681	line_uniq_app(m, MSEP_REC'\n');
2682	result.push_back(MSEP_FLD' ');
2683	result.append(m);
2684	}
2685	result.push_back(MSEP_REC'\n');
2686	ok = 1;
2687	}
2688
2689	numsyllable = oldnumsyllable2;
2690	wordnum = oldwordnum2;
2691
2692	// perhaps second word is a compound word (recursive call)
2693	if ((wordnum + 2 < maxwordnum) && (ok == 0)) {
2694	compound_check_morph((word + i), strlen(word + i), wordnum + 1,
2695	numsyllable, maxwordnum, wnum + 1, words, rwords, 0,
2696	result, &presult);
2697	} else {
2698	rv = NULL__null;
2699	}
2700	}
2701	st[i] = ch;
2702	wordnum = oldwordnum;
2703	numsyllable = oldnumsyllable;
2704
2705	} while (!defcpdtable.empty() && oldwordnum == 0 &&
2706	onlycpdrule++ < 1); // end of onlycpd loop
2707	}
2708	return 0;
2709	}
2710
2711
2712	inline int AffixMgr::isRevSubset(const char* s1,
2713	const char* end_of_s2,
2714	int len) {
2715	while ((len > 0) && (s1 != '\0') && ((s1 == end_of_s2) \|\| (s1 == '.'))) {
2716	s1++;
2717	end_of_s2--;
2718	len--;
2719	}
2720	return (*s1 == '\0');
2721	}
2722
2723	// check word for suffixes
2724	struct hentry* AffixMgr::suffix_check(const char* word,
2725	int len,
2726	int sfxopts,
2727	PfxEntry* ppfx,
2728	const FLAGunsigned short cclass,
2729	const FLAGunsigned short needflag,
2730	char in_compound) {
2731	struct hentry* rv = NULL__null;
2732	PfxEntry* ep = ppfx;
2733
2734	// first handle the special case of 0 length suffixes
2735	SfxEntry* se = sStart[0];
2736
2737	while (se) {
2738	if (!cclass \|\| se->getCont()) {
2739	// suffixes are not allowed in beginning of compounds
2740	if ((((in_compound != IN_CPD_BEGIN1)) \|\| // && !cclass
2741	// except when signed with compoundpermitflag flag
2742	(se->getCont() && compoundpermitflag &&
2743	TESTAFF(se->getCont(), compoundpermitflag, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), compoundpermitflag)))) &&
2744	(!circumfix \|\|
2745	// no circumfix flag in prefix and suffix
2746	((!ppfx \|\| !(ep->getCont()) \|\|
2747	!TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
2748	(!se->getCont() \|\|
2749	!(TESTAFF(se->getCont(), circumfix, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), circumfix))))) \|\|
2750	// circumfix flag in prefix AND suffix
2751	((ppfx && (ep->getCont()) &&
2752	TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
2753	(se->getCont() &&
2754	(TESTAFF(se->getCont(), circumfix, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), circumfix)))))) &&
2755	// fogemorpheme
2756	(in_compound \|\|
2757	!(se->getCont() &&
2758	(TESTAFF(se->getCont(), onlyincompound, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), onlyincompound))))) &&
2759	// needaffix on prefix or first suffix
2760	(cclass \|\|
2761	!(se->getCont() &&
2762	TESTAFF(se->getCont(), needaffix, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), needaffix))) \|\|
2763	(ppfx &&
2764	!((ep->getCont()) &&
2765	TESTAFF(ep->getCont(), needaffix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), needaffix)))))) {
2766	rv = se->checkword(word, len, sfxopts, ppfx,
2767	(FLAGunsigned short)cclass, needflag,
2768	(in_compound ? 0 : onlyincompound));
2769	if (rv) {
2770	sfx = se; // BUG: sfx not stateless
2771	return rv;
2772	}
2773	}
2774	}
2775	se = se->getNext();
2776	}
2777
2778	// now handle the general case
2779	if (len == 0)
2780	return NULL__null; // FULLSTRIP
2781	unsigned char sp = ((const unsigned char)(word + len - 1));
2782	SfxEntry* sptr = sStart[sp];
2783
2784	while (sptr) {
2785	if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2786	// suffixes are not allowed in beginning of compounds
2787	if ((((in_compound != IN_CPD_BEGIN1)) \|\| // && !cclass
2788	// except when signed with compoundpermitflag flag
2789	(sptr->getCont() && compoundpermitflag &&
2790	TESTAFF(sptr->getCont(), compoundpermitflag,(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), compoundpermitflag))
2791	sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), compoundpermitflag)))) &&
2792	(!circumfix \|\|
2793	// no circumfix flag in prefix and suffix
2794	((!ppfx \|\| !(ep->getCont()) \|\|
2795	!TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
2796	(!sptr->getCont() \|\|
2797	!(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), circumfix))))) \|\|
2798	// circumfix flag in prefix AND suffix
2799	((ppfx && (ep->getCont()) &&
2800	TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
2801	(sptr->getCont() &&
2802	(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), circumfix)))))) &&
2803	// fogemorpheme
2804	(in_compound \|\|
2805	!((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound,(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), onlyincompound))
2806	sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), onlyincompound)))))) &&
2807	// needaffix on prefix or first suffix
2808	(cclass \|\|
2809	!(sptr->getCont() &&
2810	TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), needaffix))) \|\|
2811	(ppfx &&
2812	!((ep->getCont()) &&
2813	TESTAFF(ep->getCont(), needaffix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), needaffix))))))
2814	if (in_compound != IN_CPD_END2 \|\| ppfx \|\|
2815	!(sptr->getCont() &&
2816	TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), onlyincompound)))) {
2817	rv = sptr->checkword(word, len, sfxopts, ppfx,
2818	cclass, needflag,
2819	(in_compound ? 0 : onlyincompound));
2820	if (rv) {
2821	sfx = sptr; // BUG: sfx not stateless
2822	sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2823	if (!sptr->getCont())
2824	sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
2825	// LANG_hu section: spec. Hungarian rule
2826	else if (langnum == LANG_hu && sptr->getKeyLen() &&
2827	sptr->getKey()[0] == 'i' && sptr->getKey()[1] != 'y' &&
2828	sptr->getKey()[1] != 't') {
2829	sfxextra = 1;
2830	}
2831	// END of LANG_hu section
2832	return rv;
2833	}
2834	}
2835	sptr = sptr->getNextEQ();
2836	} else {
2837	sptr = sptr->getNextNE();
2838	}
2839	}
2840
2841	return NULL__null;
2842	}
2843
2844	// check word for two-level suffixes
2845	struct hentry* AffixMgr::suffix_check_twosfx(const char* word,
2846	int len,
2847	int sfxopts,
2848	PfxEntry* ppfx,
2849	const FLAGunsigned short needflag) {
2850	struct hentry* rv = NULL__null;
2851
2852	// first handle the special case of 0 length suffixes
2853	SfxEntry* se = sStart[0];
2854	while (se) {
2855	if (contclasses[se->getFlag()]) {
2856	rv = se->check_twosfx(word, len, sfxopts, ppfx, needflag);
2857	if (rv)
2858	return rv;
2859	}
2860	se = se->getNext();
2861	}
2862
2863	// now handle the general case
2864	if (len == 0)
2865	return NULL__null; // FULLSTRIP
2866	unsigned char sp = ((const unsigned char)(word + len - 1));
2867	SfxEntry* sptr = sStart[sp];
2868
2869	while (sptr) {
2870	if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2871	if (contclasses[sptr->getFlag()]) {
2872	rv = sptr->check_twosfx(word, len, sfxopts, ppfx, needflag);
2873	if (rv) {
2874	sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2875	if (!sptr->getCont())
2876	sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
2877	return rv;
2878	}
2879	}
2880	sptr = sptr->getNextEQ();
2881	} else {
2882	sptr = sptr->getNextNE();
2883	}
2884	}
2885
2886	return NULL__null;
2887	}
2888
2889	// check word for two-level suffixes and morph
2890	std::string AffixMgr::suffix_check_twosfx_morph(const char* word,
2891	int len,
2892	int sfxopts,
2893	PfxEntry* ppfx,
2894	const FLAGunsigned short needflag) {
2895	std::string result;
2896	std::string result2;
2897	std::string result3;
2898
2899	// first handle the special case of 0 length suffixes
2900	SfxEntry* se = sStart[0];
2901	while (se) {
2902	if (contclasses[se->getFlag()]) {
2903	std::string st = se->check_twosfx_morph(word, len, sfxopts, ppfx, needflag);
2904	if (!st.empty()) {
2905	if (ppfx) {
2906	if (ppfx->getMorph()) {
2907	result.append(ppfx->getMorph());
2908	result.push_back(MSEP_FLD' ');
2909	} else
2910	debugflag(result, ppfx->getFlag());
2911	}
2912	result.append(st);
2913	if (se->getMorph()) {
2914	result.push_back(MSEP_FLD' ');
2915	result.append(se->getMorph());
2916	} else
2917	debugflag(result, se->getFlag());
2918	result.push_back(MSEP_REC'\n');
2919	}
2920	}
2921	se = se->getNext();
2922	}
2923
2924	// now handle the general case
2925	if (len == 0)
2926	return std::string(); // FULLSTRIP
2927	unsigned char sp = ((const unsigned char)(word + len - 1));
2928	SfxEntry* sptr = sStart[sp];
2929
2930	while (sptr) {
2931	if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2932	if (contclasses[sptr->getFlag()]) {
2933	std::string st = sptr->check_twosfx_morph(word, len, sfxopts, ppfx, needflag);
2934	if (!st.empty()) {
2935	sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2936	if (!sptr->getCont())
2937	sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
2938	result2.assign(st);
2939
2940	result3.clear();
2941
2942	if (sptr->getMorph()) {
2943	result3.push_back(MSEP_FLD' ');
2944	result3.append(sptr->getMorph());
2945	} else
2946	debugflag(result3, sptr->getFlag());
2947	strlinecat(result2, result3);
2948	result2.push_back(MSEP_REC'\n');
2949	result.append(result2);
2950	}
2951	}
2952	sptr = sptr->getNextEQ();
2953	} else {
2954	sptr = sptr->getNextNE();
2955	}
2956	}
2957
2958	return result;
2959	}
2960
2961	std::string AffixMgr::suffix_check_morph(const char* word,
2962	int len,
2963	int sfxopts,
2964	PfxEntry* ppfx,
2965	const FLAGunsigned short cclass,
2966	const FLAGunsigned short needflag,
2967	char in_compound) {
2968	std::string result;
2969
2970	struct hentry* rv = NULL__null;
2971
2972	PfxEntry* ep = ppfx;
2973
2974	// first handle the special case of 0 length suffixes
2975	SfxEntry* se = sStart[0];
2976	while (se) {
2977	if (!cclass \|\| se->getCont()) {
2978	// suffixes are not allowed in beginning of compounds
2979	if (((((in_compound != IN_CPD_BEGIN1)) \|\| // && !cclass
2980	// except when signed with compoundpermitflag flag
2981	(se->getCont() && compoundpermitflag &&
2982	TESTAFF(se->getCont(), compoundpermitflag, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), compoundpermitflag)))) &&
2983	(!circumfix \|\|
2984	// no circumfix flag in prefix and suffix
2985	((!ppfx \|\| !(ep->getCont()) \|\|
2986	!TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
2987	(!se->getCont() \|\|
2988	!(TESTAFF(se->getCont(), circumfix, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), circumfix))))) \|\|
2989	// circumfix flag in prefix AND suffix
2990	((ppfx && (ep->getCont()) &&
2991	TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
2992	(se->getCont() &&
2993	(TESTAFF(se->getCont(), circumfix, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), circumfix)))))) &&
2994	// fogemorpheme
2995	(in_compound \|\|
2996	!((se->getCont() &&
2997	(TESTAFF(se->getCont(), onlyincompound, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), onlyincompound)))))) &&
2998	// needaffix on prefix or first suffix
2999	(cclass \|\|
3000	!(se->getCont() &&
3001	TESTAFF(se->getCont(), needaffix, se->getContLen())(std::binary_search(se->getCont(), se->getCont() + se-> getContLen(), needaffix))) \|\|
3002	(ppfx &&
3003	!((ep->getCont()) &&
3004	TESTAFF(ep->getCont(), needaffix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), needaffix)))))))
3005	rv = se->checkword(word, len, sfxopts, ppfx, cclass,
3006	needflag, FLAG_NULL0x00);
3007	while (rv) {
3008	if (ppfx) {
3009	if (ppfx->getMorph()) {
3010	result.append(ppfx->getMorph());
3011	result.push_back(MSEP_FLD' ');
3012	} else
3013	debugflag(result, ppfx->getFlag());
3014	}
3015	if (complexprefixes && HENTRY_DATA(rv))
3016	result.append(HENTRY_DATA2(rv));
3017	if (!HENTRY_FIND(rv, MORPH_STEM"st:")) {
3018	result.push_back(MSEP_FLD' ');
3019	result.append(MORPH_STEM"st:");
3020	result.append(HENTRY_WORD(rv)&(rv->word[0]));
3021	}
3022
3023	if (!complexprefixes && HENTRY_DATA(rv)) {
3024	result.push_back(MSEP_FLD' ');
3025	result.append(HENTRY_DATA2(rv));
3026	}
3027	if (se->getMorph()) {
3028	result.push_back(MSEP_FLD' ');
3029	result.append(se->getMorph());
3030	} else
3031	debugflag(result, se->getFlag());
3032	result.push_back(MSEP_REC'\n');
3033	rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
3034	}
3035	}
3036	se = se->getNext();
3037	}
3038
3039	// now handle the general case
3040	if (len == 0)
3041	return std::string(); // FULLSTRIP
3042	unsigned char sp = ((const unsigned char)(word + len - 1));
3043	SfxEntry* sptr = sStart[sp];
3044
3045	while (sptr) {
3046	if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
3047	// suffixes are not allowed in beginning of compounds
3048	if (((((in_compound != IN_CPD_BEGIN1)) \|\| // && !cclass
3049	// except when signed with compoundpermitflag flag
3050	(sptr->getCont() && compoundpermitflag &&
3051	TESTAFF(sptr->getCont(), compoundpermitflag,(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), compoundpermitflag))
3052	sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), compoundpermitflag)))) &&
3053	(!circumfix \|\|
3054	// no circumfix flag in prefix and suffix
3055	((!ppfx \|\| !(ep->getCont()) \|\|
3056	!TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
3057	(!sptr->getCont() \|\|
3058	!(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), circumfix))))) \|\|
3059	// circumfix flag in prefix AND suffix
3060	((ppfx && (ep->getCont()) &&
3061	TESTAFF(ep->getCont(), circumfix, ep->getContLen())(std::binary_search(ep->getCont(), ep->getCont() + ep-> getContLen(), circumfix))) &&
3062	(sptr->getCont() &&
3063	(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), circumfix)))))) &&
3064	// fogemorpheme
3065	(in_compound \|\|
3066	!((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound,(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), onlyincompound))
3067	sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), onlyincompound)))))) &&
3068	// needaffix on first suffix
3069	(cclass \|\|
3070	!(sptr->getCont() &&
3071	TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), needaffix))))))
3072	rv = sptr->checkword(word, len, sfxopts, ppfx, cclass,
3073	needflag, FLAG_NULL0x00);
3074	while (rv) {
3075	if (ppfx) {
3076	if (ppfx->getMorph()) {
3077	result.append(ppfx->getMorph());
3078	result.push_back(MSEP_FLD' ');
3079	} else
3080	debugflag(result, ppfx->getFlag());
3081	}
3082	if (complexprefixes && HENTRY_DATA(rv))
3083	result.append(HENTRY_DATA2(rv));
3084	if (!HENTRY_FIND(rv, MORPH_STEM"st:")) {
3085	result.push_back(MSEP_FLD' ');
3086	result.append(MORPH_STEM"st:");
3087	result.append(HENTRY_WORD(rv)&(rv->word[0]));
3088	}
3089
3090	if (!complexprefixes && HENTRY_DATA(rv)) {
3091	result.push_back(MSEP_FLD' ');
3092	result.append(HENTRY_DATA2(rv));
3093	}
3094
3095	if (sptr->getMorph()) {
3096	result.push_back(MSEP_FLD' ');
3097	result.append(sptr->getMorph());
3098	} else
3099	debugflag(result, sptr->getFlag());
3100	result.push_back(MSEP_REC'\n');
3101	rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
3102	}
3103	sptr = sptr->getNextEQ();
3104	} else {
3105	sptr = sptr->getNextNE();
3106	}
3107	}
3108
3109	return result;
3110	}
3111
3112	// check if word with affixes is correctly spelled
3113	struct hentry* AffixMgr::affix_check(const char* word,
3114	int len,
3115	const FLAGunsigned short needflag,
3116	char in_compound) {
3117
3118	// check all prefixes (also crossed with suffixes if allowed)
3119	struct hentry* rv = prefix_check(word, len, in_compound, needflag);
3120	if (rv)
3121	return rv;
3122
3123	// if still not found check all suffixes
3124	rv = suffix_check(word, len, 0, NULL__null, FLAG_NULL0x00, needflag, in_compound);
3125
3126	if (havecontclass) {
3127	sfx = NULL__null;
3128	pfx = NULL__null;
3129
3130	if (rv)
3131	return rv;
3132	// if still not found check all two-level suffixes
3133	rv = suffix_check_twosfx(word, len, 0, NULL__null, needflag);
3134
3135	if (rv)
3136	return rv;
3137	// if still not found check all two-level suffixes
3138	rv = prefix_check_twosfx(word, len, IN_CPD_NOT0, needflag);
3139	}
3140
3141	return rv;
3142	}
3143
3144	// check if word with affixes is correctly spelled
3145	std::string AffixMgr::affix_check_morph(const char* word,
3146	int len,
3147	const FLAGunsigned short needflag,
3148	char in_compound) {
3149	std::string result;
3150
3151	// check all prefixes (also crossed with suffixes if allowed)
3152	std::string st = prefix_check_morph(word, len, in_compound);
3153	if (!st.empty()) {
3154	result.append(st);
3155	}
3156
3157	// if still not found check all suffixes
3158	st = suffix_check_morph(word, len, 0, NULL__null, '\0', needflag, in_compound);
3159	if (!st.empty()) {
3160	result.append(st);
3161	}
3162
3163	if (havecontclass) {
3164	sfx = NULL__null;
3165	pfx = NULL__null;
3166	// if still not found check all two-level suffixes
3167	st = suffix_check_twosfx_morph(word, len, 0, NULL__null, needflag);
3168	if (!st.empty()) {
3169	result.append(st);
3170	}
3171
3172	// if still not found check all two-level suffixes
3173	st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT0, needflag);
3174	if (!st.empty()) {
3175	result.append(st);
3176	}
3177	}
3178
3179	return result;
3180	}
3181
3182	// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
3183	// in the first line of the inputs
3184	// return 0, if inputs equal
3185	// return 1, if inputs may equal with a secondary suffix
3186	// otherwise return -1
3187	static int morphcmp(const char* s, const char* t) {
3188	int se = 0;
3189	int te = 0;
3190	const char* sl;
3191	const char* tl;
3192	const char* olds;
3193	const char* oldt;
3194	if (!s \|\| !t)
3195	return 1;
3196	olds = s;
3197	sl = strchr(s, '\n');
3198	s = strstr(s, MORPH_DERI_SFX"ds:");
3199	if (!s \|\| (sl && sl < s))
3200	s = strstr(olds, MORPH_INFL_SFX"is:");
3201	if (!s \|\| (sl && sl < s)) {
3202	s = strstr(olds, MORPH_TERM_SFX"ts:");
3203	olds = NULL__null;
3204	}
3205	oldt = t;
3206	tl = strchr(t, '\n');
3207	t = strstr(t, MORPH_DERI_SFX"ds:");
3208	if (!t \|\| (tl && tl < t))
3209	t = strstr(oldt, MORPH_INFL_SFX"is:");
3210	if (!t \|\| (tl && tl < t)) {
3211	t = strstr(oldt, MORPH_TERM_SFX"ts:");
3212	oldt = NULL__null;
3213	}
3214	while (s && t && (!sl \|\| sl > s) && (!tl \|\| tl > t)) {
3215	s += MORPH_TAG_LENstrlen("st:");
3216	t += MORPH_TAG_LENstrlen("st:");
3217	se = 0;
3218	te = 0;
3219	while ((s == t) && !se && !te) {
3220	s++;
3221	t++;
3222	switch (*s) {
3223	case ' ':
3224	case '\n':
3225	case '\t':
3226	case '\0':
3227	se = 1;
3228	}
3229	switch (*t) {
3230	case ' ':
3231	case '\n':
3232	case '\t':
3233	case '\0':
3234	te = 1;
3235	}
3236	}
3237	if (!se \|\| !te) {
3238	// not terminal suffix difference
3239	if (olds)
3240	return -1;
3241	return 1;
3242	}
3243	olds = s;
3244	s = strstr(s, MORPH_DERI_SFX"ds:");
3245	if (!s \|\| (sl && sl < s))
3246	s = strstr(olds, MORPH_INFL_SFX"is:");
3247	if (!s \|\| (sl && sl < s)) {
3248	s = strstr(olds, MORPH_TERM_SFX"ts:");
3249	olds = NULL__null;
3250	}
3251	oldt = t;
3252	t = strstr(t, MORPH_DERI_SFX"ds:");
3253	if (!t \|\| (tl && tl < t))
3254	t = strstr(oldt, MORPH_INFL_SFX"is:");
3255	if (!t \|\| (tl && tl < t)) {
3256	t = strstr(oldt, MORPH_TERM_SFX"ts:");
3257	oldt = NULL__null;
3258	}
3259	}
3260	if (!s && !t && se && te)
3261	return 0;
3262	return 1;
3263	}
3264
3265	std::string AffixMgr::morphgen(const char* ts,
3266	int wl,
3267	const unsigned short* ap,
3268	unsigned short al,
3269	const char* morph,
3270	const char* targetmorph,
3271	int level) {
3272	// handle suffixes
3273	if (!morph)
3274	return std::string();
3275
3276	// check substandard flag
3277	if (TESTAFF(ap, substandard, al)(std::binary_search(ap, ap + al, substandard)))
3278	return std::string();
3279
3280	if (morphcmp(morph, targetmorph) == 0)
3281	return ts;
3282
3283	size_t stemmorphcatpos;
3284	std::string mymorph;
3285
3286	// use input suffix fields, if exist
3287	if (strstr(morph, MORPH_INFL_SFX"is:") \|\| strstr(morph, MORPH_DERI_SFX"ds:")) {
3288	mymorph.assign(morph);
3289	mymorph.push_back(MSEP_FLD' ');
3290	stemmorphcatpos = mymorph.size();
3291	} else {
3292	stemmorphcatpos = std::string::npos;
3293	}
3294
3295	for (int i = 0; i < al; i++) {
3296	const unsigned char c = (unsigned char)(ap[i] & 0x00FF);
3297	SfxEntry* sptr = sFlag[c];
3298	while (sptr) {
3299	if (sptr->getFlag() == ap[i] && sptr->getMorph() &&
3300	((sptr->getContLen() == 0) \|\|
3301	// don't generate forms with substandard affixes
3302	!TESTAFF(sptr->getCont(), substandard, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), substandard)))) {
3303	const char* stemmorph;
3304	if (stemmorphcatpos != std::string::npos) {
3305	mymorph.replace(stemmorphcatpos, std::string::npos, sptr->getMorph());
3306	stemmorph = mymorph.c_str();
3307	} else {
3308	stemmorph = sptr->getMorph();
3309	}
3310
3311	int cmp = morphcmp(stemmorph, targetmorph);
3312
3313	if (cmp == 0) {
3314	std::string newword = sptr->add(ts, wl);
3315	if (!newword.empty()) {
3316	hentry* check = pHMgr->lookup(newword.c_str()); // XXX extra dic
3317	if (!check \|\| !check->astr \|\|
3318	!(TESTAFF(check->astr, forbiddenword, check->alen)(std::binary_search(check->astr, check->astr + check-> alen, forbiddenword)) \|\|
3319	TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen)(std::binary_search(check->astr, check->astr + check-> alen, 65511)))) {
3320	return newword;
3321	}
3322	}
3323	}
3324
3325	// recursive call for secondary suffixes
3326	if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
3327	!TESTAFF(sptr->getCont(), substandard, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), substandard))) {
3328	std::string newword = sptr->add(ts, wl);
3329	if (!newword.empty()) {
3330	std::string newword2 =
3331	morphgen(newword.c_str(), newword.size(), sptr->getCont(),
3332	sptr->getContLen(), stemmorph, targetmorph, 1);
3333
3334	if (!newword2.empty()) {
3335	return newword2;
3336	}
3337	}
3338	}
3339	}
3340	sptr = sptr->getFlgNxt();
3341	}
3342	}
3343	return std::string();
3344	}
3345
3346	int AffixMgr::expand_rootword(struct guessword* wlst,
3347	int maxn,
3348	const char* ts,
3349	int wl,
3350	const unsigned short* ap,
3351	unsigned short al,
3352	const char* bad,
3353	int badl,
3354	const char* phon) {
3355	int nh = 0;
3356	// first add root word to list
3357	if ((nh < maxn) &&
3358	!(al && ((needaffix && TESTAFF(ap, needaffix, al)(std::binary_search(ap, ap + al, needaffix))) \|\|
3359	(onlyincompound && TESTAFF(ap, onlyincompound, al)(std::binary_search(ap, ap + al, onlyincompound)))))) {
3360	wlst[nh].word = mystrdup(ts);
3361	if (!wlst[nh].word)
3362	return 0;
3363	wlst[nh].allow = false;
3364	wlst[nh].orig = NULL__null;
3365	nh++;
3366	// add special phonetic version
3367	if (phon && (nh < maxn)) {
3368	wlst[nh].word = mystrdup(phon);
3369	if (!wlst[nh].word)
3370	return nh - 1;
3371	wlst[nh].allow = false;
3372	wlst[nh].orig = mystrdup(ts);
3373	if (!wlst[nh].orig)
3374	return nh - 1;
3375	nh++;
3376	}
3377	}
3378
3379	// handle suffixes
3380	for (int i = 0; i < al; i++) {
3381	const unsigned char c = (unsigned char)(ap[i] & 0x00FF);
3382	SfxEntry* sptr = sFlag[c];
3383	while (sptr) {
3384	if ((sptr->getFlag() == ap[i]) &&
3385	(!sptr->getKeyLen() \|\|
3386	((badl > sptr->getKeyLen()) &&
3387	(strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
3388	// check needaffix flag
3389	!(sptr->getCont() &&
3390	((needaffix &&
3391	TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), needaffix))) \|\|
3392	(circumfix &&
3393	TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), circumfix))) \|\|
3394	(onlyincompound &&
3395	TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen())(std::binary_search(sptr->getCont(), sptr->getCont() + sptr ->getContLen(), onlyincompound)))))) {
3396	std::string newword = sptr->add(ts, wl);
3397	if (!newword.empty()) {
3398	if (nh < maxn) {
3399	wlst[nh].word = mystrdup(newword.c_str());
3400	wlst[nh].allow = sptr->allowCross();
3401	wlst[nh].orig = NULL__null;
3402	nh++;
3403	// add special phonetic version
3404	if (phon && (nh < maxn)) {
3405	std::string prefix(phon);
3406	std::string key(sptr->getKey());
3407	reverseword(key);
3408	prefix.append(key);
3409	wlst[nh].word = mystrdup(prefix.c_str());
3410	if (!wlst[nh].word)
3411	return nh - 1;
3412	wlst[nh].allow = false;
3413	wlst[nh].orig = mystrdup(newword.c_str());
3414	if (!wlst[nh].orig)
3415	return nh - 1;
3416	nh++;
3417	}
3418	}
3419	}
3420	}
3421	sptr = sptr->getFlgNxt();
3422	}
3423	}
3424
3425	int n = nh;
3426
3427	// handle cross products of prefixes and suffixes
3428	for (int j = 1; j < n; j++)
3429	if (wlst[j].allow) {
3430	for (int k = 0; k < al; k++) {
3431	const unsigned char c = (unsigned char)(ap[k] & 0x00FF);
3432	PfxEntry* cptr = pFlag[c];
3433	while (cptr) {
3434	if ((cptr->getFlag() == ap[k]) && cptr->allowCross() &&
3435	(!cptr->getKeyLen() \|\|
3436	((badl > cptr->getKeyLen()) &&
3437	(strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
3438	int l1 = strlen(wlst[j].word);
3439	std::string newword = cptr->add(wlst[j].word, l1);
3440	if (!newword.empty()) {
3441	if (nh < maxn) {
3442	wlst[nh].word = mystrdup(newword.c_str());
3443	wlst[nh].allow = cptr->allowCross();
3444	wlst[nh].orig = NULL__null;
3445	nh++;
3446	}
3447	}
3448	}
3449	cptr = cptr->getFlgNxt();
3450	}
3451	}
3452	}
3453
3454	// now handle pure prefixes
3455	for (int m = 0; m < al; m++) {
3456	const unsigned char c = (unsigned char)(ap[m] & 0x00FF);
3457	PfxEntry* ptr = pFlag[c];
3458	while (ptr) {
3459	if ((ptr->getFlag() == ap[m]) &&
3460	(!ptr->getKeyLen() \|\|
3461	((badl > ptr->getKeyLen()) &&
3462	(strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
3463	// check needaffix flag
3464	!(ptr->getCont() &&
3465	((needaffix &&
3466	TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())(std::binary_search(ptr->getCont(), ptr->getCont() + ptr ->getContLen(), needaffix))) \|\|
3467	(circumfix &&
3468	TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())(std::binary_search(ptr->getCont(), ptr->getCont() + ptr ->getContLen(), circumfix))) \|\|
3469	(onlyincompound &&
3470	TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen())(std::binary_search(ptr->getCont(), ptr->getCont() + ptr ->getContLen(), onlyincompound)))))) {
3471	std::string newword = ptr->add(ts, wl);
3472	if (!newword.empty()) {
3473	if (nh < maxn) {
3474	wlst[nh].word = mystrdup(newword.c_str());
3475	wlst[nh].allow = ptr->allowCross();
3476	wlst[nh].orig = NULL__null;
3477	nh++;
3478	}
3479	}
3480	}
3481	ptr = ptr->getFlgNxt();
3482	}
3483	}
3484
3485	return nh;
3486	}
3487
3488	// return replacing table
3489	const std::vector<replentry>& AffixMgr::get_reptable() const {
3490	return pHMgr->get_reptable();
3491	}
3492
3493	// return iconv table
3494	RepList* AffixMgr::get_iconvtable() const {
3495	if (!iconvtable)
3496	return NULL__null;
3497	return iconvtable;
3498	}
3499
3500	// return oconv table
3501	RepList* AffixMgr::get_oconvtable() const {
3502	if (!oconvtable)
3503	return NULL__null;
3504	return oconvtable;
3505	}
3506
3507	// return replacing table
3508	struct phonetable* AffixMgr::get_phonetable() const {
3509	if (!phone)
3510	return NULL__null;
3511	return phone;
3512	}
3513
3514	// return character map table
3515	const std::vector<mapentry>& AffixMgr::get_maptable() const {
3516	return maptable;
3517	}
3518
3519	// return character map table
3520	const std::vector<std::string>& AffixMgr::get_breaktable() const {
3521	return breaktable;
3522	}
3523
3524	// return text encoding of dictionary
3525	const std::string& AffixMgr::get_encoding() {
3526	if (encoding.empty())
3527	encoding = SPELL_ENCODING"ISO8859-1";
3528	return encoding;
3529	}
3530
3531	// return text encoding of dictionary
3532	int AffixMgr::get_langnum() const {
3533	return langnum;
3534	}
3535
3536	// return double prefix option
3537	int AffixMgr::get_complexprefixes() const {
3538	return complexprefixes;
3539	}
3540
3541	// return FULLSTRIP option
3542	int AffixMgr::get_fullstrip() const {
3543	return fullstrip;
3544	}
3545
3546	FLAGunsigned short AffixMgr::get_keepcase() const {
3547	return keepcase;
3548	}
3549
3550	FLAGunsigned short AffixMgr::get_forceucase() const {
3551	return forceucase;
3552	}
3553
3554	FLAGunsigned short AffixMgr::get_warn() const {
3555	return warn;
3556	}
3557
3558	int AffixMgr::get_forbidwarn() const {
3559	return forbidwarn;
3560	}
3561
3562	int AffixMgr::get_checksharps() const {
3563	return checksharps;
3564	}
3565
3566	char* AffixMgr::encode_flag(unsigned short aflag) const {
3567	return pHMgr->encode_flag(aflag);
3568	}
3569
3570	// return the preferred ignore string for suggestions
3571	const char* AffixMgr::get_ignore() const {
3572	if (ignorechars.empty())
3573	return NULL__null;
3574	return ignorechars.c_str();
3575	}
3576
3577	// return the preferred ignore string for suggestions
3578	const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {
3579	return ignorechars_utf16;
3580	}
3581
3582	// return the keyboard string for suggestions
3583	char* AffixMgr::get_key_string() {
3584	if (keystring.empty())
3585	keystring = SPELL_KEYSTRING"qwertyuiop\|asdfghjkl\|zxcvbnm";
3586	return mystrdup(keystring.c_str());
3587	}
3588
3589	// return the preferred try string for suggestions
3590	char* AffixMgr::get_try_string() const {
3591	if (trystring.empty())
3592	return NULL__null;
3593	return mystrdup(trystring.c_str());
3594	}
3595
3596	// return the preferred try string for suggestions
3597	const std::string& AffixMgr::get_wordchars() const {
3598	return wordchars;
3599	}
3600
3601	const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {
3602	return wordchars_utf16;
3603	}
3604
3605	// is there compounding?
3606	int AffixMgr::get_compound() const {
3607	return compoundflag \|\| compoundbegin \|\| !defcpdtable.empty();
3608	}
3609
3610	// return the compound words control flag
3611	FLAGunsigned short AffixMgr::get_compoundflag() const {
3612	return compoundflag;
3613	}
3614
3615	// return the forbidden words control flag
3616	FLAGunsigned short AffixMgr::get_forbiddenword() const {
3617	return forbiddenword;
3618	}
3619
3620	// return the forbidden words control flag
3621	FLAGunsigned short AffixMgr::get_nosuggest() const {
3622	return nosuggest;
3623	}
3624
3625	// return the forbidden words control flag
3626	FLAGunsigned short AffixMgr::get_nongramsuggest() const {
3627	return nongramsuggest;
3628	}
3629
3630	// return the substandard root/affix control flag
3631	FLAGunsigned short AffixMgr::get_substandard() const {
3632	return substandard;
3633	}
3634
3635	// return the forbidden words flag modify flag
3636	FLAGunsigned short AffixMgr::get_needaffix() const {
3637	return needaffix;
3638	}
3639
3640	// return the onlyincompound flag
3641	FLAGunsigned short AffixMgr::get_onlyincompound() const {
3642	return onlyincompound;
3643	}
3644
3645	// return the value of suffix
3646	const std::string& AffixMgr::get_version() const {
3647	return version;
3648	}
3649
3650	// utility method to look up root words in hash table
3651	struct hentry* AffixMgr::lookup(const char* word) {
3652	struct hentry* he = NULL__null;
3653	for (size_t i = 0; i < alldic.size() && !he; ++i) {
3654	he = alldic[i]->lookup(word);
3655	}
3656	return he;
3657	}
3658
3659	// return the value of suffix
3660	int AffixMgr::have_contclass() const {
3661	return havecontclass;
3662	}
3663
3664	// return utf8
3665	int AffixMgr::get_utf8() const {
3666	return utf8;
3667	}
3668
3669	int AffixMgr::get_maxngramsugs(void) const {
3670	return maxngramsugs;
3671	}
3672
3673	int AffixMgr::get_maxcpdsugs(void) const {
3674	return maxcpdsugs;
3675	}
3676
3677	int AffixMgr::get_maxdiff(void) const {
3678	return maxdiff;
3679	}
3680
3681	int AffixMgr::get_onlymaxdiff(void) const {
3682	return onlymaxdiff;
3683	}
3684
3685	// return nosplitsugs
3686	int AffixMgr::get_nosplitsugs(void) const {
3687	return nosplitsugs;
3688	}
3689
3690	// return sugswithdots
3691	int AffixMgr::get_sugswithdots(void) const {
3692	return sugswithdots;
3693	}
3694
3695	/* parse flag */
3696	bool AffixMgr::parse_flag(const std::string& line, unsigned short* out, FileMgr* af) {
3697	if (out != FLAG_NULL0x00 && !(out >= DEFAULTFLAGS65510)) {
3698	HUNSPELL_WARNING(
3699	stderrstderr,
3700	"error: line %d: multiple definitions of an affix file parameter\n",
3701	af->getlinenum());
3702	return false;
3703	}
3704	std::string s;
3705	if (!parse_string(line, s, af->getlinenum()))
3706	return false;
3707	*out = pHMgr->decode_flag(s.c_str());
3708	return true;
3709	}
3710
3711	/* parse num */
3712	bool AffixMgr::parse_num(const std::string& line, int* out, FileMgr* af) {
3713	if (*out != -1) {
3714	HUNSPELL_WARNING(
3715	stderrstderr,
3716	"error: line %d: multiple definitions of an affix file parameter\n",
3717	af->getlinenum());
3718	return false;
3719	}
3720	std::string s;
3721	if (!parse_string(line, s, af->getlinenum()))
3722	return false;
3723	*out = atoi(s.c_str());
3724	return true;
3725	}
3726
3727	/* parse in the max syllablecount of compound words and */
3728	bool AffixMgr::parse_cpdsyllable(const std::string& line, FileMgr* af) {
3729	int i = 0;
3730	int np = 0;
3731	std::string::const_iterator iter = line.begin();
3732	std::string::const_iterator start_piece = mystrsep(line, iter);
3733	while (start_piece != line.end()) {
3734	switch (i) {
3735	case 0: {
3736	np++;
3737	break;
3738	}
3739	case 1: {
3740	cpdmaxsyllable = atoi(std::string(start_piece, iter).c_str());
3741	np++;
3742	break;
3743	}
3744	case 2: {
3745	if (!utf8) {
3746	cpdvowels.assign(start_piece, iter);
3747	std::sort(cpdvowels.begin(), cpdvowels.end());
3748	} else {
3749	std::string piece(start_piece, iter);
3750	u8_u16(cpdvowels_utf16, piece);
3751	std::sort(cpdvowels_utf16.begin(), cpdvowels_utf16.end());
3752	}
3753	np++;
3754	break;
3755	}
3756	default:
3757	break;
3758	}
3759	++i;
3760	start_piece = mystrsep(line, iter);
3761	}
3762	if (np < 2) {
3763	HUNSPELL_WARNING(stderrstderr,
3764	"error: line %d: missing compoundsyllable information\n",
3765	af->getlinenum());
3766	return false;
3767	}
3768	if (np == 2)
3769	cpdvowels = "AEIOUaeiou";
3770	return true;
3771	}
3772
3773	bool AffixMgr::parse_convtable(const std::string& line,
3774	FileMgr* af,
3775	RepList** rl,
3776	const std::string& keyword) {
3777	if (*rl) {
3778	HUNSPELL_WARNING(stderrstderr, "error: line %d: multiple table definitions\n",
3779	af->getlinenum());
3780	return false;
3781	}
3782	int i = 0;
3783	int np = 0;
3784	int numrl = 0;
3785	std::string::const_iterator iter = line.begin();
3786	std::string::const_iterator start_piece = mystrsep(line, iter);
3787	while (start_piece != line.end()) {
3788	switch (i) {
3789	case 0: {
3790	np++;
3791	break;
3792	}
3793	case 1: {
3794	numrl = atoi(std::string(start_piece, iter).c_str());
3795	if (numrl < 1) {
3796	HUNSPELL_WARNING(stderrstderr, "error: line %d: incorrect entry number\n",
3797	af->getlinenum());
3798	return false;
3799	}
3800	*rl = new RepList(numrl);
3801	if (!*rl)
3802	return false;
3803	np++;
3804	break;
3805	}
3806	default:
3807	break;
3808	}
3809	++i;
3810	start_piece = mystrsep(line, iter);
3811	}
3812	if (np != 2) {
3813	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
3814	af->getlinenum());
3815	return false;
3816	}
3817
3818	/* now parse the num lines to read in the remainder of the table */
3819	for (int j = 0; j < numrl; j++) {
3820	std::string nl;
3821	if (!af->getline(nl))
3822	return false;
3823	mychomp(nl);
3824	i = 0;
3825	std::string pattern;
3826	std::string pattern2;
3827	iter = nl.begin();
3828	start_piece = mystrsep(nl, iter);
3829	while (start_piece != nl.end()) {
3830	{
3831	switch (i) {
3832	case 0: {
3833	if (nl.compare(start_piece - nl.begin(), keyword.size(), keyword, 0, keyword.size()) != 0) {
3834	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
3835	af->getlinenum());
3836	delete *rl;
3837	*rl = NULL__null;
3838	return false;
3839	}
3840	break;
3841	}
3842	case 1: {
3843	pattern.assign(start_piece, iter);
3844	break;
3845	}
3846	case 2: {
3847	pattern2.assign(start_piece, iter);
3848	break;
3849	}
3850	default:
3851	break;
3852	}
3853	++i;
3854	}
3855	start_piece = mystrsep(nl, iter);
3856	}
3857	if (pattern.empty() \|\| pattern2.empty()) {
3858	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
3859	af->getlinenum());
3860	return false;
3861	}
3862	(*rl)->add(pattern, pattern2);
3863	}
3864	return true;
3865	}
3866
3867	/* parse in the typical fault correcting table */
3868	bool AffixMgr::parse_phonetable(const std::string& line, FileMgr* af) {
3869	if (phone) {
3870	HUNSPELL_WARNING(stderrstderr, "error: line %d: multiple table definitions\n",
3871	af->getlinenum());
3872	return false;
3873	}
3874	int num = -1;
3875	int i = 0;
3876	int np = 0;
3877	std::string::const_iterator iter = line.begin();
3878	std::string::const_iterator start_piece = mystrsep(line, iter);
3879	while (start_piece != line.end()) {
3880	switch (i) {
3881	case 0: {
3882	np++;
3883	break;
3884	}
3885	case 1: {
3886	num = atoi(std::string(start_piece, iter).c_str());
3887	if (num < 1) {
3888	HUNSPELL_WARNING(stderrstderr, "error: line %d: bad entry number\n",
3889	af->getlinenum());
3890	return false;
3891	}
3892	phone = new phonetable;
3893	phone->utf8 = (char)utf8;
3894	np++;
3895	break;
3896	}
3897	default:
3898	break;
3899	}
3900	++i;
3901	start_piece = mystrsep(line, iter);
3902	}
3903	if (np != 2) {
3904	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
3905	af->getlinenum());
3906	return false;
3907	}
3908
3909	/* now parse the phone->num lines to read in the remainder of the table */
3910	for (int j = 0; j < num; ++j) {
3911	std::string nl;
3912	if (!af->getline(nl))
3913	return false;
3914	mychomp(nl);
3915	i = 0;
3916	const size_t old_size = phone->rules.size();
3917	iter = nl.begin();
3918	start_piece = mystrsep(nl, iter);
3919	while (start_piece != nl.end()) {
3920	{
3921	switch (i) {
3922	case 0: {
3923	if (nl.compare(start_piece - nl.begin(), 5, "PHONE", 5) != 0) {
3924	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
3925	af->getlinenum());
3926	return false;
3927	}
3928	break;
3929	}
3930	case 1: {
3931	phone->rules.push_back(std::string(start_piece, iter));
3932	break;
3933	}
3934	case 2: {
3935	phone->rules.push_back(std::string(start_piece, iter));
3936	mystrrep(phone->rules.back(), "_", "");
3937	break;
3938	}
3939	default:
3940	break;
3941	}
3942	++i;
3943	}
3944	start_piece = mystrsep(nl, iter);
3945	}
3946	if (phone->rules.size() != old_size + 2) {
3947	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
3948	af->getlinenum());
3949	phone->rules.clear();
3950	return false;
3951	}
3952	}
3953	phone->rules.push_back("");
3954	phone->rules.push_back("");
3955	init_phonet_hash(*phone);
3956	return true;
3957	}
3958
3959	/* parse in the checkcompoundpattern table */
3960	bool AffixMgr::parse_checkcpdtable(const std::string& line, FileMgr* af) {
3961	if (parsedcheckcpd) {
3962	HUNSPELL_WARNING(stderrstderr, "error: line %d: multiple table definitions\n",
3963	af->getlinenum());
3964	return false;
3965	}
3966	parsedcheckcpd = true;
3967	int numcheckcpd = -1;
3968	int i = 0;
3969	int np = 0;
3970	std::string::const_iterator iter = line.begin();
3971	std::string::const_iterator start_piece = mystrsep(line, iter);
3972	while (start_piece != line.end()) {
3973	switch (i) {
3974	case 0: {
3975	np++;
3976	break;
3977	}
3978	case 1: {
3979	numcheckcpd = atoi(std::string(start_piece, iter).c_str());
3980	if (numcheckcpd < 1) {
3981	HUNSPELL_WARNING(stderrstderr, "error: line %d: bad entry number\n",
3982	af->getlinenum());
3983	return false;
3984	}
3985	checkcpdtable.reserve(numcheckcpd);
3986	np++;
3987	break;
3988	}
3989	default:
3990	break;
3991	}
3992	++i;
3993	start_piece = mystrsep(line, iter);
3994	}
3995	if (np != 2) {
3996	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
3997	af->getlinenum());
3998	return false;
3999	}
4000
4001	/* now parse the numcheckcpd lines to read in the remainder of the table */
4002	for (int j = 0; j < numcheckcpd; ++j) {
4003	std::string nl;
4004	if (!af->getline(nl))
4005	return false;
4006	mychomp(nl);
4007	i = 0;
4008	checkcpdtable.push_back(patentry());
4009	iter = nl.begin();
4010	start_piece = mystrsep(nl, iter);
4011	while (start_piece != nl.end()) {
4012	switch (i) {
4013	case 0: {
4014	if (nl.compare(start_piece - nl.begin(), 20, "CHECKCOMPOUNDPATTERN", 20) != 0) {
4015	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4016	af->getlinenum());
4017	return false;
4018	}
4019	break;
4020	}
4021	case 1: {
4022	checkcpdtable.back().pattern.assign(start_piece, iter);
4023	size_t slash_pos = checkcpdtable.back().pattern.find('/');
4024	if (slash_pos != std::string::npos) {
4025	std::string chunk(checkcpdtable.back().pattern, slash_pos + 1);
4026	checkcpdtable.back().pattern.resize(slash_pos);
4027	checkcpdtable.back().cond = pHMgr->decode_flag(chunk.c_str());
4028	}
4029	break;
4030	}
4031	case 2: {
4032	checkcpdtable.back().pattern2.assign(start_piece, iter);
4033	size_t slash_pos = checkcpdtable.back().pattern2.find('/');
4034	if (slash_pos != std::string::npos) {
4035	std::string chunk(checkcpdtable.back().pattern2, slash_pos + 1);
4036	checkcpdtable.back().pattern2.resize(slash_pos);
4037	checkcpdtable.back().cond2 = pHMgr->decode_flag(chunk.c_str());
4038	}
4039	break;
4040	}
4041	case 3: {
4042	checkcpdtable.back().pattern3.assign(start_piece, iter);
4043	simplifiedcpd = 1;
4044	break;
4045	}
4046	default:
4047	break;
4048	}
4049	i++;
4050	start_piece = mystrsep(nl, iter);
4051	}
4052	}
4053	return true;
4054	}
4055
4056	/* parse in the compound rule table */
4057	bool AffixMgr::parse_defcpdtable(const std::string& line, FileMgr* af) {
4058	if (parseddefcpd) {
4059	HUNSPELL_WARNING(stderrstderr, "error: line %d: multiple table definitions\n",
4060	af->getlinenum());
4061	return false;
4062	}
4063	parseddefcpd = true;
4064	int numdefcpd = -1;
4065	int i = 0;
4066	int np = 0;
4067	std::string::const_iterator iter = line.begin();
4068	std::string::const_iterator start_piece = mystrsep(line, iter);
4069	while (start_piece != line.end()) {
4070	switch (i) {
4071	case 0: {
4072	np++;
4073	break;
4074	}
4075	case 1: {
4076	numdefcpd = atoi(std::string(start_piece, iter).c_str());
4077	if (numdefcpd < 1) {
4078	HUNSPELL_WARNING(stderrstderr, "error: line %d: bad entry number\n",
4079	af->getlinenum());
4080	return false;
4081	}
4082	defcpdtable.reserve(numdefcpd);
4083	np++;
4084	break;
4085	}
4086	default:
4087	break;
4088	}
4089	++i;
4090	start_piece = mystrsep(line, iter);
4091	}
4092	if (np != 2) {
4093	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
4094	af->getlinenum());
4095	return false;
4096	}
4097
4098	/* now parse the numdefcpd lines to read in the remainder of the table */
4099	for (int j = 0; j < numdefcpd; ++j) {
4100	std::string nl;
4101	if (!af->getline(nl))
4102	return false;
4103	mychomp(nl);
4104	i = 0;
4105	defcpdtable.push_back(flagentry());
4106	iter = nl.begin();
4107	start_piece = mystrsep(nl, iter);
4108	while (start_piece != nl.end()) {
4109	switch (i) {
4110	case 0: {
4111	if (nl.compare(start_piece - nl.begin(), 12, "COMPOUNDRULE", 12) != 0) {
4112	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4113	af->getlinenum());
4114	numdefcpd = 0;
4115	return false;
4116	}
4117	break;
4118	}
4119	case 1: { // handle parenthesized flags
4120	if (std::find(start_piece, iter, '(') != iter) {
4121	for (std::string::const_iterator k = start_piece; k != iter; ++k) {
4122	std::string::const_iterator chb = k;
4123	std::string::const_iterator che = k + 1;
4124	if (*k == '(') {
4125	std::string::const_iterator parpos = std::find(k, iter, ')');
4126	if (parpos != iter) {
4127	chb = k + 1;
4128	che = parpos;
4129	k = parpos;
4130	}
4131	}
4132
4133	if (chb == '' \|\| *chb == '?') {
4134	defcpdtable.back().push_back((FLAGunsigned short)*chb);
4135	} else {
4136	pHMgr->decode_flags(defcpdtable.back(), std::string(chb, che), af);
4137	}
4138	}
4139	} else {
4140	pHMgr->decode_flags(defcpdtable.back(), std::string(start_piece, iter), af);
4141	}
4142	break;
4143	}
4144	default:
4145	break;
4146	}
4147	++i;
4148	start_piece = mystrsep(nl, iter);
4149	}
4150	if (defcpdtable.back().empty()) {
4151	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4152	af->getlinenum());
4153	return false;
4154	}
4155	}
4156	return true;
4157	}
4158
4159	/* parse in the character map table */
4160	bool AffixMgr::parse_maptable(const std::string& line, FileMgr* af) {
4161	if (parsedmaptable) {
4162	HUNSPELL_WARNING(stderrstderr, "error: line %d: multiple table definitions\n",
4163	af->getlinenum());
4164	return false;
4165	}
4166	parsedmaptable = true;
4167	int nummap = -1;
4168	int i = 0;
4169	int np = 0;
4170	std::string::const_iterator iter = line.begin();
4171	std::string::const_iterator start_piece = mystrsep(line, iter);
4172	while (start_piece != line.end()) {
4173	switch (i) {
4174	case 0: {
4175	np++;
4176	break;
4177	}
4178	case 1: {
4179	nummap = atoi(std::string(start_piece, iter).c_str());
4180	if (nummap < 1) {
4181	HUNSPELL_WARNING(stderrstderr, "error: line %d: bad entry number\n",
4182	af->getlinenum());
4183	return false;
4184	}
4185	maptable.reserve(nummap);
4186	np++;
4187	break;
4188	}
4189	default:
4190	break;
4191	}
4192	++i;
4193	start_piece = mystrsep(line, iter);
4194	}
4195	if (np != 2) {
4196	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
4197	af->getlinenum());
4198	return false;
4199	}
4200
4201	/* now parse the nummap lines to read in the remainder of the table */
4202	for (int j = 0; j < nummap; ++j) {
4203	std::string nl;
4204	if (!af->getline(nl))
4205	return false;
4206	mychomp(nl);
4207	i = 0;
4208	maptable.push_back(mapentry());
4209	iter = nl.begin();
4210	start_piece = mystrsep(nl, iter);
4211	while (start_piece != nl.end()) {
4212	switch (i) {
4213	case 0: {
4214	if (nl.compare(start_piece - nl.begin(), 3, "MAP", 3) != 0) {
4215	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4216	af->getlinenum());
4217	nummap = 0;
4218	return false;
4219	}
4220	break;
4221	}
4222	case 1: {
4223	for (std::string::const_iterator k = start_piece; k != iter; ++k) {
4224	std::string::const_iterator chb = k;
4225	std::string::const_iterator che = k + 1;
4226	if (*k == '(') {
4227	std::string::const_iterator parpos = std::find(k, iter, ')');
4228	if (parpos != iter) {
4229	chb = k + 1;
4230	che = parpos;
4231	k = parpos;
4232	}
4233	} else {
4234	if (utf8 && (*k & 0xc0) == 0xc0) {
4235	++k;
4236	while (k != iter && (*k & 0xc0) == 0x80)
4237	++k;
4238	che = k;
4239	--k;
4240	}
4241	}
4242	maptable.back().push_back(std::string(chb, che));
4243	}
4244	break;
4245	}
4246	default:
4247	break;
4248	}
4249	++i;
4250	start_piece = mystrsep(nl, iter);
4251	}
4252	if (maptable.back().empty()) {
4253	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4254	af->getlinenum());
4255	return false;
4256	}
4257	}
4258	return true;
4259	}
4260
4261	/* parse in the word breakpoint table */
4262	bool AffixMgr::parse_breaktable(const std::string& line, FileMgr* af) {
4263	if (parsedbreaktable) {
4264	HUNSPELL_WARNING(stderrstderr, "error: line %d: multiple table definitions\n",
4265	af->getlinenum());
4266	return false;
4267	}
4268	parsedbreaktable = true;
4269	int numbreak = -1;
4270	int i = 0;
4271	int np = 0;
4272	std::string::const_iterator iter = line.begin();
4273	std::string::const_iterator start_piece = mystrsep(line, iter);
4274	while (start_piece != line.end()) {
4275	switch (i) {
4276	case 0: {
4277	np++;
4278	break;
4279	}
4280	case 1: {
4281	numbreak = atoi(std::string(start_piece, iter).c_str());
4282	if (numbreak < 0) {
4283	HUNSPELL_WARNING(stderrstderr, "error: line %d: bad entry number\n",
4284	af->getlinenum());
4285	return false;
4286	}
4287	if (numbreak == 0)
4288	return true;
4289	breaktable.reserve(numbreak);
4290	np++;
4291	break;
4292	}
4293	default:
4294	break;
4295	}
4296	++i;
4297	start_piece = mystrsep(line, iter);
4298	}
4299	if (np != 2) {
4300	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
4301	af->getlinenum());
4302	return false;
4303	}
4304
4305	/* now parse the numbreak lines to read in the remainder of the table */
4306	for (int j = 0; j < numbreak; ++j) {
4307	std::string nl;
4308	if (!af->getline(nl))
4309	return false;
4310	mychomp(nl);
4311	i = 0;
4312	iter = nl.begin();
4313	start_piece = mystrsep(nl, iter);
4314	while (start_piece != nl.end()) {
4315	switch (i) {
4316	case 0: {
4317	if (nl.compare(start_piece - nl.begin(), 5, "BREAK", 5) != 0) {
4318	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4319	af->getlinenum());
4320	numbreak = 0;
	Value stored to 'numbreak' is never read
4321	return false;
4322	}
4323	break;
4324	}
4325	case 1: {
4326	breaktable.push_back(std::string(start_piece, iter));
4327	break;
4328	}
4329	default:
4330	break;
4331	}
4332	++i;
4333	start_piece = mystrsep(nl, iter);
4334	}
4335	}
4336
4337	if (breaktable.size() != static_cast<size_t>(numbreak)) {
4338	HUNSPELL_WARNING(stderrstderr, "error: line %d: table is corrupt\n",
4339	af->getlinenum());
4340	return false;
4341	}
4342
4343	return true;
4344	}
4345
4346	void AffixMgr::reverse_condition(std::string& piece) {
4347	if (piece.empty())
4348	return;
4349
4350	int neg = 0;
4351	for (std::string::reverse_iterator k = piece.rbegin(); k != piece.rend(); ++k) {
4352	switch (*k) {
4353	case '[': {
4354	if (neg)
4355	*(k - 1) = '[';
4356	else
4357	*k = ']';
4358	break;
4359	}
4360	case ']': {
4361	*k = '[';
4362	if (neg)
4363	*(k - 1) = '^';
4364	neg = 0;
4365	break;
4366	}
4367	case '^': {
4368	if (*(k - 1) == ']')
4369	neg = 1;
4370	else if (neg)
4371	(k - 1) = k;
4372	break;
4373	}
4374	default: {
4375	if (neg)
4376	(k - 1) = k;
4377	}
4378	}
4379	}
4380	}
4381
4382	class entries_container {
4383	std::vector<AffEntry*> entries;
4384	AffixMgr* m_mgr;
4385	char m_at;
4386	public:
4387	entries_container(char at, AffixMgr* mgr)
4388	: m_mgr(mgr)
4389	, m_at(at) {
4390	}
4391	void release() {
4392	entries.clear();
4393	}
4394	void initialize(int numents,
4395	char opts, unsigned short aflag) {
4396	entries.reserve(numents);
4397
4398	if (m_at == 'P') {
4399	entries.push_back(new PfxEntry(m_mgr));
4400	} else {
4401	entries.push_back(new SfxEntry(m_mgr));
4402	}
4403
4404	entries.back()->opts = opts;
4405	entries.back()->aflag = aflag;
4406	}
4407
4408	AffEntry* add_entry(char opts) {
4409	if (m_at == 'P') {
4410	entries.push_back(new PfxEntry(m_mgr));
4411	} else {
4412	entries.push_back(new SfxEntry(m_mgr));
4413	}
4414	AffEntry* ret = entries.back();
4415	ret->opts = entries[0]->opts & opts;
4416	return ret;
4417	}
4418
4419	AffEntry* first_entry() {
4420	return entries.empty() ? NULL__null : entries[0];
4421	}
4422
4423	~entries_container() {
4424	for (size_t i = 0; i < entries.size(); ++i) {
4425	delete entries[i];
4426	}
4427	}
4428
4429	std::vector<AffEntry*>::iterator begin() { return entries.begin(); }
4430	std::vector<AffEntry*>::iterator end() { return entries.end(); }
4431	};
4432
4433	bool AffixMgr::parse_affix(const std::string& line,
4434	const char at,
4435	FileMgr* af,
4436	char* dupflags) {
4437	int numents = 0; // number of AffEntry structures to parse
4438
4439	unsigned short aflag = 0; // affix char identifier
4440
4441	char ff = 0;
4442	entries_container affentries(at, this);
4443
4444	int i = 0;
4445
4446	// checking lines with bad syntax
4447	#ifdef DEBUG1
4448	int basefieldnum = 0;
4449	#endif
4450
4451	// split affix header line into pieces
4452
4453	int np = 0;
4454	std::string::const_iterator iter = line.begin();
4455	std::string::const_iterator start_piece = mystrsep(line, iter);
4456	while (start_piece != line.end()) {
4457	switch (i) {
4458	// piece 1 - is type of affix
4459	case 0: {
4460	np++;
4461	break;
4462	}
4463
4464	// piece 2 - is affix char
4465	case 1: {
4466	np++;
4467	aflag = pHMgr->decode_flag(std::string(start_piece, iter).c_str());
4468	if (((at == 'S') && (dupflags[aflag] & dupSFX(1 << 0))) \|\|
4469	((at == 'P') && (dupflags[aflag] & dupPFX(1 << 1)))) {
4470	HUNSPELL_WARNING(
4471	stderrstderr,
4472	"error: line %d: multiple definitions of an affix flag\n",
4473	af->getlinenum());
4474	}
4475	dupflags[aflag] += (char)((at == 'S') ? dupSFX(1 << 0) : dupPFX(1 << 1));
4476	break;
4477	}
4478	// piece 3 - is cross product indicator
4479	case 2: {
4480	np++;
4481	if (*start_piece == 'Y')
4482	ff = aeXPRODUCT(1 << 0);
4483	break;
4484	}
4485
4486	// piece 4 - is number of affentries
4487	case 3: {
4488	np++;
4489	numents = atoi(std::string(start_piece, iter).c_str());
4490	if ((numents <= 0) \|\| ((std::numeric_limits<size_t>::max() /
4491	sizeof(AffEntry)) < static_cast<size_t>(numents))) {
4492	char* err = pHMgr->encode_flag(aflag);
4493	if (err) {
4494	HUNSPELL_WARNING(stderrstderr, "error: line %d: bad entry number\n",
4495	af->getlinenum());
4496	free(err)HunspellAllocator::CountingFree(err);
4497	}
4498	return false;
4499	}
4500
4501	char opts = ff;
4502	if (utf8)
4503	opts \|= aeUTF8(1 << 1);
4504	if (pHMgr->is_aliasf())
4505	opts \|= aeALIASF(1 << 2);
4506	if (pHMgr->is_aliasm())
4507	opts \|= aeALIASM(1 << 3);
4508	affentries.initialize(numents, opts, aflag);
4509	}
4510
4511	default:
4512	break;
4513	}
4514	++i;
4515	start_piece = mystrsep(line, iter);
4516	}
4517	// check to make sure we parsed enough pieces
4518	if (np != 4) {
4519	char* err = pHMgr->encode_flag(aflag);
4520	if (err) {
4521	HUNSPELL_WARNING(stderrstderr, "error: line %d: missing data\n",
4522	af->getlinenum());
4523	free(err)HunspellAllocator::CountingFree(err);
4524	}
4525	return false;
4526	}
4527
4528	// now parse numents affentries for this affix
4529	AffEntry* entry = affentries.first_entry();
4530	for (int ent = 0; ent < numents; ++ent) {
4531	std::string nl;
4532	if (!af->getline(nl))
4533	return false;
4534	mychomp(nl);
4535
4536	iter = nl.begin();
4537	i = 0;
4538	np = 0;
4539
4540	// split line into pieces
4541	start_piece = mystrsep(nl, iter);
4542	while (start_piece != nl.end()) {
4543	switch (i) {
4544	// piece 1 - is type
4545	case 0: {
4546	np++;
4547	if (ent != 0)
4548	entry = affentries.add_entry((char)(aeXPRODUCT(1 << 0) + aeUTF8(1 << 1) + aeALIASF(1 << 2) + aeALIASM(1 << 3)));
4549	break;
4550	}
4551
4552	// piece 2 - is affix char
4553	case 1: {
4554	np++;
4555	std::string chunk(start_piece, iter);
4556	if (pHMgr->decode_flag(chunk.c_str()) != aflag) {
4557	char* err = pHMgr->encode_flag(aflag);
4558	if (err) {
4559	HUNSPELL_WARNING(stderrstderr,
4560	"error: line %d: affix %s is corrupt\n",
4561	af->getlinenum(), err);
4562	free(err)HunspellAllocator::CountingFree(err);
4563	}
4564	return false;
4565	}
4566
4567	if (ent != 0) {
4568	AffEntry* start_entry = affentries.first_entry();
4569	entry->aflag = start_entry->aflag;
4570	}
4571	break;
4572	}
4573
4574	// piece 3 - is string to strip or 0 for null
4575	case 2: {
4576	np++;
4577	entry->strip = std::string(start_piece, iter);
4578	if (complexprefixes) {
4579	if (utf8)
4580	reverseword_utf(entry->strip);
4581	else
4582	reverseword(entry->strip);
4583	}
4584	if (entry->strip.compare("0") == 0) {
4585	entry->strip.clear();
4586	}
4587	break;
4588	}
4589
4590	// piece 4 - is affix string or 0 for null
4591	case 3: {
4592	entry->morphcode = NULL__null;
4593	entry->contclass = NULL__null;
4594	entry->contclasslen = 0;
4595	np++;
4596	std::string::const_iterator dash = std::find(start_piece, iter, '/');
4597	if (dash != iter) {
4598	entry->appnd = std::string(start_piece, dash);
4599	std::string dash_str(dash + 1, iter);
4600
4601	if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) {
4602	if (utf8) {
4603	remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
4604	} else {
4605	remove_ignored_chars(entry->appnd, ignorechars);
4606	}
4607	}
4608
4609	if (complexprefixes) {
4610	if (utf8)
4611	reverseword_utf(entry->appnd);
4612	else
4613	reverseword(entry->appnd);
4614	}
4615
4616	if (pHMgr->is_aliasf()) {
4617	int index = atoi(dash_str.c_str());
4618	entry->contclasslen = (unsigned short)pHMgr->get_aliasf(
4619	index, &(entry->contclass), af);
4620	if (!entry->contclasslen)
4621	HUNSPELL_WARNING(stderrstderr,
4622	"error: bad affix flag alias: \"%s\"\n",
4623	dash_str.c_str());
4624	} else {
4625	entry->contclasslen = (unsigned short)pHMgr->decode_flags(
4626	&(entry->contclass), dash_str.c_str(), af);
4627	std::sort(entry->contclass, entry->contclass + entry->contclasslen);
4628	}
4629
4630	havecontclass = 1;
4631	for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
4632	contclasses[(entry->contclass)[_i]] = 1;
4633	}
4634	} else {
4635	entry->appnd = std::string(start_piece, iter);
4636
4637	if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) {
4638	if (utf8) {
4639	remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
4640	} else {
4641	remove_ignored_chars(entry->appnd, ignorechars);
4642	}
4643	}
4644
4645	if (complexprefixes) {
4646	if (utf8)
4647	reverseword_utf(entry->appnd);
4648	else
4649	reverseword(entry->appnd);
4650	}
4651	}
4652
4653	if (entry->appnd.compare("0") == 0) {
4654	entry->appnd.clear();
4655	}
4656	break;
4657	}
4658
4659	// piece 5 - is the conditions descriptions
4660	case 4: {
4661	std::string chunk(start_piece, iter);
4662	np++;
4663	if (complexprefixes) {
4664	if (utf8)
4665	reverseword_utf(chunk);
4666	else
4667	reverseword(chunk);
4668	reverse_condition(chunk);
4669	}
4670	if (!entry->strip.empty() && chunk != "." &&
4671	redundant_condition(at, entry->strip.c_str(), entry->strip.size(), chunk.c_str(),
4672	af->getlinenum()))
4673	chunk = ".";
4674	if (at == 'S') {
4675	reverseword(chunk);
4676	reverse_condition(chunk);
4677	}
4678	if (encodeit(*entry, chunk.c_str()))
4679	return false;
4680	break;
4681	}
4682
4683	case 5: {
4684	std::string chunk(start_piece, iter);
4685	np++;
4686	if (pHMgr->is_aliasm()) {
4687	int index = atoi(chunk.c_str());
4688	entry->morphcode = pHMgr->get_aliasm(index);
4689	} else {
4690	if (complexprefixes) { // XXX - fix me for morph. gen.
4691	if (utf8)
4692	reverseword_utf(chunk);
4693	else
4694	reverseword(chunk);
4695	}
4696	// add the remaining of the line
4697	std::string::const_iterator end = nl.end();
4698	if (iter != end) {
4699	chunk.append(iter, end);
4700	}
4701	entry->morphcode = mystrdup(chunk.c_str());
4702	if (!entry->morphcode)
4703	return false;
4704	}
4705	break;
4706	}
4707	default:
4708	break;
4709	}
4710	i++;
4711	start_piece = mystrsep(nl, iter);
4712	}
4713	// check to make sure we parsed enough pieces
4714	if (np < 4) {
4715	char* err = pHMgr->encode_flag(aflag);
4716	if (err) {
4717	HUNSPELL_WARNING(stderrstderr, "error: line %d: affix %s is corrupt\n",
4718	af->getlinenum(), err);
4719	free(err)HunspellAllocator::CountingFree(err);
4720	}
4721	return false;
4722	}
4723
4724	#ifdef DEBUG1
4725	// detect unnecessary fields, excepting comments
4726	if (basefieldnum) {
4727	int fieldnum =
4728	!(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6);
4729	if (fieldnum != basefieldnum)
4730	HUNSPELL_WARNING(stderrstderr, "warning: line %d: bad field number\n",
4731	af->getlinenum());
4732	} else {
4733	basefieldnum =
4734	!(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6);
4735	}
4736	#endif
4737	}
4738
4739	// now create SfxEntry or PfxEntry objects and use links to
4740	// build an ordered (sorted by affix string) list
4741	std::vector<AffEntry*>::iterator start = affentries.begin();
4742	std::vector<AffEntry*>::iterator end = affentries.end();
4743	for (std::vector<AffEntry*>::iterator affentry = start; affentry != end; ++affentry) {
4744	if (at == 'P') {
4745	build_pfxtree(static_cast<PfxEntry>(affentry));
4746	} else {
4747	build_sfxtree(static_cast<SfxEntry>(affentry));
4748	}
4749	}
4750
4751	//contents belong to AffixMgr now
4752	affentries.release();
4753
4754	return true;
4755	}
4756
4757	int AffixMgr::redundant_condition(char ft,
4758	const char* strip,
4759	int stripl,
4760	const char* cond,
4761	int linenum) {
4762	int condl = strlen(cond);
4763	int i;
4764	int j;
4765	int neg;
4766	int in;
4767	if (ft == 'P') { // prefix
4768	if (strncmp(strip, cond, condl) == 0)
4769	return 1;
4770	if (utf8) {
4771	} else {
4772	for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
4773	if (cond[j] != '[') {
4774	if (cond[j] != strip[i]) {
4775	HUNSPELL_WARNING(stderrstderr,
4776	"warning: line %d: incompatible stripping "
4777	"characters and condition\n",
4778	linenum);
4779	return 0;
4780	}
4781	} else {
4782	neg = (cond[j + 1] == '^') ? 1 : 0;
4783	in = 0;
4784	do {
4785	j++;
4786	if (strip[i] == cond[j])
4787	in = 1;
4788	} while ((j < (condl - 1)) && (cond[j] != ']'));
4789	if (j == (condl - 1) && (cond[j] != ']')) {
4790	HUNSPELL_WARNING(stderrstderr,
4791	"error: line %d: missing ] in condition:\n%s\n",
4792	linenum, cond);
4793	return 0;
4794	}
4795	if ((!neg && !in) \|\| (neg && in)) {
4796	HUNSPELL_WARNING(stderrstderr,
4797	"warning: line %d: incompatible stripping "
4798	"characters and condition\n",
4799	linenum);
4800	return 0;
4801	}
4802	}
4803	}
4804	if (j >= condl)
4805	return 1;
4806	}
4807	} else { // suffix
4808	if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0)
4809	return 1;
4810	if (utf8) {
4811	} else {
4812	for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
4813	if (cond[j] != ']') {
4814	if (cond[j] != strip[i]) {
4815	HUNSPELL_WARNING(stderrstderr,
4816	"warning: line %d: incompatible stripping "
4817	"characters and condition\n",
4818	linenum);
4819	return 0;
4820	}
4821	} else {
4822	in = 0;
4823	do {
4824	j--;
4825	if (strip[i] == cond[j])
4826	in = 1;
4827	} while ((j > 0) && (cond[j] != '['));
4828	if ((j == 0) && (cond[j] != '[')) {
4829	HUNSPELL_WARNING(stderrstderr,
4830	"error: line: %d: missing ] in condition:\n%s\n",
4831	linenum, cond);
4832	return 0;
4833	}
4834	neg = (cond[j + 1] == '^') ? 1 : 0;
4835	if ((!neg && !in) \|\| (neg && in)) {
4836	HUNSPELL_WARNING(stderrstderr,
4837	"warning: line %d: incompatible stripping "
4838	"characters and condition\n",
4839	linenum);
4840	return 0;
4841	}
4842	}
4843	}
4844	if (j < 0)
4845	return 1;
4846	}
4847	}
4848	return 0;
4849	}
4850
4851	std::vector<std::string> AffixMgr::get_suffix_words(short unsigned* suff,
4852	int len,
4853	const char* root_word) {
4854	std::vector<std::string> slst;
4855	short unsigned* start_ptr = suff;
4856	for (int j = 0; j < SETSIZE256; j++) {
4857	SfxEntry* ptr = sStart[j];
4858	while (ptr) {
4859	suff = start_ptr;
4860	for (int i = 0; i < len; i++) {
4861	if ((*suff) == ptr->getFlag()) {
4862	std::string nw(root_word);
4863	nw.append(ptr->getAffix());
4864	hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL__null, 0, 0, 0);
4865	if (ht) {
4866	slst.push_back(nw);
4867	}
4868	}
4869	suff++;
4870	}
4871	ptr = ptr->getNext();
4872	}
4873	}
4874	return slst;
4875	}