Bug Summary

File:root/firefox-clang/intl/icu/source/i18n/uspoof.cpp
Warning:line 174, column 12
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name uspoof.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_I18N_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -I /root/firefox-clang/intl/icu/source/common -I /root/firefox-clang/mfbt/double-conversion -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ /root/firefox-clang/intl/icu/source/i18n/uspoof.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4***************************************************************************
5* Copyright (C) 2008-2015, International Business Machines Corporation
6* and others. All Rights Reserved.
7***************************************************************************
8* file name: uspoof.cpp
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2008Feb13
14* created by: Andy Heninger
15*
16* Unicode Spoof Detection
17*/
18#include "unicode/ubidi.h"
19#include "unicode/utypes.h"
20#include "unicode/normalizer2.h"
21#include "unicode/uspoof.h"
22#include "unicode/ustring.h"
23#include "unicode/utf16.h"
24#include "cmemory.h"
25#include "cstring.h"
26#include "mutex.h"
27#include "scriptset.h"
28#include "uassert.h"
29#include "ucln_in.h"
30#include "uspoof_impl.h"
31#include "umutex.h"
32
33
34#if !UCONFIG_NO_NORMALIZATION0
35
36U_NAMESPACE_USEusing namespace icu_77;
37
38
39//
40// Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
41//
42static UnicodeSet *gInclusionSet = nullptr;
43static UnicodeSet *gRecommendedSet = nullptr;
44static const Normalizer2 *gNfdNormalizer = nullptr;
45static UInitOnce gSpoofInitStaticsOnce {};
46
47namespace {
48
49UBool U_CALLCONV
50uspoof_cleanup() {
51 delete gInclusionSet;
52 gInclusionSet = nullptr;
53 delete gRecommendedSet;
54 gRecommendedSet = nullptr;
55 gNfdNormalizer = nullptr;
56 gSpoofInitStaticsOnce.reset();
57 return true;
58}
59
60void U_CALLCONV initializeStatics(UErrorCode &status) {
61 gInclusionSet = new UnicodeSet();
62 gRecommendedSet = new UnicodeSet();
63 if (gInclusionSet == nullptr || gRecommendedSet == nullptr) {
64 status = U_MEMORY_ALLOCATION_ERROR;
65 delete gInclusionSet;
66 gInclusionSet = nullptr;
67 delete gRecommendedSet;
68 gRecommendedSet = nullptr;
69 return;
70 }
71 gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status);
72 gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status);
73 if (U_FAILURE(status)) {
74 delete gInclusionSet;
75 gInclusionSet = nullptr;
76 delete gRecommendedSet;
77 gRecommendedSet = nullptr;
78 return;
79 }
80 gInclusionSet->freeze();
81 gRecommendedSet->freeze();
82 gNfdNormalizer = Normalizer2::getNFDInstance(status);
83 ucln_i18n_registerCleanupucln_i18n_registerCleanup_77(UCLN_I18N_SPOOF, uspoof_cleanup);
84}
85
86} // namespace
87
88U_CFUNCextern "C" void uspoof_internalInitStaticsuspoof_internalInitStatics_77(UErrorCode *status) {
89 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
90}
91
92U_CAPIextern "C" USpoofChecker * U_EXPORT2
93uspoof_openuspoof_open_77(UErrorCode *status) {
94 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
95 if (U_FAILURE(*status)) {
96 return nullptr;
97 }
98 SpoofImpl *si = new SpoofImpl(*status);
99 if (si == nullptr) {
100 *status = U_MEMORY_ALLOCATION_ERROR;
101 return nullptr;
102 }
103 if (U_FAILURE(*status)) {
104 delete si;
105 return nullptr;
106 }
107 return si->asUSpoofChecker();
108}
109
110
111U_CAPIextern "C" USpoofChecker * U_EXPORT2
112uspoof_openFromSerializeduspoof_openFromSerialized_77(const void *data, int32_t length, int32_t *pActualLength,
113 UErrorCode *status) {
114 if (U_FAILURE(*status)) {
115 return nullptr;
116 }
117
118 if (data == nullptr) {
119 *status = U_ILLEGAL_ARGUMENT_ERROR;
120 return nullptr;
121 }
122
123 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
124 if (U_FAILURE(*status))
125 {
126 return nullptr;
127 }
128
129 SpoofData *sd = new SpoofData(data, length, *status);
130 if (sd == nullptr) {
131 *status = U_MEMORY_ALLOCATION_ERROR;
132 return nullptr;
133 }
134
135 if (U_FAILURE(*status)) {
136 delete sd;
137 return nullptr;
138 }
139
140 SpoofImpl *si = new SpoofImpl(sd, *status);
141 if (si == nullptr) {
142 *status = U_MEMORY_ALLOCATION_ERROR;
143 delete sd; // explicit delete as the destructor for si won't be called.
144 return nullptr;
145 }
146
147 if (U_FAILURE(*status)) {
148 delete si; // no delete for sd, as the si destructor will delete it.
149 return nullptr;
150 }
151
152 if (pActualLength != nullptr) {
153 *pActualLength = sd->size();
154 }
155 return si->asUSpoofChecker();
156}
157
158
159U_CAPIextern "C" USpoofChecker * U_EXPORT2
160uspoof_cloneuspoof_clone_77(const USpoofChecker *sc, UErrorCode *status) {
161 const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
162 if (src == nullptr) {
1
Assuming the condition is false
2
Taking false branch
163 return nullptr;
164 }
165 SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor
166 if (result == nullptr) {
3
Assuming the condition is false
4
Taking false branch
167 *status = U_MEMORY_ALLOCATION_ERROR;
168 return nullptr;
169 }
170 if (U_FAILURE(*status)) {
5
Taking true branch
171 delete result;
172 result = nullptr;
6
Null pointer value stored to 'result'
173 }
174 return result->asUSpoofChecker();
7
Called C++ object pointer is null
175}
176
177
178U_CAPIextern "C" void U_EXPORT2
179uspoof_closeuspoof_close_77(USpoofChecker *sc) {
180 UErrorCode status = U_ZERO_ERROR;
181 SpoofImpl *This = SpoofImpl::validateThis(sc, status);
182 delete This;
183}
184
185
186U_CAPIextern "C" void U_EXPORT2
187uspoof_setChecksuspoof_setChecks_77(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
188 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
189 if (This == nullptr) {
190 return;
191 }
192
193 // Verify that the requested checks are all ones (bits) that
194 // are acceptable, known values.
195 if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
196 *status = U_ILLEGAL_ARGUMENT_ERROR;
197 return;
198 }
199
200 This->fChecks = checks;
201}
202
203
204U_CAPIextern "C" int32_t U_EXPORT2
205uspoof_getChecksuspoof_getChecks_77(const USpoofChecker *sc, UErrorCode *status) {
206 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
207 if (This == nullptr) {
208 return 0;
209 }
210 return This->fChecks;
211}
212
213U_CAPIextern "C" void U_EXPORT2
214uspoof_setRestrictionLeveluspoof_setRestrictionLevel_77(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
215 UErrorCode status = U_ZERO_ERROR;
216 SpoofImpl *This = SpoofImpl::validateThis(sc, status);
217 if (This != nullptr) {
218 This->fRestrictionLevel = restrictionLevel;
219 This->fChecks |= USPOOF_RESTRICTION_LEVEL;
220 }
221}
222
223U_CAPIextern "C" URestrictionLevel U_EXPORT2
224uspoof_getRestrictionLeveluspoof_getRestrictionLevel_77(const USpoofChecker *sc) {
225 UErrorCode status = U_ZERO_ERROR;
226 const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
227 if (This == nullptr) {
228 return USPOOF_UNRESTRICTIVE;
229 }
230 return This->fRestrictionLevel;
231}
232
233U_CAPIextern "C" void U_EXPORT2
234uspoof_setAllowedLocalesuspoof_setAllowedLocales_77(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
235 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
236 if (This == nullptr) {
237 return;
238 }
239 This->setAllowedLocales(localesList, *status);
240}
241
242U_CAPIextern "C" const char * U_EXPORT2
243uspoof_getAllowedLocalesuspoof_getAllowedLocales_77(USpoofChecker *sc, UErrorCode *status) {
244 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
245 if (This == nullptr) {
246 return nullptr;
247 }
248 return This->getAllowedLocales(*status);
249}
250
251
252U_CAPIextern "C" const USet * U_EXPORT2
253uspoof_getAllowedCharsuspoof_getAllowedChars_77(const USpoofChecker *sc, UErrorCode *status) {
254 const UnicodeSet *result = uspoof_getAllowedUnicodeSetuspoof_getAllowedUnicodeSet_77(sc, status);
255 return result->toUSet();
256}
257
258U_CAPIextern "C" const UnicodeSet * U_EXPORT2
259uspoof_getAllowedUnicodeSetuspoof_getAllowedUnicodeSet_77(const USpoofChecker *sc, UErrorCode *status) {
260 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
261 if (This == nullptr) {
262 return nullptr;
263 }
264 return This->fAllowedCharsSet;
265}
266
267
268U_CAPIextern "C" void U_EXPORT2
269uspoof_setAllowedCharsuspoof_setAllowedChars_77(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
270 const UnicodeSet *set = UnicodeSet::fromUSet(chars);
271 uspoof_setAllowedUnicodeSetuspoof_setAllowedUnicodeSet_77(sc, set, status);
272}
273
274
275U_CAPIextern "C" void U_EXPORT2
276uspoof_setAllowedUnicodeSetuspoof_setAllowedUnicodeSet_77(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
277 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
278 if (This == nullptr) {
279 return;
280 }
281 if (chars->isBogus()) {
282 *status = U_ILLEGAL_ARGUMENT_ERROR;
283 return;
284 }
285 UnicodeSet *clonedSet = chars->clone();
286 if (clonedSet == nullptr || clonedSet->isBogus()) {
287 *status = U_MEMORY_ALLOCATION_ERROR;
288 return;
289 }
290 clonedSet->freeze();
291 delete This->fAllowedCharsSet;
292 This->fAllowedCharsSet = clonedSet;
293 This->fChecks |= USPOOF_CHAR_LIMIT;
294}
295
296
297U_CAPIextern "C" int32_t U_EXPORT2
298uspoof_checkuspoof_check_77(const USpoofChecker *sc,
299 const char16_t *id, int32_t length,
300 int32_t *position,
301 UErrorCode *status) {
302
303 // Backwards compatibility:
304 if (position != nullptr) {
305 *position = 0;
306 }
307
308 // Delegate to uspoof_check2
309 return uspoof_check2uspoof_check2_77(sc, id, length, nullptr, status);
310}
311
312
313U_CAPIextern "C" int32_t U_EXPORT2
314uspoof_check2uspoof_check2_77(const USpoofChecker *sc,
315 const char16_t* id, int32_t length,
316 USpoofCheckResult* checkResult,
317 UErrorCode *status) {
318
319 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
320 if (This == nullptr) {
321 return 0;
322 }
323 if (length < -1) {
324 *status = U_ILLEGAL_ARGUMENT_ERROR;
325 return 0;
326 }
327 UnicodeString idStr((length == -1), id, length); // Aliasing constructor.
328 int32_t result = uspoof_check2UnicodeStringuspoof_check2UnicodeString_77(sc, idStr, checkResult, status);
329 return result;
330}
331
332
333U_CAPIextern "C" int32_t U_EXPORT2
334uspoof_checkUTF8uspoof_checkUTF8_77(const USpoofChecker *sc,
335 const char *id, int32_t length,
336 int32_t *position,
337 UErrorCode *status) {
338
339 // Backwards compatibility:
340 if (position != nullptr) {
341 *position = 0;
342 }
343
344 // Delegate to uspoof_check2
345 return uspoof_check2UTF8uspoof_check2UTF8_77(sc, id, length, nullptr, status);
346}
347
348
349U_CAPIextern "C" int32_t U_EXPORT2
350uspoof_check2UTF8uspoof_check2UTF8_77(const USpoofChecker *sc,
351 const char *id, int32_t length,
352 USpoofCheckResult* checkResult,
353 UErrorCode *status) {
354
355 if (U_FAILURE(*status)) {
356 return 0;
357 }
358 UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id):: strlen(id))));
359 int32_t result = uspoof_check2UnicodeStringuspoof_check2UnicodeString_77(sc, idStr, checkResult, status);
360 return result;
361}
362
363
364U_CAPIextern "C" int32_t U_EXPORT2
365uspoof_areConfusableuspoof_areConfusable_77(const USpoofChecker *sc,
366 const char16_t *id1, int32_t length1,
367 const char16_t *id2, int32_t length2,
368 UErrorCode *status) {
369 SpoofImpl::validateThis(sc, *status);
370 if (U_FAILURE(*status)) {
371 return 0;
372 }
373 if (length1 < -1 || length2 < -1) {
374 *status = U_ILLEGAL_ARGUMENT_ERROR;
375 return 0;
376 }
377
378 UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor
379 UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor
380 return uspoof_areConfusableUnicodeStringuspoof_areConfusableUnicodeString_77(sc, id1Str, id2Str, status);
381}
382
383
384U_CAPIextern "C" int32_t U_EXPORT2
385uspoof_areConfusableUTF8uspoof_areConfusableUTF8_77(const USpoofChecker *sc,
386 const char *id1, int32_t length1,
387 const char *id2, int32_t length2,
388 UErrorCode *status) {
389 SpoofImpl::validateThis(sc, *status);
390 if (U_FAILURE(*status)) {
391 return 0;
392 }
393 if (length1 < -1 || length2 < -1) {
394 *status = U_ILLEGAL_ARGUMENT_ERROR;
395 return 0;
396 }
397 UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1):: strlen(id1))));
398 UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2):: strlen(id2))));
399 int32_t results = uspoof_areConfusableUnicodeStringuspoof_areConfusableUnicodeString_77(sc, id1Str, id2Str, status);
400 return results;
401}
402
403
404U_CAPIextern "C" int32_t U_EXPORT2
405uspoof_areConfusableUnicodeStringuspoof_areConfusableUnicodeString_77(const USpoofChecker *sc,
406 const icu::UnicodeString &id1,
407 const icu::UnicodeString &id2,
408 UErrorCode *status) {
409 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
410 if (U_FAILURE(*status)) {
411 return 0;
412 }
413 //
414 // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
415 // and for definitions of the types (single, whole, mixed-script) of confusables.
416
417 // We only care about a few of the check flags. Ignore the others.
418 // If no tests relevant to this function have been specified, return an error.
419 // TODO: is this really the right thing to do? It's probably an error on the caller's part,
420 // but logically we would just return 0 (no error).
421 if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
422 *status = U_INVALID_STATE_ERROR;
423 return 0;
424 }
425
426 // Compute the skeletons and check for confusability.
427 UnicodeString id1Skeleton;
428 uspoof_getSkeletonUnicodeStringuspoof_getSkeletonUnicodeString_77(sc, 0 /* deprecated */, id1, id1Skeleton, status);
429 UnicodeString id2Skeleton;
430 uspoof_getSkeletonUnicodeStringuspoof_getSkeletonUnicodeString_77(sc, 0 /* deprecated */, id2, id2Skeleton, status);
431 if (U_FAILURE(*status)) { return 0; }
432 if (id1Skeleton != id2Skeleton) {
433 return 0;
434 }
435
436 // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes
437 // of confusables according to UTS 39 section 4.
438 // Start by computing the resolved script sets of id1 and id2.
439 ScriptSet id1RSS;
440 This->getResolvedScriptSet(id1, id1RSS, *status);
441 ScriptSet id2RSS;
442 This->getResolvedScriptSet(id2, id2RSS, *status);
443
444 // Turn on all applicable flags
445 int32_t result = 0;
446 if (id1RSS.intersects(id2RSS)) {
447 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
448 } else {
449 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
450 if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
451 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
452 }
453 }
454
455 // Turn off flags that the user doesn't want
456 if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
457 result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
458 }
459 if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
460 result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
461 }
462 if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
463 result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
464 }
465
466 return result;
467}
468
469U_CAPIextern "C" uint32_t U_EXPORT2 uspoof_areBidiConfusableuspoof_areBidiConfusable_77(const USpoofChecker *sc, UBiDiDirection direction,
470 const char16_t *id1, int32_t length1,
471 const char16_t *id2, int32_t length2,
472 UErrorCode *status) {
473 UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
474 UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
475 if (id1Str.isBogus() || id2Str.isBogus()) {
476 *status = U_ILLEGAL_ARGUMENT_ERROR;
477 return 0;
478 }
479 return uspoof_areBidiConfusableUnicodeStringuspoof_areBidiConfusableUnicodeString_77(sc, direction, id1Str, id2Str, status);
480}
481
482U_CAPIextern "C" uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8uspoof_areBidiConfusableUTF8_77(const USpoofChecker *sc, UBiDiDirection direction,
483 const char *id1, int32_t length1, const char *id2,
484 int32_t length2, UErrorCode *status) {
485 if (length1 < -1 || length2 < -1) {
486 *status = U_ILLEGAL_ARGUMENT_ERROR;
487 return 0;
488 }
489 UnicodeString id1Str = UnicodeString::fromUTF8(
490 StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1):: strlen(id1))));
491 UnicodeString id2Str = UnicodeString::fromUTF8(
492 StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2):: strlen(id2))));
493 return uspoof_areBidiConfusableUnicodeStringuspoof_areBidiConfusableUnicodeString_77(sc, direction, id1Str, id2Str, status);
494}
495
496U_CAPIextern "C" uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeStringuspoof_areBidiConfusableUnicodeString_77(const USpoofChecker *sc,
497 UBiDiDirection direction,
498 const icu::UnicodeString &id1,
499 const icu::UnicodeString &id2,
500 UErrorCode *status) {
501 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
502 if (U_FAILURE(*status)) {
503 return 0;
504 }
505 //
506 // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
507 // and for definitions of the types (single, whole, mixed-script) of confusables.
508
509 // We only care about a few of the check flags. Ignore the others.
510 // If no tests relevant to this function have been specified, return an error.
511 // TODO: is this really the right thing to do? It's probably an error on the caller's part,
512 // but logically we would just return 0 (no error).
513 if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
514 *status = U_INVALID_STATE_ERROR;
515 return 0;
516 }
517
518 // Compute the skeletons and check for confusability.
519 UnicodeString id1Skeleton;
520 uspoof_getBidiSkeletonUnicodeStringuspoof_getBidiSkeletonUnicodeString_77(sc, direction, id1, id1Skeleton, status);
521 UnicodeString id2Skeleton;
522 uspoof_getBidiSkeletonUnicodeStringuspoof_getBidiSkeletonUnicodeString_77(sc, direction, id2, id2Skeleton, status);
523 if (U_FAILURE(*status)) {
524 return 0;
525 }
526 if (id1Skeleton != id2Skeleton) {
527 return 0;
528 }
529
530 // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate
531 // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
532 // of id1 and id2.
533 ScriptSet id1RSS;
534 This->getResolvedScriptSet(id1, id1RSS, *status);
535 ScriptSet id2RSS;
536 This->getResolvedScriptSet(id2, id2RSS, *status);
537
538 // Turn on all applicable flags
539 uint32_t result = 0;
540 if (id1RSS.intersects(id2RSS)) {
541 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
542 } else {
543 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
544 if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
545 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
546 }
547 }
548
549 // Turn off flags that the user doesn't want
550 return result & This->fChecks;
551}
552
553
554U_CAPIextern "C" int32_t U_EXPORT2
555uspoof_checkUnicodeStringuspoof_checkUnicodeString_77(const USpoofChecker *sc,
556 const icu::UnicodeString &id,
557 int32_t *position,
558 UErrorCode *status) {
559
560 // Backwards compatibility:
561 if (position != nullptr) {
562 *position = 0;
563 }
564
565 // Delegate to uspoof_check2
566 return uspoof_check2UnicodeStringuspoof_check2UnicodeString_77(sc, id, nullptr, status);
567}
568
569namespace {
570
571int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
572 U_ASSERT(This != nullptr)(static_cast <bool> (This != nullptr) ? void (0) : __assert_fail
("This != nullptr", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__))
;
573 U_ASSERT(checkResult != nullptr)(static_cast <bool> (checkResult != nullptr) ? void (0)
: __assert_fail ("checkResult != nullptr", __builtin_FILE ()
, __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
574 checkResult->clear();
575 int32_t result = 0;
576
577 if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
578 URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
579 if (idRestrictionLevel > This->fRestrictionLevel) {
580 result |= USPOOF_RESTRICTION_LEVEL;
581 }
582 checkResult->fRestrictionLevel = idRestrictionLevel;
583 }
584
585 if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
586 UnicodeSet numerics;
587 This->getNumerics(id, numerics, *status);
588 if (numerics.size() > 1) {
589 result |= USPOOF_MIXED_NUMBERS;
590 }
591 checkResult->fNumerics = numerics; // UnicodeSet::operator=
592 }
593
594 if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
595 int32_t index = This->findHiddenOverlay(id, *status);
596 if (index != -1) {
597 result |= USPOOF_HIDDEN_OVERLAY;
598 }
599 }
600
601
602 if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
603 int32_t i;
604 UChar32 c;
605 int32_t length = id.length();
606 for (i=0; i<length ;) {
607 c = id.char32At(i);
608 i += U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
609 if (!This->fAllowedCharsSet->contains(c)) {
610 result |= USPOOF_CHAR_LIMIT;
611 break;
612 }
613 }
614 }
615
616 if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
617 // This check needs to be done on NFD input
618 UnicodeString nfdText;
619 gNfdNormalizer->normalize(id, nfdText, *status);
620 int32_t nfdLength = nfdText.length();
621
622 // scan for more than one occurrence of the same non-spacing mark
623 // in a sequence of non-spacing marks.
624 int32_t i;
625 UChar32 c;
626 UChar32 firstNonspacingMark = 0;
627 UBool haveMultipleMarks = false;
628 UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
629
630 for (i=0; i<nfdLength ;) {
631 c = nfdText.char32At(i);
632 i += U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
633 if (u_charTypeu_charType_77(c) != U_NON_SPACING_MARK) {
634 firstNonspacingMark = 0;
635 if (haveMultipleMarks) {
636 marksSeenSoFar.clear();
637 haveMultipleMarks = false;
638 }
639 continue;
640 }
641 if (firstNonspacingMark == 0) {
642 firstNonspacingMark = c;
643 continue;
644 }
645 if (!haveMultipleMarks) {
646 marksSeenSoFar.add(firstNonspacingMark);
647 haveMultipleMarks = true;
648 }
649 if (marksSeenSoFar.contains(c)) {
650 // report the error, and stop scanning.
651 // No need to find more than the first failure.
652 result |= USPOOF_INVISIBLE;
653 break;
654 }
655 marksSeenSoFar.add(c);
656 }
657 }
658
659 checkResult->fChecks = result;
660 return checkResult->toCombinedBitmask(This->fChecks);
661}
662
663} // namespace
664
665U_CAPIextern "C" int32_t U_EXPORT2
666uspoof_check2UnicodeStringuspoof_check2UnicodeString_77(const USpoofChecker *sc,
667 const icu::UnicodeString &id,
668 USpoofCheckResult* checkResult,
669 UErrorCode *status) {
670 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
671 if (This == nullptr) {
672 return false;
673 }
674
675 if (checkResult != nullptr) {
676 CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
677 if (ThisCheckResult == nullptr) {
678 return false;
679 }
680 return checkImpl(This, id, ThisCheckResult, status);
681 } else {
682 // Stack-allocate the checkResult since this method doesn't return it
683 CheckResult stackCheckResult;
684 return checkImpl(This, id, &stackCheckResult, status);
685 }
686}
687
688
689U_CAPIextern "C" int32_t U_EXPORT2
690uspoof_getSkeletonuspoof_getSkeleton_77(const USpoofChecker *sc,
691 uint32_t type,
692 const char16_t *id, int32_t length,
693 char16_t *dest, int32_t destCapacity,
694 UErrorCode *status) {
695
696 SpoofImpl::validateThis(sc, *status);
697 if (U_FAILURE(*status)) {
698 return 0;
699 }
700 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
701 *status = U_ILLEGAL_ARGUMENT_ERROR;
702 return 0;
703 }
704
705 UnicodeString idStr((length==-1), id, length); // Aliasing constructor
706 UnicodeString destStr;
707 uspoof_getSkeletonUnicodeStringuspoof_getSkeletonUnicodeString_77(sc, type, idStr, destStr, status);
708 destStr.extract(dest, destCapacity, *status);
709 return destStr.length();
710}
711
712U_CAPIextern "C" int32_t U_EXPORT2 uspoof_getBidiSkeletonuspoof_getBidiSkeleton_77(const USpoofChecker *sc, UBiDiDirection direction,
713 const UChar *id, int32_t length, UChar *dest,
714 int32_t destCapacity, UErrorCode *status) {
715 UnicodeString idStr((length == -1), id, length); // Aliasing constructor
716 if (idStr.isBogus()) {
717 *status = U_ILLEGAL_ARGUMENT_ERROR;
718 return 0;
719 }
720 UnicodeString destStr;
721 uspoof_getBidiSkeletonUnicodeStringuspoof_getBidiSkeletonUnicodeString_77(sc, direction, idStr, destStr, status);
722 return destStr.extract(dest, destCapacity, *status);
723}
724
725
726
727U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeStringuspoof_getBidiSkeletonUnicodeString_77(const USpoofChecker *sc,
728 UBiDiDirection direction,
729 const UnicodeString &id,
730 UnicodeString &dest,
731 UErrorCode *status) {
732 dest.remove();
733 if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
734 *status = U_ILLEGAL_ARGUMENT_ERROR;
735 return dest;
736 }
737 UBiDi *bidi = ubidi_openubidi_open_77();
738 ubidi_setParaubidi_setPara_77(bidi, id.getBuffer(), id.length(), direction,
739 /*embeddingLevels*/ nullptr, status);
740 if (U_FAILURE(*status)) {
741 ubidi_closeubidi_close_77(bidi);
742 return dest;
743 }
744 UnicodeString reordered;
745 int32_t const size = ubidi_getProcessedLengthubidi_getProcessedLength_77(bidi);
746 UChar* const reorderedBuffer = reordered.getBuffer(size);
747 if (reorderedBuffer == nullptr) {
748 *status = U_MEMORY_ALLOCATION_ERROR;
749 ubidi_closeubidi_close_77(bidi);
750 return dest;
751 }
752 ubidi_writeReorderedubidi_writeReordered_77(bidi, reorderedBuffer, size,
753 UBIDI_KEEP_BASE_COMBINING1 | UBIDI_DO_MIRRORING2, status);
754 reordered.releaseBuffer(size);
755 ubidi_closeubidi_close_77(bidi);
756
757 if (U_FAILURE(*status)) {
758 return dest;
759 }
760
761 // The type parameter is deprecated since ICU 58; any number may be passed.
762 constexpr uint32_t deprecatedType = 58;
763 return uspoof_getSkeletonUnicodeStringuspoof_getSkeletonUnicodeString_77(sc, deprecatedType, reordered, dest, status);
764}
765
766
767
768U_I18N_API UnicodeString & U_EXPORT2
769uspoof_getSkeletonUnicodeStringuspoof_getSkeletonUnicodeString_77(const USpoofChecker *sc,
770 uint32_t /*type*/,
771 const UnicodeString &id,
772 UnicodeString &dest,
773 UErrorCode *status) {
774 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
775 if (U_FAILURE(*status)) {
776 return dest;
777 }
778
779 UnicodeString nfdId;
780 gNfdNormalizer->normalize(id, nfdId, *status);
781
782 // Apply the skeleton mapping to the NFD normalized input string
783 // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
784 int32_t inputIndex = 0;
785 UnicodeString skelStr;
786 int32_t normalizedLen = nfdId.length();
787 for (inputIndex=0; inputIndex < normalizedLen; ) {
788 UChar32 c = nfdId.char32At(inputIndex);
789 inputIndex += U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
790 if (!u_hasBinaryPropertyu_hasBinaryProperty_77(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
791 This->fSpoofData->confusableLookup(c, skelStr);
792 }
793 }
794
795 gNfdNormalizer->normalize(skelStr, dest, *status);
796 return dest;
797}
798
799U_CAPIextern "C" int32_t U_EXPORT2 uspoof_getSkeletonUTF8uspoof_getSkeletonUTF8_77(const USpoofChecker *sc, uint32_t type, const char *id,
800 int32_t length, char *dest, int32_t destCapacity,
801 UErrorCode *status) {
802 SpoofImpl::validateThis(sc, *status);
803 if (U_FAILURE(*status)) {
804 return 0;
805 }
806 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
807 *status = U_ILLEGAL_ARGUMENT_ERROR;
808 return 0;
809 }
810
811 UnicodeString srcStr = UnicodeString::fromUTF8(
812 StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id):: strlen(id))));
813 UnicodeString destStr;
814 uspoof_getSkeletonUnicodeStringuspoof_getSkeletonUnicodeString_77(sc, type, srcStr, destStr, status);
815 if (U_FAILURE(*status)) {
816 return 0;
817 }
818
819 int32_t lengthInUTF8 = 0;
820 u_strToUTF8u_strToUTF8_77(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
821 return lengthInUTF8;
822}
823
824U_CAPIextern "C" int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8uspoof_getBidiSkeletonUTF8_77(const USpoofChecker *sc, UBiDiDirection direction,
825 const char *id, int32_t length, char *dest,
826 int32_t destCapacity, UErrorCode *status) {
827 if (length < -1) {
828 *status = U_ILLEGAL_ARGUMENT_ERROR;
829 return 0;
830 }
831
832 UnicodeString srcStr = UnicodeString::fromUTF8(
833 StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id):: strlen(id))));
834 UnicodeString destStr;
835 uspoof_getBidiSkeletonUnicodeStringuspoof_getBidiSkeletonUnicodeString_77(sc, direction, srcStr, destStr, status);
836 if (U_FAILURE(*status)) {
837 return 0;
838 }
839
840 int32_t lengthInUTF8 = 0;
841 u_strToUTF8u_strToUTF8_77(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
842 return lengthInUTF8;
843}
844
845
846U_CAPIextern "C" int32_t U_EXPORT2
847uspoof_serializeuspoof_serialize_77(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
848 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
849 if (This == nullptr) {
850 U_ASSERT(U_FAILURE(*status))(static_cast <bool> (U_FAILURE(*status)) ? void (0) : __assert_fail
("U_FAILURE(*status)", __builtin_FILE (), __builtin_LINE (),
__extension__ __PRETTY_FUNCTION__))
;
851 return 0;
852 }
853
854 return This->fSpoofData->serialize(buf, capacity, *status);
855}
856
857U_CAPIextern "C" const USet * U_EXPORT2
858uspoof_getInclusionSetuspoof_getInclusionSet_77(UErrorCode *status) {
859 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
860 return gInclusionSet->toUSet();
861}
862
863U_CAPIextern "C" const USet * U_EXPORT2
864uspoof_getRecommendedSetuspoof_getRecommendedSet_77(UErrorCode *status) {
865 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
866 return gRecommendedSet->toUSet();
867}
868
869U_I18N_API const UnicodeSet * U_EXPORT2
870uspoof_getInclusionUnicodeSetuspoof_getInclusionUnicodeSet_77(UErrorCode *status) {
871 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
872 return gInclusionSet;
873}
874
875U_I18N_API const UnicodeSet * U_EXPORT2
876uspoof_getRecommendedUnicodeSetuspoof_getRecommendedUnicodeSet_77(UErrorCode *status) {
877 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
878 return gRecommendedSet;
879}
880
881//------------------
882// CheckResult APIs
883//------------------
884
885U_CAPIextern "C" USpoofCheckResult* U_EXPORT2
886uspoof_openCheckResultuspoof_openCheckResult_77(UErrorCode *status) {
887 CheckResult* checkResult = new CheckResult();
888 if (checkResult == nullptr) {
889 *status = U_MEMORY_ALLOCATION_ERROR;
890 return nullptr;
891 }
892 return checkResult->asUSpoofCheckResult();
893}
894
895U_CAPIextern "C" void U_EXPORT2
896uspoof_closeCheckResultuspoof_closeCheckResult_77(USpoofCheckResult* checkResult) {
897 UErrorCode status = U_ZERO_ERROR;
898 CheckResult* This = CheckResult::validateThis(checkResult, status);
899 delete This;
900}
901
902U_CAPIextern "C" int32_t U_EXPORT2
903uspoof_getCheckResultChecksuspoof_getCheckResultChecks_77(const USpoofCheckResult *checkResult, UErrorCode *status) {
904 const CheckResult* This = CheckResult::validateThis(checkResult, *status);
905 if (U_FAILURE(*status)) { return 0; }
906 return This->fChecks;
907}
908
909U_CAPIextern "C" URestrictionLevel U_EXPORT2
910uspoof_getCheckResultRestrictionLeveluspoof_getCheckResultRestrictionLevel_77(const USpoofCheckResult *checkResult, UErrorCode *status) {
911 const CheckResult* This = CheckResult::validateThis(checkResult, *status);
912 if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
913 return This->fRestrictionLevel;
914}
915
916U_CAPIextern "C" const USet* U_EXPORT2
917uspoof_getCheckResultNumericsuspoof_getCheckResultNumerics_77(const USpoofCheckResult *checkResult, UErrorCode *status) {
918 const CheckResult* This = CheckResult::validateThis(checkResult, *status);
919 if (U_FAILURE(*status)) { return nullptr; }
920 return This->fNumerics.toUSet();
921}
922
923
924
925#endif // !UCONFIG_NO_NORMALIZATION