Bug Summary

File:root/firefox-clang/intl/icu/source/i18n/rulebasedcollator.cpp
Warning:line 669, column 13
Null pointer passed to 1st parameter expecting 'nonnull'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name rulebasedcollator.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_I18N_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -I /root/firefox-clang/intl/icu/source/common -I /root/firefox-clang/mfbt/double-conversion -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ /root/firefox-clang/intl/icu/source/i18n/rulebasedcollator.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1996-2015, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* rulebasedcollator.cpp
9*
10* (replaced the former tblcoll.cpp)
11*
12* created on: 2012feb14 with new and old collation code
13* created by: Markus W. Scherer
14*/
15
16#include "unicode/utypes.h"
17
18#if !UCONFIG_NO_COLLATION0
19
20#include "unicode/coll.h"
21#include "unicode/coleitr.h"
22#include "unicode/localpointer.h"
23#include "unicode/locid.h"
24#include "unicode/sortkey.h"
25#include "unicode/tblcoll.h"
26#include "unicode/ucol.h"
27#include "unicode/uiter.h"
28#include "unicode/uloc.h"
29#include "unicode/uniset.h"
30#include "unicode/unistr.h"
31#include "unicode/usetiter.h"
32#include "unicode/utf8.h"
33#include "unicode/uversion.h"
34#include "bocsu.h"
35#include "charstr.h"
36#include "cmemory.h"
37#include "collation.h"
38#include "collationcompare.h"
39#include "collationdata.h"
40#include "collationdatareader.h"
41#include "collationfastlatin.h"
42#include "collationiterator.h"
43#include "collationkeys.h"
44#include "collationroot.h"
45#include "collationsets.h"
46#include "collationsettings.h"
47#include "collationtailoring.h"
48#include "cstring.h"
49#include "uassert.h"
50#include "ucol_imp.h"
51#include "uhash.h"
52#include "uitercollationiterator.h"
53#include "ulocimp.h"
54#include "ustr_imp.h"
55#include "utf16collationiterator.h"
56#include "utf8collationiterator.h"
57#include "uvectr64.h"
58
59U_NAMESPACE_BEGINnamespace icu_77 {
60
61namespace {
62
63class FixedSortKeyByteSink : public SortKeyByteSink {
64public:
65 FixedSortKeyByteSink(char *dest, int32_t destCapacity)
66 : SortKeyByteSink(dest, destCapacity) {}
67 virtual ~FixedSortKeyByteSink();
68
69private:
70 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
71 virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
72};
73
74FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
75
76void
77FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
78 // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
79 // Fill the buffer completely.
80 int32_t available = capacity_ - length;
81 if (available > 0) {
82 uprv_memcpy(buffer_ + length, bytes, available)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (buffer_ + length != __null) ? void
(0) : __assert_fail ("buffer_ + length != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (
static_cast <bool> (bytes != __null) ? void (0) : __assert_fail
("bytes != __null", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__)); clang diagnostic pop :: memcpy(buffer_
+ length, bytes, available); } while (false)
;
83 }
84}
85
86UBool
87FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
88 return false;
89}
90
91} // namespace
92
93// Not in an anonymous namespace, so that it can be a friend of CollationKey.
94class CollationKeyByteSink : public SortKeyByteSink {
95public:
96 CollationKeyByteSink(CollationKey &key)
97 : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
98 key_(key) {}
99 virtual ~CollationKeyByteSink();
100
101private:
102 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
103 virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
104
105 CollationKey &key_;
106};
107
108CollationKeyByteSink::~CollationKeyByteSink() {}
109
110void
111CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
112 // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
113 if (Resize(n, length)) {
114 uprv_memcpy(buffer_ + length, bytes, n)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (buffer_ + length != __null) ? void
(0) : __assert_fail ("buffer_ + length != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (
static_cast <bool> (bytes != __null) ? void (0) : __assert_fail
("bytes != __null", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__)); clang diagnostic pop :: memcpy(buffer_
+ length, bytes, n); } while (false)
;
115 }
116}
117
118UBool
119CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
120 if (buffer_ == nullptr) {
121 return false; // allocation failed before already
122 }
123 int32_t newCapacity = 2 * capacity_;
124 int32_t altCapacity = length + 2 * appendCapacity;
125 if (newCapacity < altCapacity) {
126 newCapacity = altCapacity;
127 }
128 if (newCapacity < 200) {
129 newCapacity = 200;
130 }
131 uint8_t *newBuffer = key_.reallocate(newCapacity, length);
132 if (newBuffer == nullptr) {
133 SetNotOk();
134 return false;
135 }
136 buffer_ = reinterpret_cast<char *>(newBuffer);
137 capacity_ = newCapacity;
138 return true;
139}
140
141RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
142 : Collator(other),
143 data(other.data),
144 settings(other.settings),
145 tailoring(other.tailoring),
146 cacheEntry(other.cacheEntry),
147 validLocale(other.validLocale),
148 explicitlySetAttributes(other.explicitlySetAttributes),
149 actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
150 settings->addRef();
151 cacheEntry->addRef();
152}
153
154RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
155 const RuleBasedCollator *base, UErrorCode &errorCode)
156 : data(nullptr),
157 settings(nullptr),
158 tailoring(nullptr),
159 cacheEntry(nullptr),
160 validLocale(""),
161 explicitlySetAttributes(0),
162 actualLocaleIsSameAsValid(false) {
163 if(U_FAILURE(errorCode)) { return; }
164 if(bin == nullptr || length == 0 || base == nullptr) {
165 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
166 return;
167 }
168 const CollationTailoring *root = CollationRoot::getRoot(errorCode);
169 if(U_FAILURE(errorCode)) { return; }
170 if(base->tailoring != root) {
171 errorCode = U_UNSUPPORTED_ERROR;
172 return;
173 }
174 LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
175 if(t.isNull() || t->isBogus()) {
176 errorCode = U_MEMORY_ALLOCATION_ERROR;
177 return;
178 }
179 CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
180 if(U_FAILURE(errorCode)) { return; }
181 t->actualLocale.setToBogus();
182 adoptTailoring(t.orphan(), errorCode);
183}
184
185RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
186 : data(entry->tailoring->data),
187 settings(entry->tailoring->settings),
188 tailoring(entry->tailoring),
189 cacheEntry(entry),
190 validLocale(entry->validLocale),
191 explicitlySetAttributes(0),
192 actualLocaleIsSameAsValid(false) {
193 settings->addRef();
194 cacheEntry->addRef();
195}
196
197RuleBasedCollator::~RuleBasedCollator() {
198 SharedObject::clearPtr(settings);
199 SharedObject::clearPtr(cacheEntry);
200}
201
202void
203RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
204 if(U_FAILURE(errorCode)) {
205 t->deleteIfZeroRefCount();
206 return;
207 }
208 U_ASSERT(settings == nullptr && data == nullptr && tailoring == nullptr && cacheEntry == nullptr)(static_cast <bool> (settings == nullptr && data
== nullptr && tailoring == nullptr && cacheEntry
== nullptr) ? void (0) : __assert_fail ("settings == nullptr && data == nullptr && tailoring == nullptr && cacheEntry == nullptr"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
209 cacheEntry = new CollationCacheEntry(t->actualLocale, t);
210 if(cacheEntry == nullptr) {
211 errorCode = U_MEMORY_ALLOCATION_ERROR;
212 t->deleteIfZeroRefCount();
213 return;
214 }
215 data = t->data;
216 settings = t->settings;
217 settings->addRef();
218 tailoring = t;
219 cacheEntry->addRef();
220 validLocale = t->actualLocale;
221 actualLocaleIsSameAsValid = false;
222}
223
224RuleBasedCollator *
225RuleBasedCollator::clone() const {
226 return new RuleBasedCollator(*this);
227}
228
229RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
230 if(this == &other) { return *this; }
231 SharedObject::copyPtr(other.settings, settings);
232 tailoring = other.tailoring;
233 SharedObject::copyPtr(other.cacheEntry, cacheEntry);
234 data = tailoring->data;
235 validLocale = other.validLocale;
236 explicitlySetAttributes = other.explicitlySetAttributes;
237 actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
238 return *this;
239}
240
241UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)UClassID RuleBasedCollator::getStaticClassID() { static char classID
= 0; return (UClassID)&classID; } UClassID RuleBasedCollator
::getDynamicClassID() const { return RuleBasedCollator::getStaticClassID
(); }
242
243bool
244RuleBasedCollator::operator==(const Collator& other) const {
245 if(this == &other) { return true; }
246 if(!Collator::operator==(other)) { return false; }
247 const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
248 if(*settings != *o.settings) { return false; }
249 if(data == o.data) { return true; }
250 UBool thisIsRoot = data->base == nullptr;
251 UBool otherIsRoot = o.data->base == nullptr;
252 U_ASSERT(!thisIsRoot || !otherIsRoot)(static_cast <bool> (!thisIsRoot || !otherIsRoot) ? void
(0) : __assert_fail ("!thisIsRoot || !otherIsRoot", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
; // otherwise their data pointers should be ==
253 if(thisIsRoot != otherIsRoot) { return false; }
254 if((thisIsRoot || !tailoring->rules.isEmpty()) &&
255 (otherIsRoot || !o.tailoring->rules.isEmpty())) {
256 // Shortcut: If both collators have valid rule strings, then compare those.
257 if(tailoring->rules == o.tailoring->rules) { return true; }
258 }
259 // Different rule strings can result in the same or equivalent tailoring.
260 // The rule strings are optional in ICU resource bundles, although included by default.
261 // cloneBinary() drops the rule string.
262 UErrorCode errorCode = U_ZERO_ERROR;
263 LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
264 LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
265 if(U_FAILURE(errorCode)) { return false; }
266 if(*thisTailored != *otherTailored) { return false; }
267 // For completeness, we should compare all of the mappings;
268 // or we should create a list of strings, sort it with one collator,
269 // and check if both collators compare adjacent strings the same
270 // (order & strength, down to quaternary); or similar.
271 // Testing equality of collators seems unusual.
272 return true;
273}
274
275int32_t
276RuleBasedCollator::hashCode() const {
277 int32_t h = settings->hashCode();
278 if(data->base == nullptr) { return h; } // root collator
279 // Do not rely on the rule string, see comments in operator==().
280 UErrorCode errorCode = U_ZERO_ERROR;
281 LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
282 if(U_FAILURE(errorCode)) { return 0; }
283 UnicodeSetIterator iter(*set);
284 while(iter.next() && !iter.isString()) {
285 h ^= data->getCE32(iter.getCodepoint());
286 }
287 return h;
288}
289
290void
291RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
292 const Locale &actual) {
293 if(actual == tailoring->actualLocale) {
294 actualLocaleIsSameAsValid = false;
295 } else {
296 U_ASSERT(actual == valid)(static_cast <bool> (actual == valid) ? void (0) : __assert_fail
("actual == valid", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__))
;
297 actualLocaleIsSameAsValid = true;
298 }
299 // Do not modify tailoring.actualLocale:
300 // We cannot be sure that that would be thread-safe.
301 validLocale = valid;
302 (void)requested; // Ignore, see also ticket #10477.
303}
304
305Locale
306RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
307 if(U_FAILURE(errorCode)) {
308 return Locale::getRoot();
309 }
310 switch(type) {
311 case ULOC_ACTUAL_LOCALE:
312 return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
313 case ULOC_VALID_LOCALE:
314 return validLocale;
315 case ULOC_REQUESTED_LOCALE:
316 default:
317 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
318 return Locale::getRoot();
319 }
320}
321
322const char *
323RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
324 if(U_FAILURE(errorCode)) {
325 return nullptr;
326 }
327 const Locale *result;
328 switch(type) {
329 case ULOC_ACTUAL_LOCALE:
330 result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
331 break;
332 case ULOC_VALID_LOCALE:
333 result = &validLocale;
334 break;
335 case ULOC_REQUESTED_LOCALE:
336 default:
337 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
338 return nullptr;
339 }
340 if(result->isBogus()) { return nullptr; }
341 const char *id = result->getName();
342 return id[0] == 0 ? "root" : id;
343}
344
345const UnicodeString&
346RuleBasedCollator::getRules() const {
347 return tailoring->rules;
348}
349
350void
351RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
352 if(delta == UCOL_TAILORING_ONLY) {
353 buffer = tailoring->rules;
354 return;
355 }
356 // UCOL_FULL_RULES
357 buffer.remove();
358 CollationLoader::appendRootRules(buffer);
359 buffer.append(tailoring->rules).getTerminatedBuffer();
360}
361
362void
363RuleBasedCollator::getVersion(UVersionInfo version) const {
364 uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (version != __null) ? void (0) : __assert_fail
("version != __null", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__)); (static_cast <bool> (tailoring->
version != __null) ? void (0) : __assert_fail ("tailoring->version != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); clang diagnostic pop :: memcpy(version, tailoring->version
, 4); } while (false)
;
365 version[0] += (UCOL_RUNTIME_VERSION9 << 4) + (UCOL_RUNTIME_VERSION9 >> 4);
366}
367
368UnicodeSet *
369RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
370 if(U_FAILURE(errorCode)) { return nullptr; }
371 UnicodeSet *tailored = new UnicodeSet();
372 if(tailored == nullptr) {
373 errorCode = U_MEMORY_ALLOCATION_ERROR;
374 return nullptr;
375 }
376 if(data->base != nullptr) {
377 TailoredSet(tailored).forData(data, errorCode);
378 if(U_FAILURE(errorCode)) {
379 delete tailored;
380 return nullptr;
381 }
382 }
383 return tailored;
384}
385
386void
387RuleBasedCollator::internalGetContractionsAndExpansions(
388 UnicodeSet *contractions, UnicodeSet *expansions,
389 UBool addPrefixes, UErrorCode &errorCode) const {
390 if(U_FAILURE(errorCode)) { return; }
391 if(contractions != nullptr) {
392 contractions->clear();
393 }
394 if(expansions != nullptr) {
395 expansions->clear();
396 }
397 ContractionsAndExpansions(contractions, expansions, nullptr, addPrefixes).forData(data, errorCode);
398}
399
400void
401RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
402 if(U_FAILURE(errorCode)) { return; }
403 ContractionsAndExpansions(&set, nullptr, nullptr, false).forCodePoint(data, c, errorCode);
404}
405
406const CollationSettings &
407RuleBasedCollator::getDefaultSettings() const {
408 return *tailoring->settings;
409}
410
411UColAttributeValue
412RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
413 if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
414 int32_t option;
415 switch(attr) {
416 case UCOL_FRENCH_COLLATION:
417 option = CollationSettings::BACKWARD_SECONDARY;
418 break;
419 case UCOL_ALTERNATE_HANDLING:
420 return settings->getAlternateHandling();
421 case UCOL_CASE_FIRST:
422 return settings->getCaseFirst();
423 case UCOL_CASE_LEVEL:
424 option = CollationSettings::CASE_LEVEL;
425 break;
426 case UCOL_NORMALIZATION_MODE:
427 option = CollationSettings::CHECK_FCD;
428 break;
429 case UCOL_STRENGTH:
430 return static_cast<UColAttributeValue>(settings->getStrength());
431 case UCOL_HIRAGANA_QUATERNARY_MODE:
432 // Deprecated attribute, unsettable.
433 return UCOL_OFF;
434 case UCOL_NUMERIC_COLLATION:
435 option = CollationSettings::NUMERIC;
436 break;
437 default:
438 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
439 return UCOL_DEFAULT;
440 }
441 return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
442}
443
444void
445RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
446 UErrorCode &errorCode) {
447 UColAttributeValue oldValue = getAttribute(attr, errorCode);
448 if(U_FAILURE(errorCode)) { return; }
449 if(value == oldValue) {
450 setAttributeExplicitly(attr);
451 return;
452 }
453 const CollationSettings &defaultSettings = getDefaultSettings();
454 if(settings == &defaultSettings) {
455 if(value == UCOL_DEFAULT) {
456 setAttributeDefault(attr);
457 return;
458 }
459 }
460 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
461 if(ownedSettings == nullptr) {
462 errorCode = U_MEMORY_ALLOCATION_ERROR;
463 return;
464 }
465
466 switch(attr) {
467 case UCOL_FRENCH_COLLATION:
468 ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
469 defaultSettings.options, errorCode);
470 break;
471 case UCOL_ALTERNATE_HANDLING:
472 ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
473 break;
474 case UCOL_CASE_FIRST:
475 ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
476 break;
477 case UCOL_CASE_LEVEL:
478 ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
479 defaultSettings.options, errorCode);
480 break;
481 case UCOL_NORMALIZATION_MODE:
482 ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
483 defaultSettings.options, errorCode);
484 break;
485 case UCOL_STRENGTH:
486 ownedSettings->setStrength(value, defaultSettings.options, errorCode);
487 break;
488 case UCOL_HIRAGANA_QUATERNARY_MODE:
489 // Deprecated attribute. Check for valid values but do not change anything.
490 if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
491 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
492 }
493 break;
494 case UCOL_NUMERIC_COLLATION:
495 ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
496 break;
497 default:
498 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
499 break;
500 }
501 if(U_FAILURE(errorCode)) { return; }
502 setFastLatinOptions(*ownedSettings);
503 if(value == UCOL_DEFAULT) {
504 setAttributeDefault(attr);
505 } else {
506 setAttributeExplicitly(attr);
507 }
508}
509
510Collator &
511RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
512 if(U_FAILURE(errorCode)) { return *this; }
513 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
514 int32_t value;
515 if(group == UCOL_REORDER_CODE_DEFAULT) {
516 value = UCOL_DEFAULT;
517 } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
518 value = group - UCOL_REORDER_CODE_FIRST;
519 } else {
520 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
521 return *this;
522 }
523 CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
524 if(value == oldValue) {
525 setAttributeExplicitly(ATTR_VARIABLE_TOP);
526 return *this;
527 }
528 const CollationSettings &defaultSettings = getDefaultSettings();
529 if(settings == &defaultSettings) {
530 if(value == UCOL_DEFAULT) {
531 setAttributeDefault(ATTR_VARIABLE_TOP);
532 return *this;
533 }
534 }
535 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
536 if(ownedSettings == nullptr) {
537 errorCode = U_MEMORY_ALLOCATION_ERROR;
538 return *this;
539 }
540
541 if(group == UCOL_REORDER_CODE_DEFAULT) {
542 group = static_cast<UColReorderCode>(
543 UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
544 }
545 uint32_t varTop = data->getLastPrimaryForGroup(group);
546 U_ASSERT(varTop != 0)(static_cast <bool> (varTop != 0) ? void (0) : __assert_fail
("varTop != 0", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__))
;
547 ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
548 if(U_FAILURE(errorCode)) { return *this; }
549 ownedSettings->variableTop = varTop;
550 setFastLatinOptions(*ownedSettings);
551 if(value == UCOL_DEFAULT) {
552 setAttributeDefault(ATTR_VARIABLE_TOP);
553 } else {
554 setAttributeExplicitly(ATTR_VARIABLE_TOP);
555 }
556 return *this;
557}
558
559UColReorderCode
560RuleBasedCollator::getMaxVariable() const {
561 return static_cast<UColReorderCode>(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
562}
563
564uint32_t
565RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
566 return settings->variableTop;
567}
568
569uint32_t
570RuleBasedCollator::setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &errorCode) {
571 if(U_FAILURE(errorCode)) { return 0; }
572 if(varTop == nullptr && len !=0) {
573 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
574 return 0;
575 }
576 if(len < 0) { len = u_strlenu_strlen_77(varTop); }
577 if(len == 0) {
578 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
579 return 0;
580 }
581 UBool numeric = settings->isNumeric();
582 int64_t ce1, ce2;
583 if(settings->dontCheckFCD()) {
584 UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
585 ce1 = ci.nextCE(errorCode);
586 ce2 = ci.nextCE(errorCode);
587 } else {
588 FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
589 ce1 = ci.nextCE(errorCode);
590 ce2 = ci.nextCE(errorCode);
591 }
592 if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
593 errorCode = U_CE_NOT_FOUND_ERROR;
594 return 0;
595 }
596 setVariableTop(static_cast<uint32_t>(ce1 >> 32), errorCode);
597 return settings->variableTop;
598}
599
600uint32_t
601RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
602 return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
603}
604
605void
606RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
607 if(U_FAILURE(errorCode)) { return; }
608 if(varTop != settings->variableTop) {
609 // Pin the variable top to the end of the reordering group which contains it.
610 // Only a few special groups are supported.
611 int32_t group = data->getGroupForPrimary(varTop);
612 if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
613 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
614 return;
615 }
616 uint32_t v = data->getLastPrimaryForGroup(group);
617 U_ASSERT(v != 0 && v >= varTop)(static_cast <bool> (v != 0 && v >= varTop) ?
void (0) : __assert_fail ("v != 0 && v >= varTop"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
618 varTop = v;
619 if(varTop != settings->variableTop) {
620 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
621 if(ownedSettings == nullptr) {
622 errorCode = U_MEMORY_ALLOCATION_ERROR;
623 return;
624 }
625 ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
626 getDefaultSettings().options, errorCode);
627 if(U_FAILURE(errorCode)) { return; }
628 ownedSettings->variableTop = varTop;
629 setFastLatinOptions(*ownedSettings);
630 }
631 }
632 if(varTop == getDefaultSettings().variableTop) {
633 setAttributeDefault(ATTR_VARIABLE_TOP);
634 } else {
635 setAttributeExplicitly(ATTR_VARIABLE_TOP);
636 }
637}
638
639int32_t
640RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
641 UErrorCode &errorCode) const {
642 if(U_FAILURE(errorCode)) { return 0; }
643 if(capacity < 0 || (dest == nullptr && capacity > 0)) {
644 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
645 return 0;
646 }
647 int32_t length = settings->reorderCodesLength;
648 if(length == 0) { return 0; }
649 if(length > capacity) {
650 errorCode = U_BUFFER_OVERFLOW_ERROR;
651 return length;
652 }
653 uprv_memcpy(dest, settings->reorderCodes, length * 4)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (dest != __null) ? void (0) : __assert_fail
("dest != __null", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__)); (static_cast <bool> (settings->
reorderCodes != __null) ? void (0) : __assert_fail ("settings->reorderCodes != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); clang diagnostic pop :: memcpy(dest, settings->reorderCodes
, length * 4); } while (false)
;
654 return length;
655}
656
657void
658RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
659 UErrorCode &errorCode) {
660 if(U_FAILURE(errorCode)) { return; }
661 if(length < 0 || (reorderCodes == nullptr && length > 0)) {
1
Assuming 'length' is >= 0
2
Assuming pointer value is null
3
Assuming 'length' is <= 0
662 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
663 return;
664 }
665 if(length
3.1
'length' is not equal to 1
== 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
666 length = 0;
667 }
668 if(length == settings->reorderCodesLength &&
4
Assuming 'length' is equal to field 'reorderCodesLength'
669 uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4):: memcmp(reorderCodes, settings->reorderCodes,length * 4) == 0) {
5
Null pointer passed to 1st parameter expecting 'nonnull'
670 return;
671 }
672 const CollationSettings &defaultSettings = getDefaultSettings();
673 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
674 if(settings != &defaultSettings) {
675 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
676 if(ownedSettings == nullptr) {
677 errorCode = U_MEMORY_ALLOCATION_ERROR;
678 return;
679 }
680 ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
681 setFastLatinOptions(*ownedSettings);
682 }
683 return;
684 }
685 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
686 if(ownedSettings == nullptr) {
687 errorCode = U_MEMORY_ALLOCATION_ERROR;
688 return;
689 }
690 ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
691 setFastLatinOptions(*ownedSettings);
692}
693
694void
695RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
696 ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
697 data, ownedSettings,
698 ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries)(int32_t)(sizeof(ownedSettings.fastLatinPrimaries)/sizeof((ownedSettings
.fastLatinPrimaries)[0]))
);
699}
700
701UCollationResult
702RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
703 UErrorCode &errorCode) const {
704 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
705 return doCompare(left.getBuffer(), left.length(),
706 right.getBuffer(), right.length(), errorCode);
707}
708
709UCollationResult
710RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
711 int32_t length, UErrorCode &errorCode) const {
712 if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
713 if(length < 0) {
714 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
715 return UCOL_EQUAL;
716 }
717 int32_t leftLength = left.length();
718 int32_t rightLength = right.length();
719 if(leftLength > length) { leftLength = length; }
720 if(rightLength > length) { rightLength = length; }
721 return doCompare(left.getBuffer(), leftLength,
722 right.getBuffer(), rightLength, errorCode);
723}
724
725UCollationResult
726RuleBasedCollator::compare(const char16_t *left, int32_t leftLength,
727 const char16_t *right, int32_t rightLength,
728 UErrorCode &errorCode) const {
729 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
730 if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
731 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
732 return UCOL_EQUAL;
733 }
734 // Make sure both or neither strings have a known length.
735 // We do not optimize for mixed length/termination.
736 if(leftLength >= 0) {
737 if(rightLength < 0) { rightLength = u_strlenu_strlen_77(right); }
738 } else {
739 if(rightLength >= 0) { leftLength = u_strlenu_strlen_77(left); }
740 }
741 return doCompare(left, leftLength, right, rightLength, errorCode);
742}
743
744UCollationResult
745RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
746 UErrorCode &errorCode) const {
747 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
748 const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
749 const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
750 if((leftBytes == nullptr && !left.empty()) || (rightBytes == nullptr && !right.empty())) {
751 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
752 return UCOL_EQUAL;
753 }
754 return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
755}
756
757UCollationResult
758RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
759 const char *right, int32_t rightLength,
760 UErrorCode &errorCode) const {
761 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
762 if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
763 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
764 return UCOL_EQUAL;
765 }
766 // Make sure both or neither strings have a known length.
767 // We do not optimize for mixed length/termination.
768 if(leftLength >= 0) {
769 if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right):: strlen(right)); }
770 } else {
771 if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left):: strlen(left)); }
772 }
773 return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
774 reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
775}
776
777namespace {
778
779/**
780 * Abstract iterator for identical-level string comparisons.
781 * Returns FCD code points and handles temporary switching to NFD.
782 */
783class NFDIterator : public UObject {
784public:
785 NFDIterator() : index(-1), length(0) {}
786 virtual ~NFDIterator() {}
787 /**
788 * Returns the next code point from the internal normalization buffer,
789 * or else the next text code point.
790 * Returns -1 at the end of the text.
791 */
792 UChar32 nextCodePoint() {
793 if(index >= 0) {
794 if(index == length) {
795 index = -1;
796 } else {
797 UChar32 c;
798 U16_NEXT_UNSAFE(decomp, index, c)do { (c)=(decomp)[(index)++]; if((((c)&0xfffffc00)==0xd800
)) { (c)=(((UChar32)((c))<<10UL)+(UChar32)((decomp)[(index
)++])-((0xd800<<10UL)+0xdc00-0x10000)); } } while (false
)
;
799 return c;
800 }
801 }
802 return nextRawCodePoint();
803 }
804 /**
805 * @param nfcImpl
806 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
807 * @return the first code point in c's decomposition,
808 * or c itself if it was decomposed already or if it does not decompose
809 */
810 UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
811 if(index >= 0) { return c; }
812 decomp = nfcImpl.getDecomposition(c, buffer, length);
813 if(decomp == nullptr) { return c; }
814 index = 0;
815 U16_NEXT_UNSAFE(decomp, index, c)do { (c)=(decomp)[(index)++]; if((((c)&0xfffffc00)==0xd800
)) { (c)=(((UChar32)((c))<<10UL)+(UChar32)((decomp)[(index
)++])-((0xd800<<10UL)+0xdc00-0x10000)); } } while (false
)
;
816 return c;
817 }
818protected:
819 /**
820 * Returns the next text code point in FCD order.
821 * Returns -1 at the end of the text.
822 */
823 virtual UChar32 nextRawCodePoint() = 0;
824private:
825 const char16_t *decomp;
826 char16_t buffer[4];
827 int32_t index;
828 int32_t length;
829};
830
831class UTF16NFDIterator : public NFDIterator {
832public:
833 UTF16NFDIterator(const char16_t *text, const char16_t *textLimit) : s(text), limit(textLimit) {}
834protected:
835 virtual UChar32 nextRawCodePoint() override {
836 if(s == limit) { return U_SENTINEL(-1); }
837 UChar32 c = *s++;
838 if(limit == nullptr && c == 0) {
839 s = nullptr;
840 return U_SENTINEL(-1);
841 }
842 char16_t trail;
843 if(U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800) && s != limit && U16_IS_TRAIL(trail = *s)(((trail = *s)&0xfffffc00)==0xdc00)) {
844 ++s;
845 c = U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000))
;
846 }
847 return c;
848 }
849
850 const char16_t *s;
851 const char16_t *limit;
852};
853
854class FCDUTF16NFDIterator : public UTF16NFDIterator {
855public:
856 FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const char16_t *text, const char16_t *textLimit)
857 : UTF16NFDIterator(nullptr, nullptr) {
858 UErrorCode errorCode = U_ZERO_ERROR;
859 const char16_t *spanLimit = nfcImpl.makeFCD(text, textLimit, nullptr, errorCode);
860 if(U_FAILURE(errorCode)) { return; }
861 if(spanLimit == textLimit || (textLimit == nullptr && *spanLimit == 0)) {
862 s = text;
863 limit = spanLimit;
864 } else {
865 str.setTo(text, static_cast<int32_t>(spanLimit - text));
866 {
867 ReorderingBuffer r_buffer(nfcImpl, str);
868 if(r_buffer.init(str.length(), errorCode)) {
869 nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
870 }
871 }
872 if(U_SUCCESS(errorCode)) {
873 s = str.getBuffer();
874 limit = s + str.length();
875 }
876 }
877 }
878private:
879 UnicodeString str;
880};
881
882class UTF8NFDIterator : public NFDIterator {
883public:
884 UTF8NFDIterator(const uint8_t *text, int32_t textLength)
885 : s(text), pos(0), length(textLength) {}
886protected:
887 virtual UChar32 nextRawCodePoint() override {
888 if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL(-1); }
889 UChar32 c;
890 U8_NEXT_OR_FFFD(s, pos, length, c)do { (c)=(uint8_t)(s)[(pos)++]; if(!(((c)&0x80)==0)) { uint8_t
__t = 0; if((pos)!=(length) && ((c)>=0xe0 ? ((c)<
0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
[(c)&=0xf]&(1<<((__t=(s)[pos])>>5)) &&
(__t&=0x3f, 1) : ((c)-=0xf0)<=4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
[(__t=(s)[pos])>>4]&(1<<(c)) && ((c)=
((c)<<6)|(__t&0x3f), ++(pos)!=(length)) && (
__t=(s)[pos]-0x80)<=0x3f) && ((c)=((c)<<6)|__t
, ++(pos)!=(length)) : (c)>=0xc2 && ((c)&=0x1f
, 1)) && (__t=(s)[pos]-0x80)<=0x3f && ((c)
=((c)<<6)|__t, ++(pos), 1)) { } else { (c)=(0xfffd); } }
} while (false)
;
891 return c;
892 }
893
894 const uint8_t *s;
895 int32_t pos;
896 int32_t length;
897};
898
899class FCDUTF8NFDIterator : public NFDIterator {
900public:
901 FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
902 : u8ci(data, false, text, 0, textLength) {}
903protected:
904 virtual UChar32 nextRawCodePoint() override {
905 UErrorCode errorCode = U_ZERO_ERROR;
906 return u8ci.nextCodePoint(errorCode);
907 }
908private:
909 FCDUTF8CollationIterator u8ci;
910};
911
912class UIterNFDIterator : public NFDIterator {
913public:
914 UIterNFDIterator(UCharIterator &it) : iter(it) {}
915protected:
916 virtual UChar32 nextRawCodePoint() override {
917 return uiter_next32uiter_next32_77(&iter);
918 }
919private:
920 UCharIterator &iter;
921};
922
923class FCDUIterNFDIterator : public NFDIterator {
924public:
925 FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
926 : uici(data, false, it, startIndex) {}
927protected:
928 virtual UChar32 nextRawCodePoint() override {
929 UErrorCode errorCode = U_ZERO_ERROR;
930 return uici.nextCodePoint(errorCode);
931 }
932private:
933 FCDUIterCollationIterator uici;
934};
935
936UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
937 NFDIterator &left, NFDIterator &right) {
938 for(;;) {
939 // Fetch the next FCD code point from each string.
940 UChar32 leftCp = left.nextCodePoint();
941 UChar32 rightCp = right.nextCodePoint();
942 if(leftCp == rightCp) {
943 if(leftCp < 0) { break; }
944 continue;
945 }
946 // If they are different, then decompose each and compare again.
947 if(leftCp < 0) {
948 leftCp = -2; // end of string
949 } else if(leftCp == 0xfffe) {
950 leftCp = -1; // U+FFFE: merge separator
951 } else {
952 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
953 }
954 if(rightCp < 0) {
955 rightCp = -2; // end of string
956 } else if(rightCp == 0xfffe) {
957 rightCp = -1; // U+FFFE: merge separator
958 } else {
959 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
960 }
961 if(leftCp < rightCp) { return UCOL_LESS; }
962 if(leftCp > rightCp) { return UCOL_GREATER; }
963 }
964 return UCOL_EQUAL;
965}
966
967} // namespace
968
969UCollationResult
970RuleBasedCollator::doCompare(const char16_t *left, int32_t leftLength,
971 const char16_t *right, int32_t rightLength,
972 UErrorCode &errorCode) const {
973 // U_FAILURE(errorCode) checked by caller.
974 if(left == right && leftLength == rightLength) {
975 return UCOL_EQUAL;
976 }
977
978 // Identical-prefix test.
979 const char16_t *leftLimit;
980 const char16_t *rightLimit;
981 int32_t equalPrefixLength = 0;
982 if(leftLength < 0) {
983 leftLimit = nullptr;
984 rightLimit = nullptr;
985 char16_t c;
986 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
987 if(c == 0) { return UCOL_EQUAL; }
988 ++equalPrefixLength;
989 }
990 } else {
991 leftLimit = left + leftLength;
992 rightLimit = right + rightLength;
993 for(;;) {
994 if(equalPrefixLength == leftLength) {
995 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
996 break;
997 } else if(equalPrefixLength == rightLength ||
998 left[equalPrefixLength] != right[equalPrefixLength]) {
999 break;
1000 }
1001 ++equalPrefixLength;
1002 }
1003 }
1004
1005 UBool numeric = settings->isNumeric();
1006 if(equalPrefixLength > 0) {
1007 if((equalPrefixLength != leftLength &&
1008 data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1009 (equalPrefixLength != rightLength &&
1010 data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1011 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1012 while(--equalPrefixLength > 0 &&
1013 data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1014 }
1015 // Notes:
1016 // - A longer string can compare equal to a prefix of it if only ignorables follow.
1017 // - With a backward level, a longer string can compare less-than a prefix of it.
1018
1019 // Pass the actual start of each string into the CollationIterators,
1020 // plus the equalPrefixLength position,
1021 // so that prefix matches back into the equal prefix work.
1022 }
1023
1024 int32_t result;
1025 int32_t fastLatinOptions = settings->fastLatinOptions;
1026 if(fastLatinOptions >= 0 &&
1027 (equalPrefixLength == leftLength ||
1028 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1029 (equalPrefixLength == rightLength ||
1030 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1031 if(leftLength >= 0) {
1032 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1033 settings->fastLatinPrimaries,
1034 fastLatinOptions,
1035 left + equalPrefixLength,
1036 leftLength - equalPrefixLength,
1037 right + equalPrefixLength,
1038 rightLength - equalPrefixLength);
1039 } else {
1040 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1041 settings->fastLatinPrimaries,
1042 fastLatinOptions,
1043 left + equalPrefixLength, -1,
1044 right + equalPrefixLength, -1);
1045 }
1046 } else {
1047 result = CollationFastLatin::BAIL_OUT_RESULT;
1048 }
1049
1050 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1051 if(settings->dontCheckFCD()) {
1052 UTF16CollationIterator leftIter(data, numeric,
1053 left, left + equalPrefixLength, leftLimit);
1054 UTF16CollationIterator rightIter(data, numeric,
1055 right, right + equalPrefixLength, rightLimit);
1056 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1057 } else {
1058 FCDUTF16CollationIterator leftIter(data, numeric,
1059 left, left + equalPrefixLength, leftLimit);
1060 FCDUTF16CollationIterator rightIter(data, numeric,
1061 right, right + equalPrefixLength, rightLimit);
1062 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1063 }
1064 }
1065 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1066 return static_cast<UCollationResult>(result);
1067 }
1068
1069 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1070 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1071 // and the benefit seems unlikely to be measurable.
1072
1073 // Compare identical level.
1074 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1075 left += equalPrefixLength;
1076 right += equalPrefixLength;
1077 if(settings->dontCheckFCD()) {
1078 UTF16NFDIterator leftIter(left, leftLimit);
1079 UTF16NFDIterator rightIter(right, rightLimit);
1080 return compareNFDIter(nfcImpl, leftIter, rightIter);
1081 } else {
1082 FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1083 FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1084 return compareNFDIter(nfcImpl, leftIter, rightIter);
1085 }
1086}
1087
1088UCollationResult
1089RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1090 const uint8_t *right, int32_t rightLength,
1091 UErrorCode &errorCode) const {
1092 // U_FAILURE(errorCode) checked by caller.
1093 if(left == right && leftLength == rightLength) {
1094 return UCOL_EQUAL;
1095 }
1096
1097 // Identical-prefix test.
1098 int32_t equalPrefixLength = 0;
1099 if(leftLength < 0) {
1100 uint8_t c;
1101 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1102 if(c == 0) { return UCOL_EQUAL; }
1103 ++equalPrefixLength;
1104 }
1105 } else {
1106 for(;;) {
1107 if(equalPrefixLength == leftLength) {
1108 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1109 break;
1110 } else if(equalPrefixLength == rightLength ||
1111 left[equalPrefixLength] != right[equalPrefixLength]) {
1112 break;
1113 }
1114 ++equalPrefixLength;
1115 }
1116 }
1117 // Back up to the start of a partially-equal code point.
1118 if(equalPrefixLength > 0 &&
1119 ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])((int8_t)(left[equalPrefixLength])<-0x40)) ||
1120 (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])((int8_t)(right[equalPrefixLength])<-0x40)))) {
1121 while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])((int8_t)(left[equalPrefixLength])<-0x40)) {}
1122 }
1123
1124 UBool numeric = settings->isNumeric();
1125 if(equalPrefixLength > 0) {
1126 UBool unsafe = false;
1127 if(equalPrefixLength != leftLength) {
1128 int32_t i = equalPrefixLength;
1129 UChar32 c;
1130 U8_NEXT_OR_FFFD(left, i, leftLength, c)do { (c)=(uint8_t)(left)[(i)++]; if(!(((c)&0x80)==0)) { uint8_t
__t = 0; if((i)!=(leftLength) && ((c)>=0xe0 ? ((c
)<0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
[(c)&=0xf]&(1<<((__t=(left)[i])>>5)) &&
(__t&=0x3f, 1) : ((c)-=0xf0)<=4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
[(__t=(left)[i])>>4]&(1<<(c)) && ((c)
=((c)<<6)|(__t&0x3f), ++(i)!=(leftLength)) &&
(__t=(left)[i]-0x80)<=0x3f) && ((c)=((c)<<6
)|__t, ++(i)!=(leftLength)) : (c)>=0xc2 && ((c)&=
0x1f, 1)) && (__t=(left)[i]-0x80)<=0x3f &&
((c)=((c)<<6)|__t, ++(i), 1)) { } else { (c)=(0xfffd);
} } } while (false)
;
1131 unsafe = data->isUnsafeBackward(c, numeric);
1132 }
1133 if(!unsafe && equalPrefixLength != rightLength) {
1134 int32_t i = equalPrefixLength;
1135 UChar32 c;
1136 U8_NEXT_OR_FFFD(right, i, rightLength, c)do { (c)=(uint8_t)(right)[(i)++]; if(!(((c)&0x80)==0)) { uint8_t
__t = 0; if((i)!=(rightLength) && ((c)>=0xe0 ? ((
c)<0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
[(c)&=0xf]&(1<<((__t=(right)[i])>>5)) &&
(__t&=0x3f, 1) : ((c)-=0xf0)<=4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
[(__t=(right)[i])>>4]&(1<<(c)) && ((c
)=((c)<<6)|(__t&0x3f), ++(i)!=(rightLength)) &&
(__t=(right)[i]-0x80)<=0x3f) && ((c)=((c)<<
6)|__t, ++(i)!=(rightLength)) : (c)>=0xc2 && ((c)&=
0x1f, 1)) && (__t=(right)[i]-0x80)<=0x3f &&
((c)=((c)<<6)|__t, ++(i), 1)) { } else { (c)=(0xfffd);
} } } while (false)
;
1137 unsafe = data->isUnsafeBackward(c, numeric);
1138 }
1139 if(unsafe) {
1140 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1141 UChar32 c;
1142 do {
1143 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c)do { (c)=(uint8_t)(left)[--(equalPrefixLength)]; if(!(((c)&
0x80)==0)) { (c)=utf8_prevCharSafeBody_77((const uint8_t *)left
, 0, &(equalPrefixLength), c, -3); } } while (false)
;
1144 } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1145 }
1146 // See the notes in the UTF-16 version.
1147
1148 // Pass the actual start of each string into the CollationIterators,
1149 // plus the equalPrefixLength position,
1150 // so that prefix matches back into the equal prefix work.
1151 }
1152
1153 int32_t result;
1154 int32_t fastLatinOptions = settings->fastLatinOptions;
1155 if(fastLatinOptions >= 0 &&
1156 (equalPrefixLength == leftLength ||
1157 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1158 (equalPrefixLength == rightLength ||
1159 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1160 if(leftLength >= 0) {
1161 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1162 settings->fastLatinPrimaries,
1163 fastLatinOptions,
1164 left + equalPrefixLength,
1165 leftLength - equalPrefixLength,
1166 right + equalPrefixLength,
1167 rightLength - equalPrefixLength);
1168 } else {
1169 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1170 settings->fastLatinPrimaries,
1171 fastLatinOptions,
1172 left + equalPrefixLength, -1,
1173 right + equalPrefixLength, -1);
1174 }
1175 } else {
1176 result = CollationFastLatin::BAIL_OUT_RESULT;
1177 }
1178
1179 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1180 if(settings->dontCheckFCD()) {
1181 UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1182 UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1183 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1184 } else {
1185 FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1186 FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1187 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1188 }
1189 }
1190 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1191 return static_cast<UCollationResult>(result);
1192 }
1193
1194 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1195 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1196 // and the benefit seems unlikely to be measurable.
1197
1198 // Compare identical level.
1199 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1200 left += equalPrefixLength;
1201 right += equalPrefixLength;
1202 if(leftLength > 0) {
1203 leftLength -= equalPrefixLength;
1204 rightLength -= equalPrefixLength;
1205 }
1206 if(settings->dontCheckFCD()) {
1207 UTF8NFDIterator leftIter(left, leftLength);
1208 UTF8NFDIterator rightIter(right, rightLength);
1209 return compareNFDIter(nfcImpl, leftIter, rightIter);
1210 } else {
1211 FCDUTF8NFDIterator leftIter(data, left, leftLength);
1212 FCDUTF8NFDIterator rightIter(data, right, rightLength);
1213 return compareNFDIter(nfcImpl, leftIter, rightIter);
1214 }
1215}
1216
1217UCollationResult
1218RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1219 UErrorCode &errorCode) const {
1220 if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1221 UBool numeric = settings->isNumeric();
1222
1223 // Identical-prefix test.
1224 int32_t equalPrefixLength = 0;
1225 {
1226 UChar32 leftUnit;
1227 UChar32 rightUnit;
1228 while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1229 if(leftUnit < 0) { return UCOL_EQUAL; }
1230 ++equalPrefixLength;
1231 }
1232
1233 // Back out the code units that differed, for the real collation comparison.
1234 if(leftUnit >= 0) { left.previous(&left); }
1235 if(rightUnit >= 0) { right.previous(&right); }
1236
1237 if(equalPrefixLength > 0) {
1238 if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1239 (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1240 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1241 do {
1242 --equalPrefixLength;
1243 leftUnit = left.previous(&left);
1244 right.previous(&right);
1245 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1246 }
1247 // See the notes in the UTF-16 version.
1248 }
1249 }
1250
1251 UCollationResult result;
1252 if(settings->dontCheckFCD()) {
1253 UIterCollationIterator leftIter(data, numeric, left);
1254 UIterCollationIterator rightIter(data, numeric, right);
1255 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1256 } else {
1257 FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1258 FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1259 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1260 }
1261 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1262 return result;
1263 }
1264
1265 // Compare identical level.
1266 left.move(&left, equalPrefixLength, UITER_ZERO);
1267 right.move(&right, equalPrefixLength, UITER_ZERO);
1268 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1269 if(settings->dontCheckFCD()) {
1270 UIterNFDIterator leftIter(left);
1271 UIterNFDIterator rightIter(right);
1272 return compareNFDIter(nfcImpl, leftIter, rightIter);
1273 } else {
1274 FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1275 FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1276 return compareNFDIter(nfcImpl, leftIter, rightIter);
1277 }
1278}
1279
1280CollationKey &
1281RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1282 UErrorCode &errorCode) const {
1283 return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1284}
1285
1286CollationKey &
1287RuleBasedCollator::getCollationKey(const char16_t *s, int32_t length, CollationKey& key,
1288 UErrorCode &errorCode) const {
1289 if(U_FAILURE(errorCode)) {
1290 return key.setToBogus();
1291 }
1292 if(s == nullptr && length != 0) {
1293 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1294 return key.setToBogus();
1295 }
1296 key.reset(); // resets the "bogus" state
1297 CollationKeyByteSink sink(key);
1298 writeSortKey(s, length, sink, errorCode);
1299 if(U_FAILURE(errorCode)) {
1300 key.setToBogus();
1301 } else if(key.isBogus()) {
1302 errorCode = U_MEMORY_ALLOCATION_ERROR;
1303 } else {
1304 key.setLength(sink.NumberOfBytesAppended());
1305 }
1306 return key;
1307}
1308
1309int32_t
1310RuleBasedCollator::getSortKey(const UnicodeString &s,
1311 uint8_t *dest, int32_t capacity) const {
1312 return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1313}
1314
1315int32_t
1316RuleBasedCollator::getSortKey(const char16_t *s, int32_t length,
1317 uint8_t *dest, int32_t capacity) const {
1318 if((s == nullptr && length != 0) || capacity < 0 || (dest == nullptr && capacity > 0)) {
1319 return 0;
1320 }
1321 uint8_t noDest[1] = { 0 };
1322 if(dest == nullptr) {
1323 // Distinguish pure preflighting from an allocation error.
1324 dest = noDest;
1325 capacity = 0;
1326 }
1327 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1328 UErrorCode errorCode = U_ZERO_ERROR;
1329 writeSortKey(s, length, sink, errorCode);
1330 return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1331}
1332
1333void
1334RuleBasedCollator::writeSortKey(const char16_t *s, int32_t length,
1335 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1336 if(U_FAILURE(errorCode)) { return; }
1337 const char16_t *limit = (length >= 0) ? s + length : nullptr;
1338 UBool numeric = settings->isNumeric();
1339 CollationKeys::LevelCallback callback;
1340 if(settings->dontCheckFCD()) {
1341 UTF16CollationIterator iter(data, numeric, s, s, limit);
1342 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1343 sink, Collation::PRIMARY_LEVEL,
1344 callback, true, errorCode);
1345 } else {
1346 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1347 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1348 sink, Collation::PRIMARY_LEVEL,
1349 callback, true, errorCode);
1350 }
1351 if(settings->getStrength() == UCOL_IDENTICAL) {
1352 writeIdenticalLevel(s, limit, sink, errorCode);
1353 }
1354 static const char terminator = 0; // TERMINATOR_BYTE
1355 sink.Append(&terminator, 1);
1356}
1357
1358void
1359RuleBasedCollator::writeIdenticalLevel(const char16_t *s, const char16_t *limit,
1360 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1361 // NFD quick check
1362 const char16_t *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, nullptr, errorCode);
1363 if(U_FAILURE(errorCode)) { return; }
1364 sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1365 UChar32 prev = 0;
1366 if(nfdQCYesLimit != s) {
1367 prev = u_writeIdenticalLevelRunu_writeIdenticalLevelRun_77(prev, s, static_cast<int32_t>(nfdQCYesLimit - s), sink);
1368 }
1369 // Is there non-NFD text?
1370 int32_t destLengthEstimate;
1371 if(limit != nullptr) {
1372 if(nfdQCYesLimit == limit) { return; }
1373 destLengthEstimate = static_cast<int32_t>(limit - nfdQCYesLimit);
1374 } else {
1375 // s is NUL-terminated
1376 if(*nfdQCYesLimit == 0) { return; }
1377 destLengthEstimate = -1;
1378 }
1379 UnicodeString nfd;
1380 data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1381 u_writeIdenticalLevelRunu_writeIdenticalLevelRun_77(prev, nfd.getBuffer(), nfd.length(), sink);
1382}
1383
1384namespace {
1385
1386/**
1387 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1388 * with an instance of this callback class.
1389 * When another level is about to be written, the callback
1390 * records the level and the number of bytes that will be written until
1391 * the sink (which is actually a FixedSortKeyByteSink) fills up.
1392 *
1393 * When internalNextSortKeyPart() is called again, it restarts with the last level
1394 * and ignores as many bytes as were written previously for that level.
1395 */
1396class PartLevelCallback : public CollationKeys::LevelCallback {
1397public:
1398 PartLevelCallback(const SortKeyByteSink &s)
1399 : sink(s), level(Collation::PRIMARY_LEVEL) {
1400 levelCapacity = sink.GetRemainingCapacity();
1401 }
1402 virtual ~PartLevelCallback() {}
1403 virtual UBool needToWrite(Collation::Level l) override {
1404 if(!sink.Overflowed()) {
1405 // Remember a level that will be at least partially written.
1406 level = l;
1407 levelCapacity = sink.GetRemainingCapacity();
1408 return true;
1409 } else {
1410 return false;
1411 }
1412 }
1413 Collation::Level getLevel() const { return level; }
1414 int32_t getLevelCapacity() const { return levelCapacity; }
1415
1416private:
1417 const SortKeyByteSink &sink;
1418 Collation::Level level;
1419 int32_t levelCapacity;
1420};
1421
1422} // namespace
1423
1424int32_t
1425RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1426 uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1427 if(U_FAILURE(errorCode)) { return 0; }
1428 if(iter == nullptr || state == nullptr || count < 0 || (count > 0 && dest == nullptr)) {
1429 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1430 return 0;
1431 }
1432 if(count == 0) { return 0; }
1433
1434 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1435 sink.IgnoreBytes(static_cast<int32_t>(state[1]));
1436 iter->move(iter, 0, UITER_START);
1437
1438 Collation::Level level = static_cast<Collation::Level>(state[0]);
1439 if(level <= Collation::QUATERNARY_LEVEL) {
1440 UBool numeric = settings->isNumeric();
1441 PartLevelCallback callback(sink);
1442 if(settings->dontCheckFCD()) {
1443 UIterCollationIterator ci(data, numeric, *iter);
1444 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1445 sink, level, callback, false, errorCode);
1446 } else {
1447 FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1448 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1449 sink, level, callback, false, errorCode);
1450 }
1451 if(U_FAILURE(errorCode)) { return 0; }
1452 if(sink.NumberOfBytesAppended() > count) {
1453 state[0] = static_cast<uint32_t>(callback.getLevel());
1454 state[1] = static_cast<uint32_t>(callback.getLevelCapacity());
1455 return count;
1456 }
1457 // All of the normal levels are done.
1458 if(settings->getStrength() == UCOL_IDENTICAL) {
1459 level = Collation::IDENTICAL_LEVEL;
1460 iter->move(iter, 0, UITER_START);
1461 }
1462 // else fall through to setting ZERO_LEVEL
1463 }
1464
1465 if(level == Collation::IDENTICAL_LEVEL) {
1466 int32_t levelCapacity = sink.GetRemainingCapacity();
1467 UnicodeString s;
1468 for(;;) {
1469 UChar32 c = iter->next(iter);
1470 if(c < 0) { break; }
1471 s.append(static_cast<char16_t>(c));
1472 }
1473 const char16_t *sArray = s.getBuffer();
1474 writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1475 if(U_FAILURE(errorCode)) { return 0; }
1476 if(sink.NumberOfBytesAppended() > count) {
1477 state[0] = static_cast<uint32_t>(level);
1478 state[1] = static_cast<uint32_t>(levelCapacity);
1479 return count;
1480 }
1481 }
1482
1483 // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1484 state[0] = static_cast<uint32_t>(Collation::ZERO_LEVEL);
1485 state[1] = 0;
1486 int32_t length = sink.NumberOfBytesAppended();
1487 int32_t i = length;
1488 while(i < count) { dest[i++] = 0; }
1489 return length;
1490}
1491
1492void
1493RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1494 UErrorCode &errorCode) const {
1495 if(U_FAILURE(errorCode)) { return; }
1496 const char16_t *s = str.getBuffer();
1497 const char16_t *limit = s + str.length();
1498 UBool numeric = settings->isNumeric();
1499 if(settings->dontCheckFCD()) {
1500 UTF16CollationIterator iter(data, numeric, s, s, limit);
1501 int64_t ce;
1502 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1503 ces.addElement(ce, errorCode);
1504 }
1505 } else {
1506 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1507 int64_t ce;
1508 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1509 ces.addElement(ce, errorCode);
1510 }
1511 }
1512}
1513
1514namespace {
1515
1516void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1517 UErrorCode &errorCode) {
1518 if(U_FAILURE(errorCode) || length == 0) { return; }
1519 if(!s.isEmpty()) {
1520 s.append('_', errorCode);
1521 }
1522 s.append(letter, errorCode);
1523 for(int32_t i = 0; i < length; ++i) {
1524 s.append(uprv_toupperuprv_toupper_77(subtag[i]), errorCode);
1525 }
1526}
1527
1528void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1529 UErrorCode &errorCode) {
1530 if(U_FAILURE(errorCode)) { return; }
1531 if(!s.isEmpty()) {
1532 s.append('_', errorCode);
1533 }
1534 static const char *valueChars = "1234...........IXO..SN..LU......";
1535 s.append(letter, errorCode);
1536 s.append(valueChars[value], errorCode);
1537}
1538
1539} // namespace
1540
1541int32_t
1542RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1543 char *buffer, int32_t capacity,
1544 UErrorCode &errorCode) const {
1545 if(U_FAILURE(errorCode)) { return 0; }
1546 if(buffer == nullptr ? capacity != 0 : capacity < 0) {
1547 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1548 return 0;
1549 }
1550 if(locale == nullptr) {
1551 locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1552 }
1553
1554 char resultLocale[ULOC_FULLNAME_CAPACITY157 + 1];
1555 int32_t length = ucol_getFunctionalEquivalentucol_getFunctionalEquivalent_77(resultLocale, ULOC_FULLNAME_CAPACITY157,
1556 "collation", locale,
1557 nullptr, &errorCode);
1558 if(U_FAILURE(errorCode)) { return 0; }
1559 resultLocale[length] = 0;
1560
1561 // Append items in alphabetic order of their short definition letters.
1562 CharString result;
1563
1564 if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1565 appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1566 }
1567 // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1568 // See ICU tickets #10372 and #10386.
1569 if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1570 appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1571 }
1572 if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1573 appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1574 }
1575 if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1576 appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1577 }
1578 if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1579 appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1580 }
1581 // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1582 CharString collation = ulocimp_getKeywordValueulocimp_getKeywordValue_77(resultLocale, "collation", errorCode);
1583 appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
1584 CharString language;
1585 CharString script;
1586 CharString region;
1587 CharString variant;
1588 ulocimp_getSubtagsulocimp_getSubtags_77(resultLocale, &language, &script, &region, &variant, nullptr, errorCode);
1589 if (language.isEmpty()) {
1590 appendSubtag(result, 'L', "root", 4, errorCode);
1591 } else {
1592 appendSubtag(result, 'L', language.data(), language.length(), errorCode);
1593 }
1594 if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1595 appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1596 }
1597 appendSubtag(result, 'R', region.data(), region.length(), errorCode);
1598 if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1599 appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1600 }
1601 appendSubtag(result, 'V', variant.data(), variant.length(), errorCode);
1602 appendSubtag(result, 'Z', script.data(), script.length(), errorCode);
1603
1604 if(U_FAILURE(errorCode)) { return 0; }
1605 return result.extract(buffer, capacity, errorCode);
1606}
1607
1608UBool
1609RuleBasedCollator::isUnsafe(UChar32 c) const {
1610 return data->isUnsafeBackward(c, settings->isNumeric());
1611}
1612
1613void U_CALLCONV
1614RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1615 t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1616}
1617
1618UBool
1619RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1620 umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1621 return U_SUCCESS(errorCode);
1622}
1623
1624CollationElementIterator *
1625RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1626 UErrorCode errorCode = U_ZERO_ERROR;
1627 if(!initMaxExpansions(errorCode)) { return nullptr; }
1628 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1629 if(U_FAILURE(errorCode)) {
1630 delete cei;
1631 return nullptr;
1632 }
1633 return cei;
1634}
1635
1636CollationElementIterator *
1637RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1638 UErrorCode errorCode = U_ZERO_ERROR;
1639 if(!initMaxExpansions(errorCode)) { return nullptr; }
1640 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1641 if(U_FAILURE(errorCode)) {
1642 delete cei;
1643 return nullptr;
1644 }
1645 return cei;
1646}
1647
1648int32_t
1649RuleBasedCollator::getMaxExpansion(int32_t order) const {
1650 UErrorCode errorCode = U_ZERO_ERROR;
1651 (void)initMaxExpansions(errorCode);
1652 return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1653}
1654
1655U_NAMESPACE_END}
1656
1657#endif // !UCONFIG_NO_COLLATION