Bug Summary

File:root/firefox-clang/intl/icu/source/common/uniset_props.cpp
Warning:line 388, column 17
Value stored to 'lastItem' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name uniset_props.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_COMMON_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/common -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -I /root/firefox-clang/intl/icu/source/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ /root/firefox-clang/intl/icu/source/common/uniset_props.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 1999-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uniset_props.cpp
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2004aug25
16* created by: Markus W. Scherer
17*
18* Character property dependent functions moved here from uniset.cpp
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/uniset.h"
23#include "unicode/parsepos.h"
24#include "unicode/uchar.h"
25#include "unicode/uscript.h"
26#include "unicode/symtable.h"
27#include "unicode/uset.h"
28#include "unicode/locid.h"
29#include "unicode/brkiter.h"
30#include "uset_imp.h"
31#include "ruleiter.h"
32#include "cmemory.h"
33#include "ucln_cmn.h"
34#include "util.h"
35#include "uvector.h"
36#include "uprops.h"
37#include "propname.h"
38#include "normalizer2impl.h"
39#include "uinvchar.h"
40#include "uprops.h"
41#include "charstr.h"
42#include "cstring.h"
43#include "mutex.h"
44#include "umutex.h"
45#include "uassert.h"
46#include "hash.h"
47
48U_NAMESPACE_USEusing namespace icu_77;
49
50namespace {
51
52// Special property set IDs
53constexpr char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
54constexpr char ASCII[] = "ASCII"; // [\u0000-\u007F]
55constexpr char ASSIGNED[] = "Assigned"; // [:^Cn:]
56
57// Unicode name property alias
58constexpr char16_t NAME_PROP[] = u"na";
59
60} // namespace
61
62// Cached sets ------------------------------------------------------------- ***
63
64U_CDECL_BEGINextern "C" {
65static UBool U_CALLCONV uset_cleanup();
66
67static UnicodeSet *uni32Singleton;
68static icu::UInitOnce uni32InitOnce {};
69
70/**
71 * Cleanup function for UnicodeSet
72 */
73static UBool U_CALLCONV uset_cleanup() {
74 delete uni32Singleton;
75 uni32Singleton = nullptr;
76 uni32InitOnce.reset();
77 return true;
78}
79
80U_CDECL_END}
81
82U_NAMESPACE_BEGINnamespace icu_77 {
83
84namespace {
85
86// Cache some sets for other services -------------------------------------- ***
87void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
88 U_ASSERT(uni32Singleton == nullptr)(static_cast <bool> (uni32Singleton == nullptr) ? void (
0) : __assert_fail ("uni32Singleton == nullptr", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
89 uni32Singleton = new UnicodeSet(UnicodeString(u"[:age=3.2:]"), errorCode);
90 if(uni32Singleton==nullptr) {
91 errorCode=U_MEMORY_ALLOCATION_ERROR;
92 } else {
93 uni32Singleton->freeze();
94 }
95 ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_USET, uset_cleanup);
96}
97
98
99U_CFUNCextern "C" UnicodeSet *
100uniset_getUnicode32Instanceuniset_getUnicode32Instance_77(UErrorCode &errorCode) {
101 umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode);
102 return uni32Singleton;
103}
104
105// helper functions for matching of pattern syntax pieces ------------------ ***
106// these functions are parallel to the PERL_OPEN etc. strings above
107
108// using these functions is not only faster than UnicodeString::compare() and
109// caseCompare(), but they also make UnicodeSet work for simple patterns when
110// no Unicode properties data is available - when caseCompare() fails
111
112inline UBool
113isPerlOpen(const UnicodeString &pattern, int32_t pos) {
114 char16_t c;
115 return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
116}
117
118/*static inline UBool
119isPerlClose(const UnicodeString &pattern, int32_t pos) {
120 return pattern.charAt(pos)==u'}';
121}*/
122
123inline UBool
124isNameOpen(const UnicodeString &pattern, int32_t pos) {
125 return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
126}
127
128inline UBool
129isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
130 return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
131}
132
133/*static inline UBool
134isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
135 return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
136}*/
137
138// TODO memory debugging provided inside uniset.cpp
139// could be made available here but probably obsolete with use of modern
140// memory leak checker tools
141#define _dbgct(me)
142
143} // namespace
144
145//----------------------------------------------------------------
146// Constructors &c
147//----------------------------------------------------------------
148
149/**
150 * Constructs a set from the given pattern, optionally ignoring
151 * white space. See the class description for the syntax of the
152 * pattern language.
153 * @param pattern a string specifying what characters are in the set
154 */
155UnicodeSet::UnicodeSet(const UnicodeString& pattern,
156 UErrorCode& status) {
157 applyPattern(pattern, status);
158 _dbgct(this);
159}
160
161//----------------------------------------------------------------
162// Public API
163//----------------------------------------------------------------
164
165UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
166 UErrorCode& status) {
167 // Equivalent to
168 // return applyPattern(pattern, USET_IGNORE_SPACE, nullptr, status);
169 // but without dependency on closeOver().
170 ParsePosition pos(0);
171 applyPatternIgnoreSpace(pattern, pos, nullptr, status);
172 if (U_FAILURE(status)) return *this;
173
174 int32_t i = pos.getIndex();
175 // Skip over trailing whitespace
176 ICU_Utility::skipWhitespace(pattern, i, true);
177 if (i != pattern.length()) {
178 status = U_ILLEGAL_ARGUMENT_ERROR;
179 }
180 return *this;
181}
182
183void
184UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
185 ParsePosition& pos,
186 const SymbolTable* symbols,
187 UErrorCode& status) {
188 if (U_FAILURE(status)) {
189 return;
190 }
191 if (isFrozen()) {
192 status = U_NO_WRITE_PERMISSION;
193 return;
194 }
195 // Need to build the pattern in a temporary string because
196 // _applyPattern calls add() etc., which set pat to empty.
197 UnicodeString rebuiltPat;
198 RuleCharacterIterator chars(pattern, symbols, pos);
199 applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, nullptr, 0, status);
200 if (U_FAILURE(status)) return;
201 if (chars.inVariable()) {
202 // syntaxError(chars, "Extra chars in variable value");
203 status = U_MALFORMED_SET;
204 return;
205 }
206 setPattern(rebuiltPat);
207}
208
209/**
210 * Return true if the given position, in the given pattern, appears
211 * to be the start of a UnicodeSet pattern.
212 */
213UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
214 return ((pos+1) < pattern.length() &&
215 pattern.charAt(pos) == static_cast<char16_t>(91)/*[*/) ||
216 resemblesPropertyPattern(pattern, pos);
217}
218
219//----------------------------------------------------------------
220// Implementation: Pattern parsing
221//----------------------------------------------------------------
222
223namespace {
224
225/**
226 * A small all-inline class to manage a UnicodeSet pointer. Add
227 * operator->() etc. as needed.
228 */
229class UnicodeSetPointer {
230 UnicodeSet* p;
231public:
232 inline UnicodeSetPointer() : p(nullptr) {}
233 inline ~UnicodeSetPointer() { delete p; }
234 inline UnicodeSet* pointer() { return p; }
235 inline UBool allocate() {
236 if (p == nullptr) {
237 p = new UnicodeSet();
238 }
239 return p != nullptr;
240 }
241};
242
243constexpr int32_t MAX_DEPTH = 100;
244
245} // namespace
246
247/**
248 * Parse the pattern from the given RuleCharacterIterator. The
249 * iterator is advanced over the parsed pattern.
250 * @param chars iterator over the pattern characters. Upon return
251 * it will be advanced to the first character after the parsed
252 * pattern, or the end of the iteration if all characters are
253 * parsed.
254 * @param symbols symbol table to use to parse and dereference
255 * variables, or null if none.
256 * @param rebuiltPat the pattern that was parsed, rebuilt or
257 * copied from the input pattern, as appropriate.
258 * @param options a bit mask of zero or more of the following:
259 * IGNORE_SPACE, CASE.
260 */
261void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
262 const SymbolTable* symbols,
263 UnicodeString& rebuiltPat,
264 uint32_t options,
265 UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
266 int32_t depth,
267 UErrorCode& ec) {
268 if (U_FAILURE(ec)) return;
269 if (depth > MAX_DEPTH) {
270 ec = U_ILLEGAL_ARGUMENT_ERROR;
271 return;
272 }
273
274 // Syntax characters: [ ] ^ - & { }
275
276 // Recognized special forms for chars, sets: c-c s-s s&s
277
278 int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
279 RuleCharacterIterator::PARSE_ESCAPES;
280 if ((options & USET_IGNORE_SPACE) != 0) {
281 opts |= RuleCharacterIterator::SKIP_WHITESPACE;
282 }
283
284 UnicodeString patLocal, buf;
285 UBool usePat = false;
286 UnicodeSetPointer scratch;
287 RuleCharacterIterator::Pos backup;
288
289 // mode: 0=before [, 1=between [...], 2=after ]
290 // lastItem: 0=none, 1=char, 2=set
291 int8_t lastItem = 0, mode = 0;
292 UChar32 lastChar = 0;
293 char16_t op = 0;
294
295 UBool invert = false;
296
297 clear();
298
299 while (mode != 2 && !chars.atEnd()) {
300 U_ASSERT((lastItem == 0 && op == 0) ||(static_cast <bool> ((lastItem == 0 && op == 0)
|| (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem
== 2 && (op == 0 || op == u'-' || op == u'&'))) ?
void (0) : __assert_fail ("(lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
301 (lastItem == 1 && (op == 0 || op == u'-')) ||(static_cast <bool> ((lastItem == 0 && op == 0)
|| (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem
== 2 && (op == 0 || op == u'-' || op == u'&'))) ?
void (0) : __assert_fail ("(lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
302 (lastItem == 2 && (op == 0 || op == u'-' || op == u'&')))(static_cast <bool> ((lastItem == 0 && op == 0)
|| (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem
== 2 && (op == 0 || op == u'-' || op == u'&'))) ?
void (0) : __assert_fail ("(lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
303
304 UChar32 c = 0;
305 UBool literal = false;
306 UnicodeSet* nested = nullptr; // alias - do not delete
307
308 // -------- Check for property pattern
309
310 // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
311 int8_t setMode = 0;
312 if (resemblesPropertyPattern(chars, opts)) {
313 setMode = 2;
314 }
315
316 // -------- Parse '[' of opening delimiter OR nested set.
317 // If there is a nested set, use `setMode' to define how
318 // the set should be parsed. If the '[' is part of the
319 // opening delimiter for this pattern, parse special
320 // strings "[", "[^", "[-", and "[^-". Check for stand-in
321 // characters representing a nested set in the symbol
322 // table.
323
324 else {
325 // Prepare to backup if necessary
326 chars.getPos(backup);
327 c = chars.next(opts, literal, ec);
328 if (U_FAILURE(ec)) return;
329
330 if (c == u'[' && !literal) {
331 if (mode == 1) {
332 chars.setPos(backup); // backup
333 setMode = 1;
334 } else {
335 // Handle opening '[' delimiter
336 mode = 1;
337 patLocal.append(u'[');
338 chars.getPos(backup); // prepare to backup
339 c = chars.next(opts, literal, ec);
340 if (U_FAILURE(ec)) return;
341 if (c == u'^' && !literal) {
342 invert = true;
343 patLocal.append(u'^');
344 chars.getPos(backup); // prepare to backup
345 c = chars.next(opts, literal, ec);
346 if (U_FAILURE(ec)) return;
347 }
348 // Fall through to handle special leading '-';
349 // otherwise restart loop for nested [], \p{}, etc.
350 if (c == u'-') {
351 literal = true;
352 // Fall through to handle literal '-' below
353 } else {
354 chars.setPos(backup); // backup
355 continue;
356 }
357 }
358 } else if (symbols != nullptr) {
359 const UnicodeFunctor *m = symbols->lookupMatcher(c);
360 if (m != nullptr) {
361 const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m);
362 if (ms == nullptr) {
363 ec = U_MALFORMED_SET;
364 return;
365 }
366 // casting away const, but `nested' won't be modified
367 // (important not to modify stored set)
368 nested = const_cast<UnicodeSet*>(ms);
369 setMode = 3;
370 }
371 }
372 }
373
374 // -------- Handle a nested set. This either is inline in
375 // the pattern or represented by a stand-in that has
376 // previously been parsed and was looked up in the symbol
377 // table.
378
379 if (setMode != 0) {
380 if (lastItem == 1) {
381 if (op != 0) {
382 // syntaxError(chars, "Char expected after operator");
383 ec = U_MALFORMED_SET;
384 return;
385 }
386 add(lastChar, lastChar);
387 _appendToPat(patLocal, lastChar, false);
388 lastItem = 0;
Value stored to 'lastItem' is never read
389 op = 0;
390 }
391
392 if (op == u'-' || op == u'&') {
393 patLocal.append(op);
394 }
395
396 if (nested == nullptr) {
397 // lazy allocation
398 if (!scratch.allocate()) {
399 ec = U_MEMORY_ALLOCATION_ERROR;
400 return;
401 }
402 nested = scratch.pointer();
403 }
404 switch (setMode) {
405 case 1:
406 nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
407 break;
408 case 2:
409 chars.skipIgnored(opts);
410 nested->applyPropertyPattern(chars, patLocal, ec);
411 if (U_FAILURE(ec)) return;
412 break;
413 case 3: // `nested' already parsed
414 nested->_toPattern(patLocal, false);
415 break;
416 }
417
418 usePat = true;
419
420 if (mode == 0) {
421 // Entire pattern is a category; leave parse loop
422 *this = *nested;
423 mode = 2;
424 break;
425 }
426
427 switch (op) {
428 case u'-':
429 removeAll(*nested);
430 break;
431 case u'&':
432 retainAll(*nested);
433 break;
434 case 0:
435 addAll(*nested);
436 break;
437 }
438
439 op = 0;
440 lastItem = 2;
441
442 continue;
443 }
444
445 if (mode == 0) {
446 // syntaxError(chars, "Missing '['");
447 ec = U_MALFORMED_SET;
448 return;
449 }
450
451 // -------- Parse special (syntax) characters. If the
452 // current character is not special, or if it is escaped,
453 // then fall through and handle it below.
454
455 if (!literal) {
456 switch (c) {
457 case u']':
458 if (lastItem == 1) {
459 add(lastChar, lastChar);
460 _appendToPat(patLocal, lastChar, false);
461 }
462 // Treat final trailing '-' as a literal
463 if (op == u'-') {
464 add(op, op);
465 patLocal.append(op);
466 } else if (op == u'&') {
467 // syntaxError(chars, "Trailing '&'");
468 ec = U_MALFORMED_SET;
469 return;
470 }
471 patLocal.append(u']');
472 mode = 2;
473 continue;
474 case u'-':
475 if (op == 0) {
476 if (lastItem != 0) {
477 op = static_cast<char16_t>(c);
478 continue;
479 } else {
480 // Treat final trailing '-' as a literal
481 add(c, c);
482 c = chars.next(opts, literal, ec);
483 if (U_FAILURE(ec)) return;
484 if (c == u']' && !literal) {
485 patLocal.append(u"-]", 2);
486 mode = 2;
487 continue;
488 }
489 }
490 }
491 // syntaxError(chars, "'-' not after char or set");
492 ec = U_MALFORMED_SET;
493 return;
494 case u'&':
495 if (lastItem == 2 && op == 0) {
496 op = static_cast<char16_t>(c);
497 continue;
498 }
499 // syntaxError(chars, "'&' not after set");
500 ec = U_MALFORMED_SET;
501 return;
502 case u'^':
503 // syntaxError(chars, "'^' not after '['");
504 ec = U_MALFORMED_SET;
505 return;
506 case u'{':
507 if (op != 0) {
508 // syntaxError(chars, "Missing operand after operator");
509 ec = U_MALFORMED_SET;
510 return;
511 }
512 if (lastItem == 1) {
513 add(lastChar, lastChar);
514 _appendToPat(patLocal, lastChar, false);
515 }
516 lastItem = 0;
517 buf.truncate(0);
518 {
519 UBool ok = false;
520 while (!chars.atEnd()) {
521 c = chars.next(opts, literal, ec);
522 if (U_FAILURE(ec)) return;
523 if (c == u'}' && !literal) {
524 ok = true;
525 break;
526 }
527 buf.append(c);
528 }
529 if (!ok) {
530 // syntaxError(chars, "Invalid multicharacter string");
531 ec = U_MALFORMED_SET;
532 return;
533 }
534 }
535 // We have new string. Add it to set and continue;
536 // we don't need to drop through to the further
537 // processing
538 add(buf);
539 patLocal.append(u'{');
540 _appendToPat(patLocal, buf, false);
541 patLocal.append(u'}');
542 continue;
543 case SymbolTable::SYMBOL_REF:
544 // symbols nosymbols
545 // [a-$] error error (ambiguous)
546 // [a$] anchor anchor
547 // [a-$x] var "x"* literal '$'
548 // [a-$.] error literal '$'
549 // *We won't get here in the case of var "x"
550 {
551 chars.getPos(backup);
552 c = chars.next(opts, literal, ec);
553 if (U_FAILURE(ec)) return;
554 UBool anchor = (c == u']' && !literal);
555 if (symbols == nullptr && !anchor) {
556 c = SymbolTable::SYMBOL_REF;
557 chars.setPos(backup);
558 break; // literal '$'
559 }
560 if (anchor && op == 0) {
561 if (lastItem == 1) {
562 add(lastChar, lastChar);
563 _appendToPat(patLocal, lastChar, false);
564 }
565 add(U_ETHER((char16_t)0xFFFF));
566 usePat = true;
567 patLocal.append(static_cast<char16_t>(SymbolTable::SYMBOL_REF));
568 patLocal.append(u']');
569 mode = 2;
570 continue;
571 }
572 // syntaxError(chars, "Unquoted '$'");
573 ec = U_MALFORMED_SET;
574 return;
575 }
576 default:
577 break;
578 }
579 }
580
581 // -------- Parse literal characters. This includes both
582 // escaped chars ("\u4E01") and non-syntax characters
583 // ("a").
584
585 switch (lastItem) {
586 case 0:
587 lastItem = 1;
588 lastChar = c;
589 break;
590 case 1:
591 if (op == u'-') {
592 if (lastChar >= c) {
593 // Don't allow redundant (a-a) or empty (b-a) ranges;
594 // these are most likely typos.
595 // syntaxError(chars, "Invalid range");
596 ec = U_MALFORMED_SET;
597 return;
598 }
599 add(lastChar, c);
600 _appendToPat(patLocal, lastChar, false);
601 patLocal.append(op);
602 _appendToPat(patLocal, c, false);
603 lastItem = 0;
604 op = 0;
605 } else {
606 add(lastChar, lastChar);
607 _appendToPat(patLocal, lastChar, false);
608 lastChar = c;
609 }
610 break;
611 case 2:
612 if (op != 0) {
613 // syntaxError(chars, "Set expected after operator");
614 ec = U_MALFORMED_SET;
615 return;
616 }
617 lastChar = c;
618 lastItem = 1;
619 break;
620 }
621 }
622
623 if (mode != 2) {
624 // syntaxError(chars, "Missing ']'");
625 ec = U_MALFORMED_SET;
626 return;
627 }
628
629 chars.skipIgnored(opts);
630
631 /**
632 * Handle global flags (invert, case insensitivity). If this
633 * pattern should be compiled case-insensitive, then we need
634 * to close over case BEFORE COMPLEMENTING. This makes
635 * patterns like /[^abc]/i work.
636 */
637 if ((options & USET_CASE_MASK) != 0) {
638 (this->*caseClosure)(options);
639 }
640 if (invert) {
641 complement().removeAllStrings(); // code point complement
642 }
643
644 // Use the rebuilt pattern (patLocal) only if necessary. Prefer the
645 // generated pattern.
646 if (usePat) {
647 rebuiltPat.append(patLocal);
648 } else {
649 _generatePattern(rebuiltPat, false);
650 }
651 if (isBogus() && U_SUCCESS(ec)) {
652 // We likely ran out of memory. AHHH!
653 ec = U_MEMORY_ALLOCATION_ERROR;
654 }
655}
656
657//----------------------------------------------------------------
658// Property set implementation
659//----------------------------------------------------------------
660
661namespace {
662
663UBool numericValueFilter(UChar32 ch, void* context) {
664 return u_getNumericValueu_getNumericValue_77(ch) == *static_cast<double*>(context);
665}
666
667UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
668 int32_t value = *static_cast<int32_t*>(context);
669 return (U_GET_GC_MASK((UChar32) ch)((uint32_t)1<<(u_charType_77((UChar32) ch))) & value) != 0;
670}
671
672UBool versionFilter(UChar32 ch, void* context) {
673 static const UVersionInfo none = { 0, 0, 0, 0 };
674 UVersionInfo v;
675 u_charAgeu_charAge_77(ch, v);
676 UVersionInfo* version = static_cast<UVersionInfo*>(context);
677 return uprv_memcmp(&v, &none, sizeof(v)):: memcmp(&v, &none,sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)):: memcmp(&v, version,sizeof(v)) <= 0;
678}
679
680typedef struct {
681 UProperty prop;
682 int32_t value;
683} IntPropertyContext;
684
685UBool intPropertyFilter(UChar32 ch, void* context) {
686 IntPropertyContext* c = static_cast<IntPropertyContext*>(context);
687 return u_getIntPropertyValueu_getIntPropertyValue_77(ch, c->prop) == c->value;
688}
689
690UBool scriptExtensionsFilter(UChar32 ch, void* context) {
691 return uscript_hasScriptuscript_hasScript_77(ch, *static_cast<UScriptCode*>(context));
692}
693
694UBool idTypeFilter(UChar32 ch, void* context) {
695 return u_hasIDTypeu_hasIDType_77(ch, *static_cast<UIdentifierType*>(context));
696}
697
698} // namespace
699
700/**
701 * Generic filter-based scanning code for UCD property UnicodeSets.
702 */
703void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
704 void* context,
705 const UnicodeSet* inclusions,
706 UErrorCode &status) {
707 if (U_FAILURE(status)) return;
708
709 // Logically, walk through all Unicode characters, noting the start
710 // and end of each range for which filter.contain(c) is
711 // true. Add each range to a set.
712 //
713 // To improve performance, use an inclusions set which
714 // encodes information about character ranges that are known
715 // to have identical properties.
716 // inclusions contains the first characters of
717 // same-value ranges for the given property.
718
719 clear();
720
721 UChar32 startHasProperty = -1;
722 int32_t limitRange = inclusions->getRangeCount();
723
724 for (int j=0; j<limitRange; ++j) {
725 // get current range
726 UChar32 start = inclusions->getRangeStart(j);
727 UChar32 end = inclusions->getRangeEnd(j);
728
729 // for all the code points in the range, process
730 for (UChar32 ch = start; ch <= end; ++ch) {
731 // only add to this UnicodeSet on inflection points --
732 // where the hasProperty value changes to false
733 if ((*filter)(ch, context)) {
734 if (startHasProperty < 0) {
735 startHasProperty = ch;
736 }
737 } else if (startHasProperty >= 0) {
738 add(startHasProperty, ch-1);
739 startHasProperty = -1;
740 }
741 }
742 }
743 if (startHasProperty >= 0) {
744 add(startHasProperty, static_cast<UChar32>(0x10FFFF));
745 }
746 if (isBogus() && U_SUCCESS(status)) {
747 // We likely ran out of memory. AHHH!
748 status = U_MEMORY_ALLOCATION_ERROR;
749 }
750}
751
752namespace {
753
754UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
755 /* Note: we use ' ' in compiler code page */
756 int32_t j = 0;
757 char ch;
758 --dstCapacity; /* make room for term. zero */
759 while ((ch = *src++) != 0) {
760 if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) {
761 continue;
762 }
763 if (j >= dstCapacity) return false;
764 dst[j++] = ch;
765 }
766 if (j > 0 && dst[j-1] == ' ') --j;
767 dst[j] = 0;
768 return true;
769}
770
771} // namespace
772
773//----------------------------------------------------------------
774// Property set API
775//----------------------------------------------------------------
776
777#define FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
UPRV_BLOCK_MACRO_BEGINdo { \
778 ec=U_ILLEGAL_ARGUMENT_ERROR; \
779 return *this; \
780} UPRV_BLOCK_MACRO_ENDwhile (false)
781
782UnicodeSet&
783UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
784 if (U_FAILURE(ec) || isFrozen()) { return *this; }
785 if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
786 const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
787 applyFilter(generalCategoryMaskFilter, &value, inclusions, ec);
788 } else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
789 const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
790 UScriptCode script = static_cast<UScriptCode>(value);
791 applyFilter(scriptExtensionsFilter, &script, inclusions, ec);
792 } else if (prop == UCHAR_IDENTIFIER_TYPE) {
793 const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
794 UIdentifierType idType = static_cast<UIdentifierType>(value);
795 applyFilter(idTypeFilter, &idType, inclusions, ec);
796 } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) {
797 if (value == 0 || value == 1) {
798 const USet *set = u_getBinaryPropertySetu_getBinaryPropertySet_77(prop, &ec);
799 if (U_FAILURE(ec)) { return *this; }
800 copyFrom(*UnicodeSet::fromUSet(set), true);
801 if (value == 0) {
802 complement().removeAllStrings(); // code point complement
803 }
804 } else {
805 clear();
806 }
807 } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
808 const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
809 IntPropertyContext c = {prop, value};
810 applyFilter(intPropertyFilter, &c, inclusions, ec);
811 } else {
812 ec = U_ILLEGAL_ARGUMENT_ERROR;
813 }
814 return *this;
815}
816
817UnicodeSet&
818UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
819 const UnicodeString& value,
820 UErrorCode& ec) {
821 if (U_FAILURE(ec) || isFrozen()) return *this;
822
823 // prop and value used to be converted to char * using the default
824 // converter instead of the invariant conversion.
825 // This should not be necessary because all Unicode property and value
826 // names use only invariant characters.
827 // If there are any variant characters, then we won't find them anyway.
828 // Checking first avoids assertion failures in the conversion.
829 if( !uprv_isInvariantUStringuprv_isInvariantUString_77(prop.getBuffer(), prop.length()) ||
830 !uprv_isInvariantUStringuprv_isInvariantUString_77(value.getBuffer(), value.length())
831 ) {
832 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
833 }
834 CharString pname, vname;
835 pname.appendInvariantChars(prop, ec);
836 vname.appendInvariantChars(value, ec);
837 if (U_FAILURE(ec)) return *this;
838
839 UProperty p;
840 int32_t v;
841 UBool invert = false;
842
843 if (value.length() > 0) {
844 p = u_getPropertyEnumu_getPropertyEnum_77(pname.data());
845 if (p == UCHAR_INVALID_CODE) FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
846
847 // Treat gc as gcm
848 if (p == UCHAR_GENERAL_CATEGORY) {
849 p = UCHAR_GENERAL_CATEGORY_MASK;
850 }
851
852 if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) ||
853 (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) ||
854 (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) {
855 v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, vname.data());
856 if (v == UCHAR_INVALID_CODE) {
857 // Handle numeric CCC
858 if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
859 p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
860 p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
861 char* end;
862 double val = uprv_strtod(vname.data(), &end):: strtod(vname.data(), &end);
863 // Anything between 0 and 255 is valid even if unused.
864 // Cast double->int only after range check.
865 // We catch NaN here because comparing it with both 0 and 255 will be false
866 // (as are all comparisons with NaN).
867 if (*end != 0 || !(0 <= val && val <= 255) ||
868 (v = static_cast<int32_t>(val)) != val) {
869 // non-integral value or outside 0..255, or trailing junk
870 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
871 }
872 } else {
873 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
874 }
875 }
876 }
877
878 else {
879
880 switch (p) {
881 case UCHAR_NUMERIC_VALUE:
882 {
883 char* end;
884 double val = uprv_strtod(vname.data(), &end):: strtod(vname.data(), &end);
885 if (*end != 0) {
886 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
887 }
888 applyFilter(numericValueFilter, &val,
889 CharacterProperties::getInclusionsForProperty(p, ec), ec);
890 return *this;
891 }
892 case UCHAR_NAME:
893 {
894 // Must munge name, since u_charFromName() does not do
895 // 'loose' matching.
896 char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
897 if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
898 UChar32 ch = u_charFromNameu_charFromName_77(U_EXTENDED_CHAR_NAME, buf, &ec);
899 if (U_SUCCESS(ec)) {
900 clear();
901 add(ch);
902 return *this;
903 } else {
904 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
905 }
906 }
907 case UCHAR_UNICODE_1_NAME:
908 // ICU 49 deprecates the Unicode_1_Name property APIs.
909 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
910 case UCHAR_AGE:
911 {
912 // Must munge name, since u_versionFromString() does not do
913 // 'loose' matching.
914 char buf[128];
915 if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
916 UVersionInfo version;
917 u_versionFromStringu_versionFromString_77(version, buf);
918 applyFilter(versionFilter, &version,
919 CharacterProperties::getInclusionsForProperty(p, ec), ec);
920 return *this;
921 }
922 case UCHAR_SCRIPT_EXTENSIONS:
923 v = u_getPropertyValueEnumu_getPropertyValueEnum_77(UCHAR_SCRIPT, vname.data());
924 if (v == UCHAR_INVALID_CODE) {
925 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
926 }
927 // fall through to calling applyIntPropertyValue()
928 break;
929 case UCHAR_IDENTIFIER_TYPE:
930 v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, vname.data());
931 if (v == UCHAR_INVALID_CODE) {
932 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
933 }
934 // fall through to calling applyIntPropertyValue()
935 break;
936 default:
937 // p is a non-binary, non-enumerated property that we
938 // don't support (yet).
939 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
940 }
941 }
942 }
943
944 else {
945 // value is empty. Interpret as General Category, Script, or
946 // Binary property.
947 p = UCHAR_GENERAL_CATEGORY_MASK;
948 v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, pname.data());
949 if (v == UCHAR_INVALID_CODE) {
950 p = UCHAR_SCRIPT;
951 v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, pname.data());
952 if (v == UCHAR_INVALID_CODE) {
953 p = u_getPropertyEnumu_getPropertyEnum_77(pname.data());
954 if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) {
955 v = 1;
956 } else if (0 == uprv_comparePropertyNamesuprv_compareASCIIPropertyNames_77(ANY, pname.data())) {
957 set(MIN_VALUE, MAX_VALUE);
958 return *this;
959 } else if (0 == uprv_comparePropertyNamesuprv_compareASCIIPropertyNames_77(ASCII, pname.data())) {
960 set(0, 0x7F);
961 return *this;
962 } else if (0 == uprv_comparePropertyNamesuprv_compareASCIIPropertyNames_77(ASSIGNED, pname.data())) {
963 // [:Assigned:]=[:^Cn:]
964 p = UCHAR_GENERAL_CATEGORY_MASK;
965 v = U_GC_CN_MASK((uint32_t)1<<(U_GENERAL_OTHER_TYPES));
966 invert = true;
967 } else {
968 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
969 }
970 }
971 }
972 }
973
974 applyIntPropertyValue(p, v, ec);
975 if(invert) {
976 complement().removeAllStrings(); // code point complement
977 }
978
979 if (isBogus() && U_SUCCESS(ec)) {
980 // We likely ran out of memory. AHHH!
981 ec = U_MEMORY_ALLOCATION_ERROR;
982 }
983 return *this;
984}
985
986//----------------------------------------------------------------
987// Property set patterns
988//----------------------------------------------------------------
989
990/**
991 * Return true if the given position, in the given pattern, appears
992 * to be the start of a property set pattern.
993 */
994UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern,
995 int32_t pos) {
996 // Patterns are at least 5 characters long
997 if ((pos+5) > pattern.length()) {
998 return false;
999 }
1000
1001 // Look for an opening [:, [:^, \p, or \P
1002 return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos);
1003}
1004
1005/**
1006 * Return true if the given iterator appears to point at a
1007 * property pattern. Regardless of the result, return with the
1008 * iterator unchanged.
1009 * @param chars iterator over the pattern characters. Upon return
1010 * it will be unchanged.
1011 * @param iterOpts RuleCharacterIterator options
1012 */
1013UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
1014 int32_t iterOpts) {
1015 // NOTE: literal will always be false, because we don't parse escapes.
1016 UBool result = false, literal;
1017 UErrorCode ec = U_ZERO_ERROR;
1018 iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES;
1019 RuleCharacterIterator::Pos pos;
1020 chars.getPos(pos);
1021 UChar32 c = chars.next(iterOpts, literal, ec);
1022 if (c == u'[' || c == u'\\') {
1023 UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
1024 literal, ec);
1025 result = (c == u'[') ? (d == u':') :
1026 (d == u'N' || d == u'p' || d == u'P');
1027 }
1028 chars.setPos(pos);
1029 return result && U_SUCCESS(ec);
1030}
1031
1032/**
1033 * Parse the given property pattern at the given parse position.
1034 */
1035UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
1036 ParsePosition& ppos,
1037 UErrorCode &ec) {
1038 int32_t pos = ppos.getIndex();
1039
1040 UBool posix = false; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
1041 UBool isName = false; // true for \N{pat}, o/w false
1042 UBool invert = false;
1043
1044 if (U_FAILURE(ec)) return *this;
1045
1046 // Minimum length is 5 characters, e.g. \p{L}
1047 if ((pos+5) > pattern.length()) {
1048 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
1049 }
1050
1051 // On entry, ppos should point to one of the following locations:
1052 // Look for an opening [:, [:^, \p, or \P
1053 if (isPOSIXOpen(pattern, pos)) {
1054 posix = true;
1055 pos += 2;
1056 pos = ICU_Utility::skipWhitespace(pattern, pos);
1057 if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
1058 ++pos;
1059 invert = true;
1060 }
1061 } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
1062 char16_t c = pattern.charAt(pos+1);
1063 invert = (c == u'P');
1064 isName = (c == u'N');
1065 pos += 2;
1066 pos = ICU_Utility::skipWhitespace(pattern, pos);
1067 if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
1068 // Syntax error; "\p" or "\P" not followed by "{"
1069 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
1070 }
1071 } else {
1072 // Open delimiter not seen
1073 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
1074 }
1075
1076 // Look for the matching close delimiter, either :] or }
1077 int32_t close;
1078 if (posix) {
1079 close = pattern.indexOf(u":]", 2, pos);
1080 } else {
1081 close = pattern.indexOf(u'}', pos);
1082 }
1083 if (close < 0) {
1084 // Syntax error; close delimiter missing
1085 FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false
)
;
1086 }
1087
1088 // Look for an '=' sign. If this is present, we will parse a
1089 // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
1090 // pattern.
1091 int32_t equals = pattern.indexOf(u'=', pos);
1092 UnicodeString propName, valueName;
1093 if (equals >= 0 && equals < close && !isName) {
1094 // Equals seen; parse medium/long pattern
1095 pattern.extractBetween(pos, equals, propName);
1096 pattern.extractBetween(equals+1, close, valueName);
1097 }
1098
1099 else {
1100 // Handle case where no '=' is seen, and \N{}
1101 pattern.extractBetween(pos, close, propName);
1102
1103 // Handle \N{name}
1104 if (isName) {
1105 // This is a little inefficient since it means we have to
1106 // parse NAME_PROP back to UCHAR_NAME even though we already
1107 // know it's UCHAR_NAME. If we refactor the API to
1108 // support args of (UProperty, char*) then we can remove
1109 // NAME_PROP and make this a little more efficient.
1110 valueName = propName;
1111 propName = NAME_PROP;
1112 }
1113 }
1114
1115 applyPropertyAlias(propName, valueName, ec);
1116
1117 if (U_SUCCESS(ec)) {
1118 if (invert) {
1119 complement().removeAllStrings(); // code point complement
1120 }
1121
1122 // Move to the limit position after the close delimiter if the
1123 // parse succeeded.
1124 ppos.setIndex(close + (posix ? 2 : 1));
1125 }
1126
1127 return *this;
1128}
1129
1130/**
1131 * Parse a property pattern.
1132 * @param chars iterator over the pattern characters. Upon return
1133 * it will be advanced to the first character after the parsed
1134 * pattern, or the end of the iteration if all characters are
1135 * parsed.
1136 * @param rebuiltPat the pattern that was parsed, rebuilt or
1137 * copied from the input pattern, as appropriate.
1138 */
1139void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
1140 UnicodeString& rebuiltPat,
1141 UErrorCode& ec) {
1142 if (U_FAILURE(ec)) return;
1143 UnicodeString pattern;
1144 chars.lookahead(pattern);
1145 ParsePosition pos(0);
1146 applyPropertyPattern(pattern, pos, ec);
1147 if (U_FAILURE(ec)) return;
1148 if (pos.getIndex() == 0) {
1149 // syntaxError(chars, "Invalid property pattern");
1150 ec = U_MALFORMED_SET;
1151 return;
1152 }
1153 chars.jumpahead(pos.getIndex());
1154 rebuiltPat.append(pattern, 0, pos.getIndex());
1155}
1156
1157U_NAMESPACE_END}