File: | root/firefox-clang/intl/icu/source/common/uniset_props.cpp |
Warning: | line 388, column 17 Value stored to 'lastItem' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 1999-2014, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: uniset_props.cpp |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2004aug25 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * Character property dependent functions moved here from uniset.cpp |
19 | */ |
20 | |
21 | #include "unicode/utypes.h" |
22 | #include "unicode/uniset.h" |
23 | #include "unicode/parsepos.h" |
24 | #include "unicode/uchar.h" |
25 | #include "unicode/uscript.h" |
26 | #include "unicode/symtable.h" |
27 | #include "unicode/uset.h" |
28 | #include "unicode/locid.h" |
29 | #include "unicode/brkiter.h" |
30 | #include "uset_imp.h" |
31 | #include "ruleiter.h" |
32 | #include "cmemory.h" |
33 | #include "ucln_cmn.h" |
34 | #include "util.h" |
35 | #include "uvector.h" |
36 | #include "uprops.h" |
37 | #include "propname.h" |
38 | #include "normalizer2impl.h" |
39 | #include "uinvchar.h" |
40 | #include "uprops.h" |
41 | #include "charstr.h" |
42 | #include "cstring.h" |
43 | #include "mutex.h" |
44 | #include "umutex.h" |
45 | #include "uassert.h" |
46 | #include "hash.h" |
47 | |
48 | U_NAMESPACE_USEusing namespace icu_77; |
49 | |
50 | namespace { |
51 | |
52 | // Special property set IDs |
53 | constexpr char ANY[] = "ANY"; // [\u0000-\U0010FFFF] |
54 | constexpr char ASCII[] = "ASCII"; // [\u0000-\u007F] |
55 | constexpr char ASSIGNED[] = "Assigned"; // [:^Cn:] |
56 | |
57 | // Unicode name property alias |
58 | constexpr char16_t NAME_PROP[] = u"na"; |
59 | |
60 | } // namespace |
61 | |
62 | // Cached sets ------------------------------------------------------------- *** |
63 | |
64 | U_CDECL_BEGINextern "C" { |
65 | static UBool U_CALLCONV uset_cleanup(); |
66 | |
67 | static UnicodeSet *uni32Singleton; |
68 | static icu::UInitOnce uni32InitOnce {}; |
69 | |
70 | /** |
71 | * Cleanup function for UnicodeSet |
72 | */ |
73 | static UBool U_CALLCONV uset_cleanup() { |
74 | delete uni32Singleton; |
75 | uni32Singleton = nullptr; |
76 | uni32InitOnce.reset(); |
77 | return true; |
78 | } |
79 | |
80 | U_CDECL_END} |
81 | |
82 | U_NAMESPACE_BEGINnamespace icu_77 { |
83 | |
84 | namespace { |
85 | |
86 | // Cache some sets for other services -------------------------------------- *** |
87 | void U_CALLCONV createUni32Set(UErrorCode &errorCode) { |
88 | U_ASSERT(uni32Singleton == nullptr)(static_cast <bool> (uni32Singleton == nullptr) ? void ( 0) : __assert_fail ("uni32Singleton == nullptr", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); |
89 | uni32Singleton = new UnicodeSet(UnicodeString(u"[:age=3.2:]"), errorCode); |
90 | if(uni32Singleton==nullptr) { |
91 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
92 | } else { |
93 | uni32Singleton->freeze(); |
94 | } |
95 | ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_USET, uset_cleanup); |
96 | } |
97 | |
98 | |
99 | U_CFUNCextern "C" UnicodeSet * |
100 | uniset_getUnicode32Instanceuniset_getUnicode32Instance_77(UErrorCode &errorCode) { |
101 | umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode); |
102 | return uni32Singleton; |
103 | } |
104 | |
105 | // helper functions for matching of pattern syntax pieces ------------------ *** |
106 | // these functions are parallel to the PERL_OPEN etc. strings above |
107 | |
108 | // using these functions is not only faster than UnicodeString::compare() and |
109 | // caseCompare(), but they also make UnicodeSet work for simple patterns when |
110 | // no Unicode properties data is available - when caseCompare() fails |
111 | |
112 | inline UBool |
113 | isPerlOpen(const UnicodeString &pattern, int32_t pos) { |
114 | char16_t c; |
115 | return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P'); |
116 | } |
117 | |
118 | /*static inline UBool |
119 | isPerlClose(const UnicodeString &pattern, int32_t pos) { |
120 | return pattern.charAt(pos)==u'}'; |
121 | }*/ |
122 | |
123 | inline UBool |
124 | isNameOpen(const UnicodeString &pattern, int32_t pos) { |
125 | return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N'; |
126 | } |
127 | |
128 | inline UBool |
129 | isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { |
130 | return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':'; |
131 | } |
132 | |
133 | /*static inline UBool |
134 | isPOSIXClose(const UnicodeString &pattern, int32_t pos) { |
135 | return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']'; |
136 | }*/ |
137 | |
138 | // TODO memory debugging provided inside uniset.cpp |
139 | // could be made available here but probably obsolete with use of modern |
140 | // memory leak checker tools |
141 | #define _dbgct(me) |
142 | |
143 | } // namespace |
144 | |
145 | //---------------------------------------------------------------- |
146 | // Constructors &c |
147 | //---------------------------------------------------------------- |
148 | |
149 | /** |
150 | * Constructs a set from the given pattern, optionally ignoring |
151 | * white space. See the class description for the syntax of the |
152 | * pattern language. |
153 | * @param pattern a string specifying what characters are in the set |
154 | */ |
155 | UnicodeSet::UnicodeSet(const UnicodeString& pattern, |
156 | UErrorCode& status) { |
157 | applyPattern(pattern, status); |
158 | _dbgct(this); |
159 | } |
160 | |
161 | //---------------------------------------------------------------- |
162 | // Public API |
163 | //---------------------------------------------------------------- |
164 | |
165 | UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, |
166 | UErrorCode& status) { |
167 | // Equivalent to |
168 | // return applyPattern(pattern, USET_IGNORE_SPACE, nullptr, status); |
169 | // but without dependency on closeOver(). |
170 | ParsePosition pos(0); |
171 | applyPatternIgnoreSpace(pattern, pos, nullptr, status); |
172 | if (U_FAILURE(status)) return *this; |
173 | |
174 | int32_t i = pos.getIndex(); |
175 | // Skip over trailing whitespace |
176 | ICU_Utility::skipWhitespace(pattern, i, true); |
177 | if (i != pattern.length()) { |
178 | status = U_ILLEGAL_ARGUMENT_ERROR; |
179 | } |
180 | return *this; |
181 | } |
182 | |
183 | void |
184 | UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, |
185 | ParsePosition& pos, |
186 | const SymbolTable* symbols, |
187 | UErrorCode& status) { |
188 | if (U_FAILURE(status)) { |
189 | return; |
190 | } |
191 | if (isFrozen()) { |
192 | status = U_NO_WRITE_PERMISSION; |
193 | return; |
194 | } |
195 | // Need to build the pattern in a temporary string because |
196 | // _applyPattern calls add() etc., which set pat to empty. |
197 | UnicodeString rebuiltPat; |
198 | RuleCharacterIterator chars(pattern, symbols, pos); |
199 | applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, nullptr, 0, status); |
200 | if (U_FAILURE(status)) return; |
201 | if (chars.inVariable()) { |
202 | // syntaxError(chars, "Extra chars in variable value"); |
203 | status = U_MALFORMED_SET; |
204 | return; |
205 | } |
206 | setPattern(rebuiltPat); |
207 | } |
208 | |
209 | /** |
210 | * Return true if the given position, in the given pattern, appears |
211 | * to be the start of a UnicodeSet pattern. |
212 | */ |
213 | UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { |
214 | return ((pos+1) < pattern.length() && |
215 | pattern.charAt(pos) == static_cast<char16_t>(91)/*[*/) || |
216 | resemblesPropertyPattern(pattern, pos); |
217 | } |
218 | |
219 | //---------------------------------------------------------------- |
220 | // Implementation: Pattern parsing |
221 | //---------------------------------------------------------------- |
222 | |
223 | namespace { |
224 | |
225 | /** |
226 | * A small all-inline class to manage a UnicodeSet pointer. Add |
227 | * operator->() etc. as needed. |
228 | */ |
229 | class UnicodeSetPointer { |
230 | UnicodeSet* p; |
231 | public: |
232 | inline UnicodeSetPointer() : p(nullptr) {} |
233 | inline ~UnicodeSetPointer() { delete p; } |
234 | inline UnicodeSet* pointer() { return p; } |
235 | inline UBool allocate() { |
236 | if (p == nullptr) { |
237 | p = new UnicodeSet(); |
238 | } |
239 | return p != nullptr; |
240 | } |
241 | }; |
242 | |
243 | constexpr int32_t MAX_DEPTH = 100; |
244 | |
245 | } // namespace |
246 | |
247 | /** |
248 | * Parse the pattern from the given RuleCharacterIterator. The |
249 | * iterator is advanced over the parsed pattern. |
250 | * @param chars iterator over the pattern characters. Upon return |
251 | * it will be advanced to the first character after the parsed |
252 | * pattern, or the end of the iteration if all characters are |
253 | * parsed. |
254 | * @param symbols symbol table to use to parse and dereference |
255 | * variables, or null if none. |
256 | * @param rebuiltPat the pattern that was parsed, rebuilt or |
257 | * copied from the input pattern, as appropriate. |
258 | * @param options a bit mask of zero or more of the following: |
259 | * IGNORE_SPACE, CASE. |
260 | */ |
261 | void UnicodeSet::applyPattern(RuleCharacterIterator& chars, |
262 | const SymbolTable* symbols, |
263 | UnicodeString& rebuiltPat, |
264 | uint32_t options, |
265 | UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), |
266 | int32_t depth, |
267 | UErrorCode& ec) { |
268 | if (U_FAILURE(ec)) return; |
269 | if (depth > MAX_DEPTH) { |
270 | ec = U_ILLEGAL_ARGUMENT_ERROR; |
271 | return; |
272 | } |
273 | |
274 | // Syntax characters: [ ] ^ - & { } |
275 | |
276 | // Recognized special forms for chars, sets: c-c s-s s&s |
277 | |
278 | int32_t opts = RuleCharacterIterator::PARSE_VARIABLES | |
279 | RuleCharacterIterator::PARSE_ESCAPES; |
280 | if ((options & USET_IGNORE_SPACE) != 0) { |
281 | opts |= RuleCharacterIterator::SKIP_WHITESPACE; |
282 | } |
283 | |
284 | UnicodeString patLocal, buf; |
285 | UBool usePat = false; |
286 | UnicodeSetPointer scratch; |
287 | RuleCharacterIterator::Pos backup; |
288 | |
289 | // mode: 0=before [, 1=between [...], 2=after ] |
290 | // lastItem: 0=none, 1=char, 2=set |
291 | int8_t lastItem = 0, mode = 0; |
292 | UChar32 lastChar = 0; |
293 | char16_t op = 0; |
294 | |
295 | UBool invert = false; |
296 | |
297 | clear(); |
298 | |
299 | while (mode != 2 && !chars.atEnd()) { |
300 | U_ASSERT((lastItem == 0 && op == 0) ||(static_cast <bool> ((lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))) ? void (0) : __assert_fail ("(lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )) |
301 | (lastItem == 1 && (op == 0 || op == u'-')) ||(static_cast <bool> ((lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))) ? void (0) : __assert_fail ("(lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )) |
302 | (lastItem == 2 && (op == 0 || op == u'-' || op == u'&')))(static_cast <bool> ((lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))) ? void (0) : __assert_fail ("(lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == u'-')) || (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); |
303 | |
304 | UChar32 c = 0; |
305 | UBool literal = false; |
306 | UnicodeSet* nested = nullptr; // alias - do not delete |
307 | |
308 | // -------- Check for property pattern |
309 | |
310 | // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed |
311 | int8_t setMode = 0; |
312 | if (resemblesPropertyPattern(chars, opts)) { |
313 | setMode = 2; |
314 | } |
315 | |
316 | // -------- Parse '[' of opening delimiter OR nested set. |
317 | // If there is a nested set, use `setMode' to define how |
318 | // the set should be parsed. If the '[' is part of the |
319 | // opening delimiter for this pattern, parse special |
320 | // strings "[", "[^", "[-", and "[^-". Check for stand-in |
321 | // characters representing a nested set in the symbol |
322 | // table. |
323 | |
324 | else { |
325 | // Prepare to backup if necessary |
326 | chars.getPos(backup); |
327 | c = chars.next(opts, literal, ec); |
328 | if (U_FAILURE(ec)) return; |
329 | |
330 | if (c == u'[' && !literal) { |
331 | if (mode == 1) { |
332 | chars.setPos(backup); // backup |
333 | setMode = 1; |
334 | } else { |
335 | // Handle opening '[' delimiter |
336 | mode = 1; |
337 | patLocal.append(u'['); |
338 | chars.getPos(backup); // prepare to backup |
339 | c = chars.next(opts, literal, ec); |
340 | if (U_FAILURE(ec)) return; |
341 | if (c == u'^' && !literal) { |
342 | invert = true; |
343 | patLocal.append(u'^'); |
344 | chars.getPos(backup); // prepare to backup |
345 | c = chars.next(opts, literal, ec); |
346 | if (U_FAILURE(ec)) return; |
347 | } |
348 | // Fall through to handle special leading '-'; |
349 | // otherwise restart loop for nested [], \p{}, etc. |
350 | if (c == u'-') { |
351 | literal = true; |
352 | // Fall through to handle literal '-' below |
353 | } else { |
354 | chars.setPos(backup); // backup |
355 | continue; |
356 | } |
357 | } |
358 | } else if (symbols != nullptr) { |
359 | const UnicodeFunctor *m = symbols->lookupMatcher(c); |
360 | if (m != nullptr) { |
361 | const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m); |
362 | if (ms == nullptr) { |
363 | ec = U_MALFORMED_SET; |
364 | return; |
365 | } |
366 | // casting away const, but `nested' won't be modified |
367 | // (important not to modify stored set) |
368 | nested = const_cast<UnicodeSet*>(ms); |
369 | setMode = 3; |
370 | } |
371 | } |
372 | } |
373 | |
374 | // -------- Handle a nested set. This either is inline in |
375 | // the pattern or represented by a stand-in that has |
376 | // previously been parsed and was looked up in the symbol |
377 | // table. |
378 | |
379 | if (setMode != 0) { |
380 | if (lastItem == 1) { |
381 | if (op != 0) { |
382 | // syntaxError(chars, "Char expected after operator"); |
383 | ec = U_MALFORMED_SET; |
384 | return; |
385 | } |
386 | add(lastChar, lastChar); |
387 | _appendToPat(patLocal, lastChar, false); |
388 | lastItem = 0; |
Value stored to 'lastItem' is never read | |
389 | op = 0; |
390 | } |
391 | |
392 | if (op == u'-' || op == u'&') { |
393 | patLocal.append(op); |
394 | } |
395 | |
396 | if (nested == nullptr) { |
397 | // lazy allocation |
398 | if (!scratch.allocate()) { |
399 | ec = U_MEMORY_ALLOCATION_ERROR; |
400 | return; |
401 | } |
402 | nested = scratch.pointer(); |
403 | } |
404 | switch (setMode) { |
405 | case 1: |
406 | nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec); |
407 | break; |
408 | case 2: |
409 | chars.skipIgnored(opts); |
410 | nested->applyPropertyPattern(chars, patLocal, ec); |
411 | if (U_FAILURE(ec)) return; |
412 | break; |
413 | case 3: // `nested' already parsed |
414 | nested->_toPattern(patLocal, false); |
415 | break; |
416 | } |
417 | |
418 | usePat = true; |
419 | |
420 | if (mode == 0) { |
421 | // Entire pattern is a category; leave parse loop |
422 | *this = *nested; |
423 | mode = 2; |
424 | break; |
425 | } |
426 | |
427 | switch (op) { |
428 | case u'-': |
429 | removeAll(*nested); |
430 | break; |
431 | case u'&': |
432 | retainAll(*nested); |
433 | break; |
434 | case 0: |
435 | addAll(*nested); |
436 | break; |
437 | } |
438 | |
439 | op = 0; |
440 | lastItem = 2; |
441 | |
442 | continue; |
443 | } |
444 | |
445 | if (mode == 0) { |
446 | // syntaxError(chars, "Missing '['"); |
447 | ec = U_MALFORMED_SET; |
448 | return; |
449 | } |
450 | |
451 | // -------- Parse special (syntax) characters. If the |
452 | // current character is not special, or if it is escaped, |
453 | // then fall through and handle it below. |
454 | |
455 | if (!literal) { |
456 | switch (c) { |
457 | case u']': |
458 | if (lastItem == 1) { |
459 | add(lastChar, lastChar); |
460 | _appendToPat(patLocal, lastChar, false); |
461 | } |
462 | // Treat final trailing '-' as a literal |
463 | if (op == u'-') { |
464 | add(op, op); |
465 | patLocal.append(op); |
466 | } else if (op == u'&') { |
467 | // syntaxError(chars, "Trailing '&'"); |
468 | ec = U_MALFORMED_SET; |
469 | return; |
470 | } |
471 | patLocal.append(u']'); |
472 | mode = 2; |
473 | continue; |
474 | case u'-': |
475 | if (op == 0) { |
476 | if (lastItem != 0) { |
477 | op = static_cast<char16_t>(c); |
478 | continue; |
479 | } else { |
480 | // Treat final trailing '-' as a literal |
481 | add(c, c); |
482 | c = chars.next(opts, literal, ec); |
483 | if (U_FAILURE(ec)) return; |
484 | if (c == u']' && !literal) { |
485 | patLocal.append(u"-]", 2); |
486 | mode = 2; |
487 | continue; |
488 | } |
489 | } |
490 | } |
491 | // syntaxError(chars, "'-' not after char or set"); |
492 | ec = U_MALFORMED_SET; |
493 | return; |
494 | case u'&': |
495 | if (lastItem == 2 && op == 0) { |
496 | op = static_cast<char16_t>(c); |
497 | continue; |
498 | } |
499 | // syntaxError(chars, "'&' not after set"); |
500 | ec = U_MALFORMED_SET; |
501 | return; |
502 | case u'^': |
503 | // syntaxError(chars, "'^' not after '['"); |
504 | ec = U_MALFORMED_SET; |
505 | return; |
506 | case u'{': |
507 | if (op != 0) { |
508 | // syntaxError(chars, "Missing operand after operator"); |
509 | ec = U_MALFORMED_SET; |
510 | return; |
511 | } |
512 | if (lastItem == 1) { |
513 | add(lastChar, lastChar); |
514 | _appendToPat(patLocal, lastChar, false); |
515 | } |
516 | lastItem = 0; |
517 | buf.truncate(0); |
518 | { |
519 | UBool ok = false; |
520 | while (!chars.atEnd()) { |
521 | c = chars.next(opts, literal, ec); |
522 | if (U_FAILURE(ec)) return; |
523 | if (c == u'}' && !literal) { |
524 | ok = true; |
525 | break; |
526 | } |
527 | buf.append(c); |
528 | } |
529 | if (!ok) { |
530 | // syntaxError(chars, "Invalid multicharacter string"); |
531 | ec = U_MALFORMED_SET; |
532 | return; |
533 | } |
534 | } |
535 | // We have new string. Add it to set and continue; |
536 | // we don't need to drop through to the further |
537 | // processing |
538 | add(buf); |
539 | patLocal.append(u'{'); |
540 | _appendToPat(patLocal, buf, false); |
541 | patLocal.append(u'}'); |
542 | continue; |
543 | case SymbolTable::SYMBOL_REF: |
544 | // symbols nosymbols |
545 | // [a-$] error error (ambiguous) |
546 | // [a$] anchor anchor |
547 | // [a-$x] var "x"* literal '$' |
548 | // [a-$.] error literal '$' |
549 | // *We won't get here in the case of var "x" |
550 | { |
551 | chars.getPos(backup); |
552 | c = chars.next(opts, literal, ec); |
553 | if (U_FAILURE(ec)) return; |
554 | UBool anchor = (c == u']' && !literal); |
555 | if (symbols == nullptr && !anchor) { |
556 | c = SymbolTable::SYMBOL_REF; |
557 | chars.setPos(backup); |
558 | break; // literal '$' |
559 | } |
560 | if (anchor && op == 0) { |
561 | if (lastItem == 1) { |
562 | add(lastChar, lastChar); |
563 | _appendToPat(patLocal, lastChar, false); |
564 | } |
565 | add(U_ETHER((char16_t)0xFFFF)); |
566 | usePat = true; |
567 | patLocal.append(static_cast<char16_t>(SymbolTable::SYMBOL_REF)); |
568 | patLocal.append(u']'); |
569 | mode = 2; |
570 | continue; |
571 | } |
572 | // syntaxError(chars, "Unquoted '$'"); |
573 | ec = U_MALFORMED_SET; |
574 | return; |
575 | } |
576 | default: |
577 | break; |
578 | } |
579 | } |
580 | |
581 | // -------- Parse literal characters. This includes both |
582 | // escaped chars ("\u4E01") and non-syntax characters |
583 | // ("a"). |
584 | |
585 | switch (lastItem) { |
586 | case 0: |
587 | lastItem = 1; |
588 | lastChar = c; |
589 | break; |
590 | case 1: |
591 | if (op == u'-') { |
592 | if (lastChar >= c) { |
593 | // Don't allow redundant (a-a) or empty (b-a) ranges; |
594 | // these are most likely typos. |
595 | // syntaxError(chars, "Invalid range"); |
596 | ec = U_MALFORMED_SET; |
597 | return; |
598 | } |
599 | add(lastChar, c); |
600 | _appendToPat(patLocal, lastChar, false); |
601 | patLocal.append(op); |
602 | _appendToPat(patLocal, c, false); |
603 | lastItem = 0; |
604 | op = 0; |
605 | } else { |
606 | add(lastChar, lastChar); |
607 | _appendToPat(patLocal, lastChar, false); |
608 | lastChar = c; |
609 | } |
610 | break; |
611 | case 2: |
612 | if (op != 0) { |
613 | // syntaxError(chars, "Set expected after operator"); |
614 | ec = U_MALFORMED_SET; |
615 | return; |
616 | } |
617 | lastChar = c; |
618 | lastItem = 1; |
619 | break; |
620 | } |
621 | } |
622 | |
623 | if (mode != 2) { |
624 | // syntaxError(chars, "Missing ']'"); |
625 | ec = U_MALFORMED_SET; |
626 | return; |
627 | } |
628 | |
629 | chars.skipIgnored(opts); |
630 | |
631 | /** |
632 | * Handle global flags (invert, case insensitivity). If this |
633 | * pattern should be compiled case-insensitive, then we need |
634 | * to close over case BEFORE COMPLEMENTING. This makes |
635 | * patterns like /[^abc]/i work. |
636 | */ |
637 | if ((options & USET_CASE_MASK) != 0) { |
638 | (this->*caseClosure)(options); |
639 | } |
640 | if (invert) { |
641 | complement().removeAllStrings(); // code point complement |
642 | } |
643 | |
644 | // Use the rebuilt pattern (patLocal) only if necessary. Prefer the |
645 | // generated pattern. |
646 | if (usePat) { |
647 | rebuiltPat.append(patLocal); |
648 | } else { |
649 | _generatePattern(rebuiltPat, false); |
650 | } |
651 | if (isBogus() && U_SUCCESS(ec)) { |
652 | // We likely ran out of memory. AHHH! |
653 | ec = U_MEMORY_ALLOCATION_ERROR; |
654 | } |
655 | } |
656 | |
657 | //---------------------------------------------------------------- |
658 | // Property set implementation |
659 | //---------------------------------------------------------------- |
660 | |
661 | namespace { |
662 | |
663 | UBool numericValueFilter(UChar32 ch, void* context) { |
664 | return u_getNumericValueu_getNumericValue_77(ch) == *static_cast<double*>(context); |
665 | } |
666 | |
667 | UBool generalCategoryMaskFilter(UChar32 ch, void* context) { |
668 | int32_t value = *static_cast<int32_t*>(context); |
669 | return (U_GET_GC_MASK((UChar32) ch)((uint32_t)1<<(u_charType_77((UChar32) ch))) & value) != 0; |
670 | } |
671 | |
672 | UBool versionFilter(UChar32 ch, void* context) { |
673 | static const UVersionInfo none = { 0, 0, 0, 0 }; |
674 | UVersionInfo v; |
675 | u_charAgeu_charAge_77(ch, v); |
676 | UVersionInfo* version = static_cast<UVersionInfo*>(context); |
677 | return uprv_memcmp(&v, &none, sizeof(v)):: memcmp(&v, &none,sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)):: memcmp(&v, version,sizeof(v)) <= 0; |
678 | } |
679 | |
680 | typedef struct { |
681 | UProperty prop; |
682 | int32_t value; |
683 | } IntPropertyContext; |
684 | |
685 | UBool intPropertyFilter(UChar32 ch, void* context) { |
686 | IntPropertyContext* c = static_cast<IntPropertyContext*>(context); |
687 | return u_getIntPropertyValueu_getIntPropertyValue_77(ch, c->prop) == c->value; |
688 | } |
689 | |
690 | UBool scriptExtensionsFilter(UChar32 ch, void* context) { |
691 | return uscript_hasScriptuscript_hasScript_77(ch, *static_cast<UScriptCode*>(context)); |
692 | } |
693 | |
694 | UBool idTypeFilter(UChar32 ch, void* context) { |
695 | return u_hasIDTypeu_hasIDType_77(ch, *static_cast<UIdentifierType*>(context)); |
696 | } |
697 | |
698 | } // namespace |
699 | |
700 | /** |
701 | * Generic filter-based scanning code for UCD property UnicodeSets. |
702 | */ |
703 | void UnicodeSet::applyFilter(UnicodeSet::Filter filter, |
704 | void* context, |
705 | const UnicodeSet* inclusions, |
706 | UErrorCode &status) { |
707 | if (U_FAILURE(status)) return; |
708 | |
709 | // Logically, walk through all Unicode characters, noting the start |
710 | // and end of each range for which filter.contain(c) is |
711 | // true. Add each range to a set. |
712 | // |
713 | // To improve performance, use an inclusions set which |
714 | // encodes information about character ranges that are known |
715 | // to have identical properties. |
716 | // inclusions contains the first characters of |
717 | // same-value ranges for the given property. |
718 | |
719 | clear(); |
720 | |
721 | UChar32 startHasProperty = -1; |
722 | int32_t limitRange = inclusions->getRangeCount(); |
723 | |
724 | for (int j=0; j<limitRange; ++j) { |
725 | // get current range |
726 | UChar32 start = inclusions->getRangeStart(j); |
727 | UChar32 end = inclusions->getRangeEnd(j); |
728 | |
729 | // for all the code points in the range, process |
730 | for (UChar32 ch = start; ch <= end; ++ch) { |
731 | // only add to this UnicodeSet on inflection points -- |
732 | // where the hasProperty value changes to false |
733 | if ((*filter)(ch, context)) { |
734 | if (startHasProperty < 0) { |
735 | startHasProperty = ch; |
736 | } |
737 | } else if (startHasProperty >= 0) { |
738 | add(startHasProperty, ch-1); |
739 | startHasProperty = -1; |
740 | } |
741 | } |
742 | } |
743 | if (startHasProperty >= 0) { |
744 | add(startHasProperty, static_cast<UChar32>(0x10FFFF)); |
745 | } |
746 | if (isBogus() && U_SUCCESS(status)) { |
747 | // We likely ran out of memory. AHHH! |
748 | status = U_MEMORY_ALLOCATION_ERROR; |
749 | } |
750 | } |
751 | |
752 | namespace { |
753 | |
754 | UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { |
755 | /* Note: we use ' ' in compiler code page */ |
756 | int32_t j = 0; |
757 | char ch; |
758 | --dstCapacity; /* make room for term. zero */ |
759 | while ((ch = *src++) != 0) { |
760 | if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) { |
761 | continue; |
762 | } |
763 | if (j >= dstCapacity) return false; |
764 | dst[j++] = ch; |
765 | } |
766 | if (j > 0 && dst[j-1] == ' ') --j; |
767 | dst[j] = 0; |
768 | return true; |
769 | } |
770 | |
771 | } // namespace |
772 | |
773 | //---------------------------------------------------------------- |
774 | // Property set API |
775 | //---------------------------------------------------------------- |
776 | |
777 | #define FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ) UPRV_BLOCK_MACRO_BEGINdo { \ |
778 | ec=U_ILLEGAL_ARGUMENT_ERROR; \ |
779 | return *this; \ |
780 | } UPRV_BLOCK_MACRO_ENDwhile (false) |
781 | |
782 | UnicodeSet& |
783 | UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { |
784 | if (U_FAILURE(ec) || isFrozen()) { return *this; } |
785 | if (prop == UCHAR_GENERAL_CATEGORY_MASK) { |
786 | const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); |
787 | applyFilter(generalCategoryMaskFilter, &value, inclusions, ec); |
788 | } else if (prop == UCHAR_SCRIPT_EXTENSIONS) { |
789 | const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); |
790 | UScriptCode script = static_cast<UScriptCode>(value); |
791 | applyFilter(scriptExtensionsFilter, &script, inclusions, ec); |
792 | } else if (prop == UCHAR_IDENTIFIER_TYPE) { |
793 | const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); |
794 | UIdentifierType idType = static_cast<UIdentifierType>(value); |
795 | applyFilter(idTypeFilter, &idType, inclusions, ec); |
796 | } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) { |
797 | if (value == 0 || value == 1) { |
798 | const USet *set = u_getBinaryPropertySetu_getBinaryPropertySet_77(prop, &ec); |
799 | if (U_FAILURE(ec)) { return *this; } |
800 | copyFrom(*UnicodeSet::fromUSet(set), true); |
801 | if (value == 0) { |
802 | complement().removeAllStrings(); // code point complement |
803 | } |
804 | } else { |
805 | clear(); |
806 | } |
807 | } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { |
808 | const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); |
809 | IntPropertyContext c = {prop, value}; |
810 | applyFilter(intPropertyFilter, &c, inclusions, ec); |
811 | } else { |
812 | ec = U_ILLEGAL_ARGUMENT_ERROR; |
813 | } |
814 | return *this; |
815 | } |
816 | |
817 | UnicodeSet& |
818 | UnicodeSet::applyPropertyAlias(const UnicodeString& prop, |
819 | const UnicodeString& value, |
820 | UErrorCode& ec) { |
821 | if (U_FAILURE(ec) || isFrozen()) return *this; |
822 | |
823 | // prop and value used to be converted to char * using the default |
824 | // converter instead of the invariant conversion. |
825 | // This should not be necessary because all Unicode property and value |
826 | // names use only invariant characters. |
827 | // If there are any variant characters, then we won't find them anyway. |
828 | // Checking first avoids assertion failures in the conversion. |
829 | if( !uprv_isInvariantUStringuprv_isInvariantUString_77(prop.getBuffer(), prop.length()) || |
830 | !uprv_isInvariantUStringuprv_isInvariantUString_77(value.getBuffer(), value.length()) |
831 | ) { |
832 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
833 | } |
834 | CharString pname, vname; |
835 | pname.appendInvariantChars(prop, ec); |
836 | vname.appendInvariantChars(value, ec); |
837 | if (U_FAILURE(ec)) return *this; |
838 | |
839 | UProperty p; |
840 | int32_t v; |
841 | UBool invert = false; |
842 | |
843 | if (value.length() > 0) { |
844 | p = u_getPropertyEnumu_getPropertyEnum_77(pname.data()); |
845 | if (p == UCHAR_INVALID_CODE) FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
846 | |
847 | // Treat gc as gcm |
848 | if (p == UCHAR_GENERAL_CATEGORY) { |
849 | p = UCHAR_GENERAL_CATEGORY_MASK; |
850 | } |
851 | |
852 | if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) || |
853 | (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) || |
854 | (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) { |
855 | v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, vname.data()); |
856 | if (v == UCHAR_INVALID_CODE) { |
857 | // Handle numeric CCC |
858 | if (p == UCHAR_CANONICAL_COMBINING_CLASS || |
859 | p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || |
860 | p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { |
861 | char* end; |
862 | double val = uprv_strtod(vname.data(), &end):: strtod(vname.data(), &end); |
863 | // Anything between 0 and 255 is valid even if unused. |
864 | // Cast double->int only after range check. |
865 | // We catch NaN here because comparing it with both 0 and 255 will be false |
866 | // (as are all comparisons with NaN). |
867 | if (*end != 0 || !(0 <= val && val <= 255) || |
868 | (v = static_cast<int32_t>(val)) != val) { |
869 | // non-integral value or outside 0..255, or trailing junk |
870 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
871 | } |
872 | } else { |
873 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
874 | } |
875 | } |
876 | } |
877 | |
878 | else { |
879 | |
880 | switch (p) { |
881 | case UCHAR_NUMERIC_VALUE: |
882 | { |
883 | char* end; |
884 | double val = uprv_strtod(vname.data(), &end):: strtod(vname.data(), &end); |
885 | if (*end != 0) { |
886 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
887 | } |
888 | applyFilter(numericValueFilter, &val, |
889 | CharacterProperties::getInclusionsForProperty(p, ec), ec); |
890 | return *this; |
891 | } |
892 | case UCHAR_NAME: |
893 | { |
894 | // Must munge name, since u_charFromName() does not do |
895 | // 'loose' matching. |
896 | char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength |
897 | if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
898 | UChar32 ch = u_charFromNameu_charFromName_77(U_EXTENDED_CHAR_NAME, buf, &ec); |
899 | if (U_SUCCESS(ec)) { |
900 | clear(); |
901 | add(ch); |
902 | return *this; |
903 | } else { |
904 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
905 | } |
906 | } |
907 | case UCHAR_UNICODE_1_NAME: |
908 | // ICU 49 deprecates the Unicode_1_Name property APIs. |
909 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
910 | case UCHAR_AGE: |
911 | { |
912 | // Must munge name, since u_versionFromString() does not do |
913 | // 'loose' matching. |
914 | char buf[128]; |
915 | if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
916 | UVersionInfo version; |
917 | u_versionFromStringu_versionFromString_77(version, buf); |
918 | applyFilter(versionFilter, &version, |
919 | CharacterProperties::getInclusionsForProperty(p, ec), ec); |
920 | return *this; |
921 | } |
922 | case UCHAR_SCRIPT_EXTENSIONS: |
923 | v = u_getPropertyValueEnumu_getPropertyValueEnum_77(UCHAR_SCRIPT, vname.data()); |
924 | if (v == UCHAR_INVALID_CODE) { |
925 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
926 | } |
927 | // fall through to calling applyIntPropertyValue() |
928 | break; |
929 | case UCHAR_IDENTIFIER_TYPE: |
930 | v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, vname.data()); |
931 | if (v == UCHAR_INVALID_CODE) { |
932 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
933 | } |
934 | // fall through to calling applyIntPropertyValue() |
935 | break; |
936 | default: |
937 | // p is a non-binary, non-enumerated property that we |
938 | // don't support (yet). |
939 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
940 | } |
941 | } |
942 | } |
943 | |
944 | else { |
945 | // value is empty. Interpret as General Category, Script, or |
946 | // Binary property. |
947 | p = UCHAR_GENERAL_CATEGORY_MASK; |
948 | v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, pname.data()); |
949 | if (v == UCHAR_INVALID_CODE) { |
950 | p = UCHAR_SCRIPT; |
951 | v = u_getPropertyValueEnumu_getPropertyValueEnum_77(p, pname.data()); |
952 | if (v == UCHAR_INVALID_CODE) { |
953 | p = u_getPropertyEnumu_getPropertyEnum_77(pname.data()); |
954 | if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) { |
955 | v = 1; |
956 | } else if (0 == uprv_comparePropertyNamesuprv_compareASCIIPropertyNames_77(ANY, pname.data())) { |
957 | set(MIN_VALUE, MAX_VALUE); |
958 | return *this; |
959 | } else if (0 == uprv_comparePropertyNamesuprv_compareASCIIPropertyNames_77(ASCII, pname.data())) { |
960 | set(0, 0x7F); |
961 | return *this; |
962 | } else if (0 == uprv_comparePropertyNamesuprv_compareASCIIPropertyNames_77(ASSIGNED, pname.data())) { |
963 | // [:Assigned:]=[:^Cn:] |
964 | p = UCHAR_GENERAL_CATEGORY_MASK; |
965 | v = U_GC_CN_MASK((uint32_t)1<<(U_GENERAL_OTHER_TYPES)); |
966 | invert = true; |
967 | } else { |
968 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
969 | } |
970 | } |
971 | } |
972 | } |
973 | |
974 | applyIntPropertyValue(p, v, ec); |
975 | if(invert) { |
976 | complement().removeAllStrings(); // code point complement |
977 | } |
978 | |
979 | if (isBogus() && U_SUCCESS(ec)) { |
980 | // We likely ran out of memory. AHHH! |
981 | ec = U_MEMORY_ALLOCATION_ERROR; |
982 | } |
983 | return *this; |
984 | } |
985 | |
986 | //---------------------------------------------------------------- |
987 | // Property set patterns |
988 | //---------------------------------------------------------------- |
989 | |
990 | /** |
991 | * Return true if the given position, in the given pattern, appears |
992 | * to be the start of a property set pattern. |
993 | */ |
994 | UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern, |
995 | int32_t pos) { |
996 | // Patterns are at least 5 characters long |
997 | if ((pos+5) > pattern.length()) { |
998 | return false; |
999 | } |
1000 | |
1001 | // Look for an opening [:, [:^, \p, or \P |
1002 | return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos); |
1003 | } |
1004 | |
1005 | /** |
1006 | * Return true if the given iterator appears to point at a |
1007 | * property pattern. Regardless of the result, return with the |
1008 | * iterator unchanged. |
1009 | * @param chars iterator over the pattern characters. Upon return |
1010 | * it will be unchanged. |
1011 | * @param iterOpts RuleCharacterIterator options |
1012 | */ |
1013 | UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars, |
1014 | int32_t iterOpts) { |
1015 | // NOTE: literal will always be false, because we don't parse escapes. |
1016 | UBool result = false, literal; |
1017 | UErrorCode ec = U_ZERO_ERROR; |
1018 | iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES; |
1019 | RuleCharacterIterator::Pos pos; |
1020 | chars.getPos(pos); |
1021 | UChar32 c = chars.next(iterOpts, literal, ec); |
1022 | if (c == u'[' || c == u'\\') { |
1023 | UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE, |
1024 | literal, ec); |
1025 | result = (c == u'[') ? (d == u':') : |
1026 | (d == u'N' || d == u'p' || d == u'P'); |
1027 | } |
1028 | chars.setPos(pos); |
1029 | return result && U_SUCCESS(ec); |
1030 | } |
1031 | |
1032 | /** |
1033 | * Parse the given property pattern at the given parse position. |
1034 | */ |
1035 | UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, |
1036 | ParsePosition& ppos, |
1037 | UErrorCode &ec) { |
1038 | int32_t pos = ppos.getIndex(); |
1039 | |
1040 | UBool posix = false; // true for [:pat:], false for \p{pat} \P{pat} \N{pat} |
1041 | UBool isName = false; // true for \N{pat}, o/w false |
1042 | UBool invert = false; |
1043 | |
1044 | if (U_FAILURE(ec)) return *this; |
1045 | |
1046 | // Minimum length is 5 characters, e.g. \p{L} |
1047 | if ((pos+5) > pattern.length()) { |
1048 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
1049 | } |
1050 | |
1051 | // On entry, ppos should point to one of the following locations: |
1052 | // Look for an opening [:, [:^, \p, or \P |
1053 | if (isPOSIXOpen(pattern, pos)) { |
1054 | posix = true; |
1055 | pos += 2; |
1056 | pos = ICU_Utility::skipWhitespace(pattern, pos); |
1057 | if (pos < pattern.length() && pattern.charAt(pos) == u'^') { |
1058 | ++pos; |
1059 | invert = true; |
1060 | } |
1061 | } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) { |
1062 | char16_t c = pattern.charAt(pos+1); |
1063 | invert = (c == u'P'); |
1064 | isName = (c == u'N'); |
1065 | pos += 2; |
1066 | pos = ICU_Utility::skipWhitespace(pattern, pos); |
1067 | if (pos == pattern.length() || pattern.charAt(pos++) != u'{') { |
1068 | // Syntax error; "\p" or "\P" not followed by "{" |
1069 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
1070 | } |
1071 | } else { |
1072 | // Open delimiter not seen |
1073 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
1074 | } |
1075 | |
1076 | // Look for the matching close delimiter, either :] or } |
1077 | int32_t close; |
1078 | if (posix) { |
1079 | close = pattern.indexOf(u":]", 2, pos); |
1080 | } else { |
1081 | close = pattern.indexOf(u'}', pos); |
1082 | } |
1083 | if (close < 0) { |
1084 | // Syntax error; close delimiter missing |
1085 | FAIL(ec)do { ec=U_ILLEGAL_ARGUMENT_ERROR; return *this; } while (false ); |
1086 | } |
1087 | |
1088 | // Look for an '=' sign. If this is present, we will parse a |
1089 | // medium \p{gc=Cf} or long \p{GeneralCategory=Format} |
1090 | // pattern. |
1091 | int32_t equals = pattern.indexOf(u'=', pos); |
1092 | UnicodeString propName, valueName; |
1093 | if (equals >= 0 && equals < close && !isName) { |
1094 | // Equals seen; parse medium/long pattern |
1095 | pattern.extractBetween(pos, equals, propName); |
1096 | pattern.extractBetween(equals+1, close, valueName); |
1097 | } |
1098 | |
1099 | else { |
1100 | // Handle case where no '=' is seen, and \N{} |
1101 | pattern.extractBetween(pos, close, propName); |
1102 | |
1103 | // Handle \N{name} |
1104 | if (isName) { |
1105 | // This is a little inefficient since it means we have to |
1106 | // parse NAME_PROP back to UCHAR_NAME even though we already |
1107 | // know it's UCHAR_NAME. If we refactor the API to |
1108 | // support args of (UProperty, char*) then we can remove |
1109 | // NAME_PROP and make this a little more efficient. |
1110 | valueName = propName; |
1111 | propName = NAME_PROP; |
1112 | } |
1113 | } |
1114 | |
1115 | applyPropertyAlias(propName, valueName, ec); |
1116 | |
1117 | if (U_SUCCESS(ec)) { |
1118 | if (invert) { |
1119 | complement().removeAllStrings(); // code point complement |
1120 | } |
1121 | |
1122 | // Move to the limit position after the close delimiter if the |
1123 | // parse succeeded. |
1124 | ppos.setIndex(close + (posix ? 2 : 1)); |
1125 | } |
1126 | |
1127 | return *this; |
1128 | } |
1129 | |
1130 | /** |
1131 | * Parse a property pattern. |
1132 | * @param chars iterator over the pattern characters. Upon return |
1133 | * it will be advanced to the first character after the parsed |
1134 | * pattern, or the end of the iteration if all characters are |
1135 | * parsed. |
1136 | * @param rebuiltPat the pattern that was parsed, rebuilt or |
1137 | * copied from the input pattern, as appropriate. |
1138 | */ |
1139 | void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, |
1140 | UnicodeString& rebuiltPat, |
1141 | UErrorCode& ec) { |
1142 | if (U_FAILURE(ec)) return; |
1143 | UnicodeString pattern; |
1144 | chars.lookahead(pattern); |
1145 | ParsePosition pos(0); |
1146 | applyPropertyPattern(pattern, pos, ec); |
1147 | if (U_FAILURE(ec)) return; |
1148 | if (pos.getIndex() == 0) { |
1149 | // syntaxError(chars, "Invalid property pattern"); |
1150 | ec = U_MALFORMED_SET; |
1151 | return; |
1152 | } |
1153 | chars.jumpahead(pos.getIndex()); |
1154 | rebuiltPat.append(pattern, 0, pos.getIndex()); |
1155 | } |
1156 | |
1157 | U_NAMESPACE_END} |