Bug Summary

File:var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
Warning:line 1435, column 38
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name regexp-compiler.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -resource-dir /usr/lib/llvm-18/lib/clang/18 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/js-confdefs.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/stl_wrappers -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D DEBUG=1 -D WASM_SUPPORTS_HUGE_MEMORY -D JS_CACHEIR_SPEW -D JS_STRUCTURED_SPEW -D JS_HAS_CTYPES -D FFI_BUILDING -D EXPORT_JS_API -D MOZ_HAS_MOZGLUE -I /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -D V8_INTL_SUPPORT -internal-isystem /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13 -internal-isystem /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13 -internal-isystem /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward -internal-isystem /usr/lib/llvm-18/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-error=type-limits -Wno-error=return-type -Wno-c++11-narrowing -fdeprecated-macro -ferror-limit 19 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2024-05-16-034744-15991-1 -x c++ /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
1// Copyright 2019 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "irregexp/imported/regexp-compiler.h"
6
7#include "irregexp/imported/regexp-macro-assembler-arch.h"
8
9#ifdef V8_INTL_SUPPORT1
10#include "irregexp/imported/special-case.h"
11#include "unicode/locid.h"
12#include "unicode/uniset.h"
13#include "unicode/utypes.h"
14#endif // V8_INTL_SUPPORT
15
16namespace v8 {
17namespace internal {
18
19using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
20
21// -------------------------------------------------------------------
22// Implementation of the Irregexp regular expression engine.
23//
24// The Irregexp regular expression engine is intended to be a complete
25// implementation of ECMAScript regular expressions. It generates either
26// bytecodes or native code.
27
28// The Irregexp regexp engine is structured in three steps.
29// 1) The parser generates an abstract syntax tree. See ast.cc.
30// 2) From the AST a node network is created. The nodes are all
31// subclasses of RegExpNode. The nodes represent states when
32// executing a regular expression. Several optimizations are
33// performed on the node network.
34// 3) From the nodes we generate either byte codes or native code
35// that can actually execute the regular expression (perform
36// the search). The code generation step is described in more
37// detail below.
38
39// Code generation.
40//
41// The nodes are divided into four main categories.
42// * Choice nodes
43// These represent places where the regular expression can
44// match in more than one way. For example on entry to an
45// alternation (foo|bar) or a repetition (*, +, ? or {}).
46// * Action nodes
47// These represent places where some action should be
48// performed. Examples include recording the current position
49// in the input string to a register (in order to implement
50// captures) or other actions on register for example in order
51// to implement the counters needed for {} repetitions.
52// * Matching nodes
53// These attempt to match some element part of the input string.
54// Examples of elements include character classes, plain strings
55// or back references.
56// * End nodes
57// These are used to implement the actions required on finding
58// a successful match or failing to find a match.
59//
60// The code generated (whether as byte codes or native code) maintains
61// some state as it runs. This consists of the following elements:
62//
63// * The capture registers. Used for string captures.
64// * Other registers. Used for counters etc.
65// * The current position.
66// * The stack of backtracking information. Used when a matching node
67// fails to find a match and needs to try an alternative.
68//
69// Conceptual regular expression execution model:
70//
71// There is a simple conceptual model of regular expression execution
72// which will be presented first. The actual code generated is a more
73// efficient simulation of the simple conceptual model:
74//
75// * Choice nodes are implemented as follows:
76// For each choice except the last {
77// push current position
78// push backtrack code location
79// <generate code to test for choice>
80// backtrack code location:
81// pop current position
82// }
83// <generate code to test for last choice>
84//
85// * Actions nodes are generated as follows
86// <push affected registers on backtrack stack>
87// <generate code to perform action>
88// push backtrack code location
89// <generate code to test for following nodes>
90// backtrack code location:
91// <pop affected registers to restore their state>
92// <pop backtrack location from stack and go to it>
93//
94// * Matching nodes are generated as follows:
95// if input string matches at current position
96// update current position
97// <generate code to test for following nodes>
98// else
99// <pop backtrack location from stack and go to it>
100//
101// Thus it can be seen that the current position is saved and restored
102// by the choice nodes, whereas the registers are saved and restored by
103// by the action nodes that manipulate them.
104//
105// The other interesting aspect of this model is that nodes are generated
106// at the point where they are needed by a recursive call to Emit(). If
107// the node has already been code generated then the Emit() call will
108// generate a jump to the previously generated code instead. In order to
109// limit recursion it is possible for the Emit() function to put the node
110// on a work list for later generation and instead generate a jump. The
111// destination of the jump is resolved later when the code is generated.
112//
113// Actual regular expression code generation.
114//
115// Code generation is actually more complicated than the above. In order to
116// improve the efficiency of the generated code some optimizations are
117// performed
118//
119// * Choice nodes have 1-character lookahead.
120// A choice node looks at the following character and eliminates some of
121// the choices immediately based on that character. This is not yet
122// implemented.
123// * Simple greedy loops store reduced backtracking information.
124// A quantifier like /.*foo/m will greedily match the whole input. It will
125// then need to backtrack to a point where it can match "foo". The naive
126// implementation of this would push each character position onto the
127// backtracking stack, then pop them off one by one. This would use space
128// proportional to the length of the input string. However since the "."
129// can only match in one way and always has a constant length (in this case
130// of 1) it suffices to store the current position on the top of the stack
131// once. Matching now becomes merely incrementing the current position and
132// backtracking becomes decrementing the current position and checking the
133// result against the stored current position. This is faster and saves
134// space.
135// * The current state is virtualized.
136// This is used to defer expensive operations until it is clear that they
137// are needed and to generate code for a node more than once, allowing
138// specialized an efficient versions of the code to be created. This is
139// explained in the section below.
140//
141// Execution state virtualization.
142//
143// Instead of emitting code, nodes that manipulate the state can record their
144// manipulation in an object called the Trace. The Trace object can record a
145// current position offset, an optional backtrack code location on the top of
146// the virtualized backtrack stack and some register changes. When a node is
147// to be emitted it can flush the Trace or update it. Flushing the Trace
148// will emit code to bring the actual state into line with the virtual state.
149// Avoiding flushing the state can postpone some work (e.g. updates of capture
150// registers). Postponing work can save time when executing the regular
151// expression since it may be found that the work never has to be done as a
152// failure to match can occur. In addition it is much faster to jump to a
153// known backtrack code location than it is to pop an unknown backtrack
154// location from the stack and jump there.
155//
156// The virtual state found in the Trace affects code generation. For example
157// the virtual state contains the difference between the actual current
158// position and the virtual current position, and matching code needs to use
159// this offset to attempt a match in the correct location of the input
160// string. Therefore code generated for a non-trivial trace is specialized
161// to that trace. The code generator therefore has the ability to generate
162// code for each node several times. In order to limit the size of the
163// generated code there is an arbitrary limit on how many specialized sets of
164// code may be generated for a given node. If the limit is reached, the
165// trace is flushed and a generic version of the code for a node is emitted.
166// This is subsequently used for that node. The code emitted for non-generic
167// trace is not recorded in the node and so it cannot currently be reused in
168// the event that code generation is requested for an identical trace.
169
170namespace {
171
172constexpr base::uc32 MaxCodeUnit(const bool one_byte) {
173 static_assert(String::kMaxOneByteCharCodeU <=
174 std::numeric_limits<uint16_t>::max());
175 static_assert(String::kMaxUtf16CodeUnitU <=
176 std::numeric_limits<uint16_t>::max());
177 return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU;
178}
179
180constexpr uint32_t CharMask(const bool one_byte) {
181 static_assert(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1));
182 static_assert(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1));
183 return MaxCodeUnit(one_byte);
184}
185
186} // namespace
187
188void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 188); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 188; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
; }
189
190void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
191 text->AddElement(TextElement::Atom(this), zone);
192}
193
194void RegExpClassRanges::AppendToText(RegExpText* text, Zone* zone) {
195 text->AddElement(TextElement::ClassRanges(this), zone);
196}
197
198void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
199 for (int i = 0; i < elements()->length(); i++)
200 text->AddElement(elements()->at(i), zone);
201}
202
203TextElement TextElement::Atom(RegExpAtom* atom) {
204 return TextElement(ATOM, atom);
205}
206
207TextElement TextElement::ClassRanges(RegExpClassRanges* class_ranges) {
208 return TextElement(CLASS_RANGES, class_ranges);
209}
210
211int TextElement::length() const {
212 switch (text_type()) {
213 case ATOM:
214 return atom()->length();
215
216 case CLASS_RANGES:
217 return 1;
218 }
219 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 219); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 219; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
220}
221
222class RecursionCheck {
223 public:
224 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
225 compiler->IncrementRecursionDepth();
226 }
227 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
228
229 private:
230 RegExpCompiler* compiler_;
231};
232
233// Attempts to compile the regexp using an Irregexp code generator. Returns
234// a fixed array or a null handle depending on whether it succeeded.
235RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
236 RegExpFlags flags, bool one_byte)
237 : next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
238 unicode_lookaround_stack_register_(kNoRegister),
239 unicode_lookaround_position_register_(kNoRegister),
240 work_list_(nullptr),
241 recursion_depth_(0),
242 flags_(flags),
243 one_byte_(one_byte),
244 reg_exp_too_big_(false),
245 limiting_recursion_(false),
246 optimize_(v8_flagsjs::jit::JitOptions.regexp_optimization),
247 read_backward_(false),
248 current_expansion_factor_(1),
249 frequency_collator_(),
250 isolate_(isolate),
251 zone_(zone) {
252 accept_ = zone->New<EndNode>(EndNode::ACCEPT, zone);
253 DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 253); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
")"); do { *((volatile int*)__null) = 253; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
254}
255
256RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
257 Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
258 int capture_count, Handle<String> pattern) {
259 macro_assembler_ = macro_assembler;
260
261 ZoneVector<RegExpNode*> work_list(zone());
262 work_list_ = &work_list;
263 Label fail;
264 macro_assembler_->PushBacktrack(&fail);
265 Trace new_trace;
266 start->Emit(this, &new_trace);
267 macro_assembler_->BindJumpTarget(&fail);
268 macro_assembler_->Fail();
269 while (!work_list.empty()) {
270 RegExpNode* node = work_list.back();
271 work_list.pop_back();
272 node->set_on_work_list(false);
273 if (!node->label()->is_bound()) node->Emit(this, &new_trace);
274 }
275 if (reg_exp_too_big_) {
276 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
277 FATAL("Aborting on excess zone allocation")do { do { } while (false); MOZ_ReportCrash("" "Aborting on excess zone allocation"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 277); AnnotateMozCrashReason("MOZ_CRASH(" "Aborting on excess zone allocation"
")"); do { *((volatile int*)__null) = 277; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
278 }
279 macro_assembler_->AbortedCodeGeneration();
280 return CompilationResult::RegExpTooBig();
281 }
282
283 Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
284 isolate->IncreaseTotalRegexpCodeGenerated(code);
285 work_list_ = nullptr;
286
287 return {code, next_register_};
288}
289
290bool Trace::DeferredAction::Mentions(int that) {
291 if (action_type() == ActionNode::CLEAR_CAPTURES) {
292 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
293 return range.Contains(that);
294 } else {
295 return reg() == that;
296 }
297}
298
299bool Trace::mentions_reg(int reg) {
300 for (DeferredAction* action = actions_; action != nullptr;
301 action = action->next()) {
302 if (action->Mentions(reg)) return true;
303 }
304 return false;
305}
306
307bool Trace::GetStoredPosition(int reg, int* cp_offset) {
308 DCHECK_EQ(0, *cp_offset)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) == (*cp_offset))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) == (*cp_offset)))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(0) == (*cp_offset)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 308); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) == (*cp_offset)"
")"); do { *((volatile int*)__null) = 308; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
309 for (DeferredAction* action = actions_; action != nullptr;
310 action = action->next()) {
311 if (action->Mentions(reg)) {
312 if (action->action_type() == ActionNode::STORE_POSITION) {
313 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
314 return true;
315 } else {
316 return false;
317 }
318 }
319 }
320 return false;
321}
322
323// A (dynamically-sized) set of unsigned integers that behaves especially well
324// on small integers (< kFirstLimit). May do zone-allocation.
325class DynamicBitSet : public ZoneObject {
326 public:
327 V8_EXPORT_PRIVATE bool Get(unsigned value) const {
328 if (value < kFirstLimit) {
329 return (first_ & (1 << value)) != 0;
330 } else if (remaining_ == nullptr) {
331 return false;
332 } else {
333 return remaining_->Contains(value);
334 }
335 }
336
337 // Destructively set a value in this set.
338 void Set(unsigned value, Zone* zone) {
339 if (value < kFirstLimit) {
340 first_ |= (1 << value);
341 } else {
342 if (remaining_ == nullptr)
343 remaining_ = zone->New<ZoneList<unsigned>>(1, zone);
344 if (remaining_->is_empty() || !remaining_->Contains(value))
345 remaining_->Add(value, zone);
346 }
347 }
348
349 private:
350 static constexpr unsigned kFirstLimit = 32;
351
352 uint32_t first_ = 0;
353 ZoneList<unsigned>* remaining_ = nullptr;
354};
355
356int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
357 Zone* zone) {
358 int max_register = RegExpCompiler::kNoRegister;
359 for (DeferredAction* action = actions_; action != nullptr;
360 action = action->next()) {
361 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
362 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
363 for (int i = range.from(); i <= range.to(); i++)
364 affected_registers->Set(i, zone);
365 if (range.to() > max_register) max_register = range.to();
366 } else {
367 affected_registers->Set(action->reg(), zone);
368 if (action->reg() > max_register) max_register = action->reg();
369 }
370 }
371 return max_register;
372}
373
374void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
375 int max_register,
376 const DynamicBitSet& registers_to_pop,
377 const DynamicBitSet& registers_to_clear) {
378 for (int reg = max_register; reg >= 0; reg--) {
379 if (registers_to_pop.Get(reg)) {
380 assembler->PopRegister(reg);
381 } else if (registers_to_clear.Get(reg)) {
382 int clear_to = reg;
383 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
384 reg--;
385 }
386 assembler->ClearRegisters(reg, clear_to);
387 }
388 }
389}
390
391void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
392 int max_register,
393 const DynamicBitSet& affected_registers,
394 DynamicBitSet* registers_to_pop,
395 DynamicBitSet* registers_to_clear,
396 Zone* zone) {
397 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
398 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
399
400 // Count pushes performed to force a stack limit check occasionally.
401 int pushes = 0;
402
403 for (int reg = 0; reg <= max_register; reg++) {
404 if (!affected_registers.Get(reg)) continue;
405
406 // The chronologically first deferred action in the trace
407 // is used to infer the action needed to restore a register
408 // to its previous state (or not, if it's safe to ignore it).
409 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
410 DeferredActionUndoType undo_action = IGNORE;
411
412 int value = 0;
413 bool absolute = false;
414 bool clear = false;
415 static const int kNoStore = kMinInt((int32_t)0x80000000);
416 int store_position = kNoStore;
417 // This is a little tricky because we are scanning the actions in reverse
418 // historical order (newest first).
419 for (DeferredAction* action = actions_; action != nullptr;
420 action = action->next()) {
421 if (action->Mentions(reg)) {
422 switch (action->action_type()) {
423 case ActionNode::SET_REGISTER_FOR_LOOP: {
424 Trace::DeferredSetRegisterForLoop* psr =
425 static_cast<Trace::DeferredSetRegisterForLoop*>(action);
426 if (!absolute) {
427 value += psr->value();
428 absolute = true;
429 }
430 // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
431 // counters. They can have a significant previous value if they
432 // occur in a loop. TODO(lrn): Propagate this information, so
433 // we can set undo_action to IGNORE if we know there is no value to
434 // restore.
435 undo_action = RESTORE;
436 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 436); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 436; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
437 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 437); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 437; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
438 break;
439 }
440 case ActionNode::INCREMENT_REGISTER:
441 if (!absolute) {
442 value++;
443 }
444 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 444); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 444; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
445 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 445); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 445; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
446 undo_action = RESTORE;
447 break;
448 case ActionNode::STORE_POSITION: {
449 Trace::DeferredCapture* pc =
450 static_cast<Trace::DeferredCapture*>(action);
451 if (!clear && store_position == kNoStore) {
452 store_position = pc->cp_offset();
453 }
454
455 // For captures we know that stores and clears alternate.
456 // Other register, are never cleared, and if the occur
457 // inside a loop, they might be assigned more than once.
458 if (reg <= 1) {
459 // Registers zero and one, aka "capture zero", is
460 // always set correctly if we succeed. There is no
461 // need to undo a setting on backtrack, because we
462 // will set it again or fail.
463 undo_action = IGNORE;
464 } else {
465 undo_action = pc->is_capture() ? CLEAR : RESTORE;
466 }
467 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 467); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 467; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
468 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 468); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 468; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
469 break;
470 }
471 case ActionNode::CLEAR_CAPTURES: {
472 // Since we're scanning in reverse order, if we've already
473 // set the position we have to ignore historically earlier
474 // clearing operations.
475 if (store_position == kNoStore) {
476 clear = true;
477 }
478 undo_action = RESTORE;
479 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 479); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 479; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
480 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 480); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 480; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
481 break;
482 }
483 default:
484 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 484); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 484; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
485 }
486 }
487 }
488 // Prepare for the undo-action (e.g., push if it's going to be popped).
489 if (undo_action == RESTORE) {
490 pushes++;
491 RegExpMacroAssembler::StackCheckFlag stack_check =
492 RegExpMacroAssembler::kNoStackLimitCheck;
493 if (pushes == push_limit) {
494 stack_check = RegExpMacroAssembler::kCheckStackLimit;
495 pushes = 0;
496 }
497
498 assembler->PushRegister(reg, stack_check);
499 registers_to_pop->Set(reg, zone);
500 } else if (undo_action == CLEAR) {
501 registers_to_clear->Set(reg, zone);
502 }
503 // Perform the chronologically last action (or accumulated increment)
504 // for the register.
505 if (store_position != kNoStore) {
506 assembler->WriteCurrentPositionToRegister(reg, store_position);
507 } else if (clear) {
508 assembler->ClearRegisters(reg, reg);
509 } else if (absolute) {
510 assembler->SetRegister(reg, value);
511 } else if (value != 0) {
512 assembler->AdvanceRegister(reg, value);
513 }
514 }
515}
516
517// This is called as we come into a loop choice node and some other tricky
518// nodes. It normalizes the state of the code generator to ensure we can
519// generate generic code.
520void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
521 RegExpMacroAssembler* assembler = compiler->macro_assembler();
522
523 DCHECK(!is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!is_trivial()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("!is_trivial()",
"/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 523); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!is_trivial()"
")"); do { *((volatile int*)__null) = 523; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
524
525 if (actions_ == nullptr && backtrack() == nullptr) {
526 // Here we just have some deferred cp advances to fix and we are back to
527 // a normal situation. We may also have to forget some information gained
528 // through a quick check that was already performed.
529 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
530 // Create a new trivial state and generate the node with that.
531 Trace new_state;
532 successor->Emit(compiler, &new_state);
533 return;
534 }
535
536 // Generate deferred actions here along with code to undo them again.
537 DynamicBitSet affected_registers;
538
539 if (backtrack() != nullptr) {
540 // Here we have a concrete backtrack location. These are set up by choice
541 // nodes and so they indicate that we have a deferred save of the current
542 // position which we may need to emit here.
543 assembler->PushCurrentPosition();
544 }
545
546 int max_register =
547 FindAffectedRegisters(&affected_registers, compiler->zone());
548 DynamicBitSet registers_to_pop;
549 DynamicBitSet registers_to_clear;
550 PerformDeferredActions(assembler, max_register, affected_registers,
551 &registers_to_pop, &registers_to_clear,
552 compiler->zone());
553 if (cp_offset_ != 0) {
554 assembler->AdvanceCurrentPosition(cp_offset_);
555 }
556
557 // Create a new trivial state and generate the node with that.
558 Label undo;
559 assembler->PushBacktrack(&undo);
560 if (successor->KeepRecursing(compiler)) {
561 Trace new_state;
562 successor->Emit(compiler, &new_state);
563 } else {
564 compiler->AddWork(successor);
565 assembler->GoTo(successor->label());
566 }
567
568 // On backtrack we need to restore state.
569 assembler->BindJumpTarget(&undo);
570 RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
571 registers_to_clear);
572 if (backtrack() == nullptr) {
573 assembler->Backtrack();
574 } else {
575 assembler->PopCurrentPosition();
576 assembler->GoTo(backtrack());
577 }
578}
579
580void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
581 RegExpMacroAssembler* assembler = compiler->macro_assembler();
582
583 // Omit flushing the trace. We discard the entire stack frame anyway.
584
585 if (!label()->is_bound()) {
586 // We are completely independent of the trace, since we ignore it,
587 // so this code can be used as the generic version.
588 assembler->Bind(label());
589 }
590
591 // Throw away everything on the backtrack stack since the start
592 // of the negative submatch and restore the character position.
593 assembler->ReadCurrentPositionFromRegister(current_position_register_);
594 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
595 if (clear_capture_count_ > 0) {
596 // Clear any captures that might have been performed during the success
597 // of the body of the negative look-ahead.
598 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
599 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
600 }
601 // Now that we have unwound the stack we find at the top of the stack the
602 // backtrack that the BeginNegativeSubmatch node got.
603 assembler->Backtrack();
604}
605
606void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
607 if (!trace->is_trivial()) {
608 trace->Flush(compiler, this);
609 return;
610 }
611 RegExpMacroAssembler* assembler = compiler->macro_assembler();
612 if (!label()->is_bound()) {
613 assembler->Bind(label());
614 }
615 switch (action_) {
616 case ACCEPT:
617 assembler->Succeed();
618 return;
619 case BACKTRACK:
620 assembler->GoTo(trace->backtrack());
621 return;
622 case NEGATIVE_SUBMATCH_SUCCESS:
623 // This case is handled in a different virtual method.
624 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 624); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 624; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
625 }
626 UNIMPLEMENTED()do { do { } while (false); MOZ_ReportCrash("" "unimplemented code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 626); AnnotateMozCrashReason("MOZ_CRASH(" "unimplemented code"
")"); do { *((volatile int*)__null) = 626; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
627}
628
629void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
630 if (guards_ == nullptr) guards_ = zone->New<ZoneList<Guard*>>(1, zone);
631 guards_->Add(guard, zone);
632}
633
634ActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
635 RegExpNode* on_success) {
636 ActionNode* result =
637 on_success->zone()->New<ActionNode>(SET_REGISTER_FOR_LOOP, on_success);
638 result->data_.u_store_register.reg = reg;
639 result->data_.u_store_register.value = val;
640 return result;
641}
642
643ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
644 ActionNode* result =
645 on_success->zone()->New<ActionNode>(INCREMENT_REGISTER, on_success);
646 result->data_.u_increment_register.reg = reg;
647 return result;
648}
649
650ActionNode* ActionNode::StorePosition(int reg, bool is_capture,
651 RegExpNode* on_success) {
652 ActionNode* result =
653 on_success->zone()->New<ActionNode>(STORE_POSITION, on_success);
654 result->data_.u_position_register.reg = reg;
655 result->data_.u_position_register.is_capture = is_capture;
656 return result;
657}
658
659ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
660 ActionNode* result =
661 on_success->zone()->New<ActionNode>(CLEAR_CAPTURES, on_success);
662 result->data_.u_clear_captures.range_from = range.from();
663 result->data_.u_clear_captures.range_to = range.to();
664 return result;
665}
666
667ActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg,
668 RegExpNode* on_success) {
669 ActionNode* result =
670 on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success);
671 result->data_.u_submatch.stack_pointer_register = stack_reg;
672 result->data_.u_submatch.current_position_register = position_reg;
673 return result;
674}
675
676ActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg,
677 RegExpNode* on_success) {
678 ActionNode* result =
679 on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success);
680 result->data_.u_submatch.stack_pointer_register = stack_reg;
681 result->data_.u_submatch.current_position_register = position_reg;
682 return result;
683}
684
685ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg,
686 int clear_register_count,
687 int clear_register_from,
688 RegExpNode* on_success) {
689 ActionNode* result = on_success->zone()->New<ActionNode>(
690 POSITIVE_SUBMATCH_SUCCESS, on_success);
691 result->data_.u_submatch.stack_pointer_register = stack_reg;
692 result->data_.u_submatch.current_position_register = position_reg;
693 result->data_.u_submatch.clear_register_count = clear_register_count;
694 result->data_.u_submatch.clear_register_from = clear_register_from;
695 return result;
696}
697
698ActionNode* ActionNode::EmptyMatchCheck(int start_register,
699 int repetition_register,
700 int repetition_limit,
701 RegExpNode* on_success) {
702 ActionNode* result =
703 on_success->zone()->New<ActionNode>(EMPTY_MATCH_CHECK, on_success);
704 result->data_.u_empty_match_check.start_register = start_register;
705 result->data_.u_empty_match_check.repetition_register = repetition_register;
706 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
707 return result;
708}
709
710ActionNode* ActionNode::ModifyFlags(RegExpFlags flags, RegExpNode* on_success) {
711 ActionNode* result =
712 on_success->zone()->New<ActionNode>(MODIFY_FLAGS, on_success);
713 result->data_.u_modify_flags.flags = flags;
714 return result;
715}
716
717#define DEFINE_ACCEPT(Type) \
718 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
719FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)DEFINE_ACCEPT(End) DEFINE_ACCEPT(Action) DEFINE_ACCEPT(Choice
) DEFINE_ACCEPT(LoopChoice) DEFINE_ACCEPT(NegativeLookaroundChoice
) DEFINE_ACCEPT(BackReference) DEFINE_ACCEPT(Assertion) DEFINE_ACCEPT
(Text)
720#undef DEFINE_ACCEPT
721
722// -------------------------------------------------------------------
723// Emit code.
724
725void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
726 Guard* guard, Trace* trace) {
727 switch (guard->op()) {
728 case Guard::LT:
729 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 729); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 729; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
730 macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
731 trace->backtrack());
732 break;
733 case Guard::GEQ:
734 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 734); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 734; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
735 macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
736 trace->backtrack());
737 break;
738 }
739}
740
741namespace {
742
743#ifdef DEBUG1
744bool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
745 static_assert(sizeof(unibrow::uchar) == 4);
746 for (int i = 0; i < length; i++) {
747 if (chars[i] > String::kMaxUtf16CodeUnit) return false;
748 }
749 return true;
750}
751#endif // DEBUG
752
753// Returns the number of characters in the equivalence class, omitting those
754// that cannot occur in the source string because it is Latin1.
755int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
756 bool one_byte_subject, unibrow::uchar* letters,
757 int letter_length) {
758#ifdef V8_INTL_SUPPORT1
759 if (RegExpCaseFolding::IgnoreSet().contains(character)) {
760 letters[0] = character;
761 DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, 1))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, 1)))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 761); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, 1)"
")"); do { *((volatile int*)__null) = 761; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
762 return 1;
763 }
764 bool in_special_add_set =
765 RegExpCaseFolding::SpecialAddSet().contains(character);
766
767 icu::UnicodeSet set;
768 set.add(character);
769 set = set.closeOver(USET_CASE_INSENSITIVE);
770
771 UChar32 canon = 0;
772 if (in_special_add_set) {
773 canon = RegExpCaseFolding::Canonicalize(character);
774 }
775
776 int32_t range_count = set.getRangeCount();
777 int items = 0;
778 for (int32_t i = 0; i < range_count; i++) {
779 UChar32 start = set.getRangeStart(i);
780 UChar32 end = set.getRangeEnd(i);
781 CHECK(end - start + items <= letter_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(end - start + items <= letter_length)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(end - start + items <= letter_length))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("end - start + items <= letter_length"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 781); AnnotateMozCrashReason("MOZ_RELEASE_ASSERT" "(" "end - start + items <= letter_length"
")"); do { *((volatile int*)__null) = 781; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
782 for (UChar32 cu = start; cu <= end; cu++) {
783 if (one_byte_subject && cu > String::kMaxOneByteCharCode) break;
784 if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) {
785 continue;
786 }
787 letters[items++] = static_cast<unibrow::uchar>(cu);
788 }
789 }
790 DCHECK(ContainsOnlyUtf16CodeUnits(letters, items))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, items))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, items)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, items)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 790); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, items)"
")"); do { *((volatile int*)__null) = 790; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
791 return items;
792#else
793 int length =
794 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
795 // Unibrow returns 0 or 1 for characters where case independence is
796 // trivial.
797 if (length == 0) {
798 letters[0] = character;
799 length = 1;
800 }
801
802 if (one_byte_subject) {
803 int new_length = 0;
804 for (int i = 0; i < length; i++) {
805 if (letters[i] <= String::kMaxOneByteCharCode) {
806 letters[new_length++] = letters[i];
807 }
808 }
809 length = new_length;
810 }
811
812 DCHECK(ContainsOnlyUtf16CodeUnits(letters, length))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, length))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, length)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 812); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, length)"
")"); do { *((volatile int*)__null) = 812; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
813 return length;
814#endif // V8_INTL_SUPPORT
815}
816
817inline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
818 base::uc16 c, Label* on_failure, int cp_offset,
819 bool check, bool preloaded) {
820 RegExpMacroAssembler* assembler = compiler->macro_assembler();
821 bool bound_checked = false;
822 if (!preloaded) {
823 assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
824 bound_checked = true;
825 }
826 assembler->CheckNotCharacter(c, on_failure);
827 return bound_checked;
828}
829
830// Only emits non-letters (things that don't have case). Only used for case
831// independent matches.
832inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
833 base::uc16 c, Label* on_failure, int cp_offset,
834 bool check, bool preloaded) {
835 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
836 bool one_byte = compiler->one_byte();
837 unibrow::uchar chars[4];
838 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
839 if (length < 1) {
840 // This can't match. Must be an one-byte subject and a non-one-byte
841 // character. We do not need to do anything since the one-byte pass
842 // already handled this.
843 return false; // Bounds not checked.
844 }
845 bool checked = false;
846 // We handle the length > 1 case in a later pass.
847 if (length == 1) {
848 if (one_byte && c > String::kMaxOneByteCharCodeU) {
849 // Can't match - see above.
850 return false; // Bounds not checked.
851 }
852 if (!preloaded) {
853 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
854 checked = check;
855 }
856 macro_assembler->CheckNotCharacter(c, on_failure);
857 }
858 return checked;
859}
860
861bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
862 bool one_byte, base::uc16 c1, base::uc16 c2,
863 Label* on_failure) {
864 const uint32_t char_mask = CharMask(one_byte);
865 base::uc16 exor = c1 ^ c2;
866 // Check whether exor has only one bit set.
867 if (((exor - 1) & exor) == 0) {
868 // If c1 and c2 differ only by one bit.
869 // Ecma262UnCanonicalize always gives the highest number last.
870 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 870); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 870; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
871 base::uc16 mask = char_mask ^ exor;
872 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
873 return true;
874 }
875 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 875); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 875; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
876 base::uc16 diff = c2 - c1;
877 if (((diff - 1) & diff) == 0 && c1 >= diff) {
878 // If the characters differ by 2^n but don't differ by one bit then
879 // subtract the difference from the found character, then do the or
880 // trick. We avoid the theoretical case where negative numbers are
881 // involved in order to simplify code generation.
882 base::uc16 mask = char_mask ^ diff;
883 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
884 on_failure);
885 return true;
886 }
887 return false;
888}
889
890// Only emits letters (things that have case). Only used for case independent
891// matches.
892inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
893 base::uc16 c, Label* on_failure, int cp_offset,
894 bool check, bool preloaded) {
895 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
896 bool one_byte = compiler->one_byte();
897 unibrow::uchar chars[4];
898 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
899 if (length <= 1) return false;
900 // We may not need to check against the end of the input string
901 // if this character lies before a character that matched.
902 if (!preloaded) {
903 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
904 }
905 Label ok;
906 switch (length) {
907 case 2: {
908 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
909 chars[1], on_failure)) {
910 } else {
911 macro_assembler->CheckCharacter(chars[0], &ok);
912 macro_assembler->CheckNotCharacter(chars[1], on_failure);
913 macro_assembler->Bind(&ok);
914 }
915 break;
916 }
917 case 4:
918 macro_assembler->CheckCharacter(chars[3], &ok);
919 V8_FALLTHROUGH[[fallthrough]];
920 case 3:
921 macro_assembler->CheckCharacter(chars[0], &ok);
922 macro_assembler->CheckCharacter(chars[1], &ok);
923 macro_assembler->CheckNotCharacter(chars[2], on_failure);
924 macro_assembler->Bind(&ok);
925 break;
926 default:
927 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 927); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 927; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
928 }
929 return true;
930}
931
932void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
933 Label* fall_through, Label* above_or_equal,
934 Label* below) {
935 if (below != fall_through) {
936 masm->CheckCharacterLT(border, below);
937 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
938 } else {
939 masm->CheckCharacterGT(border - 1, above_or_equal);
940 }
941}
942
943void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last,
944 Label* fall_through, Label* in_range,
945 Label* out_of_range) {
946 if (in_range == fall_through) {
947 if (first == last) {
948 masm->CheckNotCharacter(first, out_of_range);
949 } else {
950 masm->CheckCharacterNotInRange(first, last, out_of_range);
951 }
952 } else {
953 if (first == last) {
954 masm->CheckCharacter(first, in_range);
955 } else {
956 masm->CheckCharacterInRange(first, last, in_range);
957 }
958 if (out_of_range != fall_through) masm->GoTo(out_of_range);
959 }
960}
961
962// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
963// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
964void EmitUseLookupTable(RegExpMacroAssembler* masm,
965 ZoneList<base::uc32>* ranges, uint32_t start_index,
966 uint32_t end_index, base::uc32 min_char,
967 Label* fall_through, Label* even_label,
968 Label* odd_label) {
969 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
970 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
971
972 base::uc32 base = (min_char & ~kMask);
973 USE(base)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{base}; (
void)unused_tmp_array_for_use_macro; } while (false)
;
974
975 // Assert that everything is on one kTableSize page.
976 for (uint32_t i = start_index; i <= end_index; i++) {
977 DCHECK_EQ(ranges->at(i) & ~kMask, base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(i) & ~kMask) == (base))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(i) & ~kMask) == (base)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(ranges->at(i) & ~kMask) == (base)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 977); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(i) & ~kMask) == (base)"
")"); do { *((volatile int*)__null) = 977; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
978 }
979 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(start_index == 0 || (ranges->at(start_index - 1) &
~kMask) <= base)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(start_index == 0 || (ranges->
at(start_index - 1) & ~kMask) <= base))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 979); AnnotateMozCrashReason("MOZ_ASSERT" "(" "start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
")"); do { *((volatile int*)__null) = 979; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
980
981 char templ[kSize];
982 Label* on_bit_set;
983 Label* on_bit_clear;
984 int bit;
985 if (even_label == fall_through) {
986 on_bit_set = odd_label;
987 on_bit_clear = even_label;
988 bit = 1;
989 } else {
990 on_bit_set = even_label;
991 on_bit_clear = odd_label;
992 bit = 0;
993 }
994 for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize;
995 i++) {
996 templ[i] = bit;
997 }
998 uint32_t j = 0;
999 bit ^= 1;
1000 for (uint32_t i = start_index; i < end_index; i++) {
1001 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1002 templ[j] = bit;
1003 }
1004 bit ^= 1;
1005 }
1006 for (uint32_t i = j; i < kSize; i++) {
1007 templ[i] = bit;
1008 }
1009 Factory* factory = masm->isolate()->factory();
1010 // TODO(erikcorry): Cache these.
1011 Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
1012 for (uint32_t i = 0; i < kSize; i++) {
1013 ba->set(i, templ[i]);
1014 }
1015 masm->CheckBitInTable(ba, on_bit_set);
1016 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1017}
1018
1019void CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1020 uint32_t start_index, uint32_t end_index, uint32_t cut_index,
1021 Label* even_label, Label* odd_label) {
1022 bool odd = (((cut_index - start_index) & 1) == 1);
1023 Label* in_range_label = odd ? odd_label : even_label;
1024 Label dummy;
1025 EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
1026 ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
1027 &dummy);
1028 DCHECK(!dummy.is_linked())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!dummy.is_linked())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!dummy.is_linked()))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!dummy.is_linked()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1028); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!dummy.is_linked()"
")"); do { *((volatile int*)__null) = 1028; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1029 // Cut out the single range by rewriting the array. This creates a new
1030 // range that is a merger of the two ranges on either side of the one we
1031 // are cutting out. The oddity of the labels is preserved.
1032 for (uint32_t j = cut_index; j > start_index; j--) {
1033 ranges->at(j) = ranges->at(j - 1);
1034 }
1035 for (uint32_t j = cut_index + 1; j < end_index; j++) {
1036 ranges->at(j) = ranges->at(j + 1);
1037 }
1038}
1039
1040// Unicode case. Split the search space into kSize spaces that are handled
1041// with recursion.
1042void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
1043 uint32_t end_index, uint32_t* new_start_index,
1044 uint32_t* new_end_index, base::uc32* border) {
1045 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
1046 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
1047
1048 base::uc32 first = ranges->at(start_index);
1049 base::uc32 last = ranges->at(end_index) - 1;
1050
1051 *new_start_index = start_index;
1052 *border = (ranges->at(start_index) & ~kMask) + kSize;
1053 while (*new_start_index < end_index) {
1054 if (ranges->at(*new_start_index) > *border) break;
1055 (*new_start_index)++;
1056 }
1057 // new_start_index is the index of the first edge that is beyond the
1058 // current kSize space.
1059
1060 // For very large search spaces we do a binary chop search of the non-Latin1
1061 // space instead of just going to the end of the current kSize space. The
1062 // heuristics are complicated a little by the fact that any 128-character
1063 // encoding space can be quickly tested with a table lookup, so we don't
1064 // wish to do binary chop search at a smaller granularity than that. A
1065 // 128-character space can take up a lot of space in the ranges array if,
1066 // for example, we only want to match every second character (eg. the lower
1067 // case characters on some Unicode pages).
1068 uint32_t binary_chop_index = (end_index + start_index) / 2;
1069 // The first test ensures that we get to the code that handles the Latin1
1070 // range with a single not-taken branch, speeding up this important
1071 // character range (even non-Latin1 charset-based text has spaces and
1072 // punctuation).
1073 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
1074 end_index - start_index > (*new_start_index - start_index) * 2 &&
1075 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1076 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1077 uint32_t scan_forward_for_section_border = binary_chop_index;
1078 uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1079
1080 while (scan_forward_for_section_border < end_index) {
1081 if (ranges->at(scan_forward_for_section_border) > new_border) {
1082 *new_start_index = scan_forward_for_section_border;
1083 *border = new_border;
1084 break;
1085 }
1086 scan_forward_for_section_border++;
1087 }
1088 }
1089
1090 DCHECK(*new_start_index > start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(*new_start_index > start_index)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(*new_start_index > start_index
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"*new_start_index > start_index", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1090); AnnotateMozCrashReason("MOZ_ASSERT" "(" "*new_start_index > start_index"
")"); do { *((volatile int*)__null) = 1090; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1091 *new_end_index = *new_start_index - 1;
1092 if (ranges->at(*new_end_index) == *border) {
1093 (*new_end_index)--;
1094 }
1095 if (*border >= ranges->at(end_index)) {
1096 *border = ranges->at(end_index);
1097 *new_start_index = end_index; // Won't be used.
1098 *new_end_index = end_index - 1;
1099 }
1100}
1101
1102// Gets a series of segment boundaries representing a character class. If the
1103// character is in the range between an even and an odd boundary (counting from
1104// start_index) then go to even_label, otherwise go to odd_label. We already
1105// know that the character is in the range of min_char to max_char inclusive.
1106// Either label can be nullptr indicating backtracking. Either label can also
1107// be equal to the fall_through label.
1108void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1109 uint32_t start_index, uint32_t end_index,
1110 base::uc32 min_char, base::uc32 max_char,
1111 Label* fall_through, Label* even_label,
1112 Label* odd_label) {
1113 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((min_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(min_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1113); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1113; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1114 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((max_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((max_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(max_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1114); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(max_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1114; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1115
1116 base::uc32 first = ranges->at(start_index);
1117 base::uc32 last = ranges->at(end_index) - 1;
1118
1119 DCHECK_LT(min_char, first)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (first))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (first)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(min_char) < (first)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1119); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (first)"
")"); do { *((volatile int*)__null) = 1119; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1120
1121 // Just need to test if the character is before or on-or-after
1122 // a particular character.
1123 if (start_index == end_index) {
1124 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1125 return;
1126 }
1127
1128 // Another almost trivial case: There is one interval in the middle that is
1129 // different from the end intervals.
1130 if (start_index + 1 == end_index) {
1131 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
1132 odd_label);
1133 return;
1134 }
1135
1136 // It's not worth using table lookup if there are very few intervals in the
1137 // character class.
1138 if (end_index - start_index <= 6) {
1139 // It is faster to test for individual characters, so we look for those
1140 // first, then try arbitrary ranges in the second round.
1141 static uint32_t kNoCutIndex = -1;
1142 uint32_t cut = kNoCutIndex;
1143 for (uint32_t i = start_index; i < end_index; i++) {
1144 if (ranges->at(i) == ranges->at(i + 1) - 1) {
1145 cut = i;
1146 break;
1147 }
1148 }
1149 if (cut == kNoCutIndex) cut = start_index;
1150 CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
1151 odd_label);
1152 DCHECK_GE(end_index - start_index, 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((end_index - start_index) >= (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((end_index - start_index) >=
(2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(end_index - start_index) >= (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1152); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(end_index - start_index) >= (2)"
")"); do { *((volatile int*)__null) = 1152; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1153 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
1154 max_char, fall_through, even_label, odd_label);
1155 return;
1156 }
1157
1158 // If there are a lot of intervals in the regexp, then we will use tables to
1159 // determine whether the character is inside or outside the character class.
1160 static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1161
1162 if ((max_char >> kBits) == (min_char >> kBits)) {
1163 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1164 fall_through, even_label, odd_label);
1165 return;
1166 }
1167
1168 if ((min_char >> kBits) != first >> kBits) {
1169 masm->CheckCharacterLT(first, odd_label);
1170 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
1171 fall_through, odd_label, even_label);
1172 return;
1173 }
1174
1175 uint32_t new_start_index = 0;
1176 uint32_t new_end_index = 0;
1177 base::uc32 border = 0;
1178
1179 SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
1180 &new_end_index, &border);
1181
1182 Label handle_rest;
1183 Label* above = &handle_rest;
1184 if (border == last + 1) {
1185 // We didn't find any section that started after the limit, so everything
1186 // above the border is one of the terminal labels.
1187 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1188 DCHECK(new_end_index == end_index - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index == end_index - 1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(new_end_index == end_index -
1))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index == end_index - 1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1188); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index == end_index - 1"
")"); do { *((volatile int*)__null) = 1188; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1189 }
1190
1191 DCHECK_LE(start_index, new_end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) <= (new_end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) <= (new_end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) <= (new_end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1191); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) <= (new_end_index)"
")"); do { *((volatile int*)__null) = 1191; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1192 DCHECK_LE(new_start_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_start_index) <= (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_start_index) <= (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_start_index) <= (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1192); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_start_index) <= (end_index)"
")"); do { *((volatile int*)__null) = 1192; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1193 DCHECK_LT(start_index, new_start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) < (new_start_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) < (new_start_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) < (new_start_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1193); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) < (new_start_index)"
")"); do { *((volatile int*)__null) = 1193; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1194 DCHECK_LT(new_end_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_end_index) < (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_end_index) < (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_end_index) < (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1194); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_end_index) < (end_index)"
")"); do { *((volatile int*)__null) = 1194; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1195 DCHECK(new_end_index + 1 == new_start_index ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1197); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1197; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1196 (new_end_index + 2 == new_start_index &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1197); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1197; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1197 border == ranges->at(new_end_index + 1)))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1197); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1197; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1198 DCHECK_LT(min_char, border - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (border - 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (border - 1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(min_char) < (border - 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1198); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (border - 1)"
")"); do { *((volatile int*)__null) = 1198; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1199 DCHECK_LT(border, max_char)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((border) < (max_char))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((border) < (max_char)))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(border) < (max_char)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1199); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(border) < (max_char)"
")"); do { *((volatile int*)__null) = 1199; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1200 DCHECK_LT(ranges->at(new_end_index), border)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(new_end_index)) < (border))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(new_end_index)) < (border)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(ranges->at(new_end_index)) < (border)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1200); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(new_end_index)) < (border)"
")"); do { *((volatile int*)__null) = 1200; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1201 DCHECK(border < ranges->at(new_start_index) ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1202 (border == ranges->at(new_start_index) &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1203 new_start_index == end_index && new_end_index == end_index - 1 &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1204 border == last + 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1205 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_start_index == 0 || border >= ranges->at(new_start_index
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_start_index == 0 || border >= ranges->at(new_start_index
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
")"); do { *((volatile int*)__null) = 1205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1206
1207 masm->CheckCharacterGT(border - 1, above);
1208 Label dummy;
1209 GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
1210 border - 1, &dummy, even_label, odd_label);
1211 if (handle_rest.is_linked()) {
1212 masm->Bind(&handle_rest);
1213 bool flip = (new_start_index & 1) != (start_index & 1);
1214 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1215 &dummy, flip ? odd_label : even_label,
1216 flip ? even_label : odd_label);
1217 }
1218}
1219
1220void EmitClassRanges(RegExpMacroAssembler* macro_assembler,
1221 RegExpClassRanges* cr, bool one_byte, Label* on_failure,
1222 int cp_offset, bool check_offset, bool preloaded,
1223 Zone* zone) {
1224 ZoneList<CharacterRange>* ranges = cr->ranges(zone);
1225 CharacterRange::Canonicalize(ranges);
1226
1227 // Now that all processing (like case-insensitivity) is done, clamp the
1228 // ranges to the set of ranges that may actually occur in the subject string.
1229 if (one_byte) CharacterRange::ClampToOneByte(ranges);
1230
1231 const int ranges_length = ranges->length();
1232 if (ranges_length == 0) {
1233 if (!cr->is_negated()) {
1234 macro_assembler->GoTo(on_failure);
1235 }
1236 if (check_offset) {
1237 macro_assembler->CheckPosition(cp_offset, on_failure);
1238 }
1239 return;
1240 }
1241
1242 const base::uc32 max_char = MaxCodeUnit(one_byte);
1243 if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
1244 if (cr->is_negated()) {
1245 macro_assembler->GoTo(on_failure);
1246 } else {
1247 // This is a common case hit by non-anchored expressions.
1248 if (check_offset) {
1249 macro_assembler->CheckPosition(cp_offset, on_failure);
1250 }
1251 }
1252 return;
1253 }
1254
1255 if (!preloaded) {
1256 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1257 }
1258
1259 if (cr->is_standard(zone) && macro_assembler->CheckSpecialClassRanges(
1260 cr->standard_type(), on_failure)) {
1261 return;
1262 }
1263
1264 static constexpr int kMaxRangesForInlineBranchGeneration = 16;
1265 if (ranges_length > kMaxRangesForInlineBranchGeneration) {
1266 // For large range sets, emit a more compact instruction sequence to avoid
1267 // a potentially problematic increase in code size.
1268 // Note the flipped logic below (we check InRange if negated, NotInRange if
1269 // not negated); this is necessary since the method falls through on
1270 // failure whereas we want to fall through on success.
1271 if (cr->is_negated()) {
1272 if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
1273 return;
1274 }
1275 } else {
1276 if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
1277 return;
1278 }
1279 }
1280 }
1281
1282 // Generate a flat list of range boundaries for consumption by
1283 // GenerateBranches. See the comment on that function for how the list should
1284 // be structured
1285 ZoneList<base::uc32>* range_boundaries =
1286 zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
1287
1288 bool zeroth_entry_is_failure = !cr->is_negated();
1289
1290 for (int i = 0; i < ranges_length; i++) {
1291 CharacterRange& range = ranges->at(i);
1292 if (range.from() == 0) {
1293 DCHECK_EQ(i, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((i) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((i) == (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(i) == (0)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1293); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(i) == (0)"
")"); do { *((volatile int*)__null) = 1293; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1294 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1295 } else {
1296 range_boundaries->Add(range.from(), zone);
1297 }
1298 // `+ 1` to convert from inclusive to exclusive `to`.
1299 // [from, to] == [from, to+1[.
1300 range_boundaries->Add(range.to() + 1, zone);
1301 }
1302 int end_index = range_boundaries->length() - 1;
1303 if (range_boundaries->at(end_index) > max_char) {
1304 end_index--;
1305 }
1306
1307 Label fall_through;
1308 GenerateBranches(macro_assembler, range_boundaries,
1309 0, // start_index.
1310 end_index,
1311 0, // min_char.
1312 max_char, &fall_through,
1313 zeroth_entry_is_failure ? &fall_through : on_failure,
1314 zeroth_entry_is_failure ? on_failure : &fall_through);
1315 macro_assembler->Bind(&fall_through);
1316}
1317
1318} // namespace
1319
1320RegExpNode::~RegExpNode() = default;
1321
1322RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1323 Trace* trace) {
1324 // If we are generating a greedy loop then don't stop and don't reuse code.
1325 if (trace->stop_node() != nullptr) {
1326 return CONTINUE;
1327 }
1328
1329 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1330 if (trace->is_trivial()) {
1331 if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
1332 // If a generic version is already scheduled to be generated or we have
1333 // recursed too deeply then just generate a jump to that code.
1334 macro_assembler->GoTo(&label_);
1335 // This will queue it up for generation of a generic version if it hasn't
1336 // already been queued.
1337 compiler->AddWork(this);
1338 return DONE;
1339 }
1340 // Generate generic version of the node and bind the label for later use.
1341 macro_assembler->Bind(&label_);
1342 return CONTINUE;
1343 }
1344
1345 // We are being asked to make a non-generic version. Keep track of how many
1346 // non-generic versions we generate so as not to overdo it.
1347 trace_count_++;
1348 if (KeepRecursing(compiler) && compiler->optimize() &&
1349 trace_count_ < kMaxCopiesCodeGenerated) {
1350 return CONTINUE;
1351 }
1352
1353 // If we get here code has been generated for this node too many times or
1354 // recursion is too deep. Time to switch to a generic version. The code for
1355 // generic versions above can handle deep recursion properly.
1356 bool was_limiting = compiler->limiting_recursion();
1357 compiler->set_limiting_recursion(true);
1358 trace->Flush(compiler, this);
1359 compiler->set_limiting_recursion(was_limiting);
1360 return DONE;
1361}
1362
1363bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
1364 return !compiler->limiting_recursion() &&
1365 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
1366}
1367
1368void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1369 BoyerMooreLookahead* bm, bool not_at_start) {
1370 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
1371 // Anything may follow a positive submatch success, thus we need to accept
1372 // all characters from this position onwards.
1373 bm->SetRest(offset);
1374 } else {
1375 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1376 }
1377 SaveBMInfo(bm, not_at_start, offset);
1378}
1379
1380void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
1381 RegExpCompiler* compiler, int filled_in,
1382 bool not_at_start) {
1383 if (action_type_ == SET_REGISTER_FOR_LOOP) {
1384 on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
1385 filled_in, not_at_start);
1386 } else {
1387 if (action_type() == MODIFY_FLAGS) {
1388 compiler->set_flags(flags());
1389 }
1390 on_success()->GetQuickCheckDetails(details, compiler, filled_in,
1391 not_at_start);
1392 }
1393}
1394
1395void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1396 BoyerMooreLookahead* bm, bool not_at_start) {
1397 // Match the behaviour of EatsAtLeast on this node.
1398 if (assertion_type() == AT_START && not_at_start) return;
1399 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1400 SaveBMInfo(bm, not_at_start, offset);
1401}
1402
1403void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
1404 QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
1405 bool not_at_start) {
1406 RegExpNode* node = continue_node();
1407 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1408}
1409
1410namespace {
1411
1412// Takes the left-most 1-bit and smears it out, setting all bits to its right.
1413inline uint32_t SmearBitsRight(uint32_t v) {
1414 v |= v >> 1;
1415 v |= v >> 2;
1416 v |= v >> 4;
1417 v |= v >> 8;
1418 v |= v >> 16;
1419 return v;
1420}
1421
1422} // namespace
1423
1424bool QuickCheckDetails::Rationalize(bool asc) {
1425 bool found_useful_op = false;
1426 const uint32_t char_mask = CharMask(asc);
1427 mask_ = 0;
1428 value_ = 0;
1429 int char_shift = 0;
1430 for (int i = 0; i < characters_; i++) {
36
Assuming 'i' is < field 'characters_'
37
Loop condition is true. Entering loop body
42
Assuming 'i' is < field 'characters_'
43
Loop condition is true. Entering loop body
48
Assuming 'i' is < field 'characters_'
49
Loop condition is true. Entering loop body
1431 Position* pos = &positions_[i];
1432 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
38
Assuming the condition is false
39
Taking false branch
44
Assuming the condition is false
45
Taking false branch
50
Assuming the condition is false
51
Taking false branch
1433 found_useful_op = true;
1434 }
1435 mask_ |= (pos->mask & char_mask) << char_shift;
52
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'
1436 value_ |= (pos->value & char_mask) << char_shift;
1437 char_shift += asc
45.1
'asc' is false
? 8 : 16
;
40
Assuming 'asc' is false
41
'?' condition is false
46
'?' condition is false
47
The value 32 is assigned to 'char_shift'
1438 }
1439 return found_useful_op;
1440}
1441
1442int RegExpNode::EatsAtLeast(bool not_at_start) {
1443 return not_at_start ? eats_at_least_.eats_at_least_from_not_start
1444 : eats_at_least_.eats_at_least_from_possibly_start;
1445}
1446
1447EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
1448 // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
1449 // implies that the following node must be a LoopChoiceNode. If we need to
1450 // set registers to constant values for other reasons, we could introduce a
1451 // new action type SET_REGISTER that doesn't imply anything about its
1452 // successor.
1453 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1453); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1453; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1454}
1455
1456void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
1457 RegExpCompiler* compiler,
1458 int characters_filled_in,
1459 bool not_at_start) {
1460 // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
1461 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1461); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1461; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1462}
1463
1464EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
1465 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1465); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 1465; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
1466
1467 if (read_backward()) {
1468 // The eats_at_least value is not used if reading backward. The
1469 // EatsAtLeastPropagator should've zeroed it as well.
1470 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1470); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
")"); do { *((volatile int*)__null) = 1470; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1471 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_not_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_not_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1471); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
")"); do { *((volatile int*)__null) = 1471; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1472 return {};
1473 }
1474
1475 // Figure out how much the loop body itself eats, not including anything in
1476 // the continuation case. In general, the nodes in the loop body should report
1477 // that they eat at least the number eaten by the continuation node, since any
1478 // successful match in the loop body must also include the continuation node.
1479 // However, in some cases involving positive lookaround, the loop body under-
1480 // reports its appetite, so use saturated math here to avoid negative numbers.
1481 uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
1482 loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
1483 uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
1484 loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
1485
1486 // Limit the number of loop iterations to avoid overflow in subsequent steps.
1487 int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
1488
1489 EatsAtLeastInfo result;
1490 result.eats_at_least_from_not_start =
1491 base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
1492 continue_node_->EatsAtLeast(true));
1493 if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
1494 // First loop iteration eats at least one, so all subsequent iterations
1495 // and the after-loop chunk are guaranteed to not be at the start.
1496 result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
1497 loop_body_from_possibly_start +
1498 (loop_iterations - 1) * loop_body_from_not_start +
1499 continue_node_->EatsAtLeast(true));
1500 } else {
1501 // Loop body might eat nothing, so only continue node contributes.
1502 result.eats_at_least_from_possibly_start =
1503 continue_node_->EatsAtLeast(false);
1504 }
1505 return result;
1506}
1507
1508bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1509 Trace* bounds_check_trace, Trace* trace,
1510 bool preload_has_checked_bounds,
1511 Label* on_possible_success,
1512 QuickCheckDetails* details,
1513 bool fall_through_on_failure,
1514 ChoiceNode* predecessor) {
1515 DCHECK_NOT_NULL(predecessor)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((predecessor) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((predecessor) != nullptr))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(predecessor) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1515); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(predecessor) != nullptr"
")"); do { *((volatile int*)__null) = 1515; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
29
Taking false branch
30
Loop condition is false. Exiting loop
1516 if (details->characters() == 0) return false;
31
Assuming the condition is false
32
Taking false branch
1517 GetQuickCheckDetails(details, compiler, 0,
1518 trace->at_start() == Trace::FALSE_VALUE);
1519 if (details->cannot_match()) return false;
33
Assuming the condition is false
34
Taking false branch
1520 if (!details->Rationalize(compiler->one_byte())) return false;
35
Calling 'QuickCheckDetails::Rationalize'
1521 DCHECK(details->characters() == 1 ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1522); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1522; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1522 compiler->macro_assembler()->CanReadUnaligned())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1522); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1522; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1523 uint32_t mask = details->mask();
1524 uint32_t value = details->value();
1525
1526 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1527
1528 if (trace->characters_preloaded() != details->characters()) {
1529 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == bounds_check_trace->cp_offset
())>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(trace->cp_offset() == bounds_check_trace->cp_offset
()))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("trace->cp_offset() == bounds_check_trace->cp_offset()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1529); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == bounds_check_trace->cp_offset()"
")"); do { *((volatile int*)__null) = 1529; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1530 // The bounds check is performed using the minimum number of characters
1531 // any choice would eat, so if the bounds check fails, then none of the
1532 // choices can succeed, so we can just immediately backtrack, rather
1533 // than go to the next choice. The number of characters preloaded may be
1534 // less than the number used for the bounds check.
1535 int eats_at_least = predecessor->EatsAtLeast(
1536 bounds_check_trace->at_start() == Trace::FALSE_VALUE);
1537 DCHECK_GE(eats_at_least, details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least) >= (details->characters()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((eats_at_least) >= (details->characters())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(eats_at_least) >= (details->characters())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1537); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least) >= (details->characters())"
")"); do { *((volatile int*)__null) = 1537; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1538 assembler->LoadCurrentCharacter(
1539 trace->cp_offset(), bounds_check_trace->backtrack(),
1540 !preload_has_checked_bounds, details->characters(), eats_at_least);
1541 }
1542
1543 bool need_mask = true;
1544
1545 if (details->characters() == 1) {
1546 // If number of characters preloaded is 1 then we used a byte or 16 bit
1547 // load so the value is already masked down.
1548 const uint32_t char_mask = CharMask(compiler->one_byte());
1549 if ((mask & char_mask) == char_mask) need_mask = false;
1550 mask &= char_mask;
1551 } else {
1552 // For 2-character preloads in one-byte mode or 1-character preloads in
1553 // two-byte mode we also use a 16 bit load with zero extend.
1554 static const uint32_t kTwoByteMask = 0xFFFF;
1555 static const uint32_t kFourByteMask = 0xFFFFFFFF;
1556 if (details->characters() == 2 && compiler->one_byte()) {
1557 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1558 } else if (details->characters() == 1 && !compiler->one_byte()) {
1559 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1560 } else {
1561 if (mask == kFourByteMask) need_mask = false;
1562 }
1563 }
1564
1565 if (fall_through_on_failure) {
1566 if (need_mask) {
1567 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
1568 } else {
1569 assembler->CheckCharacter(value, on_possible_success);
1570 }
1571 } else {
1572 if (need_mask) {
1573 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
1574 } else {
1575 assembler->CheckNotCharacter(value, trace->backtrack());
1576 }
1577 }
1578 return true;
1579}
1580
1581// Here is the meat of GetQuickCheckDetails (see also the comment on the
1582// super-class in the .h file).
1583//
1584// We iterate along the text object, building up for each character a
1585// mask and value that can be used to test for a quick failure to match.
1586// The masks and values for the positions will be combined into a single
1587// machine word for the current character width in order to be used in
1588// generating a quick check.
1589void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1590 RegExpCompiler* compiler,
1591 int characters_filled_in,
1592 bool not_at_start) {
1593 // Do not collect any quick check details if the text node reads backward,
1594 // since it reads in the opposite direction than we use for quick checks.
1595 if (read_backward()) return;
1596 Isolate* isolate = compiler->macro_assembler()->isolate();
1597 DCHECK(characters_filled_in < details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in < details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in < details->characters()))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in < details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1597); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in < details->characters()"
")"); do { *((volatile int*)__null) = 1597; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1598 int characters = details->characters();
1599 const uint32_t char_mask = CharMask(compiler->one_byte());
1600 for (int k = 0; k < elements()->length(); k++) {
1601 TextElement elm = elements()->at(k);
1602 if (elm.text_type() == TextElement::ATOM) {
1603 base::Vector<const base::uc16> quarks = elm.atom()->data();
1604 for (int i = 0; i < characters && i < quarks.length(); i++) {
1605 QuickCheckDetails::Position* pos =
1606 details->positions(characters_filled_in);
1607 base::uc16 c = quarks[i];
1608 if (IsIgnoreCase(compiler->flags())) {
1609 unibrow::uchar chars[4];
1610 int length = GetCaseIndependentLetters(
1611 isolate, c, compiler->one_byte(), chars, 4);
1612 if (length == 0) {
1613 // This can happen because all case variants are non-Latin1, but we
1614 // know the input is Latin1.
1615 details->set_cannot_match();
1616 pos->determines_perfectly = false;
1617 return;
1618 }
1619 if (length == 1) {
1620 // This letter has no case equivalents, so it's nice and simple
1621 // and the mask-compare will determine definitely whether we have
1622 // a match at this character position.
1623 pos->mask = char_mask;
1624 pos->value = chars[0];
1625 pos->determines_perfectly = true;
1626 } else {
1627 uint32_t common_bits = char_mask;
1628 uint32_t bits = chars[0];
1629 for (int j = 1; j < length; j++) {
1630 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1631 common_bits ^= differing_bits;
1632 bits &= common_bits;
1633 }
1634 // If length is 2 and common bits has only one zero in it then
1635 // our mask and compare instruction will determine definitely
1636 // whether we have a match at this character position. Otherwise
1637 // it can only be an approximate check.
1638 uint32_t one_zero = (common_bits | ~char_mask);
1639 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
1640 pos->determines_perfectly = true;
1641 }
1642 pos->mask = common_bits;
1643 pos->value = bits;
1644 }
1645 } else {
1646 // Don't ignore case. Nice simple case where the mask-compare will
1647 // determine definitely whether we have a match at this character
1648 // position.
1649 if (c > char_mask) {
1650 details->set_cannot_match();
1651 pos->determines_perfectly = false;
1652 return;
1653 }
1654 pos->mask = char_mask;
1655 pos->value = c;
1656 pos->determines_perfectly = true;
1657 }
1658 characters_filled_in++;
1659 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1659); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1659; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1660 if (characters_filled_in == details->characters()) {
1661 return;
1662 }
1663 }
1664 } else {
1665 QuickCheckDetails::Position* pos =
1666 details->positions(characters_filled_in);
1667 RegExpClassRanges* tree = elm.class_ranges();
1668 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
1669 if (tree->is_negated() || ranges->is_empty()) {
1670 // A quick check uses multi-character mask and compare. There is no
1671 // useful way to incorporate a negative char class into this scheme
1672 // so we just conservatively create a mask and value that will always
1673 // succeed.
1674 // Likewise for empty ranges (empty ranges can occur e.g. when
1675 // compiling for one-byte subjects and impossible (non-one-byte) ranges
1676 // have been removed).
1677 pos->mask = 0;
1678 pos->value = 0;
1679 } else {
1680 int first_range = 0;
1681 while (ranges->at(first_range).from() > char_mask) {
1682 first_range++;
1683 if (first_range == ranges->length()) {
1684 details->set_cannot_match();
1685 pos->determines_perfectly = false;
1686 return;
1687 }
1688 }
1689 CharacterRange range = ranges->at(first_range);
1690 const base::uc32 first_from = range.from();
1691 const base::uc32 first_to =
1692 (range.to() > char_mask) ? char_mask : range.to();
1693 const uint32_t differing_bits = (first_from ^ first_to);
1694 // A mask and compare is only perfect if the differing bits form a
1695 // number like 00011111 with one single block of trailing 1s.
1696 if ((differing_bits & (differing_bits + 1)) == 0 &&
1697 first_from + differing_bits == first_to) {
1698 pos->determines_perfectly = true;
1699 }
1700 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1701 uint32_t bits = (first_from & common_bits);
1702 for (int i = first_range + 1; i < ranges->length(); i++) {
1703 range = ranges->at(i);
1704 const base::uc32 from = range.from();
1705 if (from > char_mask) continue;
1706 const base::uc32 to =
1707 (range.to() > char_mask) ? char_mask : range.to();
1708 // Here we are combining more ranges into the mask and compare
1709 // value. With each new range the mask becomes more sparse and
1710 // so the chances of a false positive rise. A character class
1711 // with multiple ranges is assumed never to be equivalent to a
1712 // mask and compare operation.
1713 pos->determines_perfectly = false;
1714 uint32_t new_common_bits = (from ^ to);
1715 new_common_bits = ~SmearBitsRight(new_common_bits);
1716 common_bits &= new_common_bits;
1717 bits &= new_common_bits;
1718 uint32_t new_differing_bits = (from & common_bits) ^ bits;
1719 common_bits ^= new_differing_bits;
1720 bits &= common_bits;
1721 }
1722 pos->mask = common_bits;
1723 pos->value = bits;
1724 }
1725 characters_filled_in++;
1726 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1726); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1726; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1727 if (characters_filled_in == details->characters()) return;
1728 }
1729 }
1730 DCHECK(characters_filled_in != details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in != details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in != details->characters()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in != details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1730); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in != details->characters()"
")"); do { *((volatile int*)__null) = 1730; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1731 if (!details->cannot_match()) {
1732 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1733 true);
1734 }
1735}
1736
1737void QuickCheckDetails::Clear() {
1738 for (int i = 0; i < characters_; i++) {
1739 positions_[i].mask = 0;
1740 positions_[i].value = 0;
1741 positions_[i].determines_perfectly = false;
1742 }
1743 characters_ = 0;
1744}
1745
1746void QuickCheckDetails::Advance(int by, bool one_byte) {
1747 if (by >= characters_ || by < 0) {
1748 DCHECK_IMPLIES(by < 0, characters_ == 0)do { if (by < 0) { do { static_assert( mozilla::detail::AssertionConditionType
<decltype(characters_ == 0)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == 0))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("characters_ == 0"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1748); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == 0"
")"); do { *((volatile int*)__null) = 1748; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
1749 Clear();
1750 return;
1751 }
1752 DCHECK_LE(characters_ - by, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_ - by) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_ - by) <= (4)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(characters_ - by) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1752); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_ - by) <= (4)"
")"); do { *((volatile int*)__null) = 1752; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1753 DCHECK_LE(characters_, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_) <= (4)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(characters_) <= (4)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1753); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_) <= (4)"
")"); do { *((volatile int*)__null) = 1753; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1754 for (int i = 0; i < characters_ - by; i++) {
1755 positions_[i] = positions_[by + i];
1756 }
1757 for (int i = characters_ - by; i < characters_; i++) {
1758 positions_[i].mask = 0;
1759 positions_[i].value = 0;
1760 positions_[i].determines_perfectly = false;
1761 }
1762 characters_ -= by;
1763 // We could change mask_ and value_ here but we would never advance unless
1764 // they had already been used in a check and they won't be used again because
1765 // it would gain us nothing. So there's no point.
1766}
1767
1768void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
1769 DCHECK(characters_ == other->characters_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_ == other->characters_)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == other->characters_
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"characters_ == other->characters_", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1769); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == other->characters_"
")"); do { *((volatile int*)__null) = 1769; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1770 if (other->cannot_match_) {
1771 return;
1772 }
1773 if (cannot_match_) {
1774 *this = *other;
1775 return;
1776 }
1777 for (int i = from_index; i < characters_; i++) {
1778 QuickCheckDetails::Position* pos = positions(i);
1779 QuickCheckDetails::Position* other_pos = other->positions(i);
1780 if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
1781 !other_pos->determines_perfectly) {
1782 // Our mask-compare operation will be approximate unless we have the
1783 // exact same operation on both sides of the alternation.
1784 pos->determines_perfectly = false;
1785 }
1786 pos->mask &= other_pos->mask;
1787 pos->value &= pos->mask;
1788 other_pos->value &= pos->mask;
1789 uint32_t differing_bits = (pos->value ^ other_pos->value);
1790 pos->mask &= ~differing_bits;
1791 pos->value &= pos->mask;
1792 }
1793}
1794
1795class VisitMarker {
1796 public:
1797 explicit VisitMarker(NodeInfo* info) : info_(info) {
1798 DCHECK(!info->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info->visited))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!info->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1798); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info->visited"
")"); do { *((volatile int*)__null) = 1798; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1799 info->visited = true;
1800 }
1801 ~VisitMarker() { info_->visited = false; }
1802
1803 private:
1804 NodeInfo* info_;
1805};
1806
1807// Temporarily sets traversed_loop_initialization_node_.
1808class LoopInitializationMarker {
1809 public:
1810 explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
1811 DCHECK(!node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!node_->traversed_loop_initialization_node_))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1811); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1811; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1812 node_->traversed_loop_initialization_node_ = true;
1813 }
1814 ~LoopInitializationMarker() {
1815 DCHECK(node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(node_->traversed_loop_initialization_node_))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1815); AnnotateMozCrashReason("MOZ_ASSERT" "(" "node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1815; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1816 node_->traversed_loop_initialization_node_ = false;
1817 }
1818 LoopInitializationMarker(const LoopInitializationMarker&) = delete;
1819 LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete;
1820
1821 private:
1822 LoopChoiceNode* node_;
1823};
1824
1825// Temporarily decrements min_loop_iterations_.
1826class IterationDecrementer {
1827 public:
1828 explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
1829 DCHECK_GT(node_->min_loop_iterations_, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node_->min_loop_iterations_) > (0))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((node_->min_loop_iterations_) > (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(node_->min_loop_iterations_) > (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1829); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node_->min_loop_iterations_) > (0)"
")"); do { *((volatile int*)__null) = 1829; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1830 --node_->min_loop_iterations_;
1831 }
1832 ~IterationDecrementer() { ++node_->min_loop_iterations_; }
1833 IterationDecrementer(const IterationDecrementer&) = delete;
1834 IterationDecrementer& operator=(const IterationDecrementer&) = delete;
1835
1836 private:
1837 LoopChoiceNode* node_;
1838};
1839
1840RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpFlags flags) {
1841 if (info()->replacement_calculated) return replacement();
1842 if (depth < 0) return this;
1843 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1843); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1843; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1844 VisitMarker marker(info());
1845 return FilterSuccessor(depth - 1, flags);
1846}
1847
1848RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, RegExpFlags flags) {
1849 RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags);
1850 if (next == nullptr) return set_replacement(nullptr);
1851 on_success_ = next;
1852 return set_replacement(this);
1853}
1854
1855// We need to check for the following characters: 0x39C 0x3BC 0x178.
1856bool RangeContainsLatin1Equivalents(CharacterRange range) {
1857 // TODO(dcarney): this could be a lot more efficient.
1858 return range.Contains(0x039C) || range.Contains(0x03BC) ||
1859 range.Contains(0x0178);
1860}
1861
1862namespace {
1863
1864bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
1865 for (int i = 0; i < ranges->length(); i++) {
1866 // TODO(dcarney): this could be a lot more efficient.
1867 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
1868 }
1869 return false;
1870}
1871
1872} // namespace
1873
1874RegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) {
1875 if (info()->replacement_calculated) return replacement();
1876 if (depth < 0) return this;
1877 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1877); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1877; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1878 VisitMarker marker(info());
1879 int element_count = elements()->length();
1880 for (int i = 0; i < element_count; i++) {
1881 TextElement elm = elements()->at(i);
1882 if (elm.text_type() == TextElement::ATOM) {
1883 base::Vector<const base::uc16> quarks = elm.atom()->data();
1884 for (int j = 0; j < quarks.length(); j++) {
1885 base::uc16 c = quarks[j];
1886 if (IsIgnoreCase(flags)) {
1887 c = unibrow::Latin1::TryConvertToLatin1(c);
1888 }
1889 if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
1890 // Replace quark in case we converted to Latin-1.
1891 base::uc16* writable_quarks = const_cast<base::uc16*>(quarks.begin());
1892 writable_quarks[j] = c;
1893 }
1894 } else {
1895 DCHECK(elm.text_type() == TextElement::CLASS_RANGES)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(elm.text_type() == TextElement::CLASS_RANGES)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(elm.text_type() == TextElement::CLASS_RANGES))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("elm.text_type() == TextElement::CLASS_RANGES"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1895); AnnotateMozCrashReason("MOZ_ASSERT" "(" "elm.text_type() == TextElement::CLASS_RANGES"
")"); do { *((volatile int*)__null) = 1895; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1896 RegExpClassRanges* cr = elm.class_ranges();
1897 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
1898 CharacterRange::Canonicalize(ranges);
1899 // Now they are in order so we only need to look at the first.
1900 int range_count = ranges->length();
1901 if (cr->is_negated()) {
1902 if (range_count != 0 && ranges->at(0).from() == 0 &&
1903 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
1904 // This will be handled in a later filter.
1905 if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
1906 continue;
1907 }
1908 return set_replacement(nullptr);
1909 }
1910 } else {
1911 if (range_count == 0 ||
1912 ranges->at(0).from() > String::kMaxOneByteCharCode) {
1913 // This will be handled in a later filter.
1914 if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
1915 continue;
1916 }
1917 return set_replacement(nullptr);
1918 }
1919 }
1920 }
1921 }
1922 return FilterSuccessor(depth - 1, flags);
1923}
1924
1925RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
1926 if (info()->replacement_calculated) return replacement();
1927 if (depth < 0) return this;
1928 if (info()->visited) return this;
1929 {
1930 VisitMarker marker(info());
1931
1932 RegExpNode* continue_replacement =
1933 continue_node_->FilterOneByte(depth - 1, flags);
1934 // If we can't continue after the loop then there is no sense in doing the
1935 // loop.
1936 if (continue_replacement == nullptr) return set_replacement(nullptr);
1937 }
1938
1939 return ChoiceNode::FilterOneByte(depth - 1, flags);
1940}
1941
1942RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
1943 if (info()->replacement_calculated) return replacement();
1944 if (depth < 0) return this;
1945 if (info()->visited) return this;
1946 VisitMarker marker(info());
1947 int choice_count = alternatives_->length();
1948
1949 for (int i = 0; i < choice_count; i++) {
1950 GuardedAlternative alternative = alternatives_->at(i);
1951 if (alternative.guards() != nullptr &&
1952 alternative.guards()->length() != 0) {
1953 set_replacement(this);
1954 return this;
1955 }
1956 }
1957
1958 int surviving = 0;
1959 RegExpNode* survivor = nullptr;
1960 for (int i = 0; i < choice_count; i++) {
1961 GuardedAlternative alternative = alternatives_->at(i);
1962 RegExpNode* replacement =
1963 alternative.node()->FilterOneByte(depth - 1, flags);
1964 DCHECK(replacement != this)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(replacement != this)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(replacement != this))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("replacement != this"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1964); AnnotateMozCrashReason("MOZ_ASSERT" "(" "replacement != this"
")"); do { *((volatile int*)__null) = 1964; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // No missing EMPTY_MATCH_CHECK.
1965 if (replacement != nullptr) {
1966 alternatives_->at(i).set_node(replacement);
1967 surviving++;
1968 survivor = replacement;
1969 }
1970 }
1971 if (surviving < 2) return set_replacement(survivor);
1972
1973 set_replacement(this);
1974 if (surviving == choice_count) {
1975 return this;
1976 }
1977 // Only some of the nodes survived the filtering. We need to rebuild the
1978 // alternatives list.
1979 ZoneList<GuardedAlternative>* new_alternatives =
1980 zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
1981 for (int i = 0; i < choice_count; i++) {
1982 RegExpNode* replacement =
1983 alternatives_->at(i).node()->FilterOneByte(depth - 1, flags);
1984 if (replacement != nullptr) {
1985 alternatives_->at(i).set_node(replacement);
1986 new_alternatives->Add(alternatives_->at(i), zone());
1987 }
1988 }
1989 alternatives_ = new_alternatives;
1990 return this;
1991}
1992
1993RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
1994 RegExpFlags flags) {
1995 if (info()->replacement_calculated) return replacement();
1996 if (depth < 0) return this;
1997 if (info()->visited) return this;
1998 VisitMarker marker(info());
1999 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2000 // afterwards.
2001 RegExpNode* node = continue_node();
2002 RegExpNode* replacement = node->FilterOneByte(depth - 1, flags);
2003 if (replacement == nullptr) return set_replacement(nullptr);
2004 alternatives_->at(kContinueIndex).set_node(replacement);
2005
2006 RegExpNode* neg_node = lookaround_node();
2007 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags);
2008 // If the negative lookahead is always going to fail then
2009 // we don't need to check it.
2010 if (neg_replacement == nullptr) return set_replacement(replacement);
2011 alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
2012 return set_replacement(this);
2013}
2014
2015void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2016 RegExpCompiler* compiler,
2017 int characters_filled_in,
2018 bool not_at_start) {
2019 if (body_can_be_zero_length_ || info()->visited) return;
2020 not_at_start = not_at_start || this->not_at_start();
2021 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2021); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 2021; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
2022 if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
2023 loop_node_->EatsAtLeast(not_at_start) >
2024 continue_node_->EatsAtLeast(true)) {
2025 // Loop body is guaranteed to execute at least once, and consume characters
2026 // when it does, meaning the only possible quick checks from this point
2027 // begin with the loop body. We may recursively visit this LoopChoiceNode,
2028 // but we temporarily decrease its minimum iteration counter so we know when
2029 // to check the continue case.
2030 IterationDecrementer next_iteration(this);
2031 loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
2032 not_at_start);
2033 } else {
2034 // Might not consume anything in the loop body, so treat it like a normal
2035 // ChoiceNode (and don't recursively visit this node again).
2036 VisitMarker marker(info());
2037 ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
2038 not_at_start);
2039 }
2040}
2041
2042void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
2043 QuickCheckDetails* details, RegExpCompiler* compiler,
2044 int characters_filled_in, bool not_at_start) {
2045 if (traversed_loop_initialization_node_) {
2046 // We already entered this loop once, exited via its continuation node, and
2047 // followed an outer loop's back-edge to before the loop entry point. We
2048 // could try to reset the minimum iteration count to its starting value at
2049 // this point, but that seems like more trouble than it's worth. It's safe
2050 // to keep going with the current (possibly reduced) minimum iteration
2051 // count.
2052 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2053 } else {
2054 // We are entering a loop via its counter initialization action, meaning we
2055 // are guaranteed to run the loop body at least some minimum number of times
2056 // before running the continuation node. Set a flag so that this node knows
2057 // (now and any times we visit it again recursively) that it was entered
2058 // from the top.
2059 LoopInitializationMarker marker(this);
2060 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2061 }
2062}
2063
2064void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
2065 BoyerMooreLookahead* bm, bool not_at_start) {
2066 if (body_can_be_zero_length_ || budget <= 0) {
2067 bm->SetRest(offset);
2068 SaveBMInfo(bm, not_at_start, offset);
2069 return;
2070 }
2071 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2072 SaveBMInfo(bm, not_at_start, offset);
2073}
2074
2075void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2076 RegExpCompiler* compiler,
2077 int characters_filled_in,
2078 bool not_at_start) {
2079 not_at_start = (not_at_start || not_at_start_);
2080 int choice_count = alternatives_->length();
2081 DCHECK_LT(0, choice_count)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) < (choice_count))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) < (choice_count)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) < (choice_count)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2081); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) < (choice_count)"
")"); do { *((volatile int*)__null) = 2081; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2082 alternatives_->at(0).node()->GetQuickCheckDetails(
2083 details, compiler, characters_filled_in, not_at_start);
2084 for (int i = 1; i < choice_count; i++) {
2085 QuickCheckDetails new_details(details->characters());
2086 RegExpNode* node = alternatives_->at(i).node();
2087 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
2088 not_at_start);
2089 // Here we merge the quick match details of the two branches.
2090 details->Merge(&new_details, characters_filled_in);
2091 }
2092}
2093
2094namespace {
2095
2096// Check for [0-9A-Z_a-z].
2097void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
2098 Label* non_word, bool fall_through_on_word) {
2099 if (assembler->CheckSpecialClassRanges(
2100 fall_through_on_word ? StandardCharacterSet::kWord
2101 : StandardCharacterSet::kNotWord,
2102 fall_through_on_word ? non_word : word)) {
2103 // Optimized implementation available.
2104 return;
2105 }
2106 assembler->CheckCharacterGT('z', non_word);
2107 assembler->CheckCharacterLT('0', non_word);
2108 assembler->CheckCharacterGT('a' - 1, word);
2109 assembler->CheckCharacterLT('9' + 1, word);
2110 assembler->CheckCharacterLT('A', non_word);
2111 assembler->CheckCharacterLT('Z' + 1, word);
2112 if (fall_through_on_word) {
2113 assembler->CheckNotCharacter('_', non_word);
2114 } else {
2115 assembler->CheckCharacter('_', word);
2116 }
2117}
2118
2119// Emit the code to check for a ^ in multiline mode (1-character lookbehind
2120// that matches newline or the start of input).
2121void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
2122 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2123
2124 // We will load the previous character into the current character register.
2125 Trace new_trace(*trace);
2126 new_trace.InvalidateCurrentCharacter();
2127
2128 // A positive (> 0) cp_offset means we've already successfully matched a
2129 // non-empty-width part of the pattern, and thus cannot be at or before the
2130 // start of the subject string. We can thus skip both at-start and
2131 // bounds-checks when loading the one-character lookbehind.
2132 const bool may_be_at_or_before_subject_string_start =
2133 new_trace.cp_offset() <= 0;
2134
2135 Label ok;
2136 if (may_be_at_or_before_subject_string_start) {
2137 // The start of input counts as a newline in this context, so skip to ok if
2138 // we are at the start.
2139 assembler->CheckAtStart(new_trace.cp_offset(), &ok);
2140 }
2141
2142 // If we've already checked that we are not at the start of input, it's okay
2143 // to load the previous character without bounds checks.
2144 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2145 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2146 new_trace.backtrack(), can_skip_bounds_check);
2147 if (!assembler->CheckSpecialClassRanges(StandardCharacterSet::kLineTerminator,
2148 new_trace.backtrack())) {
2149 // Newline means \n, \r, 0x2028 or 0x2029.
2150 if (!compiler->one_byte()) {
2151 assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
2152 }
2153 assembler->CheckCharacter('\n', &ok);
2154 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2155 }
2156 assembler->Bind(&ok);
2157 on_success->Emit(compiler, &new_trace);
2158}
2159
2160} // namespace
2161
2162// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2163void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2164 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2165 Isolate* isolate = assembler->isolate();
2166 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2167 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2168 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2169 if (lookahead == nullptr) {
2170 int eats_at_least =
2171 std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
2172 if (eats_at_least >= 1) {
2173 BoyerMooreLookahead* bm =
2174 zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
2175 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
2176 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
2177 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2178 }
2179 } else {
2180 if (lookahead->at(0)->is_non_word())
2181 next_is_word_character = Trace::FALSE_VALUE;
2182 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2183 }
2184 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2185 if (next_is_word_character == Trace::UNKNOWN) {
2186 Label before_non_word;
2187 Label before_word;
2188 if (trace->characters_preloaded() != 1) {
2189 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2190 }
2191 // Fall through on non-word.
2192 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2193 // Next character is not a word character.
2194 assembler->Bind(&before_non_word);
2195 Label ok;
2196 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2197 assembler->GoTo(&ok);
2198
2199 assembler->Bind(&before_word);
2200 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2201 assembler->Bind(&ok);
2202 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2203 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2204 } else {
2205 DCHECK(next_is_word_character == Trace::FALSE_VALUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(next_is_word_character == Trace::FALSE_VALUE)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(next_is_word_character == Trace::FALSE_VALUE))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("next_is_word_character == Trace::FALSE_VALUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "next_is_word_character == Trace::FALSE_VALUE"
")"); do { *((volatile int*)__null) = 2205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2206 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2207 }
2208}
2209
2210void AssertionNode::BacktrackIfPrevious(
2211 RegExpCompiler* compiler, Trace* trace,
2212 AssertionNode::IfPrevious backtrack_if_previous) {
2213 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2214 Trace new_trace(*trace);
2215 new_trace.InvalidateCurrentCharacter();
2216
2217 Label fall_through;
2218 Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
2219 : &fall_through;
2220 Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
2221 : new_trace.backtrack();
2222
2223 // A positive (> 0) cp_offset means we've already successfully matched a
2224 // non-empty-width part of the pattern, and thus cannot be at or before the
2225 // start of the subject string. We can thus skip both at-start and
2226 // bounds-checks when loading the one-character lookbehind.
2227 const bool may_be_at_or_before_subject_string_start =
2228 new_trace.cp_offset() <= 0;
2229
2230 if (may_be_at_or_before_subject_string_start) {
2231 // The start of input counts as a non-word character, so the question is
2232 // decided if we are at the start.
2233 assembler->CheckAtStart(new_trace.cp_offset(), non_word);
2234 }
2235
2236 // If we've already checked that we are not at the start of input, it's okay
2237 // to load the previous character without bounds checks.
2238 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2239 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
2240 can_skip_bounds_check);
2241 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2242
2243 assembler->Bind(&fall_through);
2244 on_success()->Emit(compiler, &new_trace);
2245}
2246
2247void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2248 RegExpCompiler* compiler,
2249 int filled_in, bool not_at_start) {
2250 if (assertion_type_ == AT_START && not_at_start) {
2251 details->set_cannot_match();
2252 return;
2253 }
2254 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2255 not_at_start);
2256}
2257
2258void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2259 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2260 switch (assertion_type_) {
2261 case AT_END: {
2262 Label ok;
2263 assembler->CheckPosition(trace->cp_offset(), &ok);
2264 assembler->GoTo(trace->backtrack());
2265 assembler->Bind(&ok);
2266 break;
2267 }
2268 case AT_START: {
2269 if (trace->at_start() == Trace::FALSE_VALUE) {
2270 assembler->GoTo(trace->backtrack());
2271 return;
2272 }
2273 if (trace->at_start() == Trace::UNKNOWN) {
2274 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
2275 Trace at_start_trace = *trace;
2276 at_start_trace.set_at_start(Trace::TRUE_VALUE);
2277 on_success()->Emit(compiler, &at_start_trace);
2278 return;
2279 }
2280 } break;
2281 case AFTER_NEWLINE:
2282 EmitHat(compiler, on_success(), trace);
2283 return;
2284 case AT_BOUNDARY:
2285 case AT_NON_BOUNDARY: {
2286 EmitBoundaryCheck(compiler, trace);
2287 return;
2288 }
2289 }
2290 on_success()->Emit(compiler, trace);
2291}
2292
2293namespace {
2294
2295bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
2296 if (quick_check == nullptr) return false;
2297 if (offset >= quick_check->characters()) return false;
2298 return quick_check->positions(offset)->determines_perfectly;
2299}
2300
2301void UpdateBoundsCheck(int index, int* checked_up_to) {
2302 if (index > *checked_up_to) {
2303 *checked_up_to = index;
2304 }
2305}
2306
2307} // namespace
2308
2309// We call this repeatedly to generate code for each pass over the text node.
2310// The passes are in increasing order of difficulty because we hope one
2311// of the first passes will fail in which case we are saved the work of the
2312// later passes. for example for the case independent regexp /%[asdfghjkl]a/
2313// we will check the '%' in the first pass, the case independent 'a' in the
2314// second pass and the character class in the last pass.
2315//
2316// The passes are done from right to left, so for example to test for /bar/
2317// we will first test for an 'r' with offset 2, then an 'a' with offset 1
2318// and then a 'b' with offset 0. This means we can avoid the end-of-input
2319// bounds check most of the time. In the example we only need to check for
2320// end-of-input when loading the putative 'r'.
2321//
2322// A slight complication involves the fact that the first character may already
2323// be fetched into a register by the previous node. In this case we want to
2324// do the test for that character first. We do this in separate passes. The
2325// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
2326// pass has been performed then subsequent passes will have true in
2327// first_element_checked to indicate that that character does not need to be
2328// checked again.
2329//
2330// In addition to all this we are passed a Trace, which can
2331// contain an AlternativeGeneration object. In this AlternativeGeneration
2332// object we can see details of any quick check that was already passed in
2333// order to get to the code we are now generating. The quick check can involve
2334// loading characters, which means we do not need to recheck the bounds
2335// up to the limit the quick check already checked. In addition the quick
2336// check can have involved a mask and compare operation which may simplify
2337// or obviate the need for further checks at some character positions.
2338void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
2339 bool preloaded, Trace* trace,
2340 bool first_element_checked, int* checked_up_to) {
2341 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2342 Isolate* isolate = assembler->isolate();
2343 bool one_byte = compiler->one_byte();
2344 Label* backtrack = trace->backtrack();
2345 QuickCheckDetails* quick_check = trace->quick_check_performed();
2346 int element_count = elements()->length();
2347 int backward_offset = read_backward() ? -Length() : 0;
2348 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2349 TextElement elm = elements()->at(i);
2350 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
2351 if (elm.text_type() == TextElement::ATOM) {
2352 if (SkipPass(pass, IsIgnoreCase(compiler->flags()))) continue;
2353 base::Vector<const base::uc16> quarks = elm.atom()->data();
2354 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2355 if (first_element_checked && i == 0 && j == 0) continue;
2356 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
2357 base::uc16 quark = quarks[j];
2358 if (IsIgnoreCase(compiler->flags())) {
2359 // Everywhere else we assume that a non-Latin-1 character cannot match
2360 // a Latin-1 character. Avoid the cases where this is assumption is
2361 // invalid by using the Latin1 equivalent instead.
2362 quark = unibrow::Latin1::TryConvertToLatin1(quark);
2363 }
2364 bool needs_bounds_check =
2365 *checked_up_to < cp_offset + j || read_backward();
2366 bool bounds_checked = false;
2367 switch (pass) {
2368 case NON_LATIN1_MATCH:
2369 DCHECK(one_byte)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(one_byte)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(one_byte))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("one_byte", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2369); AnnotateMozCrashReason("MOZ_ASSERT" "(" "one_byte" ")"
); do { *((volatile int*)__null) = 2369; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2370 if (quark > String::kMaxOneByteCharCode) {
2371 assembler->GoTo(backtrack);
2372 return;
2373 }
2374 break;
2375 case NON_LETTER_CHARACTER_MATCH:
2376 bounds_checked =
2377 EmitAtomNonLetter(isolate, compiler, quark, backtrack,
2378 cp_offset + j, needs_bounds_check, preloaded);
2379 break;
2380 case SIMPLE_CHARACTER_MATCH:
2381 bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
2382 backtrack, cp_offset + j,
2383 needs_bounds_check, preloaded);
2384 break;
2385 case CASE_CHARACTER_MATCH:
2386 bounds_checked =
2387 EmitAtomLetter(isolate, compiler, quark, backtrack,
2388 cp_offset + j, needs_bounds_check, preloaded);
2389 break;
2390 default:
2391 break;
2392 }
2393 if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2394 }
2395 } else {
2396 DCHECK_EQ(TextElement::CLASS_RANGES, elm.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (elm.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (elm.text_type())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (elm.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2396); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (elm.text_type())"
")"); do { *((volatile int*)__null) = 2396; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2397 if (pass == CHARACTER_CLASS_MATCH) {
2398 if (first_element_checked && i == 0) continue;
2399 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2400 RegExpClassRanges* cr = elm.class_ranges();
2401 bool bounds_check = *checked_up_to < cp_offset || read_backward();
2402 EmitClassRanges(assembler, cr, one_byte, backtrack, cp_offset,
2403 bounds_check, preloaded, zone());
2404 UpdateBoundsCheck(cp_offset, checked_up_to);
2405 }
2406 }
2407 }
2408}
2409
2410int TextNode::Length() {
2411 TextElement elm = elements()->last();
2412 DCHECK_LE(0, elm.cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) <= (elm.cp_offset()))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) <= (elm.cp_offset()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(0) <= (elm.cp_offset())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2412); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) <= (elm.cp_offset())"
")"); do { *((volatile int*)__null) = 2412; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2413 return elm.cp_offset() + elm.length();
2414}
2415
2416bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
2417 if (ignore_case) {
2418 return pass == SIMPLE_CHARACTER_MATCH;
2419 } else {
2420 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
2421 }
2422}
2423
2424TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
2425 ZoneList<CharacterRange>* ranges,
2426 bool read_backward,
2427 RegExpNode* on_success) {
2428 DCHECK_NOT_NULL(ranges)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((ranges) != nullptr))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(ranges) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2428); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges) != nullptr"
")"); do { *((volatile int*)__null) = 2428; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2429 // TODO(jgruber): There's no fundamental need to create this
2430 // RegExpClassRanges; we could refactor to avoid the allocation.
2431 return zone->New<TextNode>(zone->New<RegExpClassRanges>(zone, ranges),
2432 read_backward, on_success);
2433}
2434
2435TextNode* TextNode::CreateForSurrogatePair(
2436 Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
2437 bool read_backward, RegExpNode* on_success) {
2438 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
2439 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2440 elms->Add(
2441 TextElement::ClassRanges(zone->New<RegExpClassRanges>(zone, lead_ranges)),
2442 zone);
2443 elms->Add(TextElement::ClassRanges(
2444 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2445 zone);
2446 return zone->New<TextNode>(elms, read_backward, on_success);
2447}
2448
2449TextNode* TextNode::CreateForSurrogatePair(
2450 Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail,
2451 bool read_backward, RegExpNode* on_success) {
2452 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
2453 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2454 elms->Add(
2455 TextElement::ClassRanges(zone->New<RegExpClassRanges>(zone, lead_ranges)),
2456 zone);
2457 elms->Add(TextElement::ClassRanges(
2458 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2459 zone);
2460 return zone->New<TextNode>(elms, read_backward, on_success);
2461}
2462
2463// This generates the code to match a text node. A text node can contain
2464// straight character sequences (possibly to be matched in a case-independent
2465// way) and character classes. For efficiency we do not do this in a single
2466// pass from left to right. Instead we pass over the text node several times,
2467// emitting code for some character positions every time. See the comment on
2468// TextEmitPass for details.
2469void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2470 LimitResult limit_result = LimitVersions(compiler, trace);
2471 if (limit_result == DONE) return;
2472 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2472); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 2472; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2473
2474 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2475 compiler->SetRegExpTooBig();
2476 return;
2477 }
2478
2479 if (compiler->one_byte()) {
2480 int dummy = 0;
2481 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
2482 }
2483
2484 bool first_elt_done = false;
2485 int bound_checked_to = trace->cp_offset() - 1;
2486 bound_checked_to += trace->bound_checked_up_to();
2487
2488 // If a character is preloaded into the current character register then
2489 // check that now.
2490 if (trace->characters_preloaded() == 1) {
2491 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2492 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
2493 false, &bound_checked_to);
2494 }
2495 first_elt_done = true;
2496 }
2497
2498 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2499 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
2500 first_elt_done, &bound_checked_to);
2501 }
2502
2503 Trace successor_trace(*trace);
2504 // If we advance backward, we may end up at the start.
2505 successor_trace.AdvanceCurrentPositionInTrace(
2506 read_backward() ? -Length() : Length(), compiler);
2507 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
2508 : Trace::FALSE_VALUE);
2509 RecursionCheck rc(compiler);
2510 on_success()->Emit(compiler, &successor_trace);
2511}
2512
2513void Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; }
2514
2515void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
2516 // We don't have an instruction for shifting the current character register
2517 // down or for using a shifted value for anything so lets just forget that
2518 // we preloaded any characters into it.
2519 characters_preloaded_ = 0;
2520 // Adjust the offsets of the quick check performed information. This
2521 // information is used to find out what we already determined about the
2522 // characters by means of mask and compare.
2523 quick_check_performed_.Advance(by, compiler->one_byte());
2524 cp_offset_ += by;
2525 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2526 compiler->SetRegExpTooBig();
2527 cp_offset_ = 0;
2528 }
2529 bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
2530}
2531
2532void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
2533 RegExpFlags flags) {
2534 if (!IsIgnoreCase(flags)) return;
2535#ifdef V8_INTL_SUPPORT1
2536 if (NeedsUnicodeCaseEquivalents(flags)) return;
2537#endif
2538
2539 int element_count = elements()->length();
2540 for (int i = 0; i < element_count; i++) {
2541 TextElement elm = elements()->at(i);
2542 if (elm.text_type() == TextElement::CLASS_RANGES) {
2543 RegExpClassRanges* cr = elm.class_ranges();
2544 // None of the standard character classes is different in the case
2545 // independent case and it slows us down if we don't know that.
2546 if (cr->is_standard(zone())) continue;
2547 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
2548 CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
2549 }
2550 }
2551}
2552
2553int TextNode::GreedyLoopTextLength() { return Length(); }
2554
2555RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2556 RegExpCompiler* compiler) {
2557 if (read_backward()) return nullptr;
2558 if (elements()->length() != 1) return nullptr;
2559 TextElement elm = elements()->at(0);
2560 if (elm.text_type() != TextElement::CLASS_RANGES) return nullptr;
2561 RegExpClassRanges* node = elm.class_ranges();
2562 ZoneList<CharacterRange>* ranges = node->ranges(zone());
2563 CharacterRange::Canonicalize(ranges);
2564 if (node->is_negated()) {
2565 return ranges->length() == 0 ? on_success() : nullptr;
2566 }
2567 if (ranges->length() != 1) return nullptr;
2568 const base::uc32 max_char = MaxCodeUnit(compiler->one_byte());
2569 return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
2570}
2571
2572// Finds the fixed match length of a sequence of nodes that goes from
2573// this alternative and back to this choice node. If there are variable
2574// length nodes or other complications in the way then return a sentinel
2575// value indicating that a greedy loop cannot be constructed.
2576int ChoiceNode::GreedyLoopTextLengthForAlternative(
2577 GuardedAlternative* alternative) {
2578 int length = 0;
2579 RegExpNode* node = alternative->node();
2580 // Later we will generate code for all these text nodes using recursion
2581 // so we have to limit the max number.
2582 int recursion_depth = 0;
2583 while (node != this) {
2584 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2585 return kNodeIsTooComplexForGreedyLoops;
2586 }
2587 int node_length = node->GreedyLoopTextLength();
2588 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2589 return kNodeIsTooComplexForGreedyLoops;
2590 }
2591 length += node_length;
2592 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2593 node = seq_node->on_success();
2594 }
2595 if (read_backward()) {
2596 length = -length;
2597 }
2598 // Check that we can jump by the whole text length. If not, return sentinel
2599 // to indicate the we can't construct a greedy loop.
2600 if (length < RegExpMacroAssembler::kMinCPOffset ||
2601 length > RegExpMacroAssembler::kMaxCPOffset) {
2602 return kNodeIsTooComplexForGreedyLoops;
2603 }
2604 return length;
2605}
2606
2607void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2608 DCHECK_NULL(loop_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((loop_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((loop_node_) == nullptr))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(loop_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2608); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(loop_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2608; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2609 AddAlternative(alt);
2610 loop_node_ = alt.node();
2611}
2612
2613void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2614 DCHECK_NULL(continue_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((continue_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((continue_node_) == nullptr)
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(continue_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2614); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(continue_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2614; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2615 AddAlternative(alt);
2616 continue_node_ = alt.node();
2617}
2618
2619void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2620 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2621 if (trace->stop_node() == this) {
1
Assuming the condition is false
2
Taking false branch
2622 // Back edge of greedy optimized loop node graph.
2623 int text_length =
2624 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2625 DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((kNodeIsTooComplexForGreedyLoops) != (text_length))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((kNodeIsTooComplexForGreedyLoops) != (text_length)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(kNodeIsTooComplexForGreedyLoops) != (text_length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2625); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(kNodeIsTooComplexForGreedyLoops) != (text_length)"
")"); do { *((volatile int*)__null) = 2625; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2626 // Update the counter-based backtracking info on the stack. This is an
2627 // optimization for greedy loops (see below).
2628 DCHECK(trace->cp_offset() == text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == text_length)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->cp_offset() == text_length
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->cp_offset() == text_length", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2628); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == text_length"
")"); do { *((volatile int*)__null) = 2628; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2629 macro_assembler->AdvanceCurrentPosition(text_length);
2630 macro_assembler->GoTo(trace->loop_label());
2631 return;
2632 }
2633 DCHECK_NULL(trace->stop_node())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((trace->stop_node()) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((trace->stop_node()) == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(trace->stop_node()) == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2633); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(trace->stop_node()) == nullptr"
")"); do { *((volatile int*)__null) = 2633; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3
Assuming the condition is true
4
Taking false branch
5
Loop condition is false. Exiting loop
2634 if (!trace->is_trivial()) {
6
Taking false branch
2635 trace->Flush(compiler, this);
2636 return;
2637 }
2638 ChoiceNode::Emit(compiler, trace);
7
Calling 'ChoiceNode::Emit'
2639}
2640
2641int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2642 int eats_at_least) {
2643 int preload_characters = std::min(4, eats_at_least);
2644 DCHECK_LE(preload_characters, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((preload_characters) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((preload_characters) <= (
4)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(preload_characters) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2644); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(preload_characters) <= (4)"
")"); do { *((volatile int*)__null) = 2644; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2645 if (compiler->macro_assembler()->CanReadUnaligned()) {
2646 bool one_byte = compiler->one_byte();
2647 if (one_byte) {
2648 // We can't preload 3 characters because there is no machine instruction
2649 // to do that. We can't just load 4 because we could be reading
2650 // beyond the end of the string, which could cause a memory fault.
2651 if (preload_characters == 3) preload_characters = 2;
2652 } else {
2653 if (preload_characters > 2) preload_characters = 2;
2654 }
2655 } else {
2656 if (preload_characters > 1) preload_characters = 1;
2657 }
2658 return preload_characters;
2659}
2660
2661// This class is used when generating the alternatives in a choice node. It
2662// records the way the alternative is being code generated.
2663class AlternativeGeneration : public Malloced {
2664 public:
2665 AlternativeGeneration()
2666 : possible_success(),
2667 expects_preload(false),
2668 after(),
2669 quick_check_details() {}
2670 Label possible_success;
2671 bool expects_preload;
2672 Label after;
2673 QuickCheckDetails quick_check_details;
2674};
2675
2676// Creates a list of AlternativeGenerations. If the list has a reasonable
2677// size then it is on the stack, otherwise the excess is on the heap.
2678class AlternativeGenerationList {
2679 public:
2680 AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) {
2681 for (int i = 0; i < count && i < kAFew; i++) {
2682 alt_gens_.Add(a_few_alt_gens_ + i, zone);
2683 }
2684 for (int i = kAFew; i < count; i++) {
2685 alt_gens_.Add(new AlternativeGeneration(), zone);
2686 }
2687 }
2688 ~AlternativeGenerationList() {
2689 for (int i = kAFew; i < alt_gens_.length(); i++) {
2690 delete alt_gens_[i];
2691 alt_gens_[i] = nullptr;
2692 }
2693 }
2694
2695 AlternativeGeneration* at(int i) { return alt_gens_[i]; }
2696
2697 private:
2698 static const int kAFew = 10;
2699 ZoneList<AlternativeGeneration*> alt_gens_;
2700 AlternativeGeneration a_few_alt_gens_[kAFew];
2701};
2702
2703void BoyerMoorePositionInfo::Set(int character) {
2704 SetInterval(Interval(character, character));
2705}
2706
2707namespace {
2708
2709ContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
2710 int ranges_length, Interval new_range) {
2711 DCHECK_EQ(1, ranges_length & 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((1) == (ranges_length & 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((1) == (ranges_length & 1
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(1) == (ranges_length & 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2711); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(1) == (ranges_length & 1)"
")"); do { *((volatile int*)__null) = 2711; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2712 DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1])do { static_assert( mozilla::detail::AssertionConditionType<
decltype((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1]))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1])))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2712); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
")"); do { *((volatile int*)__null) = 2712; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2713 if (containment == kLatticeUnknown) return containment;
2714 bool inside = false;
2715 int last = 0;
2716 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
2717 // Consider the range from last to ranges[i].
2718 // We haven't got to the new range yet.
2719 if (ranges[i] <= new_range.from()) continue;
2720 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
2721 // inclusive, but the values in ranges are not.
2722 if (last <= new_range.from() && new_range.to() < ranges[i]) {
2723 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
2724 }
2725 return kLatticeUnknown;
2726 }
2727 return containment;
2728}
2729
2730int BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) {
2731 static_assert(BoyerMoorePositionInfo::kMapSize ==
2732 2 * kInt64Size * kBitsPerByte);
2733
2734 // Slight fiddling is needed here, since the bitset is of length 128 while
2735 // CountTrailingZeros requires an integral type and std::bitset can only
2736 // convert to unsigned long long. So we handle the most- and least-significant
2737 // bits separately.
2738
2739 {
2740 static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0});
2741 BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask;
2742 static_assert(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong())));
2743 uint64_t lsb = masked_bitset.to_ullong();
2744 if (lsb != 0) return base::bits::CountTrailingZeros(lsb);
2745 }
2746
2747 {
2748 BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64;
2749 uint64_t msb = masked_bitset.to_ullong();
2750 if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb);
2751 }
2752
2753 return -1;
2754}
2755
2756} // namespace
2757
2758void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2759 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2760
2761 if (interval.size() >= kMapSize) {
2762 map_count_ = kMapSize;
2763 map_.set();
2764 return;
2765 }
2766
2767 for (int i = interval.from(); i <= interval.to(); i++) {
2768 int mod_character = (i & kMask);
2769 if (!map_[mod_character]) {
2770 map_count_++;
2771 map_.set(mod_character);
2772 }
2773 if (map_count_ == kMapSize) return;
2774 }
2775}
2776
2777void BoyerMoorePositionInfo::SetAll() {
2778 w_ = kLatticeUnknown;
2779 if (map_count_ != kMapSize) {
2780 map_count_ = kMapSize;
2781 map_.set();
2782 }
2783}
2784
2785BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
2786 Zone* zone)
2787 : length_(length),
2788 compiler_(compiler),
2789 max_char_(MaxCodeUnit(compiler->one_byte())) {
2790 bitmaps_ = zone->New<ZoneList<BoyerMoorePositionInfo*>>(length, zone);
2791 for (int i = 0; i < length; i++) {
2792 bitmaps_->Add(zone->New<BoyerMoorePositionInfo>(), zone);
2793 }
2794}
2795
2796// Find the longest range of lookahead that has the fewest number of different
2797// characters that can occur at a given position. Since we are optimizing two
2798// different parameters at once this is a tradeoff.
2799bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
2800 int biggest_points = 0;
2801 // If more than 32 characters out of 128 can occur it is unlikely that we can
2802 // be lucky enough to step forwards much of the time.
2803 const int kMaxMax = 32;
2804 for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
2805 max_number_of_chars *= 2) {
2806 biggest_points =
2807 FindBestInterval(max_number_of_chars, biggest_points, from, to);
2808 }
2809 if (biggest_points == 0) return false;
2810 return true;
2811}
2812
2813// Find the highest-points range between 0 and length_ where the character
2814// information is not too vague. 'Too vague' means that there are more than
2815// max_number_of_chars that can occur at this position. Calculates the number
2816// of points as the product of width-of-the-range and
2817// probability-of-finding-one-of-the-characters, where the probability is
2818// calculated using the frequency distribution of the sample subject string.
2819int BoyerMooreLookahead::FindBestInterval(int max_number_of_chars,
2820 int old_biggest_points, int* from,
2821 int* to) {
2822 int biggest_points = old_biggest_points;
2823 static const int kSize = RegExpMacroAssembler::kTableSize;
2824 for (int i = 0; i < length_;) {
2825 while (i < length_ && Count(i) > max_number_of_chars) i++;
2826 if (i == length_) break;
2827 int remembered_from = i;
2828
2829 BoyerMoorePositionInfo::Bitset union_bitset;
2830 for (; i < length_ && Count(i) <= max_number_of_chars; i++) {
2831 union_bitset |= bitmaps_->at(i)->raw_bitset();
2832 }
2833
2834 int frequency = 0;
2835
2836 // Iterate only over set bits.
2837 int j;
2838 while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
2839 DCHECK(union_bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(union_bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(union_bitset[j]))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("union_bitset[j]"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2839); AnnotateMozCrashReason("MOZ_ASSERT" "(" "union_bitset[j]"
")"); do { *((volatile int*)__null) = 2839; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2840 // Add 1 to the frequency to give a small per-character boost for
2841 // the cases where our sampling is not good enough and many
2842 // characters have a frequency of zero. This means the frequency
2843 // can theoretically be up to 2*kSize though we treat it mostly as
2844 // a fraction of kSize.
2845 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
2846 union_bitset.reset(j);
2847 }
2848
2849 // We use the probability of skipping times the distance we are skipping to
2850 // judge the effectiveness of this. Actually we have a cut-off: By
2851 // dividing by 2 we switch off the skipping if the probability of skipping
2852 // is less than 50%. This is because the multibyte mask-and-compare
2853 // skipping in quickcheck is more likely to do well on this case.
2854 bool in_quickcheck_range =
2855 ((i - remembered_from < 4) ||
2856 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
2857 // Called 'probability' but it is only a rough estimate and can actually
2858 // be outside the 0-kSize range.
2859 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
2860 int points = (i - remembered_from) * probability;
2861 if (points > biggest_points) {
2862 *from = remembered_from;
2863 *to = i - 1;
2864 biggest_points = points;
2865 }
2866 }
2867 return biggest_points;
2868}
2869
2870// Take all the characters that will not prevent a successful match if they
2871// occur in the subject string in the range between min_lookahead and
2872// max_lookahead (inclusive) measured from the current position. If the
2873// character at max_lookahead offset is not one of these characters, then we
2874// can safely skip forwards by the number of characters in the range.
2875int BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead,
2876 Handle<ByteArray> boolean_skip_table) {
2877 const int kSkipArrayEntry = 0;
2878 const int kDontSkipArrayEntry = 1;
2879
2880 std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
2881 boolean_skip_table->length());
2882
2883 for (int i = max_lookahead; i >= min_lookahead; i--) {
2884 BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset();
2885
2886 // Iterate only over set bits.
2887 int j;
2888 while ((j = BitsetFirstSetBit(bitset)) != -1) {
2889 DCHECK(bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(bitset[j]))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("bitset[j]", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2889); AnnotateMozCrashReason("MOZ_ASSERT" "(" "bitset[j]" ")"
); do { *((volatile int*)__null) = 2889; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2890 boolean_skip_table->set(j, kDontSkipArrayEntry);
2891 bitset.reset(j);
2892 }
2893 }
2894
2895 const int skip = max_lookahead + 1 - min_lookahead;
2896 return skip;
2897}
2898
2899// See comment above on the implementation of GetSkipTable.
2900void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
2901 const int kSize = RegExpMacroAssembler::kTableSize;
2902
2903 int min_lookahead = 0;
2904 int max_lookahead = 0;
2905
2906 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
2907
2908 // Check if we only have a single non-empty position info, and that info
2909 // contains precisely one character.
2910 bool found_single_character = false;
2911 int single_character = 0;
2912 for (int i = max_lookahead; i >= min_lookahead; i--) {
2913 BoyerMoorePositionInfo* map = bitmaps_->at(i);
2914 if (map->map_count() == 0) continue;
2915
2916 if (found_single_character || map->map_count() > 1) {
2917 found_single_character = false;
2918 break;
2919 }
2920
2921 DCHECK(!found_single_character)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!found_single_character)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!found_single_character))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!found_single_character"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2921); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!found_single_character"
")"); do { *((volatile int*)__null) = 2921; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2922 DCHECK_EQ(map->map_count(), 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((map->map_count()) == (1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((map->map_count()) == (1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(map->map_count()) == (1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2922); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(map->map_count()) == (1)"
")"); do { *((volatile int*)__null) = 2922; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2923
2924 found_single_character = true;
2925 single_character = BitsetFirstSetBit(map->raw_bitset());
2926
2927 DCHECK_NE(single_character, -1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((single_character) != (-1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((single_character) != (-1)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(single_character) != (-1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2927); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(single_character) != (-1)"
")"); do { *((volatile int*)__null) = 2927; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2928 }
2929
2930 int lookahead_width = max_lookahead + 1 - min_lookahead;
2931
2932 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
2933 // The mask-compare can probably handle this better.
2934 return;
2935 }
2936
2937 if (found_single_character) {
2938 Label cont, again;
2939 masm->Bind(&again);
2940 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2941 if (max_char_ > kSize) {
2942 masm->CheckCharacterAfterAnd(single_character,
2943 RegExpMacroAssembler::kTableMask, &cont);
2944 } else {
2945 masm->CheckCharacter(single_character, &cont);
2946 }
2947 masm->AdvanceCurrentPosition(lookahead_width);
2948 masm->GoTo(&again);
2949 masm->Bind(&cont);
2950 return;
2951 }
2952
2953 Factory* factory = masm->isolate()->factory();
2954 Handle<ByteArray> boolean_skip_table =
2955 factory->NewByteArray(kSize, AllocationType::kOld);
2956 int skip_distance =
2957 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
2958 DCHECK_NE(0, skip_distance)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) != (skip_distance))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) != (skip_distance)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) != (skip_distance)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2958); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) != (skip_distance)"
")"); do { *((volatile int*)__null) = 2958; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2959
2960 Label cont, again;
2961 masm->Bind(&again);
2962 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2963 masm->CheckBitInTable(boolean_skip_table, &cont);
2964 masm->AdvanceCurrentPosition(skip_distance);
2965 masm->GoTo(&again);
2966 masm->Bind(&cont);
2967}
2968
2969/* Code generation for choice nodes.
2970 *
2971 * We generate quick checks that do a mask and compare to eliminate a
2972 * choice. If the quick check succeeds then it jumps to the continuation to
2973 * do slow checks and check subsequent nodes. If it fails (the common case)
2974 * it falls through to the next choice.
2975 *
2976 * Here is the desired flow graph. Nodes directly below each other imply
2977 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
2978 * 3 doesn't have a quick check so we have to call the slow check.
2979 * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
2980 * regexp continuation is generated directly after the Sn node, up to the
2981 * next GoTo if we decide to reuse some already generated code. Some
2982 * nodes expect preload_characters to be preloaded into the current
2983 * character register. R nodes do this preloading. Vertices are marked
2984 * F for failures and S for success (possible success in the case of quick
2985 * nodes). L, V, < and > are used as arrow heads.
2986 *
2987 * ----------> R
2988 * |
2989 * V
2990 * Q1 -----> S1
2991 * | S /
2992 * F| /
2993 * | F/
2994 * | /
2995 * | R
2996 * | /
2997 * V L
2998 * Q2 -----> S2
2999 * | S /
3000 * F| /
3001 * | F/
3002 * | /
3003 * | R
3004 * | /
3005 * V L
3006 * S3
3007 * |
3008 * F|
3009 * |
3010 * R
3011 * |
3012 * backtrack V
3013 * <----------Q4
3014 * \ F |
3015 * \ |S
3016 * \ F V
3017 * \-----S4
3018 *
3019 * For greedy loops we push the current position, then generate the code that
3020 * eats the input specially in EmitGreedyLoop. The other choice (the
3021 * continuation) is generated by the normal code in EmitChoices, and steps back
3022 * in the input to the starting position when it fails to match. The loop code
3023 * looks like this (U is the unwind code that steps back in the greedy loop).
3024 *
3025 * _____
3026 * / \
3027 * V |
3028 * ----------> S1 |
3029 * /| |
3030 * / |S |
3031 * F/ \_____/
3032 * /
3033 * |<-----
3034 * | \
3035 * V |S
3036 * Q2 ---> U----->backtrack
3037 * | F /
3038 * S| /
3039 * V F /
3040 * S2--/
3041 */
3042
3043GreedyLoopState::GreedyLoopState(bool not_at_start) {
3044 counter_backtrack_trace_.set_backtrack(&label_);
3045 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3046}
3047
3048void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3049#ifdef DEBUG1
3050 int choice_count = alternatives_->length();
3051 for (int i = 0; i < choice_count - 1; i++) {
3052 GuardedAlternative alternative = alternatives_->at(i);
3053 ZoneList<Guard*>* guards = alternative.guards();
3054 int guard_count = (guards == nullptr) ? 0 : guards->length();
3055 for (int j = 0; j < guard_count; j++) {
3056 DCHECK(!trace->mentions_reg(guards->at(j)->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guards->at(j)->reg()))
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!trace->mentions_reg(guards->at(j)->reg()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guards->at(j)->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3056); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guards->at(j)->reg())"
")"); do { *((volatile int*)__null) = 3056; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3057 }
3058 }
3059#endif
3060}
3061
3062void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
3063 PreloadState* state) {
3064 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3065 // Save some time by looking at most one machine word ahead.
3066 state->eats_at_least_ =
3067 EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
3068 }
3069 state->preload_characters_ =
3070 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3071
3072 state->preload_is_current_ =
3073 (current_trace->characters_preloaded() == state->preload_characters_);
3074 state->preload_has_checked_bounds_ = state->preload_is_current_;
3075}
3076
3077void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3078 int choice_count = alternatives_->length();
3079
3080 if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
8
Assuming 'choice_count' is not equal to 1
3081 alternatives_->at(0).node()->Emit(compiler, trace);
3082 return;
3083 }
3084
3085 AssertGuardsMentionRegisters(trace);
3086
3087 LimitResult limit_result = LimitVersions(compiler, trace);
3088 if (limit_result == DONE) return;
9
Assuming 'limit_result' is not equal to DONE
10
Taking false branch
3089 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3089); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3089; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
11
Assuming 'limit_result' is equal to CONTINUE
12
Taking false branch
3090
3091 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
3092 // other choice nodes we only flush if we are out of code size budget.
3093 if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
13
Loop condition is false. Exiting loop
14
Assuming the condition is false
3094 trace->Flush(compiler, this);
3095 return;
3096 }
3097
3098 RecursionCheck rc(compiler);
3099
3100 PreloadState preload;
3101 preload.init();
3102 GreedyLoopState greedy_loop_state(not_at_start());
3103
3104 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
3105 AlternativeGenerationList alt_gens(choice_count, zone());
3106
3107 if (choice_count
14.1
'choice_count' is <= 1
> 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3108 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
3109 &greedy_loop_state, text_length);
3110 } else {
3111 // TODO(erikcorry): Delete this. We don't need this label, but it makes us
3112 // match the traces produced pre-cleanup.
3113 Label second_choice;
3114 compiler->macro_assembler()->Bind(&second_choice);
3115
3116 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3117
3118 EmitChoices(compiler, &alt_gens, 0, trace, &preload);
15
Calling 'ChoiceNode::EmitChoices'
3119 }
3120
3121 // At this point we need to generate slow checks for the alternatives where
3122 // the quick check was inlined. We can recognize these because the associated
3123 // label was bound.
3124 int new_flush_budget = trace->flush_budget() / choice_count;
3125 for (int i = 0; i < choice_count; i++) {
3126 AlternativeGeneration* alt_gen = alt_gens.at(i);
3127 Trace new_trace(*trace);
3128 // If there are actions to be flushed we have to limit how many times
3129 // they are flushed. Take the budget of the parent trace and distribute
3130 // it fairly amongst the children.
3131 if (new_trace.actions() != nullptr) {
3132 new_trace.set_flush_budget(new_flush_budget);
3133 }
3134 bool next_expects_preload =
3135 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
3136 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i),
3137 alt_gen, preload.preload_characters_,
3138 next_expects_preload);
3139 }
3140}
3141
3142Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
3143 AlternativeGenerationList* alt_gens,
3144 PreloadState* preload,
3145 GreedyLoopState* greedy_loop_state,
3146 int text_length) {
3147 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3148 // Here we have special handling for greedy loops containing only text nodes
3149 // and other simple nodes. These are handled by pushing the current
3150 // position on the stack and then incrementing the current position each
3151 // time around the switch. On backtrack we decrement the current position
3152 // and check it against the pushed value. This avoids pushing backtrack
3153 // information for each iteration of the loop, which could take up a lot of
3154 // space.
3155 DCHECK(trace->stop_node() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->stop_node() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->stop_node() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->stop_node() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3155); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->stop_node() == nullptr"
")"); do { *((volatile int*)__null) = 3155; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3156 macro_assembler->PushCurrentPosition();
3157 Label greedy_match_failed;
3158 Trace greedy_match_trace;
3159 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
3160 greedy_match_trace.set_backtrack(&greedy_match_failed);
3161 Label loop_label;
3162 macro_assembler->Bind(&loop_label);
3163 greedy_match_trace.set_stop_node(this);
3164 greedy_match_trace.set_loop_label(&loop_label);
3165 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3166 macro_assembler->Bind(&greedy_match_failed);
3167
3168 Label second_choice; // For use in greedy matches.
3169 macro_assembler->Bind(&second_choice);
3170
3171 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3172
3173 EmitChoices(compiler, alt_gens, 1, new_trace, preload);
3174
3175 macro_assembler->Bind(greedy_loop_state->label());
3176 // If we have unwound to the bottom then backtrack.
3177 macro_assembler->CheckGreedyLoop(trace->backtrack());
3178 // Otherwise try the second priority at an earlier position.
3179 macro_assembler->AdvanceCurrentPosition(-text_length);
3180 macro_assembler->GoTo(&second_choice);
3181 return new_trace;
3182}
3183
3184int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3185 Trace* trace) {
3186 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3187 if (alternatives_->length() != 2) return eats_at_least;
3188
3189 GuardedAlternative alt1 = alternatives_->at(1);
3190 if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
3191 return eats_at_least;
3192 }
3193 RegExpNode* eats_anything_node = alt1.node();
3194 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
3195 return eats_at_least;
3196 }
3197
3198 // Really we should be creating a new trace when we execute this function,
3199 // but there is no need, because the code it generates cannot backtrack, and
3200 // we always arrive here with a trivial trace (since it's the entry to a
3201 // loop. That also implies that there are no preloaded characters, which is
3202 // good, because it means we won't be violating any assumptions by
3203 // overwriting those characters with new load instructions.
3204 DCHECK(trace->is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->is_trivial()))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("trace->is_trivial()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->is_trivial()"
")"); do { *((volatile int*)__null) = 3204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3205
3206 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3207 Isolate* isolate = macro_assembler->isolate();
3208 // At this point we know that we are at a non-greedy loop that will eat
3209 // any character one at a time. Any non-anchored regexp has such a
3210 // loop prepended to it in order to find where it starts. We look for
3211 // a pattern of the form ...abc... where we can look 6 characters ahead
3212 // and step forwards 3 if the character is not one of abc. Abc need
3213 // not be atoms, they can be any reasonably limited character class or
3214 // small alternation.
3215 BoyerMooreLookahead* bm = bm_info(false);
3216 if (bm == nullptr) {
3217 eats_at_least = std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
3218 if (eats_at_least >= 1) {
3219 bm = zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
3220 GuardedAlternative alt0 = alternatives_->at(0);
3221 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
3222 }
3223 }
3224 if (bm != nullptr) {
3225 bm->EmitSkipInstructions(macro_assembler);
3226 }
3227 return eats_at_least;
3228}
3229
3230void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3231 AlternativeGenerationList* alt_gens,
3232 int first_choice, Trace* trace,
3233 PreloadState* preload) {
3234 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3235 SetUpPreLoad(compiler, trace, preload);
3236
3237 // For now we just call all choices one after the other. The idea ultimately
3238 // is to use the Dispatch table to try only the relevant ones.
3239 int choice_count = alternatives_->length();
3240
3241 int new_flush_budget = trace->flush_budget() / choice_count;
3242
3243 for (int i = first_choice; i < choice_count; i++) {
16
Assuming 'i' is < 'choice_count'
17
Loop condition is true. Entering loop body
3244 bool is_last = i == choice_count - 1;
18
Assuming the condition is false
3245 bool fall_through_on_failure = !is_last;
3246 GuardedAlternative alternative = alternatives_->at(i);
3247 AlternativeGeneration* alt_gen = alt_gens->at(i);
3248 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3249 ZoneList<Guard*>* guards = alternative.guards();
3250 int guard_count = (guards == nullptr) ? 0 : guards->length();
19
Assuming the condition is false
20
'?' condition is false
3251 Trace new_trace(*trace);
3252 new_trace.set_characters_preloaded(
3253 preload->preload_is_current_
20.1
Field 'preload_is_current_' is false
? preload->preload_characters_ : 0);
21
'?' condition is false
3254 if (preload->preload_has_checked_bounds_
21.1
Field 'preload_has_checked_bounds_' is false
) {
22
Taking false branch
3255 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3256 }
3257 new_trace.quick_check_performed()->Clear();
3258 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
23
Assuming field 'not_at_start_' is false
24
Taking false branch
3259 if (!is_last
24.1
'is_last' is false
) {
25
Taking true branch
3260 new_trace.set_backtrack(&alt_gen->after);
3261 }
3262 alt_gen->expects_preload = preload->preload_is_current_;
3263 bool generate_full_check_inline = false;
3264 if (compiler->optimize() &&
26
Assuming the condition is true
3265 try_to_emit_quick_check_for_alternative(i == 0) &&
27
Assuming the condition is true
3266 alternative.node()->EmitQuickCheck(
28
Calling 'RegExpNode::EmitQuickCheck'
3267 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
3268 &alt_gen->possible_success, &alt_gen->quick_check_details,
3269 fall_through_on_failure, this)) {
3270 // Quick check was generated for this choice.
3271 preload->preload_is_current_ = true;
3272 preload->preload_has_checked_bounds_ = true;
3273 // If we generated the quick check to fall through on possible success,
3274 // we now need to generate the full check inline.
3275 if (!fall_through_on_failure) {
3276 macro_assembler->Bind(&alt_gen->possible_success);
3277 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3278 new_trace.set_characters_preloaded(preload->preload_characters_);
3279 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3280 generate_full_check_inline = true;
3281 }
3282 } else if (alt_gen->quick_check_details.cannot_match()) {
3283 if (!fall_through_on_failure) {
3284 macro_assembler->GoTo(trace->backtrack());
3285 }
3286 continue;
3287 } else {
3288 // No quick check was generated. Put the full code here.
3289 // If this is not the first choice then there could be slow checks from
3290 // previous cases that go here when they fail. There's no reason to
3291 // insist that they preload characters since the slow check we are about
3292 // to generate probably can't use it.
3293 if (i != first_choice) {
3294 alt_gen->expects_preload = false;
3295 new_trace.InvalidateCurrentCharacter();
3296 }
3297 generate_full_check_inline = true;
3298 }
3299 if (generate_full_check_inline) {
3300 if (new_trace.actions() != nullptr) {
3301 new_trace.set_flush_budget(new_flush_budget);
3302 }
3303 for (int j = 0; j < guard_count; j++) {
3304 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
3305 }
3306 alternative.node()->Emit(compiler, &new_trace);
3307 preload->preload_is_current_ = false;
3308 }
3309 macro_assembler->Bind(&alt_gen->after);
3310 }
3311}
3312
3313void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3314 Trace* trace,
3315 GuardedAlternative alternative,
3316 AlternativeGeneration* alt_gen,
3317 int preload_characters,
3318 bool next_expects_preload) {
3319 if (!alt_gen->possible_success.is_linked()) return;
3320
3321 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3322 macro_assembler->Bind(&alt_gen->possible_success);
3323 Trace out_of_line_trace(*trace);
3324 out_of_line_trace.set_characters_preloaded(preload_characters);
3325 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3326 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
3327 ZoneList<Guard*>* guards = alternative.guards();
3328 int guard_count = (guards == nullptr) ? 0 : guards->length();
3329 if (next_expects_preload) {
3330 Label reload_current_char;
3331 out_of_line_trace.set_backtrack(&reload_current_char);
3332 for (int j = 0; j < guard_count; j++) {
3333 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3334 }
3335 alternative.node()->Emit(compiler, &out_of_line_trace);
3336 macro_assembler->Bind(&reload_current_char);
3337 // Reload the current character, since the next quick check expects that.
3338 // We don't need to check bounds here because we only get into this
3339 // code through a quick check which already did the checked load.
3340 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
3341 preload_characters);
3342 macro_assembler->GoTo(&(alt_gen->after));
3343 } else {
3344 out_of_line_trace.set_backtrack(&(alt_gen->after));
3345 for (int j = 0; j < guard_count; j++) {
3346 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3347 }
3348 alternative.node()->Emit(compiler, &out_of_line_trace);
3349 }
3350}
3351
3352void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3353 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3354 LimitResult limit_result = LimitVersions(compiler, trace);
3355 if (limit_result == DONE) return;
3356 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3356); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3356; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3357
3358 RecursionCheck rc(compiler);
3359
3360 switch (action_type_) {
3361 case STORE_POSITION: {
3362 Trace::DeferredCapture new_capture(data_.u_position_register.reg,
3363 data_.u_position_register.is_capture,
3364 trace);
3365 Trace new_trace = *trace;
3366 new_trace.add_action(&new_capture);
3367 on_success()->Emit(compiler, &new_trace);
3368 break;
3369 }
3370 case INCREMENT_REGISTER: {
3371 Trace::DeferredIncrementRegister new_increment(
3372 data_.u_increment_register.reg);
3373 Trace new_trace = *trace;
3374 new_trace.add_action(&new_increment);
3375 on_success()->Emit(compiler, &new_trace);
3376 break;
3377 }
3378 case SET_REGISTER_FOR_LOOP: {
3379 Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
3380 data_.u_store_register.value);
3381 Trace new_trace = *trace;
3382 new_trace.add_action(&new_set);
3383 on_success()->Emit(compiler, &new_trace);
3384 break;
3385 }
3386 case CLEAR_CAPTURES: {
3387 Trace::DeferredClearCaptures new_capture(Interval(
3388 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
3389 Trace new_trace = *trace;
3390 new_trace.add_action(&new_capture);
3391 on_success()->Emit(compiler, &new_trace);
3392 break;
3393 }
3394 case BEGIN_POSITIVE_SUBMATCH:
3395 case BEGIN_NEGATIVE_SUBMATCH:
3396 if (!trace->is_trivial()) {
3397 trace->Flush(compiler, this);
3398 } else {
3399 assembler->WriteCurrentPositionToRegister(
3400 data_.u_submatch.current_position_register, 0);
3401 assembler->WriteStackPointerToRegister(
3402 data_.u_submatch.stack_pointer_register);
3403 on_success()->Emit(compiler, trace);
3404 }
3405 break;
3406 case EMPTY_MATCH_CHECK: {
3407 int start_pos_reg = data_.u_empty_match_check.start_register;
3408 int stored_pos = 0;
3409 int rep_reg = data_.u_empty_match_check.repetition_register;
3410 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3411 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3412 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3413 // If we know we haven't advanced and there is no minimum we
3414 // can just backtrack immediately.
3415 assembler->GoTo(trace->backtrack());
3416 } else if (know_dist && stored_pos < trace->cp_offset()) {
3417 // If we know we've advanced we can generate the continuation
3418 // immediately.
3419 on_success()->Emit(compiler, trace);
3420 } else if (!trace->is_trivial()) {
3421 trace->Flush(compiler, this);
3422 } else {
3423 Label skip_empty_check;
3424 // If we have a minimum number of repetitions we check the current
3425 // number first and skip the empty check if it's not enough.
3426 if (has_minimum) {
3427 int limit = data_.u_empty_match_check.repetition_limit;
3428 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3429 }
3430 // If the match is empty we bail out, otherwise we fall through
3431 // to the on-success continuation.
3432 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3433 trace->backtrack());
3434 assembler->Bind(&skip_empty_check);
3435 on_success()->Emit(compiler, trace);
3436 }
3437 break;
3438 }
3439 case POSITIVE_SUBMATCH_SUCCESS: {
3440 if (!trace->is_trivial()) {
3441 trace->Flush(compiler, this);
3442 return;
3443 }
3444 assembler->ReadCurrentPositionFromRegister(
3445 data_.u_submatch.current_position_register);
3446 assembler->ReadStackPointerFromRegister(
3447 data_.u_submatch.stack_pointer_register);
3448 int clear_register_count = data_.u_submatch.clear_register_count;
3449 if (clear_register_count == 0) {
3450 on_success()->Emit(compiler, trace);
3451 return;
3452 }
3453 int clear_registers_from = data_.u_submatch.clear_register_from;
3454 Label clear_registers_backtrack;
3455 Trace new_trace = *trace;
3456 new_trace.set_backtrack(&clear_registers_backtrack);
3457 on_success()->Emit(compiler, &new_trace);
3458
3459 assembler->Bind(&clear_registers_backtrack);
3460 int clear_registers_to = clear_registers_from + clear_register_count - 1;
3461 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3462
3463 DCHECK(trace->backtrack() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->backtrack() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->backtrack() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->backtrack() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3463); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->backtrack() == nullptr"
")"); do { *((volatile int*)__null) = 3463; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3464 assembler->Backtrack();
3465 return;
3466 }
3467 case MODIFY_FLAGS: {
3468 compiler->set_flags(flags());
3469 on_success()->Emit(compiler, trace);
3470 break;
3471 }
3472 default:
3473 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3473); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 3473; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3474 }
3475}
3476
3477void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3478 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3479 if (!trace->is_trivial()) {
3480 trace->Flush(compiler, this);
3481 return;
3482 }
3483
3484 LimitResult limit_result = LimitVersions(compiler, trace);
3485 if (limit_result == DONE) return;
3486 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3486); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3486; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3487
3488 RecursionCheck rc(compiler);
3489
3490 DCHECK_EQ(start_reg_ + 1, end_reg_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_reg_ + 1) == (end_reg_))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_reg_ + 1) == (end_reg_
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_reg_ + 1) == (end_reg_)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3490); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_reg_ + 1) == (end_reg_)"
")"); do { *((volatile int*)__null) = 3490; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3491 if (IsIgnoreCase(compiler->flags())) {
3492 bool unicode = IsEitherUnicode(compiler->flags());
3493 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
3494 unicode, trace->backtrack());
3495 } else {
3496 assembler->CheckNotBackReference(start_reg_, read_backward(),
3497 trace->backtrack());
3498 }
3499 // We are going to advance backward, so we may end up at the start.
3500 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
3501
3502 // Check that the back reference does not end inside a surrogate pair.
3503 if (IsEitherUnicode(compiler->flags()) && !compiler->one_byte()) {
3504 assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
3505 }
3506 on_success()->Emit(compiler, trace);
3507}
3508
3509void TextNode::CalculateOffsets() {
3510 int element_count = elements()->length();
3511 // Set up the offsets of the elements relative to the start. This is a fixed
3512 // quantity since a TextNode can only contain fixed-width things.
3513 int cp_offset = 0;
3514 for (int i = 0; i < element_count; i++) {
3515 TextElement& elm = elements()->at(i);
3516 elm.set_cp_offset(cp_offset);
3517 cp_offset += elm.length();
3518 }
3519}
3520
3521namespace {
3522
3523// Assertion propagation moves information about assertions such as
3524// \b to the affected nodes. For instance, in /.\b./ information must
3525// be propagated to the first '.' that whatever follows needs to know
3526// if it matched a word or a non-word, and to the second '.' that it
3527// has to check if it succeeds a word or non-word. In this case the
3528// result will be something like:
3529//
3530// +-------+ +------------+
3531// | . | | . |
3532// +-------+ ---> +------------+
3533// | word? | | check word |
3534// +-------+ +------------+
3535class AssertionPropagator : public AllStatic {
3536 public:
3537 static void VisitText(TextNode* that) {}
3538
3539 static void VisitAction(ActionNode* that) {
3540 // If the next node is interested in what it follows then this node
3541 // has to be interested too so it can pass the information on.
3542 that->info()->AddFromFollowing(that->on_success()->info());
3543 }
3544
3545 static void VisitChoice(ChoiceNode* that, int i) {
3546 // Anything the following nodes need to know has to be known by
3547 // this node also, so it can pass it on.
3548 that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
3549 }
3550
3551 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3552 that->info()->AddFromFollowing(that->continue_node()->info());
3553 }
3554
3555 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
3556 that->info()->AddFromFollowing(that->loop_node()->info());
3557 }
3558
3559 static void VisitNegativeLookaroundChoiceLookaroundNode(
3560 NegativeLookaroundChoiceNode* that) {
3561 VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
3562 }
3563
3564 static void VisitNegativeLookaroundChoiceContinueNode(
3565 NegativeLookaroundChoiceNode* that) {
3566 VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
3567 }
3568
3569 static void VisitBackReference(BackReferenceNode* that) {}
3570
3571 static void VisitAssertion(AssertionNode* that) {}
3572};
3573
3574// Propagates information about the minimum size of successful matches from
3575// successor nodes to their predecessors. Note that all eats_at_least values
3576// are initialized to zero before analysis.
3577class EatsAtLeastPropagator : public AllStatic {
3578 public:
3579 static void VisitText(TextNode* that) {
3580 // The eats_at_least value is not used if reading backward.
3581 if (!that->read_backward()) {
3582 // We are not at the start after this node, and thus we can use the
3583 // successor's eats_at_least_from_not_start value.
3584 uint8_t eats_at_least = base::saturated_cast<uint8_t>(
3585 that->Length() + that->on_success()
3586 ->eats_at_least_info()
3587 ->eats_at_least_from_not_start);
3588 that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
3589 }
3590 }
3591
3592 static void VisitAction(ActionNode* that) {
3593 switch (that->action_type()) {
3594 case ActionNode::BEGIN_POSITIVE_SUBMATCH:
3595 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3596 // We do not propagate eats_at_least data through positive lookarounds,
3597 // because they rewind input.
3598 // TODO(v8:11859) Potential approaches for fixing this include:
3599 // 1. Add a dedicated choice node for positive lookaround, similar to
3600 // NegativeLookaroundChoiceNode.
3601 // 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which
3602 // is <= eats_at_least_from_possibly_start, and use that value in
3603 // EatsAtLeastFromLoopEntry.
3604 DCHECK(that->eats_at_least_info()->IsZero())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(that->eats_at_least_info()->IsZero())>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(that->eats_at_least_info()->IsZero()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("that->eats_at_least_info()->IsZero()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3604); AnnotateMozCrashReason("MOZ_ASSERT" "(" "that->eats_at_least_info()->IsZero()"
")"); do { *((volatile int*)__null) = 3604; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3605 break;
3606 case ActionNode::SET_REGISTER_FOR_LOOP:
3607 // SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
3608 // loop body will run at least the minimum number of times before the
3609 // continuation case can run.
3610 that->set_eats_at_least_info(
3611 that->on_success()->EatsAtLeastFromLoopEntry());
3612 break;
3613 case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
3614 default:
3615 // Otherwise, the current node eats at least as much as its successor.
3616 // Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH
3617 // because NegativeLookaroundChoiceNode ignores its lookaround successor
3618 // when computing eats-at-least and quick check information.
3619 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3620 break;
3621 }
3622 }
3623
3624 static void VisitChoice(ChoiceNode* that, int i) {
3625 // The minimum possible match from a choice node is the minimum of its
3626 // successors.
3627 EatsAtLeastInfo eats_at_least =
3628 i == 0 ? EatsAtLeastInfo(UINT8_MAX(255)) : *that->eats_at_least_info();
3629 eats_at_least.SetMin(
3630 *that->alternatives()->at(i).node()->eats_at_least_info());
3631 that->set_eats_at_least_info(eats_at_least);
3632 }
3633
3634 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3635 if (!that->read_backward()) {
3636 that->set_eats_at_least_info(
3637 *that->continue_node()->eats_at_least_info());
3638 }
3639 }
3640
3641 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
3642
3643 static void VisitNegativeLookaroundChoiceLookaroundNode(
3644 NegativeLookaroundChoiceNode* that) {}
3645
3646 static void VisitNegativeLookaroundChoiceContinueNode(
3647 NegativeLookaroundChoiceNode* that) {
3648 that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
3649 }
3650
3651 static void VisitBackReference(BackReferenceNode* that) {
3652 if (!that->read_backward()) {
3653 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3654 }
3655 }
3656
3657 static void VisitAssertion(AssertionNode* that) {
3658 EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
3659 if (that->assertion_type() == AssertionNode::AT_START) {
3660 // If we know we are not at the start and we are asked "how many
3661 // characters will you match if you succeed?" then we can answer anything
3662 // since false implies false. So let's just set the max answer
3663 // (UINT8_MAX) since that won't prevent us from preloading a lot of
3664 // characters for the other branches in the node graph.
3665 eats_at_least.eats_at_least_from_not_start = UINT8_MAX(255);
3666 }
3667 that->set_eats_at_least_info(eats_at_least);
3668 }
3669};
3670
3671} // namespace
3672
3673// -------------------------------------------------------------------
3674// Analysis
3675
3676// Iterates the node graph and provides the opportunity for propagators to set
3677// values that depend on successor nodes.
3678template <typename... Propagators>
3679class Analysis : public NodeVisitor {
3680 public:
3681 Analysis(Isolate* isolate, bool is_one_byte, RegExpFlags flags)
3682 : isolate_(isolate),
3683 is_one_byte_(is_one_byte),
3684 flags_(flags),
3685 error_(RegExpError::kNone) {}
3686
3687 void EnsureAnalyzed(RegExpNode* that) {
3688 StackLimitCheck check(isolate());
3689 if (check.HasOverflowed()) {
3690 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
3691 FATAL("Analysis: Aborting on stack overflow")do { do { } while (false); MOZ_ReportCrash("" "Analysis: Aborting on stack overflow"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3691); AnnotateMozCrashReason("MOZ_CRASH(" "Analysis: Aborting on stack overflow"
")"); do { *((volatile int*)__null) = 3691; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3692 }
3693 fail(RegExpError::kAnalysisStackOverflow);
3694 return;
3695 }
3696 if (that->info()->been_analyzed || that->info()->being_analyzed) return;
3697 that->info()->being_analyzed = true;
3698 that->Accept(this);
3699 that->info()->being_analyzed = false;
3700 that->info()->been_analyzed = true;
3701 }
3702
3703 bool has_failed() { return error_ != RegExpError::kNone; }
3704 RegExpError error() {
3705 DCHECK(error_ != RegExpError::kNone)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(error_ != RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(error_ != RegExpError::kNone
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"error_ != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3705); AnnotateMozCrashReason("MOZ_ASSERT" "(" "error_ != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3705; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3706 return error_;
3707 }
3708 void fail(RegExpError error) { error_ = error; }
3709
3710 Isolate* isolate() const { return isolate_; }
3711
3712 void VisitEnd(EndNode* that) override {
3713 // nothing to do
3714 }
3715
3716// Used to call the given static function on each propagator / variadic template
3717// argument.
3718#define STATIC_FOR_EACH(expr) \
3719 do { \
3720 int dummy[] = {((expr), 0)...}; \
3721 USE(dummy)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{dummy};
(void)unused_tmp_array_for_use_macro; } while (false)
; \
3722 } while (false)
3723
3724 void VisitText(TextNode* that) override {
3725 that->MakeCaseIndependent(isolate(), is_one_byte_, flags());
3726 EnsureAnalyzed(that->on_success());
3727 if (has_failed()) return;
3728 that->CalculateOffsets();
3729 STATIC_FOR_EACH(Propagators::VisitText(that));
3730 }
3731
3732 void VisitAction(ActionNode* that) override {
3733 if (that->action_type() == ActionNode::MODIFY_FLAGS) {
3734 set_flags(that->flags());
3735 }
3736 EnsureAnalyzed(that->on_success());
3737 if (has_failed()) return;
3738 STATIC_FOR_EACH(Propagators::VisitAction(that));
3739 }
3740
3741 void VisitChoice(ChoiceNode* that) override {
3742 for (int i = 0; i < that->alternatives()->length(); i++) {
3743 EnsureAnalyzed(that->alternatives()->at(i).node());
3744 if (has_failed()) return;
3745 STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
3746 }
3747 }
3748
3749 void VisitLoopChoice(LoopChoiceNode* that) override {
3750 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3750); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3750; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Just loop and continue.
3751
3752 // First propagate all information from the continuation node.
3753 EnsureAnalyzed(that->continue_node());
3754 if (has_failed()) return;
3755 STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
3756
3757 // Check the loop last since it may need the value of this node
3758 // to get a correct result.
3759 EnsureAnalyzed(that->loop_node());
3760 if (has_failed()) return;
3761 STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
3762 }
3763
3764 void VisitNegativeLookaroundChoice(
3765 NegativeLookaroundChoiceNode* that) override {
3766 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3766); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3766; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Lookaround and continue.
3767
3768 EnsureAnalyzed(that->lookaround_node());
3769 if (has_failed()) return;
3770 STATIC_FOR_EACH(
3771 Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
3772
3773 EnsureAnalyzed(that->continue_node());
3774 if (has_failed()) return;
3775 STATIC_FOR_EACH(
3776 Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
3777 }
3778
3779 void VisitBackReference(BackReferenceNode* that) override {
3780 EnsureAnalyzed(that->on_success());
3781 if (has_failed()) return;
3782 STATIC_FOR_EACH(Propagators::VisitBackReference(that));
3783 }
3784
3785 void VisitAssertion(AssertionNode* that) override {
3786 EnsureAnalyzed(that->on_success());
3787 if (has_failed()) return;
3788 STATIC_FOR_EACH(Propagators::VisitAssertion(that));
3789 }
3790
3791#undef STATIC_FOR_EACH
3792
3793 private:
3794 RegExpFlags flags() const { return flags_; }
3795 void set_flags(RegExpFlags flags) { flags_ = flags; }
3796
3797 Isolate* isolate_;
3798 const bool is_one_byte_;
3799 RegExpFlags flags_;
3800 RegExpError error_;
3801
3802 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis)Analysis() = delete; Analysis(const Analysis&) = delete; Analysis
& operator=(const Analysis&) = delete
;
3803};
3804
3805RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags,
3806 RegExpNode* node) {
3807 Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
3808 isolate, is_one_byte, flags);
3809 DCHECK_EQ(node->info()->been_analyzed, false)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node->info()->been_analyzed) == (false))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((node->info()->been_analyzed) == (false)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(node->info()->been_analyzed) == (false)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3809); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node->info()->been_analyzed) == (false)"
")"); do { *((volatile int*)__null) = 3809; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3810 analysis.EnsureAnalyzed(node);
3811 DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone)do { if (analysis.has_failed()) { do { static_assert( mozilla
::detail::AssertionConditionType<decltype(analysis.error()
!= RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(analysis.error() != RegExpError
::kNone))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("analysis.error() != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3811); AnnotateMozCrashReason("MOZ_ASSERT" "(" "analysis.error() != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3811; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
3812 return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
3813}
3814
3815void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3816 BoyerMooreLookahead* bm,
3817 bool not_at_start) {
3818 // Working out the set of characters that a backreference can match is too
3819 // hard, so we just say that any character can match.
3820 bm->SetRest(offset);
3821 SaveBMInfo(bm, not_at_start, offset);
3822}
3823
3824static_assert(BoyerMoorePositionInfo::kMapSize ==
3825 RegExpMacroAssembler::kTableSize);
3826
3827void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3828 BoyerMooreLookahead* bm, bool not_at_start) {
3829 ZoneList<GuardedAlternative>* alts = alternatives();
3830 budget = (budget - 1) / alts->length();
3831 for (int i = 0; i < alts->length(); i++) {
3832 GuardedAlternative& alt = alts->at(i);
3833 if (alt.guards() != nullptr && alt.guards()->length() != 0) {
3834 bm->SetRest(offset); // Give up trying to fill in info.
3835 SaveBMInfo(bm, not_at_start, offset);
3836 return;
3837 }
3838 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
3839 }
3840 SaveBMInfo(bm, not_at_start, offset);
3841}
3842
3843void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
3844 BoyerMooreLookahead* bm, bool not_at_start) {
3845 if (initial_offset >= bm->length()) return;
3846 int offset = initial_offset;
3847 int max_char = bm->max_char();
3848 for (int i = 0; i < elements()->length(); i++) {
3849 if (offset >= bm->length()) {
3850 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3851 return;
3852 }
3853 TextElement text = elements()->at(i);
3854 if (text.text_type() == TextElement::ATOM) {
3855 RegExpAtom* atom = text.atom();
3856 for (int j = 0; j < atom->length(); j++, offset++) {
3857 if (offset >= bm->length()) {
3858 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3859 return;
3860 }
3861 base::uc16 character = atom->data()[j];
3862 if (IsIgnoreCase(bm->compiler()->flags())) {
3863 unibrow::uchar chars[4];
3864 int length = GetCaseIndependentLetters(
3865 isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
3866 chars, 4);
3867 for (int k = 0; k < length; k++) {
3868 bm->Set(offset, chars[k]);
3869 }
3870 } else {
3871 if (character <= max_char) bm->Set(offset, character);
3872 }
3873 }
3874 } else {
3875 DCHECK_EQ(TextElement::CLASS_RANGES, text.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (text.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (text.text_type()))))
, 0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (text.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3875); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (text.text_type())"
")"); do { *((volatile int*)__null) = 3875; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3876 RegExpClassRanges* class_ranges = text.class_ranges();
3877 ZoneList<CharacterRange>* ranges = class_ranges->ranges(zone());
3878 if (class_ranges->is_negated()) {
3879 bm->SetAll(offset);
3880 } else {
3881 for (int k = 0; k < ranges->length(); k++) {
3882 CharacterRange& range = ranges->at(k);
3883 if (static_cast<int>(range.from()) > max_char) continue;
3884 int to = std::min(max_char, static_cast<int>(range.to()));
3885 bm->SetInterval(offset, Interval(range.from(), to));
3886 }
3887 }
3888 offset++;
3889 }
3890 }
3891 if (offset >= bm->length()) {
3892 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3893 return;
3894 }
3895 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
3896 true); // Not at start after a text node.
3897 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3898}
3899
3900RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
3901 RegExpNode* on_success) {
3902 DCHECK(!read_backward())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!read_backward())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!read_backward()))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("!read_backward()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3902); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!read_backward()"
")"); do { *((volatile int*)__null) = 3902; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3903 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
3904 zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
3905 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
3906 zone(), CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
3907
3908 ChoiceNode* optional_step_back = zone()->New<ChoiceNode>(2, zone());
3909
3910 int stack_register = UnicodeLookaroundStackRegister();
3911 int position_register = UnicodeLookaroundPositionRegister();
3912 RegExpNode* step_back = TextNode::CreateForCharacterRanges(
3913 zone(), lead_surrogates, true, on_success);
3914 RegExpLookaround::Builder builder(true, step_back, stack_register,
3915 position_register);
3916 RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
3917 zone(), trail_surrogates, false, builder.on_match_success());
3918
3919 optional_step_back->AddAlternative(
3920 GuardedAlternative(builder.ForMatch(match_trail)));
3921 optional_step_back->AddAlternative(GuardedAlternative(on_success));
3922
3923 return optional_step_back;
3924}
3925
3926RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
3927 bool is_one_byte) {
3928 // Wrap the body of the regexp in capture #0.
3929 RegExpNode* captured_body =
3930 RegExpCapture::ToNode(data->tree, 0, this, accept());
3931 RegExpNode* node = captured_body;
3932 if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags())) {
3933 // Add a .*? at the beginning, outside the body capture, unless
3934 // this expression is anchored at the beginning or sticky.
3935 RegExpNode* loop_node = RegExpQuantifier::ToNode(
3936 0, RegExpTree::kInfinity, false,
3937 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything), this,
3938 captured_body, data->contains_anchor);
3939
3940 if (data->contains_anchor) {
3941 // Unroll loop once, to take care of the case that might start
3942 // at the start of input.
3943 ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
3944 first_step_node->AddAlternative(GuardedAlternative(captured_body));
3945 first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
3946 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything),
3947 false, loop_node)));
3948 node = first_step_node;
3949 } else {
3950 node = loop_node;
3951 }
3952 }
3953 if (is_one_byte) {
3954 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
3955 // Do it again to propagate the new nodes to places where they were not
3956 // put because they had not been calculated yet.
3957 if (node != nullptr) {
3958 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
3959 }
3960 } else if (IsEitherUnicode(flags()) &&
3961 (IsGlobal(flags()) || IsSticky(flags()))) {
3962 node = OptionallyStepBackToLeadSurrogate(node);
3963 }
3964
3965 if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());
3966 return node;
3967}
3968
3969void RegExpCompiler::ToNodeCheckForStackOverflow() {
3970 if (StackLimitCheck{isolate()}.HasOverflowed()) {
3971 V8::FatalProcessOutOfMemory(isolate(), "RegExpCompiler");
3972 }
3973}
3974
3975} // namespace internal
3976} // namespace v8