Bug Summary

File:var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
Warning:line 1446, column 38
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name regexp-compiler.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -resource-dir /usr/lib/llvm-18/lib/clang/18 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/stl_wrappers -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D DEBUG=1 -D WASM_SUPPORTS_HUGE_MEMORY -D JS_CACHEIR_SPEW -D JS_STRUCTURED_SPEW -D JS_HAS_CTYPES -D FFI_BUILDING -D EXPORT_JS_API -D MOZ_HAS_MOZGLUE -D MOZ_SUPPORT_LEAKCHECKING -I /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -D V8_INTL_SUPPORT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-18/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-error=type-limits -Wno-error=return-type -Wno-sign-compare -Wno-c++11-narrowing -fdeprecated-macro -ferror-limit 19 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2024-07-21-021012-413605-1 -x c++ /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
1// Copyright 2019 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "irregexp/imported/regexp-compiler.h"
6
7#include "irregexp/imported/regexp-macro-assembler-arch.h"
8
9#ifdef V8_INTL_SUPPORT1
10#include "irregexp/imported/special-case.h"
11#include "unicode/locid.h"
12#include "unicode/uniset.h"
13#include "unicode/utypes.h"
14#endif // V8_INTL_SUPPORT
15
16namespace v8 {
17namespace internal {
18
19using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
20
21// -------------------------------------------------------------------
22// Implementation of the Irregexp regular expression engine.
23//
24// The Irregexp regular expression engine is intended to be a complete
25// implementation of ECMAScript regular expressions. It generates either
26// bytecodes or native code.
27
28// The Irregexp regexp engine is structured in three steps.
29// 1) The parser generates an abstract syntax tree. See ast.cc.
30// 2) From the AST a node network is created. The nodes are all
31// subclasses of RegExpNode. The nodes represent states when
32// executing a regular expression. Several optimizations are
33// performed on the node network.
34// 3) From the nodes we generate either byte codes or native code
35// that can actually execute the regular expression (perform
36// the search). The code generation step is described in more
37// detail below.
38
39// Code generation.
40//
41// The nodes are divided into four main categories.
42// * Choice nodes
43// These represent places where the regular expression can
44// match in more than one way. For example on entry to an
45// alternation (foo|bar) or a repetition (*, +, ? or {}).
46// * Action nodes
47// These represent places where some action should be
48// performed. Examples include recording the current position
49// in the input string to a register (in order to implement
50// captures) or other actions on register for example in order
51// to implement the counters needed for {} repetitions.
52// * Matching nodes
53// These attempt to match some element part of the input string.
54// Examples of elements include character classes, plain strings
55// or back references.
56// * End nodes
57// These are used to implement the actions required on finding
58// a successful match or failing to find a match.
59//
60// The code generated (whether as byte codes or native code) maintains
61// some state as it runs. This consists of the following elements:
62//
63// * The capture registers. Used for string captures.
64// * Other registers. Used for counters etc.
65// * The current position.
66// * The stack of backtracking information. Used when a matching node
67// fails to find a match and needs to try an alternative.
68//
69// Conceptual regular expression execution model:
70//
71// There is a simple conceptual model of regular expression execution
72// which will be presented first. The actual code generated is a more
73// efficient simulation of the simple conceptual model:
74//
75// * Choice nodes are implemented as follows:
76// For each choice except the last {
77// push current position
78// push backtrack code location
79// <generate code to test for choice>
80// backtrack code location:
81// pop current position
82// }
83// <generate code to test for last choice>
84//
85// * Actions nodes are generated as follows
86// <push affected registers on backtrack stack>
87// <generate code to perform action>
88// push backtrack code location
89// <generate code to test for following nodes>
90// backtrack code location:
91// <pop affected registers to restore their state>
92// <pop backtrack location from stack and go to it>
93//
94// * Matching nodes are generated as follows:
95// if input string matches at current position
96// update current position
97// <generate code to test for following nodes>
98// else
99// <pop backtrack location from stack and go to it>
100//
101// Thus it can be seen that the current position is saved and restored
102// by the choice nodes, whereas the registers are saved and restored by
103// by the action nodes that manipulate them.
104//
105// The other interesting aspect of this model is that nodes are generated
106// at the point where they are needed by a recursive call to Emit(). If
107// the node has already been code generated then the Emit() call will
108// generate a jump to the previously generated code instead. In order to
109// limit recursion it is possible for the Emit() function to put the node
110// on a work list for later generation and instead generate a jump. The
111// destination of the jump is resolved later when the code is generated.
112//
113// Actual regular expression code generation.
114//
115// Code generation is actually more complicated than the above. In order to
116// improve the efficiency of the generated code some optimizations are
117// performed
118//
119// * Choice nodes have 1-character lookahead.
120// A choice node looks at the following character and eliminates some of
121// the choices immediately based on that character. This is not yet
122// implemented.
123// * Simple greedy loops store reduced backtracking information.
124// A quantifier like /.*foo/m will greedily match the whole input. It will
125// then need to backtrack to a point where it can match "foo". The naive
126// implementation of this would push each character position onto the
127// backtracking stack, then pop them off one by one. This would use space
128// proportional to the length of the input string. However since the "."
129// can only match in one way and always has a constant length (in this case
130// of 1) it suffices to store the current position on the top of the stack
131// once. Matching now becomes merely incrementing the current position and
132// backtracking becomes decrementing the current position and checking the
133// result against the stored current position. This is faster and saves
134// space.
135// * The current state is virtualized.
136// This is used to defer expensive operations until it is clear that they
137// are needed and to generate code for a node more than once, allowing
138// specialized an efficient versions of the code to be created. This is
139// explained in the section below.
140//
141// Execution state virtualization.
142//
143// Instead of emitting code, nodes that manipulate the state can record their
144// manipulation in an object called the Trace. The Trace object can record a
145// current position offset, an optional backtrack code location on the top of
146// the virtualized backtrack stack and some register changes. When a node is
147// to be emitted it can flush the Trace or update it. Flushing the Trace
148// will emit code to bring the actual state into line with the virtual state.
149// Avoiding flushing the state can postpone some work (e.g. updates of capture
150// registers). Postponing work can save time when executing the regular
151// expression since it may be found that the work never has to be done as a
152// failure to match can occur. In addition it is much faster to jump to a
153// known backtrack code location than it is to pop an unknown backtrack
154// location from the stack and jump there.
155//
156// The virtual state found in the Trace affects code generation. For example
157// the virtual state contains the difference between the actual current
158// position and the virtual current position, and matching code needs to use
159// this offset to attempt a match in the correct location of the input
160// string. Therefore code generated for a non-trivial trace is specialized
161// to that trace. The code generator therefore has the ability to generate
162// code for each node several times. In order to limit the size of the
163// generated code there is an arbitrary limit on how many specialized sets of
164// code may be generated for a given node. If the limit is reached, the
165// trace is flushed and a generic version of the code for a node is emitted.
166// This is subsequently used for that node. The code emitted for non-generic
167// trace is not recorded in the node and so it cannot currently be reused in
168// the event that code generation is requested for an identical trace.
169
170namespace {
171
172constexpr base::uc32 MaxCodeUnit(const bool one_byte) {
173 static_assert(String::kMaxOneByteCharCodeU <=
174 std::numeric_limits<uint16_t>::max());
175 static_assert(String::kMaxUtf16CodeUnitU <=
176 std::numeric_limits<uint16_t>::max());
177 return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU;
178}
179
180constexpr uint32_t CharMask(const bool one_byte) {
181 static_assert(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1));
182 static_assert(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1));
183 return MaxCodeUnit(one_byte);
184}
185
186} // namespace
187
188void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 188); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 188; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
; }
189
190void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
191 text->AddElement(TextElement::Atom(this), zone);
192}
193
194void RegExpClassRanges::AppendToText(RegExpText* text, Zone* zone) {
195 text->AddElement(TextElement::ClassRanges(this), zone);
196}
197
198void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
199 for (int i = 0; i < elements()->length(); i++)
200 text->AddElement(elements()->at(i), zone);
201}
202
203TextElement TextElement::Atom(RegExpAtom* atom) {
204 return TextElement(ATOM, atom);
205}
206
207TextElement TextElement::ClassRanges(RegExpClassRanges* class_ranges) {
208 return TextElement(CLASS_RANGES, class_ranges);
209}
210
211int TextElement::length() const {
212 switch (text_type()) {
213 case ATOM:
214 return atom()->length();
215
216 case CLASS_RANGES:
217 return 1;
218 }
219 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 219); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 219; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
220}
221
222class RecursionCheck {
223 public:
224 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
225 compiler->IncrementRecursionDepth();
226 }
227 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
228
229 private:
230 RegExpCompiler* compiler_;
231};
232
233// Attempts to compile the regexp using an Irregexp code generator. Returns
234// a fixed array or a null handle depending on whether it succeeded.
235RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
236 RegExpFlags flags, bool one_byte)
237 : next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
238 unicode_lookaround_stack_register_(kNoRegister),
239 unicode_lookaround_position_register_(kNoRegister),
240 work_list_(nullptr),
241 recursion_depth_(0),
242 flags_(flags),
243 one_byte_(one_byte),
244 reg_exp_too_big_(false),
245 limiting_recursion_(false),
246 optimize_(v8_flagsjs::jit::JitOptions.regexp_optimization),
247 read_backward_(false),
248 current_expansion_factor_(1),
249 frequency_collator_(),
250 isolate_(isolate),
251 zone_(zone) {
252 accept_ = zone->New<EndNode>(EndNode::ACCEPT, zone);
253 DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 253); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
")"); do { *((volatile int*)__null) = 253; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
254}
255
256RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
257 Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
258 int capture_count, Handle<String> pattern) {
259 macro_assembler_ = macro_assembler;
260
261 ZoneVector<RegExpNode*> work_list(zone());
262 work_list_ = &work_list;
263 Label fail;
264 macro_assembler_->PushBacktrack(&fail);
265 Trace new_trace;
266 start->Emit(this, &new_trace);
267 macro_assembler_->BindJumpTarget(&fail);
268 macro_assembler_->Fail();
269 while (!work_list.empty()) {
270 RegExpNode* node = work_list.back();
271 work_list.pop_back();
272 node->set_on_work_list(false);
273 if (!node->label()->is_bound()) node->Emit(this, &new_trace);
274 }
275 if (reg_exp_too_big_) {
276 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
277 FATAL("Aborting on excess zone allocation")do { do { } while (false); MOZ_ReportCrash("" "Aborting on excess zone allocation"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 277); AnnotateMozCrashReason("MOZ_CRASH(" "Aborting on excess zone allocation"
")"); do { *((volatile int*)__null) = 277; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
278 }
279 macro_assembler_->AbortedCodeGeneration();
280 return CompilationResult::RegExpTooBig();
281 }
282
283 Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
284 isolate->IncreaseTotalRegexpCodeGenerated(code);
285 work_list_ = nullptr;
286
287 return {code, next_register_};
288}
289
290bool Trace::DeferredAction::Mentions(int that) {
291 if (action_type() == ActionNode::CLEAR_CAPTURES) {
292 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
293 return range.Contains(that);
294 } else {
295 return reg() == that;
296 }
297}
298
299bool Trace::mentions_reg(int reg) {
300 for (DeferredAction* action = actions_; action != nullptr;
301 action = action->next()) {
302 if (action->Mentions(reg)) return true;
303 }
304 return false;
305}
306
307bool Trace::GetStoredPosition(int reg, int* cp_offset) {
308 DCHECK_EQ(0, *cp_offset)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) == (*cp_offset))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) == (*cp_offset)))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(0) == (*cp_offset)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 308); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) == (*cp_offset)"
")"); do { *((volatile int*)__null) = 308; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
309 for (DeferredAction* action = actions_; action != nullptr;
310 action = action->next()) {
311 if (action->Mentions(reg)) {
312 if (action->action_type() == ActionNode::STORE_POSITION) {
313 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
314 return true;
315 } else {
316 return false;
317 }
318 }
319 }
320 return false;
321}
322
323// A (dynamically-sized) set of unsigned integers that behaves especially well
324// on small integers (< kFirstLimit). May do zone-allocation.
325class DynamicBitSet : public ZoneObject {
326 public:
327 V8_EXPORT_PRIVATE bool Get(unsigned value) const {
328 if (value < kFirstLimit) {
329 return (first_ & (1 << value)) != 0;
330 } else if (remaining_ == nullptr) {
331 return false;
332 } else {
333 return remaining_->Contains(value);
334 }
335 }
336
337 // Destructively set a value in this set.
338 void Set(unsigned value, Zone* zone) {
339 if (value < kFirstLimit) {
340 first_ |= (1 << value);
341 } else {
342 if (remaining_ == nullptr)
343 remaining_ = zone->New<ZoneList<unsigned>>(1, zone);
344 if (remaining_->is_empty() || !remaining_->Contains(value))
345 remaining_->Add(value, zone);
346 }
347 }
348
349 private:
350 static constexpr unsigned kFirstLimit = 32;
351
352 uint32_t first_ = 0;
353 ZoneList<unsigned>* remaining_ = nullptr;
354};
355
356int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
357 Zone* zone) {
358 int max_register = RegExpCompiler::kNoRegister;
359 for (DeferredAction* action = actions_; action != nullptr;
360 action = action->next()) {
361 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
362 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
363 for (int i = range.from(); i <= range.to(); i++)
364 affected_registers->Set(i, zone);
365 if (range.to() > max_register) max_register = range.to();
366 } else {
367 affected_registers->Set(action->reg(), zone);
368 if (action->reg() > max_register) max_register = action->reg();
369 }
370 }
371 return max_register;
372}
373
374void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
375 int max_register,
376 const DynamicBitSet& registers_to_pop,
377 const DynamicBitSet& registers_to_clear) {
378 for (int reg = max_register; reg >= 0; reg--) {
379 if (registers_to_pop.Get(reg)) {
380 assembler->PopRegister(reg);
381 } else if (registers_to_clear.Get(reg)) {
382 int clear_to = reg;
383 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
384 reg--;
385 }
386 assembler->ClearRegisters(reg, clear_to);
387 }
388 }
389}
390
391void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
392 int max_register,
393 const DynamicBitSet& affected_registers,
394 DynamicBitSet* registers_to_pop,
395 DynamicBitSet* registers_to_clear,
396 Zone* zone) {
397 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
398 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
399
400 // Count pushes performed to force a stack limit check occasionally.
401 int pushes = 0;
402
403 for (int reg = 0; reg <= max_register; reg++) {
404 if (!affected_registers.Get(reg)) continue;
405
406 // The chronologically first deferred action in the trace
407 // is used to infer the action needed to restore a register
408 // to its previous state (or not, if it's safe to ignore it).
409 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
410 DeferredActionUndoType undo_action = IGNORE;
411
412 int value = 0;
413 bool absolute = false;
414 bool clear = false;
415 static const int kNoStore = kMinInt((int32_t)0x80000000);
416 int store_position = kNoStore;
417 // This is a little tricky because we are scanning the actions in reverse
418 // historical order (newest first).
419 for (DeferredAction* action = actions_; action != nullptr;
420 action = action->next()) {
421 if (action->Mentions(reg)) {
422 switch (action->action_type()) {
423 case ActionNode::SET_REGISTER_FOR_LOOP: {
424 Trace::DeferredSetRegisterForLoop* psr =
425 static_cast<Trace::DeferredSetRegisterForLoop*>(action);
426 if (!absolute) {
427 value += psr->value();
428 absolute = true;
429 }
430 // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
431 // counters. They can have a significant previous value if they
432 // occur in a loop. TODO(lrn): Propagate this information, so
433 // we can set undo_action to IGNORE if we know there is no value to
434 // restore.
435 undo_action = RESTORE;
436 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 436); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 436; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
437 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 437); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 437; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
438 break;
439 }
440 case ActionNode::INCREMENT_REGISTER:
441 if (!absolute) {
442 value++;
443 }
444 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 444); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 444; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
445 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 445); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 445; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
446 undo_action = RESTORE;
447 break;
448 case ActionNode::STORE_POSITION: {
449 Trace::DeferredCapture* pc =
450 static_cast<Trace::DeferredCapture*>(action);
451 if (!clear && store_position == kNoStore) {
452 store_position = pc->cp_offset();
453 }
454
455 // For captures we know that stores and clears alternate.
456 // Other register, are never cleared, and if the occur
457 // inside a loop, they might be assigned more than once.
458 if (reg <= 1) {
459 // Registers zero and one, aka "capture zero", is
460 // always set correctly if we succeed. There is no
461 // need to undo a setting on backtrack, because we
462 // will set it again or fail.
463 undo_action = IGNORE;
464 } else {
465 undo_action = pc->is_capture() ? CLEAR : RESTORE;
466 }
467 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 467); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 467; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
468 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 468); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 468; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
469 break;
470 }
471 case ActionNode::CLEAR_CAPTURES: {
472 // Since we're scanning in reverse order, if we've already
473 // set the position we have to ignore historically earlier
474 // clearing operations.
475 if (store_position == kNoStore) {
476 clear = true;
477 }
478 undo_action = RESTORE;
479 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 479); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 479; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
480 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 480); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 480; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
481 break;
482 }
483 default:
484 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 484); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 484; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
485 }
486 }
487 }
488 // Prepare for the undo-action (e.g., push if it's going to be popped).
489 if (undo_action == RESTORE) {
490 pushes++;
491 RegExpMacroAssembler::StackCheckFlag stack_check =
492 RegExpMacroAssembler::kNoStackLimitCheck;
493 if (pushes == push_limit) {
494 stack_check = RegExpMacroAssembler::kCheckStackLimit;
495 pushes = 0;
496 }
497
498 assembler->PushRegister(reg, stack_check);
499 registers_to_pop->Set(reg, zone);
500 } else if (undo_action == CLEAR) {
501 registers_to_clear->Set(reg, zone);
502 }
503 // Perform the chronologically last action (or accumulated increment)
504 // for the register.
505 if (store_position != kNoStore) {
506 assembler->WriteCurrentPositionToRegister(reg, store_position);
507 } else if (clear) {
508 assembler->ClearRegisters(reg, reg);
509 } else if (absolute) {
510 assembler->SetRegister(reg, value);
511 } else if (value != 0) {
512 assembler->AdvanceRegister(reg, value);
513 }
514 }
515}
516
517// This is called as we come into a loop choice node and some other tricky
518// nodes. It normalizes the state of the code generator to ensure we can
519// generate generic code.
520void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
521 RegExpMacroAssembler* assembler = compiler->macro_assembler();
522
523 DCHECK(!is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!is_trivial()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("!is_trivial()",
"/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 523); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!is_trivial()"
")"); do { *((volatile int*)__null) = 523; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
524
525 if (actions_ == nullptr && backtrack() == nullptr) {
526 // Here we just have some deferred cp advances to fix and we are back to
527 // a normal situation. We may also have to forget some information gained
528 // through a quick check that was already performed.
529 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
530 // Create a new trivial state and generate the node with that.
531 Trace new_state;
532 successor->Emit(compiler, &new_state);
533 return;
534 }
535
536 // Generate deferred actions here along with code to undo them again.
537 DynamicBitSet affected_registers;
538
539 if (backtrack() != nullptr) {
540 // Here we have a concrete backtrack location. These are set up by choice
541 // nodes and so they indicate that we have a deferred save of the current
542 // position which we may need to emit here.
543 assembler->PushCurrentPosition();
544 }
545
546 int max_register =
547 FindAffectedRegisters(&affected_registers, compiler->zone());
548 DynamicBitSet registers_to_pop;
549 DynamicBitSet registers_to_clear;
550 PerformDeferredActions(assembler, max_register, affected_registers,
551 &registers_to_pop, &registers_to_clear,
552 compiler->zone());
553 if (cp_offset_ != 0) {
554 assembler->AdvanceCurrentPosition(cp_offset_);
555 }
556
557 // Create a new trivial state and generate the node with that.
558 Label undo;
559 assembler->PushBacktrack(&undo);
560 if (successor->KeepRecursing(compiler)) {
561 Trace new_state;
562 successor->Emit(compiler, &new_state);
563 } else {
564 compiler->AddWork(successor);
565 assembler->GoTo(successor->label());
566 }
567
568 // On backtrack we need to restore state.
569 assembler->BindJumpTarget(&undo);
570 RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
571 registers_to_clear);
572 if (backtrack() == nullptr) {
573 assembler->Backtrack();
574 } else {
575 assembler->PopCurrentPosition();
576 assembler->GoTo(backtrack());
577 }
578}
579
580void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
581 RegExpMacroAssembler* assembler = compiler->macro_assembler();
582
583 // Omit flushing the trace. We discard the entire stack frame anyway.
584
585 if (!label()->is_bound()) {
586 // We are completely independent of the trace, since we ignore it,
587 // so this code can be used as the generic version.
588 assembler->Bind(label());
589 }
590
591 // Throw away everything on the backtrack stack since the start
592 // of the negative submatch and restore the character position.
593 assembler->ReadCurrentPositionFromRegister(current_position_register_);
594 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
595 if (clear_capture_count_ > 0) {
596 // Clear any captures that might have been performed during the success
597 // of the body of the negative look-ahead.
598 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
599 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
600 }
601 // Now that we have unwound the stack we find at the top of the stack the
602 // backtrack that the BeginNegativeSubmatch node got.
603 assembler->Backtrack();
604}
605
606void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
607 if (!trace->is_trivial()) {
608 trace->Flush(compiler, this);
609 return;
610 }
611 RegExpMacroAssembler* assembler = compiler->macro_assembler();
612 if (!label()->is_bound()) {
613 assembler->Bind(label());
614 }
615 switch (action_) {
616 case ACCEPT:
617 assembler->Succeed();
618 return;
619 case BACKTRACK:
620 assembler->GoTo(trace->backtrack());
621 return;
622 case NEGATIVE_SUBMATCH_SUCCESS:
623 // This case is handled in a different virtual method.
624 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 624); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 624; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
625 }
626 UNIMPLEMENTED()do { do { } while (false); MOZ_ReportCrash("" "unimplemented code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 626); AnnotateMozCrashReason("MOZ_CRASH(" "unimplemented code"
")"); do { *((volatile int*)__null) = 626; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
627}
628
629void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
630 if (guards_ == nullptr) guards_ = zone->New<ZoneList<Guard*>>(1, zone);
631 guards_->Add(guard, zone);
632}
633
634ActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
635 RegExpNode* on_success) {
636 ActionNode* result =
637 on_success->zone()->New<ActionNode>(SET_REGISTER_FOR_LOOP, on_success);
638 result->data_.u_store_register.reg = reg;
639 result->data_.u_store_register.value = val;
640 return result;
641}
642
643ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
644 ActionNode* result =
645 on_success->zone()->New<ActionNode>(INCREMENT_REGISTER, on_success);
646 result->data_.u_increment_register.reg = reg;
647 return result;
648}
649
650ActionNode* ActionNode::StorePosition(int reg, bool is_capture,
651 RegExpNode* on_success) {
652 ActionNode* result =
653 on_success->zone()->New<ActionNode>(STORE_POSITION, on_success);
654 result->data_.u_position_register.reg = reg;
655 result->data_.u_position_register.is_capture = is_capture;
656 return result;
657}
658
659ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
660 ActionNode* result =
661 on_success->zone()->New<ActionNode>(CLEAR_CAPTURES, on_success);
662 result->data_.u_clear_captures.range_from = range.from();
663 result->data_.u_clear_captures.range_to = range.to();
664 return result;
665}
666
667ActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg,
668 RegExpNode* on_success) {
669 ActionNode* result =
670 on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success);
671 result->data_.u_submatch.stack_pointer_register = stack_reg;
672 result->data_.u_submatch.current_position_register = position_reg;
673 return result;
674}
675
676ActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg,
677 RegExpNode* on_success) {
678 ActionNode* result =
679 on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success);
680 result->data_.u_submatch.stack_pointer_register = stack_reg;
681 result->data_.u_submatch.current_position_register = position_reg;
682 return result;
683}
684
685ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg,
686 int clear_register_count,
687 int clear_register_from,
688 RegExpNode* on_success) {
689 ActionNode* result = on_success->zone()->New<ActionNode>(
690 POSITIVE_SUBMATCH_SUCCESS, on_success);
691 result->data_.u_submatch.stack_pointer_register = stack_reg;
692 result->data_.u_submatch.current_position_register = position_reg;
693 result->data_.u_submatch.clear_register_count = clear_register_count;
694 result->data_.u_submatch.clear_register_from = clear_register_from;
695 return result;
696}
697
698ActionNode* ActionNode::EmptyMatchCheck(int start_register,
699 int repetition_register,
700 int repetition_limit,
701 RegExpNode* on_success) {
702 ActionNode* result =
703 on_success->zone()->New<ActionNode>(EMPTY_MATCH_CHECK, on_success);
704 result->data_.u_empty_match_check.start_register = start_register;
705 result->data_.u_empty_match_check.repetition_register = repetition_register;
706 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
707 return result;
708}
709
710ActionNode* ActionNode::ModifyFlags(RegExpFlags flags, RegExpNode* on_success) {
711 ActionNode* result =
712 on_success->zone()->New<ActionNode>(MODIFY_FLAGS, on_success);
713 result->data_.u_modify_flags.flags = flags;
714 return result;
715}
716
717#define DEFINE_ACCEPT(Type) \
718 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
719FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)DEFINE_ACCEPT(End) DEFINE_ACCEPT(Action) DEFINE_ACCEPT(Choice
) DEFINE_ACCEPT(LoopChoice) DEFINE_ACCEPT(NegativeLookaroundChoice
) DEFINE_ACCEPT(BackReference) DEFINE_ACCEPT(Assertion) DEFINE_ACCEPT
(Text)
720#undef DEFINE_ACCEPT
721
722// -------------------------------------------------------------------
723// Emit code.
724
725void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
726 Guard* guard, Trace* trace) {
727 switch (guard->op()) {
728 case Guard::LT:
729 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 729); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 729; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
730 macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
731 trace->backtrack());
732 break;
733 case Guard::GEQ:
734 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 734); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 734; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
735 macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
736 trace->backtrack());
737 break;
738 }
739}
740
741namespace {
742
743#ifdef DEBUG1
744bool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
745 static_assert(sizeof(unibrow::uchar) == 4);
746 for (int i = 0; i < length; i++) {
747 if (chars[i] > String::kMaxUtf16CodeUnit) return false;
748 }
749 return true;
750}
751#endif // DEBUG
752
753// Returns the number of characters in the equivalence class, omitting those
754// that cannot occur in the source string because it is Latin1.
755int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
756 bool one_byte_subject, unibrow::uchar* letters,
757 int letter_length) {
758#ifdef V8_INTL_SUPPORT1
759 if (RegExpCaseFolding::IgnoreSet().contains(character)) {
760 letters[0] = character;
761 DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, 1))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, 1)))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 761); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, 1)"
")"); do { *((volatile int*)__null) = 761; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
762 return 1;
763 }
764 bool in_special_add_set =
765 RegExpCaseFolding::SpecialAddSet().contains(character);
766
767 icu::UnicodeSet set;
768 set.add(character);
769 set = set.closeOver(USET_CASE_INSENSITIVE);
770
771 UChar32 canon = 0;
772 if (in_special_add_set) {
773 canon = RegExpCaseFolding::Canonicalize(character);
774 }
775
776 int32_t range_count = set.getRangeCount();
777 int items = 0;
778 for (int32_t i = 0; i < range_count; i++) {
779 UChar32 start = set.getRangeStart(i);
780 UChar32 end = set.getRangeEnd(i);
781 CHECK(end - start + items <= letter_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(end - start + items <= letter_length)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(end - start + items <= letter_length))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("end - start + items <= letter_length"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 781); AnnotateMozCrashReason("MOZ_RELEASE_ASSERT" "(" "end - start + items <= letter_length"
")"); do { *((volatile int*)__null) = 781; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
782 for (UChar32 cu = start; cu <= end; cu++) {
783 if (one_byte_subject && cu > String::kMaxOneByteCharCode) break;
784 if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) {
785 continue;
786 }
787 letters[items++] = static_cast<unibrow::uchar>(cu);
788 }
789 }
790 DCHECK(ContainsOnlyUtf16CodeUnits(letters, items))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, items))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, items)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, items)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 790); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, items)"
")"); do { *((volatile int*)__null) = 790; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
791 return items;
792#else
793 int length =
794 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
795 // Unibrow returns 0 or 1 for characters where case independence is
796 // trivial.
797 if (length == 0) {
798 letters[0] = character;
799 length = 1;
800 }
801
802 if (one_byte_subject) {
803 int new_length = 0;
804 for (int i = 0; i < length; i++) {
805 if (letters[i] <= String::kMaxOneByteCharCode) {
806 letters[new_length++] = letters[i];
807 }
808 }
809 length = new_length;
810 }
811
812 DCHECK(ContainsOnlyUtf16CodeUnits(letters, length))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, length))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, length)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 812); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, length)"
")"); do { *((volatile int*)__null) = 812; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
813 return length;
814#endif // V8_INTL_SUPPORT
815}
816
817inline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
818 base::uc16 c, Label* on_failure, int cp_offset,
819 bool check, bool preloaded) {
820 RegExpMacroAssembler* assembler = compiler->macro_assembler();
821 bool bound_checked = false;
822 if (!preloaded) {
823 assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
824 bound_checked = true;
825 }
826 assembler->CheckNotCharacter(c, on_failure);
827 return bound_checked;
828}
829
830// Only emits non-letters (things that don't have case). Only used for case
831// independent matches.
832inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
833 base::uc16 c, Label* on_failure, int cp_offset,
834 bool check, bool preloaded) {
835 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
836 bool one_byte = compiler->one_byte();
837 unibrow::uchar chars[4];
838 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
839 if (length < 1) {
840 // This can't match. Must be an one-byte subject and a non-one-byte
841 // character. We do not need to do anything since the one-byte pass
842 // already handled this.
843 return false; // Bounds not checked.
844 }
845 bool checked = false;
846 // We handle the length > 1 case in a later pass.
847 if (length == 1) {
848 if (one_byte && c > String::kMaxOneByteCharCodeU) {
849 // Can't match - see above.
850 return false; // Bounds not checked.
851 }
852 if (!preloaded) {
853 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
854 checked = check;
855 }
856 macro_assembler->CheckNotCharacter(c, on_failure);
857 }
858 return checked;
859}
860
861bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
862 bool one_byte, base::uc16 c1, base::uc16 c2,
863 Label* on_failure) {
864 const uint32_t char_mask = CharMask(one_byte);
865 base::uc16 exor = c1 ^ c2;
866 // Check whether exor has only one bit set.
867 if (((exor - 1) & exor) == 0) {
868 // If c1 and c2 differ only by one bit.
869 // Ecma262UnCanonicalize always gives the highest number last.
870 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 870); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 870; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
871 base::uc16 mask = char_mask ^ exor;
872 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
873 return true;
874 }
875 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 875); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 875; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
876 base::uc16 diff = c2 - c1;
877 if (((diff - 1) & diff) == 0 && c1 >= diff) {
878 // If the characters differ by 2^n but don't differ by one bit then
879 // subtract the difference from the found character, then do the or
880 // trick. We avoid the theoretical case where negative numbers are
881 // involved in order to simplify code generation.
882 base::uc16 mask = char_mask ^ diff;
883 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
884 on_failure);
885 return true;
886 }
887 return false;
888}
889
890// Only emits letters (things that have case). Only used for case independent
891// matches.
892inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
893 base::uc16 c, Label* on_failure, int cp_offset,
894 bool check, bool preloaded) {
895 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
896 bool one_byte = compiler->one_byte();
897 unibrow::uchar chars[4];
898 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
899 if (length <= 1) return false;
900 // We may not need to check against the end of the input string
901 // if this character lies before a character that matched.
902 if (!preloaded) {
903 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
904 }
905 Label ok;
906 switch (length) {
907 case 2: {
908 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
909 chars[1], on_failure)) {
910 } else {
911 macro_assembler->CheckCharacter(chars[0], &ok);
912 macro_assembler->CheckNotCharacter(chars[1], on_failure);
913 macro_assembler->Bind(&ok);
914 }
915 break;
916 }
917 case 4:
918 macro_assembler->CheckCharacter(chars[3], &ok);
919 [[fallthrough]];
920 case 3:
921 macro_assembler->CheckCharacter(chars[0], &ok);
922 macro_assembler->CheckCharacter(chars[1], &ok);
923 macro_assembler->CheckNotCharacter(chars[2], on_failure);
924 macro_assembler->Bind(&ok);
925 break;
926 default:
927 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 927); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 927; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
928 }
929 return true;
930}
931
932void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
933 Label* fall_through, Label* above_or_equal,
934 Label* below) {
935 if (below != fall_through) {
936 masm->CheckCharacterLT(border, below);
937 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
938 } else {
939 masm->CheckCharacterGT(border - 1, above_or_equal);
940 }
941}
942
943void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last,
944 Label* fall_through, Label* in_range,
945 Label* out_of_range) {
946 if (in_range == fall_through) {
947 if (first == last) {
948 masm->CheckNotCharacter(first, out_of_range);
949 } else {
950 masm->CheckCharacterNotInRange(first, last, out_of_range);
951 }
952 } else {
953 if (first == last) {
954 masm->CheckCharacter(first, in_range);
955 } else {
956 masm->CheckCharacterInRange(first, last, in_range);
957 }
958 if (out_of_range != fall_through) masm->GoTo(out_of_range);
959 }
960}
961
962// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
963// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
964void EmitUseLookupTable(RegExpMacroAssembler* masm,
965 ZoneList<base::uc32>* ranges, uint32_t start_index,
966 uint32_t end_index, base::uc32 min_char,
967 Label* fall_through, Label* even_label,
968 Label* odd_label) {
969 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
970 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
971
972 base::uc32 base = (min_char & ~kMask);
973 USE(base)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{base}; (
void)unused_tmp_array_for_use_macro; } while (false)
;
974
975 // Assert that everything is on one kTableSize page.
976 for (uint32_t i = start_index; i <= end_index; i++) {
977 DCHECK_EQ(ranges->at(i) & ~kMask, base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(i) & ~kMask) == (base))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(i) & ~kMask) == (base)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(ranges->at(i) & ~kMask) == (base)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 977); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(i) & ~kMask) == (base)"
")"); do { *((volatile int*)__null) = 977; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
978 }
979 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(start_index == 0 || (ranges->at(start_index - 1) &
~kMask) <= base)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(start_index == 0 || (ranges->
at(start_index - 1) & ~kMask) <= base))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 979); AnnotateMozCrashReason("MOZ_ASSERT" "(" "start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
")"); do { *((volatile int*)__null) = 979; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
980
981 char templ[kSize];
982 Label* on_bit_set;
983 Label* on_bit_clear;
984 int bit;
985 if (even_label == fall_through) {
986 on_bit_set = odd_label;
987 on_bit_clear = even_label;
988 bit = 1;
989 } else {
990 on_bit_set = even_label;
991 on_bit_clear = odd_label;
992 bit = 0;
993 }
994 for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize;
995 i++) {
996 templ[i] = bit;
997 }
998 uint32_t j = 0;
999 bit ^= 1;
1000 for (uint32_t i = start_index; i < end_index; i++) {
1001 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1002 templ[j] = bit;
1003 }
1004 bit ^= 1;
1005 }
1006 for (uint32_t i = j; i < kSize; i++) {
1007 templ[i] = bit;
1008 }
1009 Factory* factory = masm->isolate()->factory();
1010 // TODO(erikcorry): Cache these.
1011 Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
1012 for (uint32_t i = 0; i < kSize; i++) {
1013 ba->set(i, templ[i]);
1014 }
1015 masm->CheckBitInTable(ba, on_bit_set);
1016 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1017}
1018
1019void CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1020 uint32_t start_index, uint32_t end_index, uint32_t cut_index,
1021 Label* even_label, Label* odd_label) {
1022 bool odd = (((cut_index - start_index) & 1) == 1);
1023 Label* in_range_label = odd ? odd_label : even_label;
1024 Label dummy;
1025 EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
1026 ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
1027 &dummy);
1028 DCHECK(!dummy.is_linked())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!dummy.is_linked())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!dummy.is_linked()))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!dummy.is_linked()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1028); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!dummy.is_linked()"
")"); do { *((volatile int*)__null) = 1028; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1029 // Cut out the single range by rewriting the array. This creates a new
1030 // range that is a merger of the two ranges on either side of the one we
1031 // are cutting out. The oddity of the labels is preserved.
1032 for (uint32_t j = cut_index; j > start_index; j--) {
1033 ranges->at(j) = ranges->at(j - 1);
1034 }
1035 for (uint32_t j = cut_index + 1; j < end_index; j++) {
1036 ranges->at(j) = ranges->at(j + 1);
1037 }
1038}
1039
1040// Unicode case. Split the search space into kSize spaces that are handled
1041// with recursion.
1042void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
1043 uint32_t end_index, uint32_t* new_start_index,
1044 uint32_t* new_end_index, base::uc32* border) {
1045 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
1046 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
1047
1048 base::uc32 first = ranges->at(start_index);
1049 base::uc32 last = ranges->at(end_index) - 1;
1050
1051 *new_start_index = start_index;
1052 *border = (ranges->at(start_index) & ~kMask) + kSize;
1053 while (*new_start_index < end_index) {
1054 if (ranges->at(*new_start_index) > *border) break;
1055 (*new_start_index)++;
1056 }
1057 // new_start_index is the index of the first edge that is beyond the
1058 // current kSize space.
1059
1060 // For very large search spaces we do a binary chop search of the non-Latin1
1061 // space instead of just going to the end of the current kSize space. The
1062 // heuristics are complicated a little by the fact that any 128-character
1063 // encoding space can be quickly tested with a table lookup, so we don't
1064 // wish to do binary chop search at a smaller granularity than that. A
1065 // 128-character space can take up a lot of space in the ranges array if,
1066 // for example, we only want to match every second character (eg. the lower
1067 // case characters on some Unicode pages).
1068 uint32_t binary_chop_index = (end_index + start_index) / 2;
1069 // The first test ensures that we get to the code that handles the Latin1
1070 // range with a single not-taken branch, speeding up this important
1071 // character range (even non-Latin1 charset-based text has spaces and
1072 // punctuation).
1073 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
1074 end_index - start_index > (*new_start_index - start_index) * 2 &&
1075 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1076 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1077 uint32_t scan_forward_for_section_border = binary_chop_index;
1078 uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1079
1080 while (scan_forward_for_section_border < end_index) {
1081 if (ranges->at(scan_forward_for_section_border) > new_border) {
1082 *new_start_index = scan_forward_for_section_border;
1083 *border = new_border;
1084 break;
1085 }
1086 scan_forward_for_section_border++;
1087 }
1088 }
1089
1090 DCHECK(*new_start_index > start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(*new_start_index > start_index)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(*new_start_index > start_index
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"*new_start_index > start_index", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1090); AnnotateMozCrashReason("MOZ_ASSERT" "(" "*new_start_index > start_index"
")"); do { *((volatile int*)__null) = 1090; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1091 *new_end_index = *new_start_index - 1;
1092 if (ranges->at(*new_end_index) == *border) {
1093 (*new_end_index)--;
1094 }
1095 if (*border >= ranges->at(end_index)) {
1096 *border = ranges->at(end_index);
1097 *new_start_index = end_index; // Won't be used.
1098 *new_end_index = end_index - 1;
1099 }
1100}
1101
1102// Gets a series of segment boundaries representing a character class. If the
1103// character is in the range between an even and an odd boundary (counting from
1104// start_index) then go to even_label, otherwise go to odd_label. We already
1105// know that the character is in the range of min_char to max_char inclusive.
1106// Either label can be nullptr indicating backtracking. Either label can also
1107// be equal to the fall_through label.
1108void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1109 uint32_t start_index, uint32_t end_index,
1110 base::uc32 min_char, base::uc32 max_char,
1111 Label* fall_through, Label* even_label,
1112 Label* odd_label) {
1113 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((min_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(min_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1113); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1113; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1114 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((max_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((max_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(max_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1114); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(max_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1114; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1115
1116 base::uc32 first = ranges->at(start_index);
1117 base::uc32 last = ranges->at(end_index) - 1;
1118
1119 DCHECK_LT(min_char, first)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (first))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (first)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(min_char) < (first)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1119); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (first)"
")"); do { *((volatile int*)__null) = 1119; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1120
1121 // Just need to test if the character is before or on-or-after
1122 // a particular character.
1123 if (start_index == end_index) {
1124 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1125 return;
1126 }
1127
1128 // Another almost trivial case: There is one interval in the middle that is
1129 // different from the end intervals.
1130 if (start_index + 1 == end_index) {
1131 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
1132 odd_label);
1133 return;
1134 }
1135
1136 // It's not worth using table lookup if there are very few intervals in the
1137 // character class.
1138 if (end_index - start_index <= 6) {
1139 // It is faster to test for individual characters, so we look for those
1140 // first, then try arbitrary ranges in the second round.
1141 static uint32_t kNoCutIndex = -1;
1142 uint32_t cut = kNoCutIndex;
1143 for (uint32_t i = start_index; i < end_index; i++) {
1144 if (ranges->at(i) == ranges->at(i + 1) - 1) {
1145 cut = i;
1146 break;
1147 }
1148 }
1149 if (cut == kNoCutIndex) cut = start_index;
1150 CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
1151 odd_label);
1152 DCHECK_GE(end_index - start_index, 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((end_index - start_index) >= (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((end_index - start_index) >=
(2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(end_index - start_index) >= (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1152); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(end_index - start_index) >= (2)"
")"); do { *((volatile int*)__null) = 1152; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1153 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
1154 max_char, fall_through, even_label, odd_label);
1155 return;
1156 }
1157
1158 // If there are a lot of intervals in the regexp, then we will use tables to
1159 // determine whether the character is inside or outside the character class.
1160 static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1161
1162 if ((max_char >> kBits) == (min_char >> kBits)) {
1163 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1164 fall_through, even_label, odd_label);
1165 return;
1166 }
1167
1168 if ((min_char >> kBits) != first >> kBits) {
1169 masm->CheckCharacterLT(first, odd_label);
1170 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
1171 fall_through, odd_label, even_label);
1172 return;
1173 }
1174
1175 uint32_t new_start_index = 0;
1176 uint32_t new_end_index = 0;
1177 base::uc32 border = 0;
1178
1179 SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
1180 &new_end_index, &border);
1181
1182 Label handle_rest;
1183 Label* above = &handle_rest;
1184 if (border == last + 1) {
1185 // We didn't find any section that started after the limit, so everything
1186 // above the border is one of the terminal labels.
1187 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1188 DCHECK(new_end_index == end_index - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index == end_index - 1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(new_end_index == end_index -
1))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index == end_index - 1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1188); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index == end_index - 1"
")"); do { *((volatile int*)__null) = 1188; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1189 }
1190
1191 DCHECK_LE(start_index, new_end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) <= (new_end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) <= (new_end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) <= (new_end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1191); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) <= (new_end_index)"
")"); do { *((volatile int*)__null) = 1191; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1192 DCHECK_LE(new_start_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_start_index) <= (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_start_index) <= (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_start_index) <= (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1192); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_start_index) <= (end_index)"
")"); do { *((volatile int*)__null) = 1192; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1193 DCHECK_LT(start_index, new_start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) < (new_start_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) < (new_start_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) < (new_start_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1193); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) < (new_start_index)"
")"); do { *((volatile int*)__null) = 1193; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1194 DCHECK_LT(new_end_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_end_index) < (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_end_index) < (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_end_index) < (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1194); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_end_index) < (end_index)"
")"); do { *((volatile int*)__null) = 1194; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1195 DCHECK(new_end_index + 1 == new_start_index ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1197); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1197; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1196 (new_end_index + 2 == new_start_index &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1197); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1197; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1197 border == ranges->at(new_end_index + 1)))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1197); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1197; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1198 DCHECK_LT(min_char, border - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (border - 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (border - 1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(min_char) < (border - 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1198); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (border - 1)"
")"); do { *((volatile int*)__null) = 1198; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1199 DCHECK_LT(border, max_char)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((border) < (max_char))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((border) < (max_char)))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(border) < (max_char)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1199); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(border) < (max_char)"
")"); do { *((volatile int*)__null) = 1199; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1200 DCHECK_LT(ranges->at(new_end_index), border)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(new_end_index)) < (border))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(new_end_index)) < (border)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(ranges->at(new_end_index)) < (border)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1200); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(new_end_index)) < (border)"
")"); do { *((volatile int*)__null) = 1200; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1201 DCHECK(border < ranges->at(new_start_index) ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1202 (border == ranges->at(new_start_index) &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1203 new_start_index == end_index && new_end_index == end_index - 1 &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1204 border == last + 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1204); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1204; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1205 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_start_index == 0 || border >= ranges->at(new_start_index
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_start_index == 0 || border >= ranges->at(new_start_index
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
")"); do { *((volatile int*)__null) = 1205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1206
1207 masm->CheckCharacterGT(border - 1, above);
1208 Label dummy;
1209 GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
1210 border - 1, &dummy, even_label, odd_label);
1211 if (handle_rest.is_linked()) {
1212 masm->Bind(&handle_rest);
1213 bool flip = (new_start_index & 1) != (start_index & 1);
1214 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1215 &dummy, flip ? odd_label : even_label,
1216 flip ? even_label : odd_label);
1217 }
1218}
1219
1220void EmitClassRanges(RegExpMacroAssembler* macro_assembler,
1221 RegExpClassRanges* cr, bool one_byte, Label* on_failure,
1222 int cp_offset, bool check_offset, bool preloaded,
1223 Zone* zone) {
1224 ZoneList<CharacterRange>* ranges = cr->ranges(zone);
1225 CharacterRange::Canonicalize(ranges);
1226
1227 // Now that all processing (like case-insensitivity) is done, clamp the
1228 // ranges to the set of ranges that may actually occur in the subject string.
1229 if (one_byte) CharacterRange::ClampToOneByte(ranges);
1230
1231 const int ranges_length = ranges->length();
1232 if (ranges_length == 0) {
1233 if (!cr->is_negated()) {
1234 macro_assembler->GoTo(on_failure);
1235 }
1236 if (check_offset) {
1237 macro_assembler->CheckPosition(cp_offset, on_failure);
1238 }
1239 return;
1240 }
1241
1242 const base::uc32 max_char = MaxCodeUnit(one_byte);
1243 if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
1244 if (cr->is_negated()) {
1245 macro_assembler->GoTo(on_failure);
1246 } else {
1247 // This is a common case hit by non-anchored expressions.
1248 if (check_offset) {
1249 macro_assembler->CheckPosition(cp_offset, on_failure);
1250 }
1251 }
1252 return;
1253 }
1254
1255 if (!preloaded) {
1256 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1257 }
1258
1259 if (cr->is_standard(zone) && macro_assembler->CheckSpecialClassRanges(
1260 cr->standard_type(), on_failure)) {
1261 return;
1262 }
1263
1264 static constexpr int kMaxRangesForInlineBranchGeneration = 16;
1265 if (ranges_length > kMaxRangesForInlineBranchGeneration) {
1266 // For large range sets, emit a more compact instruction sequence to avoid
1267 // a potentially problematic increase in code size.
1268 // Note the flipped logic below (we check InRange if negated, NotInRange if
1269 // not negated); this is necessary since the method falls through on
1270 // failure whereas we want to fall through on success.
1271 if (cr->is_negated()) {
1272 if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
1273 return;
1274 }
1275 } else {
1276 if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
1277 return;
1278 }
1279 }
1280 }
1281
1282 // Generate a flat list of range boundaries for consumption by
1283 // GenerateBranches. See the comment on that function for how the list should
1284 // be structured
1285 ZoneList<base::uc32>* range_boundaries =
1286 zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
1287
1288 bool zeroth_entry_is_failure = !cr->is_negated();
1289
1290 for (int i = 0; i < ranges_length; i++) {
1291 CharacterRange& range = ranges->at(i);
1292 if (range.from() == 0) {
1293 DCHECK_EQ(i, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((i) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((i) == (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(i) == (0)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1293); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(i) == (0)"
")"); do { *((volatile int*)__null) = 1293; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1294 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1295 } else {
1296 range_boundaries->Add(range.from(), zone);
1297 }
1298 // `+ 1` to convert from inclusive to exclusive `to`.
1299 // [from, to] == [from, to+1[.
1300 range_boundaries->Add(range.to() + 1, zone);
1301 }
1302 int end_index = range_boundaries->length() - 1;
1303 if (range_boundaries->at(end_index) > max_char) {
1304 end_index--;
1305 }
1306
1307 Label fall_through;
1308 GenerateBranches(macro_assembler, range_boundaries,
1309 0, // start_index.
1310 end_index,
1311 0, // min_char.
1312 max_char, &fall_through,
1313 zeroth_entry_is_failure ? &fall_through : on_failure,
1314 zeroth_entry_is_failure ? on_failure : &fall_through);
1315 macro_assembler->Bind(&fall_through);
1316}
1317
1318} // namespace
1319
1320RegExpNode::~RegExpNode() = default;
1321
1322RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1323 Trace* trace) {
1324 // If we are generating a greedy loop then don't stop and don't reuse code.
1325 if (trace->stop_node() != nullptr) {
1326 return CONTINUE;
1327 }
1328
1329 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1330 if (trace->is_trivial()) {
1331 if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
1332 // If a generic version is already scheduled to be generated or we have
1333 // recursed too deeply then just generate a jump to that code.
1334 macro_assembler->GoTo(&label_);
1335 // This will queue it up for generation of a generic version if it hasn't
1336 // already been queued.
1337 compiler->AddWork(this);
1338 return DONE;
1339 }
1340 // Generate generic version of the node and bind the label for later use.
1341 macro_assembler->Bind(&label_);
1342 return CONTINUE;
1343 }
1344
1345 // We are being asked to make a non-generic version. Keep track of how many
1346 // non-generic versions we generate so as not to overdo it.
1347 trace_count_++;
1348 if (KeepRecursing(compiler) && compiler->optimize() &&
1349 trace_count_ < kMaxCopiesCodeGenerated) {
1350 return CONTINUE;
1351 }
1352
1353 // If we get here code has been generated for this node too many times or
1354 // recursion is too deep. Time to switch to a generic version. The code for
1355 // generic versions above can handle deep recursion properly.
1356 bool was_limiting = compiler->limiting_recursion();
1357 compiler->set_limiting_recursion(true);
1358 trace->Flush(compiler, this);
1359 compiler->set_limiting_recursion(was_limiting);
1360 return DONE;
1361}
1362
1363bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
1364 return !compiler->limiting_recursion() &&
1365 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
1366}
1367
1368void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1369 BoyerMooreLookahead* bm, bool not_at_start) {
1370 base::Optional<RegExpFlags> old_flags;
1371 if (action_type_ == MODIFY_FLAGS) {
1372 // It is not guaranteed that we hit the resetting modify flags node, due to
1373 // recursion budget limitation for filling in BMInfo. Therefore we reset the
1374 // flags manually to the previous state after recursing.
1375 old_flags = bm->compiler()->flags();
1376 bm->compiler()->set_flags(flags());
1377 }
1378 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
1379 // Anything may follow a positive submatch success, thus we need to accept
1380 // all characters from this position onwards.
1381 bm->SetRest(offset);
1382 } else {
1383 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1384 }
1385 SaveBMInfo(bm, not_at_start, offset);
1386 if (old_flags.has_value()) {
1387 bm->compiler()->set_flags(*old_flags);
1388 }
1389}
1390
1391void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
1392 RegExpCompiler* compiler, int filled_in,
1393 bool not_at_start) {
1394 if (action_type_ == SET_REGISTER_FOR_LOOP) {
1395 on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
1396 filled_in, not_at_start);
1397 } else {
1398 if (action_type() == MODIFY_FLAGS) {
1399 compiler->set_flags(flags());
1400 }
1401 on_success()->GetQuickCheckDetails(details, compiler, filled_in,
1402 not_at_start);
1403 }
1404}
1405
1406void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1407 BoyerMooreLookahead* bm, bool not_at_start) {
1408 // Match the behaviour of EatsAtLeast on this node.
1409 if (assertion_type() == AT_START && not_at_start) return;
1410 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1411 SaveBMInfo(bm, not_at_start, offset);
1412}
1413
1414void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
1415 QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
1416 bool not_at_start) {
1417 RegExpNode* node = continue_node();
1418 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1419}
1420
1421namespace {
1422
1423// Takes the left-most 1-bit and smears it out, setting all bits to its right.
1424inline uint32_t SmearBitsRight(uint32_t v) {
1425 v |= v >> 1;
1426 v |= v >> 2;
1427 v |= v >> 4;
1428 v |= v >> 8;
1429 v |= v >> 16;
1430 return v;
1431}
1432
1433} // namespace
1434
1435bool QuickCheckDetails::Rationalize(bool asc) {
1436 bool found_useful_op = false;
1437 const uint32_t char_mask = CharMask(asc);
1438 mask_ = 0;
1439 value_ = 0;
1440 int char_shift = 0;
1441 for (int i = 0; i < characters_; i++) {
36
Assuming 'i' is < field 'characters_'
37
Loop condition is true. Entering loop body
42
Assuming 'i' is < field 'characters_'
43
Loop condition is true. Entering loop body
48
Assuming 'i' is < field 'characters_'
49
Loop condition is true. Entering loop body
1442 Position* pos = &positions_[i];
1443 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
38
Assuming the condition is false
39
Taking false branch
44
Assuming the condition is false
45
Taking false branch
50
Assuming the condition is false
51
Taking false branch
1444 found_useful_op = true;
1445 }
1446 mask_ |= (pos->mask & char_mask) << char_shift;
52
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'
1447 value_ |= (pos->value & char_mask) << char_shift;
1448 char_shift += asc
45.1
'asc' is false
? 8 : 16
;
40
Assuming 'asc' is false
41
'?' condition is false
46
'?' condition is false
47
The value 32 is assigned to 'char_shift'
1449 }
1450 return found_useful_op;
1451}
1452
1453int RegExpNode::EatsAtLeast(bool not_at_start) {
1454 return not_at_start ? eats_at_least_.eats_at_least_from_not_start
1455 : eats_at_least_.eats_at_least_from_possibly_start;
1456}
1457
1458EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
1459 // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
1460 // implies that the following node must be a LoopChoiceNode. If we need to
1461 // set registers to constant values for other reasons, we could introduce a
1462 // new action type SET_REGISTER that doesn't imply anything about its
1463 // successor.
1464 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1464); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1464; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1465}
1466
1467void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
1468 RegExpCompiler* compiler,
1469 int characters_filled_in,
1470 bool not_at_start) {
1471 // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
1472 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1472); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1472; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1473}
1474
1475EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
1476 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1476); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 1476; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
1477
1478 if (read_backward()) {
1479 // The eats_at_least value is not used if reading backward. The
1480 // EatsAtLeastPropagator should've zeroed it as well.
1481 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1481); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
")"); do { *((volatile int*)__null) = 1481; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1482 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_not_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_not_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1482); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
")"); do { *((volatile int*)__null) = 1482; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1483 return {};
1484 }
1485
1486 // Figure out how much the loop body itself eats, not including anything in
1487 // the continuation case. In general, the nodes in the loop body should report
1488 // that they eat at least the number eaten by the continuation node, since any
1489 // successful match in the loop body must also include the continuation node.
1490 // However, in some cases involving positive lookaround, the loop body under-
1491 // reports its appetite, so use saturated math here to avoid negative numbers.
1492 uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
1493 loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
1494 uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
1495 loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
1496
1497 // Limit the number of loop iterations to avoid overflow in subsequent steps.
1498 int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
1499
1500 EatsAtLeastInfo result;
1501 result.eats_at_least_from_not_start =
1502 base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
1503 continue_node_->EatsAtLeast(true));
1504 if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
1505 // First loop iteration eats at least one, so all subsequent iterations
1506 // and the after-loop chunk are guaranteed to not be at the start.
1507 result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
1508 loop_body_from_possibly_start +
1509 (loop_iterations - 1) * loop_body_from_not_start +
1510 continue_node_->EatsAtLeast(true));
1511 } else {
1512 // Loop body might eat nothing, so only continue node contributes.
1513 result.eats_at_least_from_possibly_start =
1514 continue_node_->EatsAtLeast(false);
1515 }
1516 return result;
1517}
1518
1519bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1520 Trace* bounds_check_trace, Trace* trace,
1521 bool preload_has_checked_bounds,
1522 Label* on_possible_success,
1523 QuickCheckDetails* details,
1524 bool fall_through_on_failure,
1525 ChoiceNode* predecessor) {
1526 DCHECK_NOT_NULL(predecessor)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((predecessor) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((predecessor) != nullptr))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(predecessor) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1526); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(predecessor) != nullptr"
")"); do { *((volatile int*)__null) = 1526; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
29
Taking false branch
30
Loop condition is false. Exiting loop
1527 if (details->characters() == 0) return false;
31
Assuming the condition is false
32
Taking false branch
1528 GetQuickCheckDetails(details, compiler, 0,
1529 trace->at_start() == Trace::FALSE_VALUE);
1530 if (details->cannot_match()) return false;
33
Assuming the condition is false
34
Taking false branch
1531 if (!details->Rationalize(compiler->one_byte())) return false;
35
Calling 'QuickCheckDetails::Rationalize'
1532 DCHECK(details->characters() == 1 ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1533); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1533; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1533 compiler->macro_assembler()->CanReadUnaligned())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1533); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1533; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1534 uint32_t mask = details->mask();
1535 uint32_t value = details->value();
1536
1537 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1538
1539 if (trace->characters_preloaded() != details->characters()) {
1540 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == bounds_check_trace->cp_offset
())>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(trace->cp_offset() == bounds_check_trace->cp_offset
()))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("trace->cp_offset() == bounds_check_trace->cp_offset()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1540); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == bounds_check_trace->cp_offset()"
")"); do { *((volatile int*)__null) = 1540; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1541 // The bounds check is performed using the minimum number of characters
1542 // any choice would eat, so if the bounds check fails, then none of the
1543 // choices can succeed, so we can just immediately backtrack, rather
1544 // than go to the next choice. The number of characters preloaded may be
1545 // less than the number used for the bounds check.
1546 int eats_at_least = predecessor->EatsAtLeast(
1547 bounds_check_trace->at_start() == Trace::FALSE_VALUE);
1548 DCHECK_GE(eats_at_least, details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least) >= (details->characters()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((eats_at_least) >= (details->characters())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(eats_at_least) >= (details->characters())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1548); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least) >= (details->characters())"
")"); do { *((volatile int*)__null) = 1548; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1549 assembler->LoadCurrentCharacter(
1550 trace->cp_offset(), bounds_check_trace->backtrack(),
1551 !preload_has_checked_bounds, details->characters(), eats_at_least);
1552 }
1553
1554 bool need_mask = true;
1555
1556 if (details->characters() == 1) {
1557 // If number of characters preloaded is 1 then we used a byte or 16 bit
1558 // load so the value is already masked down.
1559 const uint32_t char_mask = CharMask(compiler->one_byte());
1560 if ((mask & char_mask) == char_mask) need_mask = false;
1561 mask &= char_mask;
1562 } else {
1563 // For 2-character preloads in one-byte mode or 1-character preloads in
1564 // two-byte mode we also use a 16 bit load with zero extend.
1565 static const uint32_t kTwoByteMask = 0xFFFF;
1566 static const uint32_t kFourByteMask = 0xFFFFFFFF;
1567 if (details->characters() == 2 && compiler->one_byte()) {
1568 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1569 } else if (details->characters() == 1 && !compiler->one_byte()) {
1570 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1571 } else {
1572 if (mask == kFourByteMask) need_mask = false;
1573 }
1574 }
1575
1576 if (fall_through_on_failure) {
1577 if (need_mask) {
1578 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
1579 } else {
1580 assembler->CheckCharacter(value, on_possible_success);
1581 }
1582 } else {
1583 if (need_mask) {
1584 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
1585 } else {
1586 assembler->CheckNotCharacter(value, trace->backtrack());
1587 }
1588 }
1589 return true;
1590}
1591
1592// Here is the meat of GetQuickCheckDetails (see also the comment on the
1593// super-class in the .h file).
1594//
1595// We iterate along the text object, building up for each character a
1596// mask and value that can be used to test for a quick failure to match.
1597// The masks and values for the positions will be combined into a single
1598// machine word for the current character width in order to be used in
1599// generating a quick check.
1600void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1601 RegExpCompiler* compiler,
1602 int characters_filled_in,
1603 bool not_at_start) {
1604 // Do not collect any quick check details if the text node reads backward,
1605 // since it reads in the opposite direction than we use for quick checks.
1606 if (read_backward()) return;
1607 Isolate* isolate = compiler->macro_assembler()->isolate();
1608 DCHECK(characters_filled_in < details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in < details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in < details->characters()))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in < details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1608); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in < details->characters()"
")"); do { *((volatile int*)__null) = 1608; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1609 int characters = details->characters();
1610 const uint32_t char_mask = CharMask(compiler->one_byte());
1611 for (int k = 0; k < elements()->length(); k++) {
1612 TextElement elm = elements()->at(k);
1613 if (elm.text_type() == TextElement::ATOM) {
1614 base::Vector<const base::uc16> quarks = elm.atom()->data();
1615 for (int i = 0; i < characters && i < quarks.length(); i++) {
1616 QuickCheckDetails::Position* pos =
1617 details->positions(characters_filled_in);
1618 base::uc16 c = quarks[i];
1619 if (IsIgnoreCase(compiler->flags())) {
1620 unibrow::uchar chars[4];
1621 int length = GetCaseIndependentLetters(
1622 isolate, c, compiler->one_byte(), chars, 4);
1623 if (length == 0) {
1624 // This can happen because all case variants are non-Latin1, but we
1625 // know the input is Latin1.
1626 details->set_cannot_match();
1627 pos->determines_perfectly = false;
1628 return;
1629 }
1630 if (length == 1) {
1631 // This letter has no case equivalents, so it's nice and simple
1632 // and the mask-compare will determine definitely whether we have
1633 // a match at this character position.
1634 pos->mask = char_mask;
1635 pos->value = chars[0];
1636 pos->determines_perfectly = true;
1637 } else {
1638 uint32_t common_bits = char_mask;
1639 uint32_t bits = chars[0];
1640 for (int j = 1; j < length; j++) {
1641 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1642 common_bits ^= differing_bits;
1643 bits &= common_bits;
1644 }
1645 // If length is 2 and common bits has only one zero in it then
1646 // our mask and compare instruction will determine definitely
1647 // whether we have a match at this character position. Otherwise
1648 // it can only be an approximate check.
1649 uint32_t one_zero = (common_bits | ~char_mask);
1650 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
1651 pos->determines_perfectly = true;
1652 }
1653 pos->mask = common_bits;
1654 pos->value = bits;
1655 }
1656 } else {
1657 // Don't ignore case. Nice simple case where the mask-compare will
1658 // determine definitely whether we have a match at this character
1659 // position.
1660 if (c > char_mask) {
1661 details->set_cannot_match();
1662 pos->determines_perfectly = false;
1663 return;
1664 }
1665 pos->mask = char_mask;
1666 pos->value = c;
1667 pos->determines_perfectly = true;
1668 }
1669 characters_filled_in++;
1670 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1670); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1670; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1671 if (characters_filled_in == details->characters()) {
1672 return;
1673 }
1674 }
1675 } else {
1676 QuickCheckDetails::Position* pos =
1677 details->positions(characters_filled_in);
1678 RegExpClassRanges* tree = elm.class_ranges();
1679 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
1680 if (tree->is_negated() || ranges->is_empty()) {
1681 // A quick check uses multi-character mask and compare. There is no
1682 // useful way to incorporate a negative char class into this scheme
1683 // so we just conservatively create a mask and value that will always
1684 // succeed.
1685 // Likewise for empty ranges (empty ranges can occur e.g. when
1686 // compiling for one-byte subjects and impossible (non-one-byte) ranges
1687 // have been removed).
1688 pos->mask = 0;
1689 pos->value = 0;
1690 } else {
1691 int first_range = 0;
1692 while (ranges->at(first_range).from() > char_mask) {
1693 first_range++;
1694 if (first_range == ranges->length()) {
1695 details->set_cannot_match();
1696 pos->determines_perfectly = false;
1697 return;
1698 }
1699 }
1700 CharacterRange range = ranges->at(first_range);
1701 const base::uc32 first_from = range.from();
1702 const base::uc32 first_to =
1703 (range.to() > char_mask) ? char_mask : range.to();
1704 const uint32_t differing_bits = (first_from ^ first_to);
1705 // A mask and compare is only perfect if the differing bits form a
1706 // number like 00011111 with one single block of trailing 1s.
1707 if ((differing_bits & (differing_bits + 1)) == 0 &&
1708 first_from + differing_bits == first_to) {
1709 pos->determines_perfectly = true;
1710 }
1711 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1712 uint32_t bits = (first_from & common_bits);
1713 for (int i = first_range + 1; i < ranges->length(); i++) {
1714 range = ranges->at(i);
1715 const base::uc32 from = range.from();
1716 if (from > char_mask) continue;
1717 const base::uc32 to =
1718 (range.to() > char_mask) ? char_mask : range.to();
1719 // Here we are combining more ranges into the mask and compare
1720 // value. With each new range the mask becomes more sparse and
1721 // so the chances of a false positive rise. A character class
1722 // with multiple ranges is assumed never to be equivalent to a
1723 // mask and compare operation.
1724 pos->determines_perfectly = false;
1725 uint32_t new_common_bits = (from ^ to);
1726 new_common_bits = ~SmearBitsRight(new_common_bits);
1727 common_bits &= new_common_bits;
1728 bits &= new_common_bits;
1729 uint32_t new_differing_bits = (from & common_bits) ^ bits;
1730 common_bits ^= new_differing_bits;
1731 bits &= common_bits;
1732 }
1733 pos->mask = common_bits;
1734 pos->value = bits;
1735 }
1736 characters_filled_in++;
1737 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1737); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1737; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1738 if (characters_filled_in == details->characters()) return;
1739 }
1740 }
1741 DCHECK(characters_filled_in != details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in != details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in != details->characters()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in != details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1741); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in != details->characters()"
")"); do { *((volatile int*)__null) = 1741; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1742 if (!details->cannot_match()) {
1743 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1744 true);
1745 }
1746}
1747
1748void QuickCheckDetails::Clear() {
1749 for (int i = 0; i < characters_; i++) {
1750 positions_[i].mask = 0;
1751 positions_[i].value = 0;
1752 positions_[i].determines_perfectly = false;
1753 }
1754 characters_ = 0;
1755}
1756
1757void QuickCheckDetails::Advance(int by, bool one_byte) {
1758 if (by >= characters_ || by < 0) {
1759 DCHECK_IMPLIES(by < 0, characters_ == 0)do { if (by < 0) { do { static_assert( mozilla::detail::AssertionConditionType
<decltype(characters_ == 0)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == 0))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("characters_ == 0"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1759); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == 0"
")"); do { *((volatile int*)__null) = 1759; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
1760 Clear();
1761 return;
1762 }
1763 DCHECK_LE(characters_ - by, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_ - by) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_ - by) <= (4)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(characters_ - by) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1763); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_ - by) <= (4)"
")"); do { *((volatile int*)__null) = 1763; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1764 DCHECK_LE(characters_, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_) <= (4)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(characters_) <= (4)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1764); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_) <= (4)"
")"); do { *((volatile int*)__null) = 1764; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1765 for (int i = 0; i < characters_ - by; i++) {
1766 positions_[i] = positions_[by + i];
1767 }
1768 for (int i = characters_ - by; i < characters_; i++) {
1769 positions_[i].mask = 0;
1770 positions_[i].value = 0;
1771 positions_[i].determines_perfectly = false;
1772 }
1773 characters_ -= by;
1774 // We could change mask_ and value_ here but we would never advance unless
1775 // they had already been used in a check and they won't be used again because
1776 // it would gain us nothing. So there's no point.
1777}
1778
1779void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
1780 DCHECK(characters_ == other->characters_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_ == other->characters_)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == other->characters_
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"characters_ == other->characters_", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1780); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == other->characters_"
")"); do { *((volatile int*)__null) = 1780; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1781 if (other->cannot_match_) {
1782 return;
1783 }
1784 if (cannot_match_) {
1785 *this = *other;
1786 return;
1787 }
1788 for (int i = from_index; i < characters_; i++) {
1789 QuickCheckDetails::Position* pos = positions(i);
1790 QuickCheckDetails::Position* other_pos = other->positions(i);
1791 if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
1792 !other_pos->determines_perfectly) {
1793 // Our mask-compare operation will be approximate unless we have the
1794 // exact same operation on both sides of the alternation.
1795 pos->determines_perfectly = false;
1796 }
1797 pos->mask &= other_pos->mask;
1798 pos->value &= pos->mask;
1799 other_pos->value &= pos->mask;
1800 uint32_t differing_bits = (pos->value ^ other_pos->value);
1801 pos->mask &= ~differing_bits;
1802 pos->value &= pos->mask;
1803 }
1804}
1805
1806class VisitMarker {
1807 public:
1808 explicit VisitMarker(NodeInfo* info) : info_(info) {
1809 DCHECK(!info->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info->visited))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!info->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1809); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info->visited"
")"); do { *((volatile int*)__null) = 1809; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1810 info->visited = true;
1811 }
1812 ~VisitMarker() { info_->visited = false; }
1813
1814 private:
1815 NodeInfo* info_;
1816};
1817
1818// Temporarily sets traversed_loop_initialization_node_.
1819class LoopInitializationMarker {
1820 public:
1821 explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
1822 DCHECK(!node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!node_->traversed_loop_initialization_node_))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1822); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1822; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1823 node_->traversed_loop_initialization_node_ = true;
1824 }
1825 ~LoopInitializationMarker() {
1826 DCHECK(node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(node_->traversed_loop_initialization_node_))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1826); AnnotateMozCrashReason("MOZ_ASSERT" "(" "node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1826; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1827 node_->traversed_loop_initialization_node_ = false;
1828 }
1829 LoopInitializationMarker(const LoopInitializationMarker&) = delete;
1830 LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete;
1831
1832 private:
1833 LoopChoiceNode* node_;
1834};
1835
1836// Temporarily decrements min_loop_iterations_.
1837class IterationDecrementer {
1838 public:
1839 explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
1840 DCHECK_GT(node_->min_loop_iterations_, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node_->min_loop_iterations_) > (0))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((node_->min_loop_iterations_) > (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(node_->min_loop_iterations_) > (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1840); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node_->min_loop_iterations_) > (0)"
")"); do { *((volatile int*)__null) = 1840; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1841 --node_->min_loop_iterations_;
1842 }
1843 ~IterationDecrementer() { ++node_->min_loop_iterations_; }
1844 IterationDecrementer(const IterationDecrementer&) = delete;
1845 IterationDecrementer& operator=(const IterationDecrementer&) = delete;
1846
1847 private:
1848 LoopChoiceNode* node_;
1849};
1850
1851RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpFlags flags) {
1852 if (info()->replacement_calculated) return replacement();
1853 if (depth < 0) return this;
1854 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1854); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1854; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1855 VisitMarker marker(info());
1856 return FilterSuccessor(depth - 1, flags);
1857}
1858
1859RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, RegExpFlags flags) {
1860 RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags);
1861 if (next == nullptr) return set_replacement(nullptr);
1862 on_success_ = next;
1863 return set_replacement(this);
1864}
1865
1866// We need to check for the following characters: 0x39C 0x3BC 0x178.
1867bool RangeContainsLatin1Equivalents(CharacterRange range) {
1868 // TODO(dcarney): this could be a lot more efficient.
1869 return range.Contains(0x039C) || range.Contains(0x03BC) ||
1870 range.Contains(0x0178);
1871}
1872
1873namespace {
1874
1875bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
1876 for (int i = 0; i < ranges->length(); i++) {
1877 // TODO(dcarney): this could be a lot more efficient.
1878 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
1879 }
1880 return false;
1881}
1882
1883} // namespace
1884
1885RegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) {
1886 if (info()->replacement_calculated) return replacement();
1887 if (depth < 0) return this;
1888 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1888); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1888; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1889 VisitMarker marker(info());
1890 int element_count = elements()->length();
1891 for (int i = 0; i < element_count; i++) {
1892 TextElement elm = elements()->at(i);
1893 if (elm.text_type() == TextElement::ATOM) {
1894 base::Vector<const base::uc16> quarks = elm.atom()->data();
1895 for (int j = 0; j < quarks.length(); j++) {
1896 base::uc16 c = quarks[j];
1897 if (IsIgnoreCase(flags)) {
1898 c = unibrow::Latin1::TryConvertToLatin1(c);
1899 }
1900 if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
1901 // Replace quark in case we converted to Latin-1.
1902 base::uc16* writable_quarks = const_cast<base::uc16*>(quarks.begin());
1903 writable_quarks[j] = c;
1904 }
1905 } else {
1906 DCHECK(elm.text_type() == TextElement::CLASS_RANGES)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(elm.text_type() == TextElement::CLASS_RANGES)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(elm.text_type() == TextElement::CLASS_RANGES))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("elm.text_type() == TextElement::CLASS_RANGES"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1906); AnnotateMozCrashReason("MOZ_ASSERT" "(" "elm.text_type() == TextElement::CLASS_RANGES"
")"); do { *((volatile int*)__null) = 1906; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1907 RegExpClassRanges* cr = elm.class_ranges();
1908 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
1909 CharacterRange::Canonicalize(ranges);
1910 // Now they are in order so we only need to look at the first.
1911 int range_count = ranges->length();
1912 if (cr->is_negated()) {
1913 if (range_count != 0 && ranges->at(0).from() == 0 &&
1914 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
1915 // This will be handled in a later filter.
1916 if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
1917 continue;
1918 }
1919 return set_replacement(nullptr);
1920 }
1921 } else {
1922 if (range_count == 0 ||
1923 ranges->at(0).from() > String::kMaxOneByteCharCode) {
1924 // This will be handled in a later filter.
1925 if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
1926 continue;
1927 }
1928 return set_replacement(nullptr);
1929 }
1930 }
1931 }
1932 }
1933 return FilterSuccessor(depth - 1, flags);
1934}
1935
1936RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
1937 if (info()->replacement_calculated) return replacement();
1938 if (depth < 0) return this;
1939 if (info()->visited) return this;
1940 {
1941 VisitMarker marker(info());
1942
1943 RegExpNode* continue_replacement =
1944 continue_node_->FilterOneByte(depth - 1, flags);
1945 // If we can't continue after the loop then there is no sense in doing the
1946 // loop.
1947 if (continue_replacement == nullptr) return set_replacement(nullptr);
1948 }
1949
1950 return ChoiceNode::FilterOneByte(depth - 1, flags);
1951}
1952
1953RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
1954 if (info()->replacement_calculated) return replacement();
1955 if (depth < 0) return this;
1956 if (info()->visited) return this;
1957 VisitMarker marker(info());
1958 int choice_count = alternatives_->length();
1959
1960 for (int i = 0; i < choice_count; i++) {
1961 GuardedAlternative alternative = alternatives_->at(i);
1962 if (alternative.guards() != nullptr &&
1963 alternative.guards()->length() != 0) {
1964 set_replacement(this);
1965 return this;
1966 }
1967 }
1968
1969 int surviving = 0;
1970 RegExpNode* survivor = nullptr;
1971 for (int i = 0; i < choice_count; i++) {
1972 GuardedAlternative alternative = alternatives_->at(i);
1973 RegExpNode* replacement =
1974 alternative.node()->FilterOneByte(depth - 1, flags);
1975 DCHECK(replacement != this)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(replacement != this)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(replacement != this))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("replacement != this"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1975); AnnotateMozCrashReason("MOZ_ASSERT" "(" "replacement != this"
")"); do { *((volatile int*)__null) = 1975; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // No missing EMPTY_MATCH_CHECK.
1976 if (replacement != nullptr) {
1977 alternatives_->at(i).set_node(replacement);
1978 surviving++;
1979 survivor = replacement;
1980 }
1981 }
1982 if (surviving < 2) return set_replacement(survivor);
1983
1984 set_replacement(this);
1985 if (surviving == choice_count) {
1986 return this;
1987 }
1988 // Only some of the nodes survived the filtering. We need to rebuild the
1989 // alternatives list.
1990 ZoneList<GuardedAlternative>* new_alternatives =
1991 zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
1992 for (int i = 0; i < choice_count; i++) {
1993 RegExpNode* replacement =
1994 alternatives_->at(i).node()->FilterOneByte(depth - 1, flags);
1995 if (replacement != nullptr) {
1996 alternatives_->at(i).set_node(replacement);
1997 new_alternatives->Add(alternatives_->at(i), zone());
1998 }
1999 }
2000 alternatives_ = new_alternatives;
2001 return this;
2002}
2003
2004RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
2005 RegExpFlags flags) {
2006 if (info()->replacement_calculated) return replacement();
2007 if (depth < 0) return this;
2008 if (info()->visited) return this;
2009 VisitMarker marker(info());
2010 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2011 // afterwards.
2012 RegExpNode* node = continue_node();
2013 RegExpNode* replacement = node->FilterOneByte(depth - 1, flags);
2014 if (replacement == nullptr) return set_replacement(nullptr);
2015 alternatives_->at(kContinueIndex).set_node(replacement);
2016
2017 RegExpNode* neg_node = lookaround_node();
2018 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags);
2019 // If the negative lookahead is always going to fail then
2020 // we don't need to check it.
2021 if (neg_replacement == nullptr) return set_replacement(replacement);
2022 alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
2023 return set_replacement(this);
2024}
2025
2026void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2027 RegExpCompiler* compiler,
2028 int characters_filled_in,
2029 bool not_at_start) {
2030 if (body_can_be_zero_length_ || info()->visited) return;
2031 not_at_start = not_at_start || this->not_at_start();
2032 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2032); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 2032; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
2033 if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
2034 loop_node_->EatsAtLeast(not_at_start) >
2035 continue_node_->EatsAtLeast(true)) {
2036 // Loop body is guaranteed to execute at least once, and consume characters
2037 // when it does, meaning the only possible quick checks from this point
2038 // begin with the loop body. We may recursively visit this LoopChoiceNode,
2039 // but we temporarily decrease its minimum iteration counter so we know when
2040 // to check the continue case.
2041 IterationDecrementer next_iteration(this);
2042 loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
2043 not_at_start);
2044 } else {
2045 // Might not consume anything in the loop body, so treat it like a normal
2046 // ChoiceNode (and don't recursively visit this node again).
2047 VisitMarker marker(info());
2048 ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
2049 not_at_start);
2050 }
2051}
2052
2053void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
2054 QuickCheckDetails* details, RegExpCompiler* compiler,
2055 int characters_filled_in, bool not_at_start) {
2056 if (traversed_loop_initialization_node_) {
2057 // We already entered this loop once, exited via its continuation node, and
2058 // followed an outer loop's back-edge to before the loop entry point. We
2059 // could try to reset the minimum iteration count to its starting value at
2060 // this point, but that seems like more trouble than it's worth. It's safe
2061 // to keep going with the current (possibly reduced) minimum iteration
2062 // count.
2063 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2064 } else {
2065 // We are entering a loop via its counter initialization action, meaning we
2066 // are guaranteed to run the loop body at least some minimum number of times
2067 // before running the continuation node. Set a flag so that this node knows
2068 // (now and any times we visit it again recursively) that it was entered
2069 // from the top.
2070 LoopInitializationMarker marker(this);
2071 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2072 }
2073}
2074
2075void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
2076 BoyerMooreLookahead* bm, bool not_at_start) {
2077 if (body_can_be_zero_length_ || budget <= 0) {
2078 bm->SetRest(offset);
2079 SaveBMInfo(bm, not_at_start, offset);
2080 return;
2081 }
2082 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2083 SaveBMInfo(bm, not_at_start, offset);
2084}
2085
2086void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2087 RegExpCompiler* compiler,
2088 int characters_filled_in,
2089 bool not_at_start) {
2090 not_at_start = (not_at_start || not_at_start_);
2091 int choice_count = alternatives_->length();
2092 DCHECK_LT(0, choice_count)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) < (choice_count))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) < (choice_count)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) < (choice_count)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2092); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) < (choice_count)"
")"); do { *((volatile int*)__null) = 2092; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2093 alternatives_->at(0).node()->GetQuickCheckDetails(
2094 details, compiler, characters_filled_in, not_at_start);
2095 for (int i = 1; i < choice_count; i++) {
2096 QuickCheckDetails new_details(details->characters());
2097 RegExpNode* node = alternatives_->at(i).node();
2098 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
2099 not_at_start);
2100 // Here we merge the quick match details of the two branches.
2101 details->Merge(&new_details, characters_filled_in);
2102 }
2103}
2104
2105namespace {
2106
2107// Check for [0-9A-Z_a-z].
2108void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
2109 Label* non_word, bool fall_through_on_word) {
2110 if (assembler->CheckSpecialClassRanges(
2111 fall_through_on_word ? StandardCharacterSet::kWord
2112 : StandardCharacterSet::kNotWord,
2113 fall_through_on_word ? non_word : word)) {
2114 // Optimized implementation available.
2115 return;
2116 }
2117 assembler->CheckCharacterGT('z', non_word);
2118 assembler->CheckCharacterLT('0', non_word);
2119 assembler->CheckCharacterGT('a' - 1, word);
2120 assembler->CheckCharacterLT('9' + 1, word);
2121 assembler->CheckCharacterLT('A', non_word);
2122 assembler->CheckCharacterLT('Z' + 1, word);
2123 if (fall_through_on_word) {
2124 assembler->CheckNotCharacter('_', non_word);
2125 } else {
2126 assembler->CheckCharacter('_', word);
2127 }
2128}
2129
2130// Emit the code to check for a ^ in multiline mode (1-character lookbehind
2131// that matches newline or the start of input).
2132void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
2133 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2134
2135 // We will load the previous character into the current character register.
2136 Trace new_trace(*trace);
2137 new_trace.InvalidateCurrentCharacter();
2138
2139 // A positive (> 0) cp_offset means we've already successfully matched a
2140 // non-empty-width part of the pattern, and thus cannot be at or before the
2141 // start of the subject string. We can thus skip both at-start and
2142 // bounds-checks when loading the one-character lookbehind.
2143 const bool may_be_at_or_before_subject_string_start =
2144 new_trace.cp_offset() <= 0;
2145
2146 Label ok;
2147 if (may_be_at_or_before_subject_string_start) {
2148 // The start of input counts as a newline in this context, so skip to ok if
2149 // we are at the start.
2150 assembler->CheckAtStart(new_trace.cp_offset(), &ok);
2151 }
2152
2153 // If we've already checked that we are not at the start of input, it's okay
2154 // to load the previous character without bounds checks.
2155 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2156 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2157 new_trace.backtrack(), can_skip_bounds_check);
2158 if (!assembler->CheckSpecialClassRanges(StandardCharacterSet::kLineTerminator,
2159 new_trace.backtrack())) {
2160 // Newline means \n, \r, 0x2028 or 0x2029.
2161 if (!compiler->one_byte()) {
2162 assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
2163 }
2164 assembler->CheckCharacter('\n', &ok);
2165 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2166 }
2167 assembler->Bind(&ok);
2168 on_success->Emit(compiler, &new_trace);
2169}
2170
2171} // namespace
2172
2173// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2174void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2175 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2176 Isolate* isolate = assembler->isolate();
2177 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2178 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2179 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2180 if (lookahead == nullptr) {
2181 int eats_at_least =
2182 std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
2183 if (eats_at_least >= 1) {
2184 BoyerMooreLookahead* bm =
2185 zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
2186 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
2187 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
2188 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2189 }
2190 } else {
2191 if (lookahead->at(0)->is_non_word())
2192 next_is_word_character = Trace::FALSE_VALUE;
2193 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2194 }
2195 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2196 if (next_is_word_character == Trace::UNKNOWN) {
2197 Label before_non_word;
2198 Label before_word;
2199 if (trace->characters_preloaded() != 1) {
2200 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2201 }
2202 // Fall through on non-word.
2203 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2204 // Next character is not a word character.
2205 assembler->Bind(&before_non_word);
2206 Label ok;
2207 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2208 assembler->GoTo(&ok);
2209
2210 assembler->Bind(&before_word);
2211 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2212 assembler->Bind(&ok);
2213 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2214 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2215 } else {
2216 DCHECK(next_is_word_character == Trace::FALSE_VALUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(next_is_word_character == Trace::FALSE_VALUE)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(next_is_word_character == Trace::FALSE_VALUE))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("next_is_word_character == Trace::FALSE_VALUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2216); AnnotateMozCrashReason("MOZ_ASSERT" "(" "next_is_word_character == Trace::FALSE_VALUE"
")"); do { *((volatile int*)__null) = 2216; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2217 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2218 }
2219}
2220
2221void AssertionNode::BacktrackIfPrevious(
2222 RegExpCompiler* compiler, Trace* trace,
2223 AssertionNode::IfPrevious backtrack_if_previous) {
2224 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2225 Trace new_trace(*trace);
2226 new_trace.InvalidateCurrentCharacter();
2227
2228 Label fall_through;
2229 Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
2230 : &fall_through;
2231 Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
2232 : new_trace.backtrack();
2233
2234 // A positive (> 0) cp_offset means we've already successfully matched a
2235 // non-empty-width part of the pattern, and thus cannot be at or before the
2236 // start of the subject string. We can thus skip both at-start and
2237 // bounds-checks when loading the one-character lookbehind.
2238 const bool may_be_at_or_before_subject_string_start =
2239 new_trace.cp_offset() <= 0;
2240
2241 if (may_be_at_or_before_subject_string_start) {
2242 // The start of input counts as a non-word character, so the question is
2243 // decided if we are at the start.
2244 assembler->CheckAtStart(new_trace.cp_offset(), non_word);
2245 }
2246
2247 // If we've already checked that we are not at the start of input, it's okay
2248 // to load the previous character without bounds checks.
2249 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2250 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
2251 can_skip_bounds_check);
2252 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2253
2254 assembler->Bind(&fall_through);
2255 on_success()->Emit(compiler, &new_trace);
2256}
2257
2258void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2259 RegExpCompiler* compiler,
2260 int filled_in, bool not_at_start) {
2261 if (assertion_type_ == AT_START && not_at_start) {
2262 details->set_cannot_match();
2263 return;
2264 }
2265 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2266 not_at_start);
2267}
2268
2269void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2270 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2271 switch (assertion_type_) {
2272 case AT_END: {
2273 Label ok;
2274 assembler->CheckPosition(trace->cp_offset(), &ok);
2275 assembler->GoTo(trace->backtrack());
2276 assembler->Bind(&ok);
2277 break;
2278 }
2279 case AT_START: {
2280 if (trace->at_start() == Trace::FALSE_VALUE) {
2281 assembler->GoTo(trace->backtrack());
2282 return;
2283 }
2284 if (trace->at_start() == Trace::UNKNOWN) {
2285 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
2286 Trace at_start_trace = *trace;
2287 at_start_trace.set_at_start(Trace::TRUE_VALUE);
2288 on_success()->Emit(compiler, &at_start_trace);
2289 return;
2290 }
2291 } break;
2292 case AFTER_NEWLINE:
2293 EmitHat(compiler, on_success(), trace);
2294 return;
2295 case AT_BOUNDARY:
2296 case AT_NON_BOUNDARY: {
2297 EmitBoundaryCheck(compiler, trace);
2298 return;
2299 }
2300 }
2301 on_success()->Emit(compiler, trace);
2302}
2303
2304namespace {
2305
2306bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
2307 if (quick_check == nullptr) return false;
2308 if (offset >= quick_check->characters()) return false;
2309 return quick_check->positions(offset)->determines_perfectly;
2310}
2311
2312void UpdateBoundsCheck(int index, int* checked_up_to) {
2313 if (index > *checked_up_to) {
2314 *checked_up_to = index;
2315 }
2316}
2317
2318} // namespace
2319
2320// We call this repeatedly to generate code for each pass over the text node.
2321// The passes are in increasing order of difficulty because we hope one
2322// of the first passes will fail in which case we are saved the work of the
2323// later passes. for example for the case independent regexp /%[asdfghjkl]a/
2324// we will check the '%' in the first pass, the case independent 'a' in the
2325// second pass and the character class in the last pass.
2326//
2327// The passes are done from right to left, so for example to test for /bar/
2328// we will first test for an 'r' with offset 2, then an 'a' with offset 1
2329// and then a 'b' with offset 0. This means we can avoid the end-of-input
2330// bounds check most of the time. In the example we only need to check for
2331// end-of-input when loading the putative 'r'.
2332//
2333// A slight complication involves the fact that the first character may already
2334// be fetched into a register by the previous node. In this case we want to
2335// do the test for that character first. We do this in separate passes. The
2336// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
2337// pass has been performed then subsequent passes will have true in
2338// first_element_checked to indicate that that character does not need to be
2339// checked again.
2340//
2341// In addition to all this we are passed a Trace, which can
2342// contain an AlternativeGeneration object. In this AlternativeGeneration
2343// object we can see details of any quick check that was already passed in
2344// order to get to the code we are now generating. The quick check can involve
2345// loading characters, which means we do not need to recheck the bounds
2346// up to the limit the quick check already checked. In addition the quick
2347// check can have involved a mask and compare operation which may simplify
2348// or obviate the need for further checks at some character positions.
2349void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
2350 bool preloaded, Trace* trace,
2351 bool first_element_checked, int* checked_up_to) {
2352 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2353 Isolate* isolate = assembler->isolate();
2354 bool one_byte = compiler->one_byte();
2355 Label* backtrack = trace->backtrack();
2356 QuickCheckDetails* quick_check = trace->quick_check_performed();
2357 int element_count = elements()->length();
2358 int backward_offset = read_backward() ? -Length() : 0;
2359 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2360 TextElement elm = elements()->at(i);
2361 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
2362 if (elm.text_type() == TextElement::ATOM) {
2363 if (SkipPass(pass, IsIgnoreCase(compiler->flags()))) continue;
2364 base::Vector<const base::uc16> quarks = elm.atom()->data();
2365 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2366 if (first_element_checked && i == 0 && j == 0) continue;
2367 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
2368 base::uc16 quark = quarks[j];
2369 if (IsIgnoreCase(compiler->flags())) {
2370 // Everywhere else we assume that a non-Latin-1 character cannot match
2371 // a Latin-1 character. Avoid the cases where this is assumption is
2372 // invalid by using the Latin1 equivalent instead.
2373 quark = unibrow::Latin1::TryConvertToLatin1(quark);
2374 }
2375 bool needs_bounds_check =
2376 *checked_up_to < cp_offset + j || read_backward();
2377 bool bounds_checked = false;
2378 switch (pass) {
2379 case NON_LATIN1_MATCH:
2380 DCHECK(one_byte)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(one_byte)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(one_byte))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("one_byte", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2380); AnnotateMozCrashReason("MOZ_ASSERT" "(" "one_byte" ")"
); do { *((volatile int*)__null) = 2380; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2381 if (quark > String::kMaxOneByteCharCode) {
2382 assembler->GoTo(backtrack);
2383 return;
2384 }
2385 break;
2386 case NON_LETTER_CHARACTER_MATCH:
2387 bounds_checked =
2388 EmitAtomNonLetter(isolate, compiler, quark, backtrack,
2389 cp_offset + j, needs_bounds_check, preloaded);
2390 break;
2391 case SIMPLE_CHARACTER_MATCH:
2392 bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
2393 backtrack, cp_offset + j,
2394 needs_bounds_check, preloaded);
2395 break;
2396 case CASE_CHARACTER_MATCH:
2397 bounds_checked =
2398 EmitAtomLetter(isolate, compiler, quark, backtrack,
2399 cp_offset + j, needs_bounds_check, preloaded);
2400 break;
2401 default:
2402 break;
2403 }
2404 if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2405 }
2406 } else {
2407 DCHECK_EQ(TextElement::CLASS_RANGES, elm.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (elm.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (elm.text_type())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (elm.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2407); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (elm.text_type())"
")"); do { *((volatile int*)__null) = 2407; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2408 if (pass == CHARACTER_CLASS_MATCH) {
2409 if (first_element_checked && i == 0) continue;
2410 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2411 RegExpClassRanges* cr = elm.class_ranges();
2412 bool bounds_check = *checked_up_to < cp_offset || read_backward();
2413 EmitClassRanges(assembler, cr, one_byte, backtrack, cp_offset,
2414 bounds_check, preloaded, zone());
2415 UpdateBoundsCheck(cp_offset, checked_up_to);
2416 }
2417 }
2418 }
2419}
2420
2421int TextNode::Length() {
2422 TextElement elm = elements()->last();
2423 DCHECK_LE(0, elm.cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) <= (elm.cp_offset()))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) <= (elm.cp_offset()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(0) <= (elm.cp_offset())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2423); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) <= (elm.cp_offset())"
")"); do { *((volatile int*)__null) = 2423; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2424 return elm.cp_offset() + elm.length();
2425}
2426
2427bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
2428 if (ignore_case) {
2429 return pass == SIMPLE_CHARACTER_MATCH;
2430 } else {
2431 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
2432 }
2433}
2434
2435TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
2436 ZoneList<CharacterRange>* ranges,
2437 bool read_backward,
2438 RegExpNode* on_success) {
2439 DCHECK_NOT_NULL(ranges)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((ranges) != nullptr))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(ranges) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2439); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges) != nullptr"
")"); do { *((volatile int*)__null) = 2439; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2440 // TODO(jgruber): There's no fundamental need to create this
2441 // RegExpClassRanges; we could refactor to avoid the allocation.
2442 return zone->New<TextNode>(zone->New<RegExpClassRanges>(zone, ranges),
2443 read_backward, on_success);
2444}
2445
2446TextNode* TextNode::CreateForSurrogatePair(
2447 Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
2448 bool read_backward, RegExpNode* on_success) {
2449 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
2450 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2451 elms->Add(
2452 TextElement::ClassRanges(zone->New<RegExpClassRanges>(zone, lead_ranges)),
2453 zone);
2454 elms->Add(TextElement::ClassRanges(
2455 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2456 zone);
2457 return zone->New<TextNode>(elms, read_backward, on_success);
2458}
2459
2460TextNode* TextNode::CreateForSurrogatePair(
2461 Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail,
2462 bool read_backward, RegExpNode* on_success) {
2463 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
2464 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2465 elms->Add(
2466 TextElement::ClassRanges(zone->New<RegExpClassRanges>(zone, lead_ranges)),
2467 zone);
2468 elms->Add(TextElement::ClassRanges(
2469 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2470 zone);
2471 return zone->New<TextNode>(elms, read_backward, on_success);
2472}
2473
2474// This generates the code to match a text node. A text node can contain
2475// straight character sequences (possibly to be matched in a case-independent
2476// way) and character classes. For efficiency we do not do this in a single
2477// pass from left to right. Instead we pass over the text node several times,
2478// emitting code for some character positions every time. See the comment on
2479// TextEmitPass for details.
2480void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2481 LimitResult limit_result = LimitVersions(compiler, trace);
2482 if (limit_result == DONE) return;
2483 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2483); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 2483; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2484
2485 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2486 compiler->SetRegExpTooBig();
2487 return;
2488 }
2489
2490 if (compiler->one_byte()) {
2491 int dummy = 0;
2492 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
2493 }
2494
2495 bool first_elt_done = false;
2496 int bound_checked_to = trace->cp_offset() - 1;
2497 bound_checked_to += trace->bound_checked_up_to();
2498
2499 // If a character is preloaded into the current character register then
2500 // check that now.
2501 if (trace->characters_preloaded() == 1) {
2502 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2503 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
2504 false, &bound_checked_to);
2505 }
2506 first_elt_done = true;
2507 }
2508
2509 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2510 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
2511 first_elt_done, &bound_checked_to);
2512 }
2513
2514 Trace successor_trace(*trace);
2515 // If we advance backward, we may end up at the start.
2516 successor_trace.AdvanceCurrentPositionInTrace(
2517 read_backward() ? -Length() : Length(), compiler);
2518 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
2519 : Trace::FALSE_VALUE);
2520 RecursionCheck rc(compiler);
2521 on_success()->Emit(compiler, &successor_trace);
2522}
2523
2524void Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; }
2525
2526void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
2527 // We don't have an instruction for shifting the current character register
2528 // down or for using a shifted value for anything so lets just forget that
2529 // we preloaded any characters into it.
2530 characters_preloaded_ = 0;
2531 // Adjust the offsets of the quick check performed information. This
2532 // information is used to find out what we already determined about the
2533 // characters by means of mask and compare.
2534 quick_check_performed_.Advance(by, compiler->one_byte());
2535 cp_offset_ += by;
2536 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2537 compiler->SetRegExpTooBig();
2538 cp_offset_ = 0;
2539 }
2540 bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
2541}
2542
2543void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
2544 RegExpFlags flags) {
2545 if (!IsIgnoreCase(flags)) return;
2546#ifdef V8_INTL_SUPPORT1
2547 if (NeedsUnicodeCaseEquivalents(flags)) return;
2548#endif
2549
2550 int element_count = elements()->length();
2551 for (int i = 0; i < element_count; i++) {
2552 TextElement elm = elements()->at(i);
2553 if (elm.text_type() == TextElement::CLASS_RANGES) {
2554 RegExpClassRanges* cr = elm.class_ranges();
2555 // None of the standard character classes is different in the case
2556 // independent case and it slows us down if we don't know that.
2557 if (cr->is_standard(zone())) continue;
2558 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
2559 CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
2560 }
2561 }
2562}
2563
2564int TextNode::GreedyLoopTextLength() { return Length(); }
2565
2566RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2567 RegExpCompiler* compiler) {
2568 if (read_backward()) return nullptr;
2569 if (elements()->length() != 1) return nullptr;
2570 TextElement elm = elements()->at(0);
2571 if (elm.text_type() != TextElement::CLASS_RANGES) return nullptr;
2572 RegExpClassRanges* node = elm.class_ranges();
2573 ZoneList<CharacterRange>* ranges = node->ranges(zone());
2574 CharacterRange::Canonicalize(ranges);
2575 if (node->is_negated()) {
2576 return ranges->length() == 0 ? on_success() : nullptr;
2577 }
2578 if (ranges->length() != 1) return nullptr;
2579 const base::uc32 max_char = MaxCodeUnit(compiler->one_byte());
2580 return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
2581}
2582
2583// Finds the fixed match length of a sequence of nodes that goes from
2584// this alternative and back to this choice node. If there are variable
2585// length nodes or other complications in the way then return a sentinel
2586// value indicating that a greedy loop cannot be constructed.
2587int ChoiceNode::GreedyLoopTextLengthForAlternative(
2588 GuardedAlternative* alternative) {
2589 int length = 0;
2590 RegExpNode* node = alternative->node();
2591 // Later we will generate code for all these text nodes using recursion
2592 // so we have to limit the max number.
2593 int recursion_depth = 0;
2594 while (node != this) {
2595 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2596 return kNodeIsTooComplexForGreedyLoops;
2597 }
2598 int node_length = node->GreedyLoopTextLength();
2599 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2600 return kNodeIsTooComplexForGreedyLoops;
2601 }
2602 length += node_length;
2603 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2604 node = seq_node->on_success();
2605 }
2606 if (read_backward()) {
2607 length = -length;
2608 }
2609 // Check that we can jump by the whole text length. If not, return sentinel
2610 // to indicate the we can't construct a greedy loop.
2611 if (length < RegExpMacroAssembler::kMinCPOffset ||
2612 length > RegExpMacroAssembler::kMaxCPOffset) {
2613 return kNodeIsTooComplexForGreedyLoops;
2614 }
2615 return length;
2616}
2617
2618void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2619 DCHECK_NULL(loop_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((loop_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((loop_node_) == nullptr))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(loop_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2619); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(loop_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2619; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2620 AddAlternative(alt);
2621 loop_node_ = alt.node();
2622}
2623
2624void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2625 DCHECK_NULL(continue_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((continue_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((continue_node_) == nullptr)
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(continue_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2625); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(continue_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2625; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2626 AddAlternative(alt);
2627 continue_node_ = alt.node();
2628}
2629
2630void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2631 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2632 if (trace->stop_node() == this) {
1
Assuming the condition is false
2
Taking false branch
2633 // Back edge of greedy optimized loop node graph.
2634 int text_length =
2635 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2636 DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((kNodeIsTooComplexForGreedyLoops) != (text_length))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((kNodeIsTooComplexForGreedyLoops) != (text_length)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(kNodeIsTooComplexForGreedyLoops) != (text_length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2636); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(kNodeIsTooComplexForGreedyLoops) != (text_length)"
")"); do { *((volatile int*)__null) = 2636; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2637 // Update the counter-based backtracking info on the stack. This is an
2638 // optimization for greedy loops (see below).
2639 DCHECK(trace->cp_offset() == text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == text_length)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->cp_offset() == text_length
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->cp_offset() == text_length", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2639); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == text_length"
")"); do { *((volatile int*)__null) = 2639; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2640 macro_assembler->AdvanceCurrentPosition(text_length);
2641 macro_assembler->GoTo(trace->loop_label());
2642 return;
2643 }
2644 DCHECK_NULL(trace->stop_node())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((trace->stop_node()) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((trace->stop_node()) == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(trace->stop_node()) == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2644); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(trace->stop_node()) == nullptr"
")"); do { *((volatile int*)__null) = 2644; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3
Assuming the condition is true
4
Taking false branch
5
Loop condition is false. Exiting loop
2645 if (!trace->is_trivial()) {
6
Taking false branch
2646 trace->Flush(compiler, this);
2647 return;
2648 }
2649 ChoiceNode::Emit(compiler, trace);
7
Calling 'ChoiceNode::Emit'
2650}
2651
2652int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2653 int eats_at_least) {
2654 int preload_characters = std::min(4, eats_at_least);
2655 DCHECK_LE(preload_characters, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((preload_characters) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((preload_characters) <= (
4)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(preload_characters) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2655); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(preload_characters) <= (4)"
")"); do { *((volatile int*)__null) = 2655; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2656 if (compiler->macro_assembler()->CanReadUnaligned()) {
2657 bool one_byte = compiler->one_byte();
2658 if (one_byte) {
2659 // We can't preload 3 characters because there is no machine instruction
2660 // to do that. We can't just load 4 because we could be reading
2661 // beyond the end of the string, which could cause a memory fault.
2662 if (preload_characters == 3) preload_characters = 2;
2663 } else {
2664 if (preload_characters > 2) preload_characters = 2;
2665 }
2666 } else {
2667 if (preload_characters > 1) preload_characters = 1;
2668 }
2669 return preload_characters;
2670}
2671
2672// This class is used when generating the alternatives in a choice node. It
2673// records the way the alternative is being code generated.
2674class AlternativeGeneration : public Malloced {
2675 public:
2676 AlternativeGeneration()
2677 : possible_success(),
2678 expects_preload(false),
2679 after(),
2680 quick_check_details() {}
2681 Label possible_success;
2682 bool expects_preload;
2683 Label after;
2684 QuickCheckDetails quick_check_details;
2685};
2686
2687// Creates a list of AlternativeGenerations. If the list has a reasonable
2688// size then it is on the stack, otherwise the excess is on the heap.
2689class AlternativeGenerationList {
2690 public:
2691 AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) {
2692 for (int i = 0; i < count && i < kAFew; i++) {
2693 alt_gens_.Add(a_few_alt_gens_ + i, zone);
2694 }
2695 for (int i = kAFew; i < count; i++) {
2696 alt_gens_.Add(new AlternativeGeneration(), zone);
2697 }
2698 }
2699 ~AlternativeGenerationList() {
2700 for (int i = kAFew; i < alt_gens_.length(); i++) {
2701 delete alt_gens_[i];
2702 alt_gens_[i] = nullptr;
2703 }
2704 }
2705
2706 AlternativeGeneration* at(int i) { return alt_gens_[i]; }
2707
2708 private:
2709 static const int kAFew = 10;
2710 ZoneList<AlternativeGeneration*> alt_gens_;
2711 AlternativeGeneration a_few_alt_gens_[kAFew];
2712};
2713
2714void BoyerMoorePositionInfo::Set(int character) {
2715 SetInterval(Interval(character, character));
2716}
2717
2718namespace {
2719
2720ContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
2721 int ranges_length, Interval new_range) {
2722 DCHECK_EQ(1, ranges_length & 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((1) == (ranges_length & 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((1) == (ranges_length & 1
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(1) == (ranges_length & 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2722); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(1) == (ranges_length & 1)"
")"); do { *((volatile int*)__null) = 2722; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2723 DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1])do { static_assert( mozilla::detail::AssertionConditionType<
decltype((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1]))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1])))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2723); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
")"); do { *((volatile int*)__null) = 2723; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2724 if (containment == kLatticeUnknown) return containment;
2725 bool inside = false;
2726 int last = 0;
2727 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
2728 // Consider the range from last to ranges[i].
2729 // We haven't got to the new range yet.
2730 if (ranges[i] <= new_range.from()) continue;
2731 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
2732 // inclusive, but the values in ranges are not.
2733 if (last <= new_range.from() && new_range.to() < ranges[i]) {
2734 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
2735 }
2736 return kLatticeUnknown;
2737 }
2738 return containment;
2739}
2740
2741int BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) {
2742 static_assert(BoyerMoorePositionInfo::kMapSize ==
2743 2 * kInt64Size * kBitsPerByte);
2744
2745 // Slight fiddling is needed here, since the bitset is of length 128 while
2746 // CountTrailingZeros requires an integral type and std::bitset can only
2747 // convert to unsigned long long. So we handle the most- and least-significant
2748 // bits separately.
2749
2750 {
2751 static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0});
2752 BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask;
2753 static_assert(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong())));
2754 uint64_t lsb = masked_bitset.to_ullong();
2755 if (lsb != 0) return base::bits::CountTrailingZeros(lsb);
2756 }
2757
2758 {
2759 BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64;
2760 uint64_t msb = masked_bitset.to_ullong();
2761 if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb);
2762 }
2763
2764 return -1;
2765}
2766
2767} // namespace
2768
2769void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2770 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2771
2772 if (interval.size() >= kMapSize) {
2773 map_count_ = kMapSize;
2774 map_.set();
2775 return;
2776 }
2777
2778 for (int i = interval.from(); i <= interval.to(); i++) {
2779 int mod_character = (i & kMask);
2780 if (!map_[mod_character]) {
2781 map_count_++;
2782 map_.set(mod_character);
2783 }
2784 if (map_count_ == kMapSize) return;
2785 }
2786}
2787
2788void BoyerMoorePositionInfo::SetAll() {
2789 w_ = kLatticeUnknown;
2790 if (map_count_ != kMapSize) {
2791 map_count_ = kMapSize;
2792 map_.set();
2793 }
2794}
2795
2796BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
2797 Zone* zone)
2798 : length_(length),
2799 compiler_(compiler),
2800 max_char_(MaxCodeUnit(compiler->one_byte())) {
2801 bitmaps_ = zone->New<ZoneList<BoyerMoorePositionInfo*>>(length, zone);
2802 for (int i = 0; i < length; i++) {
2803 bitmaps_->Add(zone->New<BoyerMoorePositionInfo>(), zone);
2804 }
2805}
2806
2807// Find the longest range of lookahead that has the fewest number of different
2808// characters that can occur at a given position. Since we are optimizing two
2809// different parameters at once this is a tradeoff.
2810bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
2811 int biggest_points = 0;
2812 // If more than 32 characters out of 128 can occur it is unlikely that we can
2813 // be lucky enough to step forwards much of the time.
2814 const int kMaxMax = 32;
2815 for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
2816 max_number_of_chars *= 2) {
2817 biggest_points =
2818 FindBestInterval(max_number_of_chars, biggest_points, from, to);
2819 }
2820 if (biggest_points == 0) return false;
2821 return true;
2822}
2823
2824// Find the highest-points range between 0 and length_ where the character
2825// information is not too vague. 'Too vague' means that there are more than
2826// max_number_of_chars that can occur at this position. Calculates the number
2827// of points as the product of width-of-the-range and
2828// probability-of-finding-one-of-the-characters, where the probability is
2829// calculated using the frequency distribution of the sample subject string.
2830int BoyerMooreLookahead::FindBestInterval(int max_number_of_chars,
2831 int old_biggest_points, int* from,
2832 int* to) {
2833 int biggest_points = old_biggest_points;
2834 static const int kSize = RegExpMacroAssembler::kTableSize;
2835 for (int i = 0; i < length_;) {
2836 while (i < length_ && Count(i) > max_number_of_chars) i++;
2837 if (i == length_) break;
2838 int remembered_from = i;
2839
2840 BoyerMoorePositionInfo::Bitset union_bitset;
2841 for (; i < length_ && Count(i) <= max_number_of_chars; i++) {
2842 union_bitset |= bitmaps_->at(i)->raw_bitset();
2843 }
2844
2845 int frequency = 0;
2846
2847 // Iterate only over set bits.
2848 int j;
2849 while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
2850 DCHECK(union_bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(union_bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(union_bitset[j]))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("union_bitset[j]"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2850); AnnotateMozCrashReason("MOZ_ASSERT" "(" "union_bitset[j]"
")"); do { *((volatile int*)__null) = 2850; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2851 // Add 1 to the frequency to give a small per-character boost for
2852 // the cases where our sampling is not good enough and many
2853 // characters have a frequency of zero. This means the frequency
2854 // can theoretically be up to 2*kSize though we treat it mostly as
2855 // a fraction of kSize.
2856 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
2857 union_bitset.reset(j);
2858 }
2859
2860 // We use the probability of skipping times the distance we are skipping to
2861 // judge the effectiveness of this. Actually we have a cut-off: By
2862 // dividing by 2 we switch off the skipping if the probability of skipping
2863 // is less than 50%. This is because the multibyte mask-and-compare
2864 // skipping in quickcheck is more likely to do well on this case.
2865 bool in_quickcheck_range =
2866 ((i - remembered_from < 4) ||
2867 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
2868 // Called 'probability' but it is only a rough estimate and can actually
2869 // be outside the 0-kSize range.
2870 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
2871 int points = (i - remembered_from) * probability;
2872 if (points > biggest_points) {
2873 *from = remembered_from;
2874 *to = i - 1;
2875 biggest_points = points;
2876 }
2877 }
2878 return biggest_points;
2879}
2880
2881// Take all the characters that will not prevent a successful match if they
2882// occur in the subject string in the range between min_lookahead and
2883// max_lookahead (inclusive) measured from the current position. If the
2884// character at max_lookahead offset is not one of these characters, then we
2885// can safely skip forwards by the number of characters in the range.
2886int BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead,
2887 Handle<ByteArray> boolean_skip_table) {
2888 const int kSkipArrayEntry = 0;
2889 const int kDontSkipArrayEntry = 1;
2890
2891 std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
2892 boolean_skip_table->length());
2893
2894 for (int i = max_lookahead; i >= min_lookahead; i--) {
2895 BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset();
2896
2897 // Iterate only over set bits.
2898 int j;
2899 while ((j = BitsetFirstSetBit(bitset)) != -1) {
2900 DCHECK(bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(bitset[j]))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("bitset[j]", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2900); AnnotateMozCrashReason("MOZ_ASSERT" "(" "bitset[j]" ")"
); do { *((volatile int*)__null) = 2900; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2901 boolean_skip_table->set(j, kDontSkipArrayEntry);
2902 bitset.reset(j);
2903 }
2904 }
2905
2906 const int skip = max_lookahead + 1 - min_lookahead;
2907 return skip;
2908}
2909
2910// See comment above on the implementation of GetSkipTable.
2911void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
2912 const int kSize = RegExpMacroAssembler::kTableSize;
2913
2914 int min_lookahead = 0;
2915 int max_lookahead = 0;
2916
2917 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
2918
2919 // Check if we only have a single non-empty position info, and that info
2920 // contains precisely one character.
2921 bool found_single_character = false;
2922 int single_character = 0;
2923 for (int i = max_lookahead; i >= min_lookahead; i--) {
2924 BoyerMoorePositionInfo* map = bitmaps_->at(i);
2925 if (map->map_count() == 0) continue;
2926
2927 if (found_single_character || map->map_count() > 1) {
2928 found_single_character = false;
2929 break;
2930 }
2931
2932 DCHECK(!found_single_character)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!found_single_character)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!found_single_character))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!found_single_character"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2932); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!found_single_character"
")"); do { *((volatile int*)__null) = 2932; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2933 DCHECK_EQ(map->map_count(), 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((map->map_count()) == (1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((map->map_count()) == (1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(map->map_count()) == (1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2933); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(map->map_count()) == (1)"
")"); do { *((volatile int*)__null) = 2933; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2934
2935 found_single_character = true;
2936 single_character = BitsetFirstSetBit(map->raw_bitset());
2937
2938 DCHECK_NE(single_character, -1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((single_character) != (-1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((single_character) != (-1)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(single_character) != (-1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2938); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(single_character) != (-1)"
")"); do { *((volatile int*)__null) = 2938; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2939 }
2940
2941 int lookahead_width = max_lookahead + 1 - min_lookahead;
2942
2943 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
2944 // The mask-compare can probably handle this better.
2945 return;
2946 }
2947
2948 if (found_single_character) {
2949 Label cont, again;
2950 masm->Bind(&again);
2951 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2952 if (max_char_ > kSize) {
2953 masm->CheckCharacterAfterAnd(single_character,
2954 RegExpMacroAssembler::kTableMask, &cont);
2955 } else {
2956 masm->CheckCharacter(single_character, &cont);
2957 }
2958 masm->AdvanceCurrentPosition(lookahead_width);
2959 masm->GoTo(&again);
2960 masm->Bind(&cont);
2961 return;
2962 }
2963
2964 Factory* factory = masm->isolate()->factory();
2965 Handle<ByteArray> boolean_skip_table =
2966 factory->NewByteArray(kSize, AllocationType::kOld);
2967 int skip_distance =
2968 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
2969 DCHECK_NE(0, skip_distance)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) != (skip_distance))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) != (skip_distance)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) != (skip_distance)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2969); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) != (skip_distance)"
")"); do { *((volatile int*)__null) = 2969; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2970
2971 Label cont, again;
2972 masm->Bind(&again);
2973 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2974 masm->CheckBitInTable(boolean_skip_table, &cont);
2975 masm->AdvanceCurrentPosition(skip_distance);
2976 masm->GoTo(&again);
2977 masm->Bind(&cont);
2978}
2979
2980/* Code generation for choice nodes.
2981 *
2982 * We generate quick checks that do a mask and compare to eliminate a
2983 * choice. If the quick check succeeds then it jumps to the continuation to
2984 * do slow checks and check subsequent nodes. If it fails (the common case)
2985 * it falls through to the next choice.
2986 *
2987 * Here is the desired flow graph. Nodes directly below each other imply
2988 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
2989 * 3 doesn't have a quick check so we have to call the slow check.
2990 * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
2991 * regexp continuation is generated directly after the Sn node, up to the
2992 * next GoTo if we decide to reuse some already generated code. Some
2993 * nodes expect preload_characters to be preloaded into the current
2994 * character register. R nodes do this preloading. Vertices are marked
2995 * F for failures and S for success (possible success in the case of quick
2996 * nodes). L, V, < and > are used as arrow heads.
2997 *
2998 * ----------> R
2999 * |
3000 * V
3001 * Q1 -----> S1
3002 * | S /
3003 * F| /
3004 * | F/
3005 * | /
3006 * | R
3007 * | /
3008 * V L
3009 * Q2 -----> S2
3010 * | S /
3011 * F| /
3012 * | F/
3013 * | /
3014 * | R
3015 * | /
3016 * V L
3017 * S3
3018 * |
3019 * F|
3020 * |
3021 * R
3022 * |
3023 * backtrack V
3024 * <----------Q4
3025 * \ F |
3026 * \ |S
3027 * \ F V
3028 * \-----S4
3029 *
3030 * For greedy loops we push the current position, then generate the code that
3031 * eats the input specially in EmitGreedyLoop. The other choice (the
3032 * continuation) is generated by the normal code in EmitChoices, and steps back
3033 * in the input to the starting position when it fails to match. The loop code
3034 * looks like this (U is the unwind code that steps back in the greedy loop).
3035 *
3036 * _____
3037 * / \
3038 * V |
3039 * ----------> S1 |
3040 * /| |
3041 * / |S |
3042 * F/ \_____/
3043 * /
3044 * |<-----
3045 * | \
3046 * V |S
3047 * Q2 ---> U----->backtrack
3048 * | F /
3049 * S| /
3050 * V F /
3051 * S2--/
3052 */
3053
3054GreedyLoopState::GreedyLoopState(bool not_at_start) {
3055 counter_backtrack_trace_.set_backtrack(&label_);
3056 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3057}
3058
3059void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3060#ifdef DEBUG1
3061 int choice_count = alternatives_->length();
3062 for (int i = 0; i < choice_count - 1; i++) {
3063 GuardedAlternative alternative = alternatives_->at(i);
3064 ZoneList<Guard*>* guards = alternative.guards();
3065 int guard_count = (guards == nullptr) ? 0 : guards->length();
3066 for (int j = 0; j < guard_count; j++) {
3067 DCHECK(!trace->mentions_reg(guards->at(j)->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guards->at(j)->reg()))
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!trace->mentions_reg(guards->at(j)->reg()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guards->at(j)->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3067); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guards->at(j)->reg())"
")"); do { *((volatile int*)__null) = 3067; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3068 }
3069 }
3070#endif
3071}
3072
3073void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
3074 PreloadState* state) {
3075 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3076 // Save some time by looking at most one machine word ahead.
3077 state->eats_at_least_ =
3078 EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
3079 }
3080 state->preload_characters_ =
3081 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3082
3083 state->preload_is_current_ =
3084 (current_trace->characters_preloaded() == state->preload_characters_);
3085 state->preload_has_checked_bounds_ = state->preload_is_current_;
3086}
3087
3088void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3089 int choice_count = alternatives_->length();
3090
3091 if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
8
Assuming 'choice_count' is not equal to 1
3092 alternatives_->at(0).node()->Emit(compiler, trace);
3093 return;
3094 }
3095
3096 AssertGuardsMentionRegisters(trace);
3097
3098 LimitResult limit_result = LimitVersions(compiler, trace);
3099 if (limit_result == DONE) return;
9
Assuming 'limit_result' is not equal to DONE
10
Taking false branch
3100 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3100); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3100; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
11
Assuming 'limit_result' is equal to CONTINUE
12
Taking false branch
3101
3102 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
3103 // other choice nodes we only flush if we are out of code size budget.
3104 if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
13
Loop condition is false. Exiting loop
14
Assuming the condition is false
3105 trace->Flush(compiler, this);
3106 return;
3107 }
3108
3109 RecursionCheck rc(compiler);
3110
3111 PreloadState preload;
3112 preload.init();
3113 GreedyLoopState greedy_loop_state(not_at_start());
3114
3115 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
3116 AlternativeGenerationList alt_gens(choice_count, zone());
3117
3118 if (choice_count
14.1
'choice_count' is <= 1
> 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3119 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
3120 &greedy_loop_state, text_length);
3121 } else {
3122 // TODO(erikcorry): Delete this. We don't need this label, but it makes us
3123 // match the traces produced pre-cleanup.
3124 Label second_choice;
3125 compiler->macro_assembler()->Bind(&second_choice);
3126
3127 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3128
3129 EmitChoices(compiler, &alt_gens, 0, trace, &preload);
15
Calling 'ChoiceNode::EmitChoices'
3130 }
3131
3132 // At this point we need to generate slow checks for the alternatives where
3133 // the quick check was inlined. We can recognize these because the associated
3134 // label was bound.
3135 int new_flush_budget = trace->flush_budget() / choice_count;
3136 for (int i = 0; i < choice_count; i++) {
3137 AlternativeGeneration* alt_gen = alt_gens.at(i);
3138 Trace new_trace(*trace);
3139 // If there are actions to be flushed we have to limit how many times
3140 // they are flushed. Take the budget of the parent trace and distribute
3141 // it fairly amongst the children.
3142 if (new_trace.actions() != nullptr) {
3143 new_trace.set_flush_budget(new_flush_budget);
3144 }
3145 bool next_expects_preload =
3146 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
3147 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i),
3148 alt_gen, preload.preload_characters_,
3149 next_expects_preload);
3150 }
3151}
3152
3153Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
3154 AlternativeGenerationList* alt_gens,
3155 PreloadState* preload,
3156 GreedyLoopState* greedy_loop_state,
3157 int text_length) {
3158 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3159 // Here we have special handling for greedy loops containing only text nodes
3160 // and other simple nodes. These are handled by pushing the current
3161 // position on the stack and then incrementing the current position each
3162 // time around the switch. On backtrack we decrement the current position
3163 // and check it against the pushed value. This avoids pushing backtrack
3164 // information for each iteration of the loop, which could take up a lot of
3165 // space.
3166 DCHECK(trace->stop_node() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->stop_node() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->stop_node() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->stop_node() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3166); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->stop_node() == nullptr"
")"); do { *((volatile int*)__null) = 3166; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3167 macro_assembler->PushCurrentPosition();
3168 Label greedy_match_failed;
3169 Trace greedy_match_trace;
3170 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
3171 greedy_match_trace.set_backtrack(&greedy_match_failed);
3172 Label loop_label;
3173 macro_assembler->Bind(&loop_label);
3174 greedy_match_trace.set_stop_node(this);
3175 greedy_match_trace.set_loop_label(&loop_label);
3176 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3177 macro_assembler->Bind(&greedy_match_failed);
3178
3179 Label second_choice; // For use in greedy matches.
3180 macro_assembler->Bind(&second_choice);
3181
3182 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3183
3184 EmitChoices(compiler, alt_gens, 1, new_trace, preload);
3185
3186 macro_assembler->Bind(greedy_loop_state->label());
3187 // If we have unwound to the bottom then backtrack.
3188 macro_assembler->CheckGreedyLoop(trace->backtrack());
3189 // Otherwise try the second priority at an earlier position.
3190 macro_assembler->AdvanceCurrentPosition(-text_length);
3191 macro_assembler->GoTo(&second_choice);
3192 return new_trace;
3193}
3194
3195int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3196 Trace* trace) {
3197 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3198 if (alternatives_->length() != 2) return eats_at_least;
3199
3200 GuardedAlternative alt1 = alternatives_->at(1);
3201 if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
3202 return eats_at_least;
3203 }
3204 RegExpNode* eats_anything_node = alt1.node();
3205 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
3206 return eats_at_least;
3207 }
3208
3209 // Really we should be creating a new trace when we execute this function,
3210 // but there is no need, because the code it generates cannot backtrack, and
3211 // we always arrive here with a trivial trace (since it's the entry to a
3212 // loop. That also implies that there are no preloaded characters, which is
3213 // good, because it means we won't be violating any assumptions by
3214 // overwriting those characters with new load instructions.
3215 DCHECK(trace->is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->is_trivial()))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("trace->is_trivial()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3215); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->is_trivial()"
")"); do { *((volatile int*)__null) = 3215; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3216
3217 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3218 Isolate* isolate = macro_assembler->isolate();
3219 // At this point we know that we are at a non-greedy loop that will eat
3220 // any character one at a time. Any non-anchored regexp has such a
3221 // loop prepended to it in order to find where it starts. We look for
3222 // a pattern of the form ...abc... where we can look 6 characters ahead
3223 // and step forwards 3 if the character is not one of abc. Abc need
3224 // not be atoms, they can be any reasonably limited character class or
3225 // small alternation.
3226 BoyerMooreLookahead* bm = bm_info(false);
3227 if (bm == nullptr) {
3228 eats_at_least = std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
3229 if (eats_at_least >= 1) {
3230 bm = zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
3231 GuardedAlternative alt0 = alternatives_->at(0);
3232 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
3233 }
3234 }
3235 if (bm != nullptr) {
3236 bm->EmitSkipInstructions(macro_assembler);
3237 }
3238 return eats_at_least;
3239}
3240
3241void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3242 AlternativeGenerationList* alt_gens,
3243 int first_choice, Trace* trace,
3244 PreloadState* preload) {
3245 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3246 SetUpPreLoad(compiler, trace, preload);
3247
3248 // For now we just call all choices one after the other. The idea ultimately
3249 // is to use the Dispatch table to try only the relevant ones.
3250 int choice_count = alternatives_->length();
3251
3252 int new_flush_budget = trace->flush_budget() / choice_count;
3253
3254 for (int i = first_choice; i < choice_count; i++) {
16
Assuming 'i' is < 'choice_count'
17
Loop condition is true. Entering loop body
3255 bool is_last = i == choice_count - 1;
18
Assuming the condition is false
3256 bool fall_through_on_failure = !is_last;
3257 GuardedAlternative alternative = alternatives_->at(i);
3258 AlternativeGeneration* alt_gen = alt_gens->at(i);
3259 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3260 ZoneList<Guard*>* guards = alternative.guards();
3261 int guard_count = (guards == nullptr) ? 0 : guards->length();
19
Assuming the condition is false
20
'?' condition is false
3262 Trace new_trace(*trace);
3263 new_trace.set_characters_preloaded(
3264 preload->preload_is_current_
20.1
Field 'preload_is_current_' is false
? preload->preload_characters_ : 0);
21
'?' condition is false
3265 if (preload->preload_has_checked_bounds_
21.1
Field 'preload_has_checked_bounds_' is false
) {
22
Taking false branch
3266 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3267 }
3268 new_trace.quick_check_performed()->Clear();
3269 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
23
Assuming field 'not_at_start_' is false
24
Taking false branch
3270 if (!is_last
24.1
'is_last' is false
) {
25
Taking true branch
3271 new_trace.set_backtrack(&alt_gen->after);
3272 }
3273 alt_gen->expects_preload = preload->preload_is_current_;
3274 bool generate_full_check_inline = false;
3275 if (compiler->optimize() &&
26
Assuming the condition is true
3276 try_to_emit_quick_check_for_alternative(i == 0) &&
27
Assuming the condition is true
3277 alternative.node()->EmitQuickCheck(
28
Calling 'RegExpNode::EmitQuickCheck'
3278 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
3279 &alt_gen->possible_success, &alt_gen->quick_check_details,
3280 fall_through_on_failure, this)) {
3281 // Quick check was generated for this choice.
3282 preload->preload_is_current_ = true;
3283 preload->preload_has_checked_bounds_ = true;
3284 // If we generated the quick check to fall through on possible success,
3285 // we now need to generate the full check inline.
3286 if (!fall_through_on_failure) {
3287 macro_assembler->Bind(&alt_gen->possible_success);
3288 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3289 new_trace.set_characters_preloaded(preload->preload_characters_);
3290 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3291 generate_full_check_inline = true;
3292 }
3293 } else if (alt_gen->quick_check_details.cannot_match()) {
3294 if (!fall_through_on_failure) {
3295 macro_assembler->GoTo(trace->backtrack());
3296 }
3297 continue;
3298 } else {
3299 // No quick check was generated. Put the full code here.
3300 // If this is not the first choice then there could be slow checks from
3301 // previous cases that go here when they fail. There's no reason to
3302 // insist that they preload characters since the slow check we are about
3303 // to generate probably can't use it.
3304 if (i != first_choice) {
3305 alt_gen->expects_preload = false;
3306 new_trace.InvalidateCurrentCharacter();
3307 }
3308 generate_full_check_inline = true;
3309 }
3310 if (generate_full_check_inline) {
3311 if (new_trace.actions() != nullptr) {
3312 new_trace.set_flush_budget(new_flush_budget);
3313 }
3314 for (int j = 0; j < guard_count; j++) {
3315 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
3316 }
3317 alternative.node()->Emit(compiler, &new_trace);
3318 preload->preload_is_current_ = false;
3319 }
3320 macro_assembler->Bind(&alt_gen->after);
3321 }
3322}
3323
3324void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3325 Trace* trace,
3326 GuardedAlternative alternative,
3327 AlternativeGeneration* alt_gen,
3328 int preload_characters,
3329 bool next_expects_preload) {
3330 if (!alt_gen->possible_success.is_linked()) return;
3331
3332 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3333 macro_assembler->Bind(&alt_gen->possible_success);
3334 Trace out_of_line_trace(*trace);
3335 out_of_line_trace.set_characters_preloaded(preload_characters);
3336 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3337 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
3338 ZoneList<Guard*>* guards = alternative.guards();
3339 int guard_count = (guards == nullptr) ? 0 : guards->length();
3340 if (next_expects_preload) {
3341 Label reload_current_char;
3342 out_of_line_trace.set_backtrack(&reload_current_char);
3343 for (int j = 0; j < guard_count; j++) {
3344 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3345 }
3346 alternative.node()->Emit(compiler, &out_of_line_trace);
3347 macro_assembler->Bind(&reload_current_char);
3348 // Reload the current character, since the next quick check expects that.
3349 // We don't need to check bounds here because we only get into this
3350 // code through a quick check which already did the checked load.
3351 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
3352 preload_characters);
3353 macro_assembler->GoTo(&(alt_gen->after));
3354 } else {
3355 out_of_line_trace.set_backtrack(&(alt_gen->after));
3356 for (int j = 0; j < guard_count; j++) {
3357 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3358 }
3359 alternative.node()->Emit(compiler, &out_of_line_trace);
3360 }
3361}
3362
3363void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3364 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3365 LimitResult limit_result = LimitVersions(compiler, trace);
3366 if (limit_result == DONE) return;
3367 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3367); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3367; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3368
3369 RecursionCheck rc(compiler);
3370
3371 switch (action_type_) {
3372 case STORE_POSITION: {
3373 Trace::DeferredCapture new_capture(data_.u_position_register.reg,
3374 data_.u_position_register.is_capture,
3375 trace);
3376 Trace new_trace = *trace;
3377 new_trace.add_action(&new_capture);
3378 on_success()->Emit(compiler, &new_trace);
3379 break;
3380 }
3381 case INCREMENT_REGISTER: {
3382 Trace::DeferredIncrementRegister new_increment(
3383 data_.u_increment_register.reg);
3384 Trace new_trace = *trace;
3385 new_trace.add_action(&new_increment);
3386 on_success()->Emit(compiler, &new_trace);
3387 break;
3388 }
3389 case SET_REGISTER_FOR_LOOP: {
3390 Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
3391 data_.u_store_register.value);
3392 Trace new_trace = *trace;
3393 new_trace.add_action(&new_set);
3394 on_success()->Emit(compiler, &new_trace);
3395 break;
3396 }
3397 case CLEAR_CAPTURES: {
3398 Trace::DeferredClearCaptures new_capture(Interval(
3399 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
3400 Trace new_trace = *trace;
3401 new_trace.add_action(&new_capture);
3402 on_success()->Emit(compiler, &new_trace);
3403 break;
3404 }
3405 case BEGIN_POSITIVE_SUBMATCH:
3406 case BEGIN_NEGATIVE_SUBMATCH:
3407 if (!trace->is_trivial()) {
3408 trace->Flush(compiler, this);
3409 } else {
3410 assembler->WriteCurrentPositionToRegister(
3411 data_.u_submatch.current_position_register, 0);
3412 assembler->WriteStackPointerToRegister(
3413 data_.u_submatch.stack_pointer_register);
3414 on_success()->Emit(compiler, trace);
3415 }
3416 break;
3417 case EMPTY_MATCH_CHECK: {
3418 int start_pos_reg = data_.u_empty_match_check.start_register;
3419 int stored_pos = 0;
3420 int rep_reg = data_.u_empty_match_check.repetition_register;
3421 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3422 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3423 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3424 // If we know we haven't advanced and there is no minimum we
3425 // can just backtrack immediately.
3426 assembler->GoTo(trace->backtrack());
3427 } else if (know_dist && stored_pos < trace->cp_offset()) {
3428 // If we know we've advanced we can generate the continuation
3429 // immediately.
3430 on_success()->Emit(compiler, trace);
3431 } else if (!trace->is_trivial()) {
3432 trace->Flush(compiler, this);
3433 } else {
3434 Label skip_empty_check;
3435 // If we have a minimum number of repetitions we check the current
3436 // number first and skip the empty check if it's not enough.
3437 if (has_minimum) {
3438 int limit = data_.u_empty_match_check.repetition_limit;
3439 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3440 }
3441 // If the match is empty we bail out, otherwise we fall through
3442 // to the on-success continuation.
3443 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3444 trace->backtrack());
3445 assembler->Bind(&skip_empty_check);
3446 on_success()->Emit(compiler, trace);
3447 }
3448 break;
3449 }
3450 case POSITIVE_SUBMATCH_SUCCESS: {
3451 if (!trace->is_trivial()) {
3452 trace->Flush(compiler, this);
3453 return;
3454 }
3455 assembler->ReadCurrentPositionFromRegister(
3456 data_.u_submatch.current_position_register);
3457 assembler->ReadStackPointerFromRegister(
3458 data_.u_submatch.stack_pointer_register);
3459 int clear_register_count = data_.u_submatch.clear_register_count;
3460 if (clear_register_count == 0) {
3461 on_success()->Emit(compiler, trace);
3462 return;
3463 }
3464 int clear_registers_from = data_.u_submatch.clear_register_from;
3465 Label clear_registers_backtrack;
3466 Trace new_trace = *trace;
3467 new_trace.set_backtrack(&clear_registers_backtrack);
3468 on_success()->Emit(compiler, &new_trace);
3469
3470 assembler->Bind(&clear_registers_backtrack);
3471 int clear_registers_to = clear_registers_from + clear_register_count - 1;
3472 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3473
3474 DCHECK(trace->backtrack() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->backtrack() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->backtrack() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->backtrack() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3474); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->backtrack() == nullptr"
")"); do { *((volatile int*)__null) = 3474; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3475 assembler->Backtrack();
3476 return;
3477 }
3478 case MODIFY_FLAGS: {
3479 compiler->set_flags(flags());
3480 on_success()->Emit(compiler, trace);
3481 break;
3482 }
3483 default:
3484 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3484); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 3484; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3485 }
3486}
3487
3488void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3489 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3490 if (!trace->is_trivial()) {
3491 trace->Flush(compiler, this);
3492 return;
3493 }
3494
3495 LimitResult limit_result = LimitVersions(compiler, trace);
3496 if (limit_result == DONE) return;
3497 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3497); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3497; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3498
3499 RecursionCheck rc(compiler);
3500
3501 DCHECK_EQ(start_reg_ + 1, end_reg_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_reg_ + 1) == (end_reg_))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_reg_ + 1) == (end_reg_
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_reg_ + 1) == (end_reg_)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3501); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_reg_ + 1) == (end_reg_)"
")"); do { *((volatile int*)__null) = 3501; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3502 if (IsIgnoreCase(compiler->flags())) {
3503 bool unicode = IsEitherUnicode(compiler->flags());
3504 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
3505 unicode, trace->backtrack());
3506 } else {
3507 assembler->CheckNotBackReference(start_reg_, read_backward(),
3508 trace->backtrack());
3509 }
3510 // We are going to advance backward, so we may end up at the start.
3511 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
3512
3513 // Check that the back reference does not end inside a surrogate pair.
3514 if (IsEitherUnicode(compiler->flags()) && !compiler->one_byte()) {
3515 assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
3516 }
3517 on_success()->Emit(compiler, trace);
3518}
3519
3520void TextNode::CalculateOffsets() {
3521 int element_count = elements()->length();
3522 // Set up the offsets of the elements relative to the start. This is a fixed
3523 // quantity since a TextNode can only contain fixed-width things.
3524 int cp_offset = 0;
3525 for (int i = 0; i < element_count; i++) {
3526 TextElement& elm = elements()->at(i);
3527 elm.set_cp_offset(cp_offset);
3528 cp_offset += elm.length();
3529 }
3530}
3531
3532namespace {
3533
3534// Assertion propagation moves information about assertions such as
3535// \b to the affected nodes. For instance, in /.\b./ information must
3536// be propagated to the first '.' that whatever follows needs to know
3537// if it matched a word or a non-word, and to the second '.' that it
3538// has to check if it succeeds a word or non-word. In this case the
3539// result will be something like:
3540//
3541// +-------+ +------------+
3542// | . | | . |
3543// +-------+ ---> +------------+
3544// | word? | | check word |
3545// +-------+ +------------+
3546class AssertionPropagator : public AllStatic {
3547 public:
3548 static void VisitText(TextNode* that) {}
3549
3550 static void VisitAction(ActionNode* that) {
3551 // If the next node is interested in what it follows then this node
3552 // has to be interested too so it can pass the information on.
3553 that->info()->AddFromFollowing(that->on_success()->info());
3554 }
3555
3556 static void VisitChoice(ChoiceNode* that, int i) {
3557 // Anything the following nodes need to know has to be known by
3558 // this node also, so it can pass it on.
3559 that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
3560 }
3561
3562 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3563 that->info()->AddFromFollowing(that->continue_node()->info());
3564 }
3565
3566 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
3567 that->info()->AddFromFollowing(that->loop_node()->info());
3568 }
3569
3570 static void VisitNegativeLookaroundChoiceLookaroundNode(
3571 NegativeLookaroundChoiceNode* that) {
3572 VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
3573 }
3574
3575 static void VisitNegativeLookaroundChoiceContinueNode(
3576 NegativeLookaroundChoiceNode* that) {
3577 VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
3578 }
3579
3580 static void VisitBackReference(BackReferenceNode* that) {}
3581
3582 static void VisitAssertion(AssertionNode* that) {}
3583};
3584
3585// Propagates information about the minimum size of successful matches from
3586// successor nodes to their predecessors. Note that all eats_at_least values
3587// are initialized to zero before analysis.
3588class EatsAtLeastPropagator : public AllStatic {
3589 public:
3590 static void VisitText(TextNode* that) {
3591 // The eats_at_least value is not used if reading backward.
3592 if (!that->read_backward()) {
3593 // We are not at the start after this node, and thus we can use the
3594 // successor's eats_at_least_from_not_start value.
3595 uint8_t eats_at_least = base::saturated_cast<uint8_t>(
3596 that->Length() + that->on_success()
3597 ->eats_at_least_info()
3598 ->eats_at_least_from_not_start);
3599 that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
3600 }
3601 }
3602
3603 static void VisitAction(ActionNode* that) {
3604 switch (that->action_type()) {
3605 case ActionNode::BEGIN_POSITIVE_SUBMATCH:
3606 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3607 // We do not propagate eats_at_least data through positive lookarounds,
3608 // because they rewind input.
3609 // TODO(v8:11859) Potential approaches for fixing this include:
3610 // 1. Add a dedicated choice node for positive lookaround, similar to
3611 // NegativeLookaroundChoiceNode.
3612 // 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which
3613 // is <= eats_at_least_from_possibly_start, and use that value in
3614 // EatsAtLeastFromLoopEntry.
3615 DCHECK(that->eats_at_least_info()->IsZero())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(that->eats_at_least_info()->IsZero())>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(that->eats_at_least_info()->IsZero()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("that->eats_at_least_info()->IsZero()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3615); AnnotateMozCrashReason("MOZ_ASSERT" "(" "that->eats_at_least_info()->IsZero()"
")"); do { *((volatile int*)__null) = 3615; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3616 break;
3617 case ActionNode::SET_REGISTER_FOR_LOOP:
3618 // SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
3619 // loop body will run at least the minimum number of times before the
3620 // continuation case can run.
3621 that->set_eats_at_least_info(
3622 that->on_success()->EatsAtLeastFromLoopEntry());
3623 break;
3624 case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
3625 default:
3626 // Otherwise, the current node eats at least as much as its successor.
3627 // Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH
3628 // because NegativeLookaroundChoiceNode ignores its lookaround successor
3629 // when computing eats-at-least and quick check information.
3630 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3631 break;
3632 }
3633 }
3634
3635 static void VisitChoice(ChoiceNode* that, int i) {
3636 // The minimum possible match from a choice node is the minimum of its
3637 // successors.
3638 EatsAtLeastInfo eats_at_least =
3639 i == 0 ? EatsAtLeastInfo(UINT8_MAX(255)) : *that->eats_at_least_info();
3640 eats_at_least.SetMin(
3641 *that->alternatives()->at(i).node()->eats_at_least_info());
3642 that->set_eats_at_least_info(eats_at_least);
3643 }
3644
3645 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3646 if (!that->read_backward()) {
3647 that->set_eats_at_least_info(
3648 *that->continue_node()->eats_at_least_info());
3649 }
3650 }
3651
3652 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
3653
3654 static void VisitNegativeLookaroundChoiceLookaroundNode(
3655 NegativeLookaroundChoiceNode* that) {}
3656
3657 static void VisitNegativeLookaroundChoiceContinueNode(
3658 NegativeLookaroundChoiceNode* that) {
3659 that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
3660 }
3661
3662 static void VisitBackReference(BackReferenceNode* that) {
3663 if (!that->read_backward()) {
3664 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3665 }
3666 }
3667
3668 static void VisitAssertion(AssertionNode* that) {
3669 EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
3670 if (that->assertion_type() == AssertionNode::AT_START) {
3671 // If we know we are not at the start and we are asked "how many
3672 // characters will you match if you succeed?" then we can answer anything
3673 // since false implies false. So let's just set the max answer
3674 // (UINT8_MAX) since that won't prevent us from preloading a lot of
3675 // characters for the other branches in the node graph.
3676 eats_at_least.eats_at_least_from_not_start = UINT8_MAX(255);
3677 }
3678 that->set_eats_at_least_info(eats_at_least);
3679 }
3680};
3681
3682} // namespace
3683
3684// -------------------------------------------------------------------
3685// Analysis
3686
3687// Iterates the node graph and provides the opportunity for propagators to set
3688// values that depend on successor nodes.
3689template <typename... Propagators>
3690class Analysis : public NodeVisitor {
3691 public:
3692 Analysis(Isolate* isolate, bool is_one_byte, RegExpFlags flags)
3693 : isolate_(isolate),
3694 is_one_byte_(is_one_byte),
3695 flags_(flags),
3696 error_(RegExpError::kNone) {}
3697
3698 void EnsureAnalyzed(RegExpNode* that) {
3699 StackLimitCheck check(isolate());
3700 if (check.HasOverflowed()) {
3701 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
3702 FATAL("Analysis: Aborting on stack overflow")do { do { } while (false); MOZ_ReportCrash("" "Analysis: Aborting on stack overflow"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3702); AnnotateMozCrashReason("MOZ_CRASH(" "Analysis: Aborting on stack overflow"
")"); do { *((volatile int*)__null) = 3702; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3703 }
3704 fail(RegExpError::kAnalysisStackOverflow);
3705 return;
3706 }
3707 if (that->info()->been_analyzed || that->info()->being_analyzed) return;
3708 that->info()->being_analyzed = true;
3709 that->Accept(this);
3710 that->info()->being_analyzed = false;
3711 that->info()->been_analyzed = true;
3712 }
3713
3714 bool has_failed() { return error_ != RegExpError::kNone; }
3715 RegExpError error() {
3716 DCHECK(error_ != RegExpError::kNone)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(error_ != RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(error_ != RegExpError::kNone
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"error_ != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3716); AnnotateMozCrashReason("MOZ_ASSERT" "(" "error_ != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3716; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3717 return error_;
3718 }
3719 void fail(RegExpError error) { error_ = error; }
3720
3721 Isolate* isolate() const { return isolate_; }
3722
3723 void VisitEnd(EndNode* that) override {
3724 // nothing to do
3725 }
3726
3727// Used to call the given static function on each propagator / variadic template
3728// argument.
3729#define STATIC_FOR_EACH(expr) \
3730 do { \
3731 int dummy[] = {((expr), 0)...}; \
3732 USE(dummy)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{dummy};
(void)unused_tmp_array_for_use_macro; } while (false)
; \
3733 } while (false)
3734
3735 void VisitText(TextNode* that) override {
3736 that->MakeCaseIndependent(isolate(), is_one_byte_, flags());
3737 EnsureAnalyzed(that->on_success());
3738 if (has_failed()) return;
3739 that->CalculateOffsets();
3740 STATIC_FOR_EACH(Propagators::VisitText(that));
3741 }
3742
3743 void VisitAction(ActionNode* that) override {
3744 if (that->action_type() == ActionNode::MODIFY_FLAGS) {
3745 set_flags(that->flags());
3746 }
3747 EnsureAnalyzed(that->on_success());
3748 if (has_failed()) return;
3749 STATIC_FOR_EACH(Propagators::VisitAction(that));
3750 }
3751
3752 void VisitChoice(ChoiceNode* that) override {
3753 for (int i = 0; i < that->alternatives()->length(); i++) {
3754 EnsureAnalyzed(that->alternatives()->at(i).node());
3755 if (has_failed()) return;
3756 STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
3757 }
3758 }
3759
3760 void VisitLoopChoice(LoopChoiceNode* that) override {
3761 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3761); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3761; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Just loop and continue.
3762
3763 // First propagate all information from the continuation node.
3764 EnsureAnalyzed(that->continue_node());
3765 if (has_failed()) return;
3766 STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
3767
3768 // Check the loop last since it may need the value of this node
3769 // to get a correct result.
3770 EnsureAnalyzed(that->loop_node());
3771 if (has_failed()) return;
3772 STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
3773 }
3774
3775 void VisitNegativeLookaroundChoice(
3776 NegativeLookaroundChoiceNode* that) override {
3777 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3777); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3777; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Lookaround and continue.
3778
3779 EnsureAnalyzed(that->lookaround_node());
3780 if (has_failed()) return;
3781 STATIC_FOR_EACH(
3782 Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
3783
3784 EnsureAnalyzed(that->continue_node());
3785 if (has_failed()) return;
3786 STATIC_FOR_EACH(
3787 Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
3788 }
3789
3790 void VisitBackReference(BackReferenceNode* that) override {
3791 EnsureAnalyzed(that->on_success());
3792 if (has_failed()) return;
3793 STATIC_FOR_EACH(Propagators::VisitBackReference(that));
3794 }
3795
3796 void VisitAssertion(AssertionNode* that) override {
3797 EnsureAnalyzed(that->on_success());
3798 if (has_failed()) return;
3799 STATIC_FOR_EACH(Propagators::VisitAssertion(that));
3800 }
3801
3802#undef STATIC_FOR_EACH
3803
3804 private:
3805 RegExpFlags flags() const { return flags_; }
3806 void set_flags(RegExpFlags flags) { flags_ = flags; }
3807
3808 Isolate* isolate_;
3809 const bool is_one_byte_;
3810 RegExpFlags flags_;
3811 RegExpError error_;
3812
3813 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis)Analysis() = delete; Analysis(const Analysis&) = delete; Analysis
& operator=(const Analysis&) = delete
;
3814};
3815
3816RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags,
3817 RegExpNode* node) {
3818 Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
3819 isolate, is_one_byte, flags);
3820 DCHECK_EQ(node->info()->been_analyzed, false)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node->info()->been_analyzed) == (false))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((node->info()->been_analyzed) == (false)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(node->info()->been_analyzed) == (false)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3820); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node->info()->been_analyzed) == (false)"
")"); do { *((volatile int*)__null) = 3820; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3821 analysis.EnsureAnalyzed(node);
3822 DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone)do { if (analysis.has_failed()) { do { static_assert( mozilla
::detail::AssertionConditionType<decltype(analysis.error()
!= RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(analysis.error() != RegExpError
::kNone))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("analysis.error() != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3822); AnnotateMozCrashReason("MOZ_ASSERT" "(" "analysis.error() != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3822; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
3823 return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
3824}
3825
3826void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3827 BoyerMooreLookahead* bm,
3828 bool not_at_start) {
3829 // Working out the set of characters that a backreference can match is too
3830 // hard, so we just say that any character can match.
3831 bm->SetRest(offset);
3832 SaveBMInfo(bm, not_at_start, offset);
3833}
3834
3835static_assert(BoyerMoorePositionInfo::kMapSize ==
3836 RegExpMacroAssembler::kTableSize);
3837
3838void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3839 BoyerMooreLookahead* bm, bool not_at_start) {
3840 ZoneList<GuardedAlternative>* alts = alternatives();
3841 budget = (budget - 1) / alts->length();
3842 for (int i = 0; i < alts->length(); i++) {
3843 GuardedAlternative& alt = alts->at(i);
3844 if (alt.guards() != nullptr && alt.guards()->length() != 0) {
3845 bm->SetRest(offset); // Give up trying to fill in info.
3846 SaveBMInfo(bm, not_at_start, offset);
3847 return;
3848 }
3849 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
3850 }
3851 SaveBMInfo(bm, not_at_start, offset);
3852}
3853
3854void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
3855 BoyerMooreLookahead* bm, bool not_at_start) {
3856 if (initial_offset >= bm->length()) return;
3857 int offset = initial_offset;
3858 int max_char = bm->max_char();
3859 for (int i = 0; i < elements()->length(); i++) {
3860 if (offset >= bm->length()) {
3861 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3862 return;
3863 }
3864 TextElement text = elements()->at(i);
3865 if (text.text_type() == TextElement::ATOM) {
3866 RegExpAtom* atom = text.atom();
3867 for (int j = 0; j < atom->length(); j++, offset++) {
3868 if (offset >= bm->length()) {
3869 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3870 return;
3871 }
3872 base::uc16 character = atom->data()[j];
3873 if (IsIgnoreCase(bm->compiler()->flags())) {
3874 unibrow::uchar chars[4];
3875 int length = GetCaseIndependentLetters(
3876 isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
3877 chars, 4);
3878 for (int k = 0; k < length; k++) {
3879 bm->Set(offset, chars[k]);
3880 }
3881 } else {
3882 if (character <= max_char) bm->Set(offset, character);
3883 }
3884 }
3885 } else {
3886 DCHECK_EQ(TextElement::CLASS_RANGES, text.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (text.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (text.text_type()))))
, 0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (text.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3886); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (text.text_type())"
")"); do { *((volatile int*)__null) = 3886; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3887 RegExpClassRanges* class_ranges = text.class_ranges();
3888 ZoneList<CharacterRange>* ranges = class_ranges->ranges(zone());
3889 if (class_ranges->is_negated()) {
3890 bm->SetAll(offset);
3891 } else {
3892 for (int k = 0; k < ranges->length(); k++) {
3893 CharacterRange& range = ranges->at(k);
3894 if (static_cast<int>(range.from()) > max_char) continue;
3895 int to = std::min(max_char, static_cast<int>(range.to()));
3896 bm->SetInterval(offset, Interval(range.from(), to));
3897 }
3898 }
3899 offset++;
3900 }
3901 }
3902 if (offset >= bm->length()) {
3903 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3904 return;
3905 }
3906 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
3907 true); // Not at start after a text node.
3908 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3909}
3910
3911RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
3912 RegExpNode* on_success) {
3913 DCHECK(!read_backward())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!read_backward())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!read_backward()))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("!read_backward()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3913); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!read_backward()"
")"); do { *((volatile int*)__null) = 3913; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3914 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
3915 zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
3916 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
3917 zone(), CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
3918
3919 ChoiceNode* optional_step_back = zone()->New<ChoiceNode>(2, zone());
3920
3921 int stack_register = UnicodeLookaroundStackRegister();
3922 int position_register = UnicodeLookaroundPositionRegister();
3923 RegExpNode* step_back = TextNode::CreateForCharacterRanges(
3924 zone(), lead_surrogates, true, on_success);
3925 RegExpLookaround::Builder builder(true, step_back, stack_register,
3926 position_register);
3927 RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
3928 zone(), trail_surrogates, false, builder.on_match_success());
3929
3930 optional_step_back->AddAlternative(
3931 GuardedAlternative(builder.ForMatch(match_trail)));
3932 optional_step_back->AddAlternative(GuardedAlternative(on_success));
3933
3934 return optional_step_back;
3935}
3936
3937RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
3938 bool is_one_byte) {
3939 // Wrap the body of the regexp in capture #0.
3940 RegExpNode* captured_body =
3941 RegExpCapture::ToNode(data->tree, 0, this, accept());
3942 RegExpNode* node = captured_body;
3943 if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags())) {
3944 // Add a .*? at the beginning, outside the body capture, unless
3945 // this expression is anchored at the beginning or sticky.
3946 RegExpNode* loop_node = RegExpQuantifier::ToNode(
3947 0, RegExpTree::kInfinity, false,
3948 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything), this,
3949 captured_body, data->contains_anchor);
3950
3951 if (data->contains_anchor) {
3952 // Unroll loop once, to take care of the case that might start
3953 // at the start of input.
3954 ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
3955 first_step_node->AddAlternative(GuardedAlternative(captured_body));
3956 first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
3957 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything),
3958 false, loop_node)));
3959 node = first_step_node;
3960 } else {
3961 node = loop_node;
3962 }
3963 }
3964 if (is_one_byte) {
3965 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
3966 // Do it again to propagate the new nodes to places where they were not
3967 // put because they had not been calculated yet.
3968 if (node != nullptr) {
3969 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
3970 }
3971 } else if (IsEitherUnicode(flags()) &&
3972 (IsGlobal(flags()) || IsSticky(flags()))) {
3973 node = OptionallyStepBackToLeadSurrogate(node);
3974 }
3975
3976 if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());
3977 return node;
3978}
3979
3980void RegExpCompiler::ToNodeCheckForStackOverflow() {
3981 if (StackLimitCheck{isolate()}.HasOverflowed()) {
3982 V8::FatalProcessOutOfMemory(isolate(), "RegExpCompiler");
3983 }
3984}
3985
3986} // namespace internal
3987} // namespace v8