Bug Summary

File:var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
Warning:line 1466, column 38
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name regexp-compiler.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -resource-dir /usr/lib/llvm-18/lib/clang/18 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/stl_wrappers -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D DEBUG=1 -D WASM_SUPPORTS_HUGE_MEMORY -D JS_CACHEIR_SPEW -D JS_STRUCTURED_SPEW -D JS_HAS_CTYPES -D FFI_BUILDING -D EXPORT_JS_API -D MOZ_HAS_MOZGLUE -D MOZ_SUPPORT_LEAKCHECKING -I /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -D V8_INTL_SUPPORT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-18/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-error=type-limits -Wno-error=return-type -Wno-sign-compare -Wno-c++11-narrowing -fdeprecated-macro -ferror-limit 19 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2024-07-30-004816-4182763-1 -x c++ /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
1// Copyright 2019 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "irregexp/imported/regexp-compiler.h"
6
7#include "irregexp/imported/regexp-macro-assembler-arch.h"
8
9#ifdef V8_INTL_SUPPORT1
10#include "irregexp/imported/special-case.h"
11#include "unicode/locid.h"
12#include "unicode/uniset.h"
13#include "unicode/utypes.h"
14#endif // V8_INTL_SUPPORT
15
16namespace v8 {
17namespace internal {
18
19using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
20
21// -------------------------------------------------------------------
22// Implementation of the Irregexp regular expression engine.
23//
24// The Irregexp regular expression engine is intended to be a complete
25// implementation of ECMAScript regular expressions. It generates either
26// bytecodes or native code.
27
28// The Irregexp regexp engine is structured in three steps.
29// 1) The parser generates an abstract syntax tree. See ast.cc.
30// 2) From the AST a node network is created. The nodes are all
31// subclasses of RegExpNode. The nodes represent states when
32// executing a regular expression. Several optimizations are
33// performed on the node network.
34// 3) From the nodes we generate either byte codes or native code
35// that can actually execute the regular expression (perform
36// the search). The code generation step is described in more
37// detail below.
38
39// Code generation.
40//
41// The nodes are divided into four main categories.
42// * Choice nodes
43// These represent places where the regular expression can
44// match in more than one way. For example on entry to an
45// alternation (foo|bar) or a repetition (*, +, ? or {}).
46// * Action nodes
47// These represent places where some action should be
48// performed. Examples include recording the current position
49// in the input string to a register (in order to implement
50// captures) or other actions on register for example in order
51// to implement the counters needed for {} repetitions.
52// * Matching nodes
53// These attempt to match some element part of the input string.
54// Examples of elements include character classes, plain strings
55// or back references.
56// * End nodes
57// These are used to implement the actions required on finding
58// a successful match or failing to find a match.
59//
60// The code generated (whether as byte codes or native code) maintains
61// some state as it runs. This consists of the following elements:
62//
63// * The capture registers. Used for string captures.
64// * Other registers. Used for counters etc.
65// * The current position.
66// * The stack of backtracking information. Used when a matching node
67// fails to find a match and needs to try an alternative.
68//
69// Conceptual regular expression execution model:
70//
71// There is a simple conceptual model of regular expression execution
72// which will be presented first. The actual code generated is a more
73// efficient simulation of the simple conceptual model:
74//
75// * Choice nodes are implemented as follows:
76// For each choice except the last {
77// push current position
78// push backtrack code location
79// <generate code to test for choice>
80// backtrack code location:
81// pop current position
82// }
83// <generate code to test for last choice>
84//
85// * Actions nodes are generated as follows
86// <push affected registers on backtrack stack>
87// <generate code to perform action>
88// push backtrack code location
89// <generate code to test for following nodes>
90// backtrack code location:
91// <pop affected registers to restore their state>
92// <pop backtrack location from stack and go to it>
93//
94// * Matching nodes are generated as follows:
95// if input string matches at current position
96// update current position
97// <generate code to test for following nodes>
98// else
99// <pop backtrack location from stack and go to it>
100//
101// Thus it can be seen that the current position is saved and restored
102// by the choice nodes, whereas the registers are saved and restored by
103// by the action nodes that manipulate them.
104//
105// The other interesting aspect of this model is that nodes are generated
106// at the point where they are needed by a recursive call to Emit(). If
107// the node has already been code generated then the Emit() call will
108// generate a jump to the previously generated code instead. In order to
109// limit recursion it is possible for the Emit() function to put the node
110// on a work list for later generation and instead generate a jump. The
111// destination of the jump is resolved later when the code is generated.
112//
113// Actual regular expression code generation.
114//
115// Code generation is actually more complicated than the above. In order to
116// improve the efficiency of the generated code some optimizations are
117// performed
118//
119// * Choice nodes have 1-character lookahead.
120// A choice node looks at the following character and eliminates some of
121// the choices immediately based on that character. This is not yet
122// implemented.
123// * Simple greedy loops store reduced backtracking information.
124// A quantifier like /.*foo/m will greedily match the whole input. It will
125// then need to backtrack to a point where it can match "foo". The naive
126// implementation of this would push each character position onto the
127// backtracking stack, then pop them off one by one. This would use space
128// proportional to the length of the input string. However since the "."
129// can only match in one way and always has a constant length (in this case
130// of 1) it suffices to store the current position on the top of the stack
131// once. Matching now becomes merely incrementing the current position and
132// backtracking becomes decrementing the current position and checking the
133// result against the stored current position. This is faster and saves
134// space.
135// * The current state is virtualized.
136// This is used to defer expensive operations until it is clear that they
137// are needed and to generate code for a node more than once, allowing
138// specialized an efficient versions of the code to be created. This is
139// explained in the section below.
140//
141// Execution state virtualization.
142//
143// Instead of emitting code, nodes that manipulate the state can record their
144// manipulation in an object called the Trace. The Trace object can record a
145// current position offset, an optional backtrack code location on the top of
146// the virtualized backtrack stack and some register changes. When a node is
147// to be emitted it can flush the Trace or update it. Flushing the Trace
148// will emit code to bring the actual state into line with the virtual state.
149// Avoiding flushing the state can postpone some work (e.g. updates of capture
150// registers). Postponing work can save time when executing the regular
151// expression since it may be found that the work never has to be done as a
152// failure to match can occur. In addition it is much faster to jump to a
153// known backtrack code location than it is to pop an unknown backtrack
154// location from the stack and jump there.
155//
156// The virtual state found in the Trace affects code generation. For example
157// the virtual state contains the difference between the actual current
158// position and the virtual current position, and matching code needs to use
159// this offset to attempt a match in the correct location of the input
160// string. Therefore code generated for a non-trivial trace is specialized
161// to that trace. The code generator therefore has the ability to generate
162// code for each node several times. In order to limit the size of the
163// generated code there is an arbitrary limit on how many specialized sets of
164// code may be generated for a given node. If the limit is reached, the
165// trace is flushed and a generic version of the code for a node is emitted.
166// This is subsequently used for that node. The code emitted for non-generic
167// trace is not recorded in the node and so it cannot currently be reused in
168// the event that code generation is requested for an identical trace.
169
170namespace {
171
172constexpr base::uc32 MaxCodeUnit(const bool one_byte) {
173 static_assert(String::kMaxOneByteCharCodeU <=
174 std::numeric_limits<uint16_t>::max());
175 static_assert(String::kMaxUtf16CodeUnitU <=
176 std::numeric_limits<uint16_t>::max());
177 return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU;
178}
179
180constexpr uint32_t CharMask(const bool one_byte) {
181 static_assert(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1));
182 static_assert(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1));
183 return MaxCodeUnit(one_byte);
184}
185
186} // namespace
187
188void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 188); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 188; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
; }
189
190void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
191 text->AddElement(TextElement::Atom(this), zone);
192}
193
194void RegExpClassRanges::AppendToText(RegExpText* text, Zone* zone) {
195 text->AddElement(TextElement::ClassRanges(this), zone);
196}
197
198void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
199 for (int i = 0; i < elements()->length(); i++)
200 text->AddElement(elements()->at(i), zone);
201}
202
203TextElement TextElement::Atom(RegExpAtom* atom) {
204 return TextElement(ATOM, atom);
205}
206
207TextElement TextElement::ClassRanges(RegExpClassRanges* class_ranges) {
208 return TextElement(CLASS_RANGES, class_ranges);
209}
210
211int TextElement::length() const {
212 switch (text_type()) {
213 case ATOM:
214 return atom()->length();
215
216 case CLASS_RANGES:
217 return 1;
218 }
219 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 219); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 219; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
220}
221
222class RecursionCheck {
223 public:
224 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
225 compiler->IncrementRecursionDepth();
226 }
227 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
228
229 private:
230 RegExpCompiler* compiler_;
231};
232
233// Attempts to compile the regexp using an Irregexp code generator. Returns
234// a fixed array or a null handle depending on whether it succeeded.
235RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
236 RegExpFlags flags, bool one_byte)
237 : next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
238 unicode_lookaround_stack_register_(kNoRegister),
239 unicode_lookaround_position_register_(kNoRegister),
240 work_list_(nullptr),
241 recursion_depth_(0),
242 flags_(flags),
243 one_byte_(one_byte),
244 reg_exp_too_big_(false),
245 limiting_recursion_(false),
246 optimize_(v8_flagsjs::jit::JitOptions.regexp_optimization),
247 read_backward_(false),
248 current_expansion_factor_(1),
249 frequency_collator_(),
250 isolate_(isolate),
251 zone_(zone) {
252 accept_ = zone->New<EndNode>(EndNode::ACCEPT, zone);
253 DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 253); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
")"); do { *((volatile int*)__null) = 253; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
254}
255
256RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
257 Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
258 int capture_count, Handle<String> pattern) {
259 macro_assembler_ = macro_assembler;
260
261 ZoneVector<RegExpNode*> work_list(zone());
262 work_list_ = &work_list;
263 Label fail;
264 macro_assembler_->PushBacktrack(&fail);
265 Trace new_trace;
266 start->Emit(this, &new_trace);
267 macro_assembler_->BindJumpTarget(&fail);
268 macro_assembler_->Fail();
269 while (!work_list.empty()) {
270 RegExpNode* node = work_list.back();
271 work_list.pop_back();
272 node->set_on_work_list(false);
273 if (!node->label()->is_bound()) node->Emit(this, &new_trace);
274 }
275 if (reg_exp_too_big_) {
276 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
277 FATAL("Aborting on excess zone allocation")do { do { } while (false); MOZ_ReportCrash("" "Aborting on excess zone allocation"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 277); AnnotateMozCrashReason("MOZ_CRASH(" "Aborting on excess zone allocation"
")"); do { *((volatile int*)__null) = 277; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
278 }
279 macro_assembler_->AbortedCodeGeneration();
280 return CompilationResult::RegExpTooBig();
281 }
282
283 Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
284 isolate->IncreaseTotalRegexpCodeGenerated(code);
285 work_list_ = nullptr;
286
287 return {code, next_register_};
288}
289
290bool Trace::DeferredAction::Mentions(int that) {
291 if (action_type() == ActionNode::CLEAR_CAPTURES) {
292 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
293 return range.Contains(that);
294 } else {
295 return reg() == that;
296 }
297}
298
299bool Trace::mentions_reg(int reg) {
300 for (DeferredAction* action = actions_; action != nullptr;
301 action = action->next()) {
302 if (action->Mentions(reg)) return true;
303 }
304 return false;
305}
306
307bool Trace::GetStoredPosition(int reg, int* cp_offset) {
308 DCHECK_EQ(0, *cp_offset)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) == (*cp_offset))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) == (*cp_offset)))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(0) == (*cp_offset)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 308); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) == (*cp_offset)"
")"); do { *((volatile int*)__null) = 308; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
309 for (DeferredAction* action = actions_; action != nullptr;
310 action = action->next()) {
311 if (action->Mentions(reg)) {
312 if (action->action_type() == ActionNode::STORE_POSITION) {
313 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
314 return true;
315 } else {
316 return false;
317 }
318 }
319 }
320 return false;
321}
322
323// A (dynamically-sized) set of unsigned integers that behaves especially well
324// on small integers (< kFirstLimit). May do zone-allocation.
325class DynamicBitSet : public ZoneObject {
326 public:
327 V8_EXPORT_PRIVATE bool Get(unsigned value) const {
328 if (value < kFirstLimit) {
329 return (first_ & (1 << value)) != 0;
330 } else if (remaining_ == nullptr) {
331 return false;
332 } else {
333 return remaining_->Contains(value);
334 }
335 }
336
337 // Destructively set a value in this set.
338 void Set(unsigned value, Zone* zone) {
339 if (value < kFirstLimit) {
340 first_ |= (1 << value);
341 } else {
342 if (remaining_ == nullptr)
343 remaining_ = zone->New<ZoneList<unsigned>>(1, zone);
344 if (remaining_->is_empty() || !remaining_->Contains(value))
345 remaining_->Add(value, zone);
346 }
347 }
348
349 private:
350 static constexpr unsigned kFirstLimit = 32;
351
352 uint32_t first_ = 0;
353 ZoneList<unsigned>* remaining_ = nullptr;
354};
355
356int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
357 Zone* zone) {
358 int max_register = RegExpCompiler::kNoRegister;
359 for (DeferredAction* action = actions_; action != nullptr;
360 action = action->next()) {
361 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
362 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
363 for (int i = range.from(); i <= range.to(); i++)
364 affected_registers->Set(i, zone);
365 if (range.to() > max_register) max_register = range.to();
366 } else {
367 affected_registers->Set(action->reg(), zone);
368 if (action->reg() > max_register) max_register = action->reg();
369 }
370 }
371 return max_register;
372}
373
374void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
375 int max_register,
376 const DynamicBitSet& registers_to_pop,
377 const DynamicBitSet& registers_to_clear) {
378 for (int reg = max_register; reg >= 0; reg--) {
379 if (registers_to_pop.Get(reg)) {
380 assembler->PopRegister(reg);
381 } else if (registers_to_clear.Get(reg)) {
382 int clear_to = reg;
383 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
384 reg--;
385 }
386 assembler->ClearRegisters(reg, clear_to);
387 }
388 }
389}
390
391void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
392 int max_register,
393 const DynamicBitSet& affected_registers,
394 DynamicBitSet* registers_to_pop,
395 DynamicBitSet* registers_to_clear,
396 Zone* zone) {
397 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
398 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
399
400 // Count pushes performed to force a stack limit check occasionally.
401 int pushes = 0;
402
403 for (int reg = 0; reg <= max_register; reg++) {
404 if (!affected_registers.Get(reg)) continue;
405
406 // The chronologically first deferred action in the trace
407 // is used to infer the action needed to restore a register
408 // to its previous state (or not, if it's safe to ignore it).
409 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
410 DeferredActionUndoType undo_action = IGNORE;
411
412 int value = 0;
413 bool absolute = false;
414 bool clear = false;
415 static const int kNoStore = kMinInt((int32_t)0x80000000);
416 int store_position = kNoStore;
417 // This is a little tricky because we are scanning the actions in reverse
418 // historical order (newest first).
419 for (DeferredAction* action = actions_; action != nullptr;
420 action = action->next()) {
421 if (action->Mentions(reg)) {
422 switch (action->action_type()) {
423 case ActionNode::SET_REGISTER_FOR_LOOP: {
424 Trace::DeferredSetRegisterForLoop* psr =
425 static_cast<Trace::DeferredSetRegisterForLoop*>(action);
426 if (!absolute) {
427 value += psr->value();
428 absolute = true;
429 }
430 // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
431 // counters. They can have a significant previous value if they
432 // occur in a loop. TODO(lrn): Propagate this information, so
433 // we can set undo_action to IGNORE if we know there is no value to
434 // restore.
435 undo_action = RESTORE;
436 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 436); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 436; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
437 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 437); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 437; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
438 break;
439 }
440 case ActionNode::INCREMENT_REGISTER:
441 if (!absolute) {
442 value++;
443 }
444 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 444); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 444; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
445 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 445); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 445; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
446 undo_action = RESTORE;
447 break;
448 case ActionNode::STORE_POSITION: {
449 Trace::DeferredCapture* pc =
450 static_cast<Trace::DeferredCapture*>(action);
451 if (!clear && store_position == kNoStore) {
452 store_position = pc->cp_offset();
453 }
454
455 // For captures we know that stores and clears alternate.
456 // Other register, are never cleared, and if the occur
457 // inside a loop, they might be assigned more than once.
458 if (reg <= 1) {
459 // Registers zero and one, aka "capture zero", is
460 // always set correctly if we succeed. There is no
461 // need to undo a setting on backtrack, because we
462 // will set it again or fail.
463 undo_action = IGNORE;
464 } else {
465 undo_action = pc->is_capture() ? CLEAR : RESTORE;
466 }
467 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 467); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 467; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
468 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 468); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 468; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
469 break;
470 }
471 case ActionNode::CLEAR_CAPTURES: {
472 // Since we're scanning in reverse order, if we've already
473 // set the position we have to ignore historically earlier
474 // clearing operations.
475 if (store_position == kNoStore) {
476 clear = true;
477 }
478 undo_action = RESTORE;
479 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 479); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 479; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
480 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 480); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 480; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
481 break;
482 }
483 default:
484 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 484); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 484; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
485 }
486 }
487 }
488 // Prepare for the undo-action (e.g., push if it's going to be popped).
489 if (undo_action == RESTORE) {
490 pushes++;
491 RegExpMacroAssembler::StackCheckFlag stack_check =
492 RegExpMacroAssembler::kNoStackLimitCheck;
493 if (pushes == push_limit) {
494 stack_check = RegExpMacroAssembler::kCheckStackLimit;
495 pushes = 0;
496 }
497
498 assembler->PushRegister(reg, stack_check);
499 registers_to_pop->Set(reg, zone);
500 } else if (undo_action == CLEAR) {
501 registers_to_clear->Set(reg, zone);
502 }
503 // Perform the chronologically last action (or accumulated increment)
504 // for the register.
505 if (store_position != kNoStore) {
506 assembler->WriteCurrentPositionToRegister(reg, store_position);
507 } else if (clear) {
508 assembler->ClearRegisters(reg, reg);
509 } else if (absolute) {
510 assembler->SetRegister(reg, value);
511 } else if (value != 0) {
512 assembler->AdvanceRegister(reg, value);
513 }
514 }
515}
516
517// This is called as we come into a loop choice node and some other tricky
518// nodes. It normalizes the state of the code generator to ensure we can
519// generate generic code.
520void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
521 RegExpMacroAssembler* assembler = compiler->macro_assembler();
522
523 DCHECK(!is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!is_trivial()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("!is_trivial()",
"/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 523); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!is_trivial()"
")"); do { *((volatile int*)__null) = 523; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
524
525 if (actions_ == nullptr && backtrack() == nullptr) {
526 // Here we just have some deferred cp advances to fix and we are back to
527 // a normal situation. We may also have to forget some information gained
528 // through a quick check that was already performed.
529 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
530 // Create a new trivial state and generate the node with that.
531 Trace new_state;
532 successor->Emit(compiler, &new_state);
533 return;
534 }
535
536 // Generate deferred actions here along with code to undo them again.
537 DynamicBitSet affected_registers;
538
539 if (backtrack() != nullptr) {
540 // Here we have a concrete backtrack location. These are set up by choice
541 // nodes and so they indicate that we have a deferred save of the current
542 // position which we may need to emit here.
543 assembler->PushCurrentPosition();
544 }
545
546 int max_register =
547 FindAffectedRegisters(&affected_registers, compiler->zone());
548 DynamicBitSet registers_to_pop;
549 DynamicBitSet registers_to_clear;
550 PerformDeferredActions(assembler, max_register, affected_registers,
551 &registers_to_pop, &registers_to_clear,
552 compiler->zone());
553 if (cp_offset_ != 0) {
554 assembler->AdvanceCurrentPosition(cp_offset_);
555 }
556
557 // Create a new trivial state and generate the node with that.
558 Label undo;
559 assembler->PushBacktrack(&undo);
560 if (successor->KeepRecursing(compiler)) {
561 Trace new_state;
562 successor->Emit(compiler, &new_state);
563 } else {
564 compiler->AddWork(successor);
565 assembler->GoTo(successor->label());
566 }
567
568 // On backtrack we need to restore state.
569 assembler->BindJumpTarget(&undo);
570 RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
571 registers_to_clear);
572 if (backtrack() == nullptr) {
573 assembler->Backtrack();
574 } else {
575 assembler->PopCurrentPosition();
576 assembler->GoTo(backtrack());
577 }
578}
579
580void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
581 RegExpMacroAssembler* assembler = compiler->macro_assembler();
582
583 // Omit flushing the trace. We discard the entire stack frame anyway.
584
585 if (!label()->is_bound()) {
586 // We are completely independent of the trace, since we ignore it,
587 // so this code can be used as the generic version.
588 assembler->Bind(label());
589 }
590
591 // Throw away everything on the backtrack stack since the start
592 // of the negative submatch and restore the character position.
593 assembler->ReadCurrentPositionFromRegister(current_position_register_);
594 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
595 if (clear_capture_count_ > 0) {
596 // Clear any captures that might have been performed during the success
597 // of the body of the negative look-ahead.
598 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
599 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
600 }
601 // Now that we have unwound the stack we find at the top of the stack the
602 // backtrack that the BeginNegativeSubmatch node got.
603 assembler->Backtrack();
604}
605
606void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
607 if (!trace->is_trivial()) {
608 trace->Flush(compiler, this);
609 return;
610 }
611 RegExpMacroAssembler* assembler = compiler->macro_assembler();
612 if (!label()->is_bound()) {
613 assembler->Bind(label());
614 }
615 switch (action_) {
616 case ACCEPT:
617 assembler->Succeed();
618 return;
619 case BACKTRACK:
620 assembler->GoTo(trace->backtrack());
621 return;
622 case NEGATIVE_SUBMATCH_SUCCESS:
623 // This case is handled in a different virtual method.
624 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 624); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 624; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
625 }
626 UNIMPLEMENTED()do { do { } while (false); MOZ_ReportCrash("" "unimplemented code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 626); AnnotateMozCrashReason("MOZ_CRASH(" "unimplemented code"
")"); do { *((volatile int*)__null) = 626; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
627}
628
629void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
630 if (guards_ == nullptr) guards_ = zone->New<ZoneList<Guard*>>(1, zone);
631 guards_->Add(guard, zone);
632}
633
634ActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
635 RegExpNode* on_success) {
636 ActionNode* result =
637 on_success->zone()->New<ActionNode>(SET_REGISTER_FOR_LOOP, on_success);
638 result->data_.u_store_register.reg = reg;
639 result->data_.u_store_register.value = val;
640 return result;
641}
642
643ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
644 ActionNode* result =
645 on_success->zone()->New<ActionNode>(INCREMENT_REGISTER, on_success);
646 result->data_.u_increment_register.reg = reg;
647 return result;
648}
649
650ActionNode* ActionNode::StorePosition(int reg, bool is_capture,
651 RegExpNode* on_success) {
652 ActionNode* result =
653 on_success->zone()->New<ActionNode>(STORE_POSITION, on_success);
654 result->data_.u_position_register.reg = reg;
655 result->data_.u_position_register.is_capture = is_capture;
656 return result;
657}
658
659ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
660 ActionNode* result =
661 on_success->zone()->New<ActionNode>(CLEAR_CAPTURES, on_success);
662 result->data_.u_clear_captures.range_from = range.from();
663 result->data_.u_clear_captures.range_to = range.to();
664 return result;
665}
666
667ActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg,
668 RegExpNode* on_success) {
669 ActionNode* result =
670 on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success);
671 result->data_.u_submatch.stack_pointer_register = stack_reg;
672 result->data_.u_submatch.current_position_register = position_reg;
673 return result;
674}
675
676ActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg,
677 RegExpNode* on_success) {
678 ActionNode* result =
679 on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success);
680 result->data_.u_submatch.stack_pointer_register = stack_reg;
681 result->data_.u_submatch.current_position_register = position_reg;
682 return result;
683}
684
685ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg,
686 int clear_register_count,
687 int clear_register_from,
688 RegExpNode* on_success) {
689 ActionNode* result = on_success->zone()->New<ActionNode>(
690 POSITIVE_SUBMATCH_SUCCESS, on_success);
691 result->data_.u_submatch.stack_pointer_register = stack_reg;
692 result->data_.u_submatch.current_position_register = position_reg;
693 result->data_.u_submatch.clear_register_count = clear_register_count;
694 result->data_.u_submatch.clear_register_from = clear_register_from;
695 return result;
696}
697
698ActionNode* ActionNode::EmptyMatchCheck(int start_register,
699 int repetition_register,
700 int repetition_limit,
701 RegExpNode* on_success) {
702 ActionNode* result =
703 on_success->zone()->New<ActionNode>(EMPTY_MATCH_CHECK, on_success);
704 result->data_.u_empty_match_check.start_register = start_register;
705 result->data_.u_empty_match_check.repetition_register = repetition_register;
706 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
707 return result;
708}
709
710ActionNode* ActionNode::ModifyFlags(RegExpFlags flags, RegExpNode* on_success) {
711 ActionNode* result =
712 on_success->zone()->New<ActionNode>(MODIFY_FLAGS, on_success);
713 result->data_.u_modify_flags.flags = flags;
714 return result;
715}
716
717#define DEFINE_ACCEPT(Type) \
718 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
719FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)DEFINE_ACCEPT(End) DEFINE_ACCEPT(Action) DEFINE_ACCEPT(Choice
) DEFINE_ACCEPT(LoopChoice) DEFINE_ACCEPT(NegativeLookaroundChoice
) DEFINE_ACCEPT(BackReference) DEFINE_ACCEPT(Assertion) DEFINE_ACCEPT
(Text)
720#undef DEFINE_ACCEPT
721
722// -------------------------------------------------------------------
723// Emit code.
724
725void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
726 Guard* guard, Trace* trace) {
727 switch (guard->op()) {
728 case Guard::LT:
729 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 729); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 729; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
730 macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
731 trace->backtrack());
732 break;
733 case Guard::GEQ:
734 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 734); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 734; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
735 macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
736 trace->backtrack());
737 break;
738 }
739}
740
741namespace {
742
743#ifdef DEBUG1
744bool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
745 static_assert(sizeof(unibrow::uchar) == 4);
746 for (int i = 0; i < length; i++) {
747 if (chars[i] > String::kMaxUtf16CodeUnit) return false;
748 }
749 return true;
750}
751#endif // DEBUG
752
753// Returns the number of characters in the equivalence class, omitting those
754// that cannot occur in the source string because it is Latin1. This is called
755// both for unicode modes /ui and /vi, and also for legacy case independent
756// mode /i. In the case of Unicode modes we handled surrogate pair expansions
757// earlier so at this point it's all about single-code-unit expansions.
758int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
759 RegExpCompiler* compiler, unibrow::uchar* letters,
760 int letter_length) {
761 bool one_byte_subject = compiler->one_byte();
762 bool unicode = IsEitherUnicode(compiler->flags());
763 static const base::uc16 kMaxAscii = 0x7f;
764 if (!unicode && character <= kMaxAscii) {
765 // Fast case for common characters.
766 base::uc16 upper = character & ~0x20;
767 if ('A' <= upper && upper <= 'Z') {
768 letters[0] = upper;
769 letters[1] = upper | 0x20;
770 return 2;
771 }
772 letters[0] = character;
773 return 1;
774 }
775#ifdef V8_INTL_SUPPORT1
776
777 if (!unicode && RegExpCaseFolding::IgnoreSet().contains(character)) {
778 letters[0] = character;
779 DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, 1))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, 1)))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 779); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, 1)"
")"); do { *((volatile int*)__null) = 779; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
780 return 1;
781 }
782 bool in_special_add_set =
783 RegExpCaseFolding::SpecialAddSet().contains(character);
784
785 icu::UnicodeSet set;
786 set.add(character);
787 set = set.closeOver(unicode ? USET_SIMPLE_CASE_INSENSITIVE
788 : USET_CASE_INSENSITIVE);
789
790 UChar32 canon = 0;
791 if (in_special_add_set && !unicode) {
792 canon = RegExpCaseFolding::Canonicalize(character);
793 }
794
795 int32_t range_count = set.getRangeCount();
796 int items = 0;
797 for (int32_t i = 0; i < range_count; i++) {
798 UChar32 start = set.getRangeStart(i);
799 UChar32 end = set.getRangeEnd(i);
800 CHECK(end - start + items <= letter_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(end - start + items <= letter_length)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(end - start + items <= letter_length))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("end - start + items <= letter_length"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 800); AnnotateMozCrashReason("MOZ_RELEASE_ASSERT" "(" "end - start + items <= letter_length"
")"); do { *((volatile int*)__null) = 800; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
801 for (UChar32 cu = start; cu <= end; cu++) {
802 if (one_byte_subject && cu > String::kMaxOneByteCharCode) continue;
803 if (!unicode && in_special_add_set &&
804 RegExpCaseFolding::Canonicalize(cu) != canon) {
805 continue;
806 }
807 letters[items++] = static_cast<unibrow::uchar>(cu);
808 }
809 }
810 DCHECK(ContainsOnlyUtf16CodeUnits(letters, items))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, items))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, items)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, items)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 810); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, items)"
")"); do { *((volatile int*)__null) = 810; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
811 return items;
812#else
813 int length =
814 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
815 // Unibrow returns 0 or 1 for characters where case independence is
816 // trivial.
817 if (length == 0) {
818 letters[0] = character;
819 length = 1;
820 }
821
822 if (one_byte_subject) {
823 int new_length = 0;
824 for (int i = 0; i < length; i++) {
825 if (letters[i] <= String::kMaxOneByteCharCode) {
826 letters[new_length++] = letters[i];
827 }
828 }
829 length = new_length;
830 }
831
832 DCHECK(ContainsOnlyUtf16CodeUnits(letters, length))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, length))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, length)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 832); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, length)"
")"); do { *((volatile int*)__null) = 832; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
833 return length;
834#endif // V8_INTL_SUPPORT
835}
836
837inline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
838 base::uc16 c, Label* on_failure, int cp_offset,
839 bool check, bool preloaded) {
840 RegExpMacroAssembler* assembler = compiler->macro_assembler();
841 bool bound_checked = false;
842 if (!preloaded) {
843 assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
844 bound_checked = true;
845 }
846 assembler->CheckNotCharacter(c, on_failure);
847 return bound_checked;
848}
849
850// Only emits non-letters (things that don't have case). Only used for case
851// independent matches.
852inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
853 base::uc16 c, Label* on_failure, int cp_offset,
854 bool check, bool preloaded) {
855 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
856 bool one_byte = compiler->one_byte();
857 unibrow::uchar chars[4];
858 int length = GetCaseIndependentLetters(isolate, c, compiler, chars, 4);
859 if (length < 1) {
860 // This can't match. Must be an one-byte subject and a non-one-byte
861 // character. We do not need to do anything since the one-byte pass
862 // already handled this.
863 return false; // Bounds not checked.
864 }
865 bool checked = false;
866 // We handle the length > 1 case in a later pass.
867 if (length == 1) {
868 if (one_byte && chars[0] > String::kMaxOneByteCharCodeU) {
869 // Can't match - see above.
870 return false; // Bounds not checked.
871 }
872 if (!preloaded) {
873 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
874 checked = check;
875 }
876 macro_assembler->CheckNotCharacter(chars[0], on_failure);
877 }
878 return checked;
879}
880
881bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
882 bool one_byte, base::uc16 c1, base::uc16 c2,
883 Label* on_failure) {
884 const uint32_t char_mask = CharMask(one_byte);
885 base::uc16 exor = c1 ^ c2;
886 // Check whether exor has only one bit set.
887 if (((exor - 1) & exor) == 0) {
888 // If c1 and c2 differ only by one bit.
889 // Ecma262UnCanonicalize always gives the highest number last.
890 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 890); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 890; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
891 base::uc16 mask = char_mask ^ exor;
892 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
893 return true;
894 }
895 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 895); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 895; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
896 base::uc16 diff = c2 - c1;
897 if (((diff - 1) & diff) == 0 && c1 >= diff) {
898 // If the characters differ by 2^n but don't differ by one bit then
899 // subtract the difference from the found character, then do the or
900 // trick. We avoid the theoretical case where negative numbers are
901 // involved in order to simplify code generation.
902 base::uc16 mask = char_mask ^ diff;
903 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
904 on_failure);
905 return true;
906 }
907 return false;
908}
909
910// Only emits letters (things that have case). Only used for case independent
911// matches.
912inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
913 base::uc16 c, Label* on_failure, int cp_offset,
914 bool check, bool preloaded) {
915 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
916 bool one_byte = compiler->one_byte();
917 unibrow::uchar chars[4];
918 int length = GetCaseIndependentLetters(isolate, c, compiler, chars, 4);
919 if (length <= 1) return false;
920 // We may not need to check against the end of the input string
921 // if this character lies before a character that matched.
922 if (!preloaded) {
923 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
924 }
925 Label ok;
926 switch (length) {
927 case 2: {
928 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
929 chars[1], on_failure)) {
930 } else {
931 macro_assembler->CheckCharacter(chars[0], &ok);
932 macro_assembler->CheckNotCharacter(chars[1], on_failure);
933 macro_assembler->Bind(&ok);
934 }
935 break;
936 }
937 case 4:
938 macro_assembler->CheckCharacter(chars[3], &ok);
939 [[fallthrough]];
940 case 3:
941 macro_assembler->CheckCharacter(chars[0], &ok);
942 macro_assembler->CheckCharacter(chars[1], &ok);
943 macro_assembler->CheckNotCharacter(chars[2], on_failure);
944 macro_assembler->Bind(&ok);
945 break;
946 default:
947 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 947); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 947; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
948 }
949 return true;
950}
951
952void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
953 Label* fall_through, Label* above_or_equal,
954 Label* below) {
955 if (below != fall_through) {
956 masm->CheckCharacterLT(border, below);
957 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
958 } else {
959 masm->CheckCharacterGT(border - 1, above_or_equal);
960 }
961}
962
963void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last,
964 Label* fall_through, Label* in_range,
965 Label* out_of_range) {
966 if (in_range == fall_through) {
967 if (first == last) {
968 masm->CheckNotCharacter(first, out_of_range);
969 } else {
970 masm->CheckCharacterNotInRange(first, last, out_of_range);
971 }
972 } else {
973 if (first == last) {
974 masm->CheckCharacter(first, in_range);
975 } else {
976 masm->CheckCharacterInRange(first, last, in_range);
977 }
978 if (out_of_range != fall_through) masm->GoTo(out_of_range);
979 }
980}
981
982// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
983// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
984void EmitUseLookupTable(RegExpMacroAssembler* masm,
985 ZoneList<base::uc32>* ranges, uint32_t start_index,
986 uint32_t end_index, base::uc32 min_char,
987 Label* fall_through, Label* even_label,
988 Label* odd_label) {
989 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
990 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
991
992 base::uc32 base = (min_char & ~kMask);
993 USE(base)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{base}; (
void)unused_tmp_array_for_use_macro; } while (false)
;
994
995 // Assert that everything is on one kTableSize page.
996 for (uint32_t i = start_index; i <= end_index; i++) {
997 DCHECK_EQ(ranges->at(i) & ~kMask, base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(i) & ~kMask) == (base))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(i) & ~kMask) == (base)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(ranges->at(i) & ~kMask) == (base)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 997); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(i) & ~kMask) == (base)"
")"); do { *((volatile int*)__null) = 997; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
998 }
999 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(start_index == 0 || (ranges->at(start_index - 1) &
~kMask) <= base)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(start_index == 0 || (ranges->
at(start_index - 1) & ~kMask) <= base))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 999); AnnotateMozCrashReason("MOZ_ASSERT" "(" "start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
")"); do { *((volatile int*)__null) = 999; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1000
1001 char templ[kSize];
1002 Label* on_bit_set;
1003 Label* on_bit_clear;
1004 int bit;
1005 if (even_label == fall_through) {
1006 on_bit_set = odd_label;
1007 on_bit_clear = even_label;
1008 bit = 1;
1009 } else {
1010 on_bit_set = even_label;
1011 on_bit_clear = odd_label;
1012 bit = 0;
1013 }
1014 for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize;
1015 i++) {
1016 templ[i] = bit;
1017 }
1018 uint32_t j = 0;
1019 bit ^= 1;
1020 for (uint32_t i = start_index; i < end_index; i++) {
1021 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1022 templ[j] = bit;
1023 }
1024 bit ^= 1;
1025 }
1026 for (uint32_t i = j; i < kSize; i++) {
1027 templ[i] = bit;
1028 }
1029 Factory* factory = masm->isolate()->factory();
1030 // TODO(erikcorry): Cache these.
1031 Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
1032 for (uint32_t i = 0; i < kSize; i++) {
1033 ba->set(i, templ[i]);
1034 }
1035 masm->CheckBitInTable(ba, on_bit_set);
1036 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1037}
1038
1039void CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1040 uint32_t start_index, uint32_t end_index, uint32_t cut_index,
1041 Label* even_label, Label* odd_label) {
1042 bool odd = (((cut_index - start_index) & 1) == 1);
1043 Label* in_range_label = odd ? odd_label : even_label;
1044 Label dummy;
1045 EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
1046 ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
1047 &dummy);
1048 DCHECK(!dummy.is_linked())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!dummy.is_linked())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!dummy.is_linked()))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!dummy.is_linked()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1048); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!dummy.is_linked()"
")"); do { *((volatile int*)__null) = 1048; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1049 // Cut out the single range by rewriting the array. This creates a new
1050 // range that is a merger of the two ranges on either side of the one we
1051 // are cutting out. The oddity of the labels is preserved.
1052 for (uint32_t j = cut_index; j > start_index; j--) {
1053 ranges->at(j) = ranges->at(j - 1);
1054 }
1055 for (uint32_t j = cut_index + 1; j < end_index; j++) {
1056 ranges->at(j) = ranges->at(j + 1);
1057 }
1058}
1059
1060// Unicode case. Split the search space into kSize spaces that are handled
1061// with recursion.
1062void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
1063 uint32_t end_index, uint32_t* new_start_index,
1064 uint32_t* new_end_index, base::uc32* border) {
1065 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
1066 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
1067
1068 base::uc32 first = ranges->at(start_index);
1069 base::uc32 last = ranges->at(end_index) - 1;
1070
1071 *new_start_index = start_index;
1072 *border = (ranges->at(start_index) & ~kMask) + kSize;
1073 while (*new_start_index < end_index) {
1074 if (ranges->at(*new_start_index) > *border) break;
1075 (*new_start_index)++;
1076 }
1077 // new_start_index is the index of the first edge that is beyond the
1078 // current kSize space.
1079
1080 // For very large search spaces we do a binary chop search of the non-Latin1
1081 // space instead of just going to the end of the current kSize space. The
1082 // heuristics are complicated a little by the fact that any 128-character
1083 // encoding space can be quickly tested with a table lookup, so we don't
1084 // wish to do binary chop search at a smaller granularity than that. A
1085 // 128-character space can take up a lot of space in the ranges array if,
1086 // for example, we only want to match every second character (eg. the lower
1087 // case characters on some Unicode pages).
1088 uint32_t binary_chop_index = (end_index + start_index) / 2;
1089 // The first test ensures that we get to the code that handles the Latin1
1090 // range with a single not-taken branch, speeding up this important
1091 // character range (even non-Latin1 charset-based text has spaces and
1092 // punctuation).
1093 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
1094 end_index - start_index > (*new_start_index - start_index) * 2 &&
1095 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1096 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1097 uint32_t scan_forward_for_section_border = binary_chop_index;
1098 uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1099
1100 while (scan_forward_for_section_border < end_index) {
1101 if (ranges->at(scan_forward_for_section_border) > new_border) {
1102 *new_start_index = scan_forward_for_section_border;
1103 *border = new_border;
1104 break;
1105 }
1106 scan_forward_for_section_border++;
1107 }
1108 }
1109
1110 DCHECK(*new_start_index > start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(*new_start_index > start_index)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(*new_start_index > start_index
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"*new_start_index > start_index", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1110); AnnotateMozCrashReason("MOZ_ASSERT" "(" "*new_start_index > start_index"
")"); do { *((volatile int*)__null) = 1110; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1111 *new_end_index = *new_start_index - 1;
1112 if (ranges->at(*new_end_index) == *border) {
1113 (*new_end_index)--;
1114 }
1115 if (*border >= ranges->at(end_index)) {
1116 *border = ranges->at(end_index);
1117 *new_start_index = end_index; // Won't be used.
1118 *new_end_index = end_index - 1;
1119 }
1120}
1121
1122// Gets a series of segment boundaries representing a character class. If the
1123// character is in the range between an even and an odd boundary (counting from
1124// start_index) then go to even_label, otherwise go to odd_label. We already
1125// know that the character is in the range of min_char to max_char inclusive.
1126// Either label can be nullptr indicating backtracking. Either label can also
1127// be equal to the fall_through label.
1128void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1129 uint32_t start_index, uint32_t end_index,
1130 base::uc32 min_char, base::uc32 max_char,
1131 Label* fall_through, Label* even_label,
1132 Label* odd_label) {
1133 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((min_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(min_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1133); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1133; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1134 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((max_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((max_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(max_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1134); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(max_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1134; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1135
1136 base::uc32 first = ranges->at(start_index);
1137 base::uc32 last = ranges->at(end_index) - 1;
1138
1139 DCHECK_LT(min_char, first)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (first))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (first)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(min_char) < (first)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1139); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (first)"
")"); do { *((volatile int*)__null) = 1139; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1140
1141 // Just need to test if the character is before or on-or-after
1142 // a particular character.
1143 if (start_index == end_index) {
1144 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1145 return;
1146 }
1147
1148 // Another almost trivial case: There is one interval in the middle that is
1149 // different from the end intervals.
1150 if (start_index + 1 == end_index) {
1151 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
1152 odd_label);
1153 return;
1154 }
1155
1156 // It's not worth using table lookup if there are very few intervals in the
1157 // character class.
1158 if (end_index - start_index <= 6) {
1159 // It is faster to test for individual characters, so we look for those
1160 // first, then try arbitrary ranges in the second round.
1161 static uint32_t kNoCutIndex = -1;
1162 uint32_t cut = kNoCutIndex;
1163 for (uint32_t i = start_index; i < end_index; i++) {
1164 if (ranges->at(i) == ranges->at(i + 1) - 1) {
1165 cut = i;
1166 break;
1167 }
1168 }
1169 if (cut == kNoCutIndex) cut = start_index;
1170 CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
1171 odd_label);
1172 DCHECK_GE(end_index - start_index, 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((end_index - start_index) >= (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((end_index - start_index) >=
(2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(end_index - start_index) >= (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1172); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(end_index - start_index) >= (2)"
")"); do { *((volatile int*)__null) = 1172; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1173 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
1174 max_char, fall_through, even_label, odd_label);
1175 return;
1176 }
1177
1178 // If there are a lot of intervals in the regexp, then we will use tables to
1179 // determine whether the character is inside or outside the character class.
1180 static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1181
1182 if ((max_char >> kBits) == (min_char >> kBits)) {
1183 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1184 fall_through, even_label, odd_label);
1185 return;
1186 }
1187
1188 if ((min_char >> kBits) != first >> kBits) {
1189 masm->CheckCharacterLT(first, odd_label);
1190 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
1191 fall_through, odd_label, even_label);
1192 return;
1193 }
1194
1195 uint32_t new_start_index = 0;
1196 uint32_t new_end_index = 0;
1197 base::uc32 border = 0;
1198
1199 SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
1200 &new_end_index, &border);
1201
1202 Label handle_rest;
1203 Label* above = &handle_rest;
1204 if (border == last + 1) {
1205 // We didn't find any section that started after the limit, so everything
1206 // above the border is one of the terminal labels.
1207 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1208 DCHECK(new_end_index == end_index - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index == end_index - 1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(new_end_index == end_index -
1))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index == end_index - 1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1208); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index == end_index - 1"
")"); do { *((volatile int*)__null) = 1208; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1209 }
1210
1211 DCHECK_LE(start_index, new_end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) <= (new_end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) <= (new_end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) <= (new_end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1211); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) <= (new_end_index)"
")"); do { *((volatile int*)__null) = 1211; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1212 DCHECK_LE(new_start_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_start_index) <= (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_start_index) <= (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_start_index) <= (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1212); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_start_index) <= (end_index)"
")"); do { *((volatile int*)__null) = 1212; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1213 DCHECK_LT(start_index, new_start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) < (new_start_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) < (new_start_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) < (new_start_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1213); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) < (new_start_index)"
")"); do { *((volatile int*)__null) = 1213; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1214 DCHECK_LT(new_end_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_end_index) < (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_end_index) < (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_end_index) < (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1214); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_end_index) < (end_index)"
")"); do { *((volatile int*)__null) = 1214; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1215 DCHECK(new_end_index + 1 == new_start_index ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1217); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1217; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1216 (new_end_index + 2 == new_start_index &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1217); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1217; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1217 border == ranges->at(new_end_index + 1)))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1217); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1217; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1218 DCHECK_LT(min_char, border - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (border - 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (border - 1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(min_char) < (border - 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1218); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (border - 1)"
")"); do { *((volatile int*)__null) = 1218; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1219 DCHECK_LT(border, max_char)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((border) < (max_char))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((border) < (max_char)))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(border) < (max_char)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1219); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(border) < (max_char)"
")"); do { *((volatile int*)__null) = 1219; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1220 DCHECK_LT(ranges->at(new_end_index), border)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(new_end_index)) < (border))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(new_end_index)) < (border)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(ranges->at(new_end_index)) < (border)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1220); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(new_end_index)) < (border)"
")"); do { *((volatile int*)__null) = 1220; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1221 DCHECK(border < ranges->at(new_start_index) ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1224); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1224; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1222 (border == ranges->at(new_start_index) &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1224); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1224; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1223 new_start_index == end_index && new_end_index == end_index - 1 &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1224); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1224; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1224 border == last + 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1224); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1224; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1225 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_start_index == 0 || border >= ranges->at(new_start_index
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_start_index == 0 || border >= ranges->at(new_start_index
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1225); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
")"); do { *((volatile int*)__null) = 1225; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1226
1227 masm->CheckCharacterGT(border - 1, above);
1228 Label dummy;
1229 GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
1230 border - 1, &dummy, even_label, odd_label);
1231 if (handle_rest.is_linked()) {
1232 masm->Bind(&handle_rest);
1233 bool flip = (new_start_index & 1) != (start_index & 1);
1234 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1235 &dummy, flip ? odd_label : even_label,
1236 flip ? even_label : odd_label);
1237 }
1238}
1239
1240void EmitClassRanges(RegExpMacroAssembler* macro_assembler,
1241 RegExpClassRanges* cr, bool one_byte, Label* on_failure,
1242 int cp_offset, bool check_offset, bool preloaded,
1243 Zone* zone) {
1244 ZoneList<CharacterRange>* ranges = cr->ranges(zone);
1245 CharacterRange::Canonicalize(ranges);
1246
1247 // Now that all processing (like case-insensitivity) is done, clamp the
1248 // ranges to the set of ranges that may actually occur in the subject string.
1249 if (one_byte) CharacterRange::ClampToOneByte(ranges);
1250
1251 const int ranges_length = ranges->length();
1252 if (ranges_length == 0) {
1253 if (!cr->is_negated()) {
1254 macro_assembler->GoTo(on_failure);
1255 }
1256 if (check_offset) {
1257 macro_assembler->CheckPosition(cp_offset, on_failure);
1258 }
1259 return;
1260 }
1261
1262 const base::uc32 max_char = MaxCodeUnit(one_byte);
1263 if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
1264 if (cr->is_negated()) {
1265 macro_assembler->GoTo(on_failure);
1266 } else {
1267 // This is a common case hit by non-anchored expressions.
1268 if (check_offset) {
1269 macro_assembler->CheckPosition(cp_offset, on_failure);
1270 }
1271 }
1272 return;
1273 }
1274
1275 if (!preloaded) {
1276 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1277 }
1278
1279 if (cr->is_standard(zone) && macro_assembler->CheckSpecialClassRanges(
1280 cr->standard_type(), on_failure)) {
1281 return;
1282 }
1283
1284 static constexpr int kMaxRangesForInlineBranchGeneration = 16;
1285 if (ranges_length > kMaxRangesForInlineBranchGeneration) {
1286 // For large range sets, emit a more compact instruction sequence to avoid
1287 // a potentially problematic increase in code size.
1288 // Note the flipped logic below (we check InRange if negated, NotInRange if
1289 // not negated); this is necessary since the method falls through on
1290 // failure whereas we want to fall through on success.
1291 if (cr->is_negated()) {
1292 if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
1293 return;
1294 }
1295 } else {
1296 if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
1297 return;
1298 }
1299 }
1300 }
1301
1302 // Generate a flat list of range boundaries for consumption by
1303 // GenerateBranches. See the comment on that function for how the list should
1304 // be structured
1305 ZoneList<base::uc32>* range_boundaries =
1306 zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
1307
1308 bool zeroth_entry_is_failure = !cr->is_negated();
1309
1310 for (int i = 0; i < ranges_length; i++) {
1311 CharacterRange& range = ranges->at(i);
1312 if (range.from() == 0) {
1313 DCHECK_EQ(i, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((i) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((i) == (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(i) == (0)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1313); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(i) == (0)"
")"); do { *((volatile int*)__null) = 1313; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1314 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1315 } else {
1316 range_boundaries->Add(range.from(), zone);
1317 }
1318 // `+ 1` to convert from inclusive to exclusive `to`.
1319 // [from, to] == [from, to+1[.
1320 range_boundaries->Add(range.to() + 1, zone);
1321 }
1322 int end_index = range_boundaries->length() - 1;
1323 if (range_boundaries->at(end_index) > max_char) {
1324 end_index--;
1325 }
1326
1327 Label fall_through;
1328 GenerateBranches(macro_assembler, range_boundaries,
1329 0, // start_index.
1330 end_index,
1331 0, // min_char.
1332 max_char, &fall_through,
1333 zeroth_entry_is_failure ? &fall_through : on_failure,
1334 zeroth_entry_is_failure ? on_failure : &fall_through);
1335 macro_assembler->Bind(&fall_through);
1336}
1337
1338} // namespace
1339
1340RegExpNode::~RegExpNode() = default;
1341
1342RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1343 Trace* trace) {
1344 // If we are generating a greedy loop then don't stop and don't reuse code.
1345 if (trace->stop_node() != nullptr) {
1346 return CONTINUE;
1347 }
1348
1349 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1350 if (trace->is_trivial()) {
1351 if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
1352 // If a generic version is already scheduled to be generated or we have
1353 // recursed too deeply then just generate a jump to that code.
1354 macro_assembler->GoTo(&label_);
1355 // This will queue it up for generation of a generic version if it hasn't
1356 // already been queued.
1357 compiler->AddWork(this);
1358 return DONE;
1359 }
1360 // Generate generic version of the node and bind the label for later use.
1361 macro_assembler->Bind(&label_);
1362 return CONTINUE;
1363 }
1364
1365 // We are being asked to make a non-generic version. Keep track of how many
1366 // non-generic versions we generate so as not to overdo it.
1367 trace_count_++;
1368 if (KeepRecursing(compiler) && compiler->optimize() &&
1369 trace_count_ < kMaxCopiesCodeGenerated) {
1370 return CONTINUE;
1371 }
1372
1373 // If we get here code has been generated for this node too many times or
1374 // recursion is too deep. Time to switch to a generic version. The code for
1375 // generic versions above can handle deep recursion properly.
1376 bool was_limiting = compiler->limiting_recursion();
1377 compiler->set_limiting_recursion(true);
1378 trace->Flush(compiler, this);
1379 compiler->set_limiting_recursion(was_limiting);
1380 return DONE;
1381}
1382
1383bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
1384 return !compiler->limiting_recursion() &&
1385 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
1386}
1387
1388void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1389 BoyerMooreLookahead* bm, bool not_at_start) {
1390 base::Optional<RegExpFlags> old_flags;
1391 if (action_type_ == MODIFY_FLAGS) {
1392 // It is not guaranteed that we hit the resetting modify flags node, due to
1393 // recursion budget limitation for filling in BMInfo. Therefore we reset the
1394 // flags manually to the previous state after recursing.
1395 old_flags = bm->compiler()->flags();
1396 bm->compiler()->set_flags(flags());
1397 }
1398 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
1399 // Anything may follow a positive submatch success, thus we need to accept
1400 // all characters from this position onwards.
1401 bm->SetRest(offset);
1402 } else {
1403 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1404 }
1405 SaveBMInfo(bm, not_at_start, offset);
1406 if (old_flags.has_value()) {
1407 bm->compiler()->set_flags(*old_flags);
1408 }
1409}
1410
1411void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
1412 RegExpCompiler* compiler, int filled_in,
1413 bool not_at_start) {
1414 if (action_type_ == SET_REGISTER_FOR_LOOP) {
1415 on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
1416 filled_in, not_at_start);
1417 } else {
1418 if (action_type() == MODIFY_FLAGS) {
1419 compiler->set_flags(flags());
1420 }
1421 on_success()->GetQuickCheckDetails(details, compiler, filled_in,
1422 not_at_start);
1423 }
1424}
1425
1426void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1427 BoyerMooreLookahead* bm, bool not_at_start) {
1428 // Match the behaviour of EatsAtLeast on this node.
1429 if (assertion_type() == AT_START && not_at_start) return;
1430 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1431 SaveBMInfo(bm, not_at_start, offset);
1432}
1433
1434void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
1435 QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
1436 bool not_at_start) {
1437 RegExpNode* node = continue_node();
1438 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1439}
1440
1441namespace {
1442
1443// Takes the left-most 1-bit and smears it out, setting all bits to its right.
1444inline uint32_t SmearBitsRight(uint32_t v) {
1445 v |= v >> 1;
1446 v |= v >> 2;
1447 v |= v >> 4;
1448 v |= v >> 8;
1449 v |= v >> 16;
1450 return v;
1451}
1452
1453} // namespace
1454
1455bool QuickCheckDetails::Rationalize(bool asc) {
1456 bool found_useful_op = false;
1457 const uint32_t char_mask = CharMask(asc);
1458 mask_ = 0;
1459 value_ = 0;
1460 int char_shift = 0;
1461 for (int i = 0; i < characters_; i++) {
36
Assuming 'i' is < field 'characters_'
37
Loop condition is true. Entering loop body
42
Assuming 'i' is < field 'characters_'
43
Loop condition is true. Entering loop body
48
Assuming 'i' is < field 'characters_'
49
Loop condition is true. Entering loop body
1462 Position* pos = &positions_[i];
1463 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
38
Assuming the condition is false
39
Taking false branch
44
Assuming the condition is false
45
Taking false branch
50
Assuming the condition is false
51
Taking false branch
1464 found_useful_op = true;
1465 }
1466 mask_ |= (pos->mask & char_mask) << char_shift;
52
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'
1467 value_ |= (pos->value & char_mask) << char_shift;
1468 char_shift += asc
45.1
'asc' is false
? 8 : 16
;
40
Assuming 'asc' is false
41
'?' condition is false
46
'?' condition is false
47
The value 32 is assigned to 'char_shift'
1469 }
1470 return found_useful_op;
1471}
1472
1473int RegExpNode::EatsAtLeast(bool not_at_start) {
1474 return not_at_start ? eats_at_least_.eats_at_least_from_not_start
1475 : eats_at_least_.eats_at_least_from_possibly_start;
1476}
1477
1478EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
1479 // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
1480 // implies that the following node must be a LoopChoiceNode. If we need to
1481 // set registers to constant values for other reasons, we could introduce a
1482 // new action type SET_REGISTER that doesn't imply anything about its
1483 // successor.
1484 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1484); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1484; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1485}
1486
1487void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
1488 RegExpCompiler* compiler,
1489 int characters_filled_in,
1490 bool not_at_start) {
1491 // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
1492 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1492); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1492; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1493}
1494
1495EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
1496 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1496); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 1496; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
1497
1498 if (read_backward()) {
1499 // The eats_at_least value is not used if reading backward. The
1500 // EatsAtLeastPropagator should've zeroed it as well.
1501 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1501); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
")"); do { *((volatile int*)__null) = 1501; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1502 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_not_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_not_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1502); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
")"); do { *((volatile int*)__null) = 1502; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1503 return {};
1504 }
1505
1506 // Figure out how much the loop body itself eats, not including anything in
1507 // the continuation case. In general, the nodes in the loop body should report
1508 // that they eat at least the number eaten by the continuation node, since any
1509 // successful match in the loop body must also include the continuation node.
1510 // However, in some cases involving positive lookaround, the loop body under-
1511 // reports its appetite, so use saturated math here to avoid negative numbers.
1512 uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
1513 loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
1514 uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
1515 loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
1516
1517 // Limit the number of loop iterations to avoid overflow in subsequent steps.
1518 int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
1519
1520 EatsAtLeastInfo result;
1521 result.eats_at_least_from_not_start =
1522 base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
1523 continue_node_->EatsAtLeast(true));
1524 if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
1525 // First loop iteration eats at least one, so all subsequent iterations
1526 // and the after-loop chunk are guaranteed to not be at the start.
1527 result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
1528 loop_body_from_possibly_start +
1529 (loop_iterations - 1) * loop_body_from_not_start +
1530 continue_node_->EatsAtLeast(true));
1531 } else {
1532 // Loop body might eat nothing, so only continue node contributes.
1533 result.eats_at_least_from_possibly_start =
1534 continue_node_->EatsAtLeast(false);
1535 }
1536 return result;
1537}
1538
1539bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1540 Trace* bounds_check_trace, Trace* trace,
1541 bool preload_has_checked_bounds,
1542 Label* on_possible_success,
1543 QuickCheckDetails* details,
1544 bool fall_through_on_failure,
1545 ChoiceNode* predecessor) {
1546 DCHECK_NOT_NULL(predecessor)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((predecessor) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((predecessor) != nullptr))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(predecessor) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1546); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(predecessor) != nullptr"
")"); do { *((volatile int*)__null) = 1546; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
29
Taking false branch
30
Loop condition is false. Exiting loop
1547 if (details->characters() == 0) return false;
31
Assuming the condition is false
32
Taking false branch
1548 GetQuickCheckDetails(details, compiler, 0,
1549 trace->at_start() == Trace::FALSE_VALUE);
1550 if (details->cannot_match()) return false;
33
Assuming the condition is false
34
Taking false branch
1551 if (!details->Rationalize(compiler->one_byte())) return false;
35
Calling 'QuickCheckDetails::Rationalize'
1552 DCHECK(details->characters() == 1 ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1553); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1553; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1553 compiler->macro_assembler()->CanReadUnaligned())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1553); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1553; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1554 uint32_t mask = details->mask();
1555 uint32_t value = details->value();
1556
1557 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1558
1559 if (trace->characters_preloaded() != details->characters()) {
1560 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == bounds_check_trace->cp_offset
())>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(trace->cp_offset() == bounds_check_trace->cp_offset
()))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("trace->cp_offset() == bounds_check_trace->cp_offset()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1560); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == bounds_check_trace->cp_offset()"
")"); do { *((volatile int*)__null) = 1560; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1561 // The bounds check is performed using the minimum number of characters
1562 // any choice would eat, so if the bounds check fails, then none of the
1563 // choices can succeed, so we can just immediately backtrack, rather
1564 // than go to the next choice. The number of characters preloaded may be
1565 // less than the number used for the bounds check.
1566 int eats_at_least = predecessor->EatsAtLeast(
1567 bounds_check_trace->at_start() == Trace::FALSE_VALUE);
1568 DCHECK_GE(eats_at_least, details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least) >= (details->characters()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((eats_at_least) >= (details->characters())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(eats_at_least) >= (details->characters())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1568); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least) >= (details->characters())"
")"); do { *((volatile int*)__null) = 1568; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1569 assembler->LoadCurrentCharacter(
1570 trace->cp_offset(), bounds_check_trace->backtrack(),
1571 !preload_has_checked_bounds, details->characters(), eats_at_least);
1572 }
1573
1574 bool need_mask = true;
1575
1576 if (details->characters() == 1) {
1577 // If number of characters preloaded is 1 then we used a byte or 16 bit
1578 // load so the value is already masked down.
1579 const uint32_t char_mask = CharMask(compiler->one_byte());
1580 if ((mask & char_mask) == char_mask) need_mask = false;
1581 mask &= char_mask;
1582 } else {
1583 // For 2-character preloads in one-byte mode or 1-character preloads in
1584 // two-byte mode we also use a 16 bit load with zero extend.
1585 static const uint32_t kTwoByteMask = 0xFFFF;
1586 static const uint32_t kFourByteMask = 0xFFFFFFFF;
1587 if (details->characters() == 2 && compiler->one_byte()) {
1588 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1589 } else if (details->characters() == 1 && !compiler->one_byte()) {
1590 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1591 } else {
1592 if (mask == kFourByteMask) need_mask = false;
1593 }
1594 }
1595
1596 if (fall_through_on_failure) {
1597 if (need_mask) {
1598 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
1599 } else {
1600 assembler->CheckCharacter(value, on_possible_success);
1601 }
1602 } else {
1603 if (need_mask) {
1604 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
1605 } else {
1606 assembler->CheckNotCharacter(value, trace->backtrack());
1607 }
1608 }
1609 return true;
1610}
1611
1612// Here is the meat of GetQuickCheckDetails (see also the comment on the
1613// super-class in the .h file).
1614//
1615// We iterate along the text object, building up for each character a
1616// mask and value that can be used to test for a quick failure to match.
1617// The masks and values for the positions will be combined into a single
1618// machine word for the current character width in order to be used in
1619// generating a quick check.
1620void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1621 RegExpCompiler* compiler,
1622 int characters_filled_in,
1623 bool not_at_start) {
1624 // Do not collect any quick check details if the text node reads backward,
1625 // since it reads in the opposite direction than we use for quick checks.
1626 if (read_backward()) return;
1627 Isolate* isolate = compiler->macro_assembler()->isolate();
1628 DCHECK(characters_filled_in < details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in < details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in < details->characters()))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in < details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1628); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in < details->characters()"
")"); do { *((volatile int*)__null) = 1628; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1629 int characters = details->characters();
1630 const uint32_t char_mask = CharMask(compiler->one_byte());
1631 for (int k = 0; k < elements()->length(); k++) {
1632 TextElement elm = elements()->at(k);
1633 if (elm.text_type() == TextElement::ATOM) {
1634 base::Vector<const base::uc16> quarks = elm.atom()->data();
1635 for (int i = 0; i < characters && i < quarks.length(); i++) {
1636 QuickCheckDetails::Position* pos =
1637 details->positions(characters_filled_in);
1638 base::uc16 c = quarks[i];
1639 if (IsIgnoreCase(compiler->flags())) {
1640 unibrow::uchar chars[4];
1641 int length =
1642 GetCaseIndependentLetters(isolate, c, compiler, chars, 4);
1643 if (length == 0) {
1644 // This can happen because all case variants are non-Latin1, but we
1645 // know the input is Latin1.
1646 details->set_cannot_match();
1647 pos->determines_perfectly = false;
1648 return;
1649 }
1650 if (length == 1) {
1651 // This letter has no case equivalents, so it's nice and simple
1652 // and the mask-compare will determine definitely whether we have
1653 // a match at this character position.
1654 pos->mask = char_mask;
1655 pos->value = chars[0];
1656 pos->determines_perfectly = true;
1657 } else {
1658 uint32_t common_bits = char_mask;
1659 uint32_t bits = chars[0];
1660 for (int j = 1; j < length; j++) {
1661 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1662 common_bits ^= differing_bits;
1663 bits &= common_bits;
1664 }
1665 // If length is 2 and common bits has only one zero in it then
1666 // our mask and compare instruction will determine definitely
1667 // whether we have a match at this character position. Otherwise
1668 // it can only be an approximate check.
1669 uint32_t one_zero = (common_bits | ~char_mask);
1670 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
1671 pos->determines_perfectly = true;
1672 }
1673 pos->mask = common_bits;
1674 pos->value = bits;
1675 }
1676 } else {
1677 // Don't ignore case. Nice simple case where the mask-compare will
1678 // determine definitely whether we have a match at this character
1679 // position.
1680 if (c > char_mask) {
1681 details->set_cannot_match();
1682 pos->determines_perfectly = false;
1683 return;
1684 }
1685 pos->mask = char_mask;
1686 pos->value = c;
1687 pos->determines_perfectly = true;
1688 }
1689 characters_filled_in++;
1690 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1690); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1690; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1691 if (characters_filled_in == details->characters()) {
1692 return;
1693 }
1694 }
1695 } else {
1696 QuickCheckDetails::Position* pos =
1697 details->positions(characters_filled_in);
1698 RegExpClassRanges* tree = elm.class_ranges();
1699 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
1700 if (tree->is_negated() || ranges->is_empty()) {
1701 // A quick check uses multi-character mask and compare. There is no
1702 // useful way to incorporate a negative char class into this scheme
1703 // so we just conservatively create a mask and value that will always
1704 // succeed.
1705 // Likewise for empty ranges (empty ranges can occur e.g. when
1706 // compiling for one-byte subjects and impossible (non-one-byte) ranges
1707 // have been removed).
1708 pos->mask = 0;
1709 pos->value = 0;
1710 } else {
1711 int first_range = 0;
1712 while (ranges->at(first_range).from() > char_mask) {
1713 first_range++;
1714 if (first_range == ranges->length()) {
1715 details->set_cannot_match();
1716 pos->determines_perfectly = false;
1717 return;
1718 }
1719 }
1720 CharacterRange range = ranges->at(first_range);
1721 const base::uc32 first_from = range.from();
1722 const base::uc32 first_to =
1723 (range.to() > char_mask) ? char_mask : range.to();
1724 const uint32_t differing_bits = (first_from ^ first_to);
1725 // A mask and compare is only perfect if the differing bits form a
1726 // number like 00011111 with one single block of trailing 1s.
1727 if ((differing_bits & (differing_bits + 1)) == 0 &&
1728 first_from + differing_bits == first_to) {
1729 pos->determines_perfectly = true;
1730 }
1731 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1732 uint32_t bits = (first_from & common_bits);
1733 for (int i = first_range + 1; i < ranges->length(); i++) {
1734 range = ranges->at(i);
1735 const base::uc32 from = range.from();
1736 if (from > char_mask) continue;
1737 const base::uc32 to =
1738 (range.to() > char_mask) ? char_mask : range.to();
1739 // Here we are combining more ranges into the mask and compare
1740 // value. With each new range the mask becomes more sparse and
1741 // so the chances of a false positive rise. A character class
1742 // with multiple ranges is assumed never to be equivalent to a
1743 // mask and compare operation.
1744 pos->determines_perfectly = false;
1745 uint32_t new_common_bits = (from ^ to);
1746 new_common_bits = ~SmearBitsRight(new_common_bits);
1747 common_bits &= new_common_bits;
1748 bits &= new_common_bits;
1749 uint32_t new_differing_bits = (from & common_bits) ^ bits;
1750 common_bits ^= new_differing_bits;
1751 bits &= common_bits;
1752 }
1753 pos->mask = common_bits;
1754 pos->value = bits;
1755 }
1756 characters_filled_in++;
1757 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1757); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1757; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1758 if (characters_filled_in == details->characters()) return;
1759 }
1760 }
1761 DCHECK(characters_filled_in != details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in != details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in != details->characters()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in != details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1761); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in != details->characters()"
")"); do { *((volatile int*)__null) = 1761; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1762 if (!details->cannot_match()) {
1763 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1764 true);
1765 }
1766}
1767
1768void QuickCheckDetails::Clear() {
1769 for (int i = 0; i < characters_; i++) {
1770 positions_[i].mask = 0;
1771 positions_[i].value = 0;
1772 positions_[i].determines_perfectly = false;
1773 }
1774 characters_ = 0;
1775}
1776
1777void QuickCheckDetails::Advance(int by, bool one_byte) {
1778 if (by >= characters_ || by < 0) {
1779 DCHECK_IMPLIES(by < 0, characters_ == 0)do { if (by < 0) { do { static_assert( mozilla::detail::AssertionConditionType
<decltype(characters_ == 0)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == 0))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("characters_ == 0"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1779); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == 0"
")"); do { *((volatile int*)__null) = 1779; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
1780 Clear();
1781 return;
1782 }
1783 DCHECK_LE(characters_ - by, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_ - by) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_ - by) <= (4)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(characters_ - by) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1783); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_ - by) <= (4)"
")"); do { *((volatile int*)__null) = 1783; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1784 DCHECK_LE(characters_, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_) <= (4)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(characters_) <= (4)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1784); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_) <= (4)"
")"); do { *((volatile int*)__null) = 1784; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1785 for (int i = 0; i < characters_ - by; i++) {
1786 positions_[i] = positions_[by + i];
1787 }
1788 for (int i = characters_ - by; i < characters_; i++) {
1789 positions_[i].mask = 0;
1790 positions_[i].value = 0;
1791 positions_[i].determines_perfectly = false;
1792 }
1793 characters_ -= by;
1794 // We could change mask_ and value_ here but we would never advance unless
1795 // they had already been used in a check and they won't be used again because
1796 // it would gain us nothing. So there's no point.
1797}
1798
1799void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
1800 DCHECK(characters_ == other->characters_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_ == other->characters_)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == other->characters_
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"characters_ == other->characters_", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1800); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == other->characters_"
")"); do { *((volatile int*)__null) = 1800; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1801 if (other->cannot_match_) {
1802 return;
1803 }
1804 if (cannot_match_) {
1805 *this = *other;
1806 return;
1807 }
1808 for (int i = from_index; i < characters_; i++) {
1809 QuickCheckDetails::Position* pos = positions(i);
1810 QuickCheckDetails::Position* other_pos = other->positions(i);
1811 if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
1812 !other_pos->determines_perfectly) {
1813 // Our mask-compare operation will be approximate unless we have the
1814 // exact same operation on both sides of the alternation.
1815 pos->determines_perfectly = false;
1816 }
1817 pos->mask &= other_pos->mask;
1818 pos->value &= pos->mask;
1819 other_pos->value &= pos->mask;
1820 uint32_t differing_bits = (pos->value ^ other_pos->value);
1821 pos->mask &= ~differing_bits;
1822 pos->value &= pos->mask;
1823 }
1824}
1825
1826class VisitMarker {
1827 public:
1828 explicit VisitMarker(NodeInfo* info) : info_(info) {
1829 DCHECK(!info->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info->visited))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!info->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1829); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info->visited"
")"); do { *((volatile int*)__null) = 1829; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1830 info->visited = true;
1831 }
1832 ~VisitMarker() { info_->visited = false; }
1833
1834 private:
1835 NodeInfo* info_;
1836};
1837
1838// Temporarily sets traversed_loop_initialization_node_.
1839class LoopInitializationMarker {
1840 public:
1841 explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
1842 DCHECK(!node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!node_->traversed_loop_initialization_node_))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1842); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1842; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1843 node_->traversed_loop_initialization_node_ = true;
1844 }
1845 ~LoopInitializationMarker() {
1846 DCHECK(node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(node_->traversed_loop_initialization_node_))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1846); AnnotateMozCrashReason("MOZ_ASSERT" "(" "node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1846; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1847 node_->traversed_loop_initialization_node_ = false;
1848 }
1849 LoopInitializationMarker(const LoopInitializationMarker&) = delete;
1850 LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete;
1851
1852 private:
1853 LoopChoiceNode* node_;
1854};
1855
1856// Temporarily decrements min_loop_iterations_.
1857class IterationDecrementer {
1858 public:
1859 explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
1860 DCHECK_GT(node_->min_loop_iterations_, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node_->min_loop_iterations_) > (0))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((node_->min_loop_iterations_) > (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(node_->min_loop_iterations_) > (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1860); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node_->min_loop_iterations_) > (0)"
")"); do { *((volatile int*)__null) = 1860; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1861 --node_->min_loop_iterations_;
1862 }
1863 ~IterationDecrementer() { ++node_->min_loop_iterations_; }
1864 IterationDecrementer(const IterationDecrementer&) = delete;
1865 IterationDecrementer& operator=(const IterationDecrementer&) = delete;
1866
1867 private:
1868 LoopChoiceNode* node_;
1869};
1870
1871RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
1872 if (info()->replacement_calculated) return replacement();
1873 if (depth < 0) return this;
1874 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1874); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1874; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1875 VisitMarker marker(info());
1876 return FilterSuccessor(depth - 1, compiler);
1877}
1878
1879RegExpNode* SeqRegExpNode::FilterSuccessor(int depth,
1880 RegExpCompiler* compiler) {
1881 RegExpNode* next = on_success_->FilterOneByte(depth - 1, compiler);
1882 if (next == nullptr) return set_replacement(nullptr);
1883 on_success_ = next;
1884 return set_replacement(this);
1885}
1886
1887// We need to check for the following characters: 0x39C 0x3BC 0x178.
1888bool RangeContainsLatin1Equivalents(CharacterRange range) {
1889 // TODO(dcarney): this could be a lot more efficient.
1890 return range.Contains(0x039C) || range.Contains(0x03BC) ||
1891 range.Contains(0x0178);
1892}
1893
1894namespace {
1895
1896bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
1897 for (int i = 0; i < ranges->length(); i++) {
1898 // TODO(dcarney): this could be a lot more efficient.
1899 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
1900 }
1901 return false;
1902}
1903
1904} // namespace
1905
1906RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
1907 RegExpFlags flags = compiler->flags();
1908 if (info()->replacement_calculated) return replacement();
1909 if (depth < 0) return this;
1910 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1910); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1910; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1911 VisitMarker marker(info());
1912 int element_count = elements()->length();
1913 for (int i = 0; i < element_count; i++) {
1914 TextElement elm = elements()->at(i);
1915 if (elm.text_type() == TextElement::ATOM) {
1916 base::Vector<const base::uc16> quarks = elm.atom()->data();
1917 for (int j = 0; j < quarks.length(); j++) {
1918 base::uc16 c = quarks[j];
1919 if (!IsIgnoreCase(flags)) {
1920 if (c > String::kMaxOneByteCharCode) return set_replacement(nullptr);
1921 } else {
1922 unibrow::uchar chars[4];
1923 int length = GetCaseIndependentLetters(compiler->isolate(), c,
1924 compiler, chars, 4);
1925 if (length == 0 || chars[0] > String::kMaxOneByteCharCode) {
1926 return set_replacement(nullptr);
1927 }
1928 }
1929 }
1930 } else {
1931 // A character class can also be impossible to match in one-byte mode.
1932 DCHECK(elm.text_type() == TextElement::CLASS_RANGES)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(elm.text_type() == TextElement::CLASS_RANGES)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(elm.text_type() == TextElement::CLASS_RANGES))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("elm.text_type() == TextElement::CLASS_RANGES"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1932); AnnotateMozCrashReason("MOZ_ASSERT" "(" "elm.text_type() == TextElement::CLASS_RANGES"
")"); do { *((volatile int*)__null) = 1932; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1933 RegExpClassRanges* cr = elm.class_ranges();
1934 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
1935 CharacterRange::Canonicalize(ranges);
1936 // Now they are in order so we only need to look at the first.
1937 // If we are in non-Unicode case independent mode then we need
1938 // to be a bit careful here, because the character classes have
1939 // not been case-desugared yet, but there are characters and ranges
1940 // that can become Latin-1 when case is considered.
1941 int range_count = ranges->length();
1942 if (cr->is_negated()) {
1943 if (range_count != 0 && ranges->at(0).from() == 0 &&
1944 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
1945 bool case_complications = !IsEitherUnicode(flags) &&
1946 IsIgnoreCase(flags) &&
1947 RangesContainLatin1Equivalents(ranges);
1948 if (!case_complications) {
1949 return set_replacement(nullptr);
1950 }
1951 }
1952 } else {
1953 if (range_count == 0 ||
1954 ranges->at(0).from() > String::kMaxOneByteCharCode) {
1955 bool case_complications = !IsEitherUnicode(flags) &&
1956 IsIgnoreCase(flags) &&
1957 RangesContainLatin1Equivalents(ranges);
1958 if (!case_complications) {
1959 return set_replacement(nullptr);
1960 }
1961 }
1962 }
1963 }
1964 }
1965 return FilterSuccessor(depth - 1, compiler);
1966}
1967
1968RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
1969 if (info()->replacement_calculated) return replacement();
1970 if (depth < 0) return this;
1971 if (info()->visited) return this;
1972 {
1973 VisitMarker marker(info());
1974
1975 RegExpNode* continue_replacement =
1976 continue_node_->FilterOneByte(depth - 1, compiler);
1977 // If we can't continue after the loop then there is no sense in doing the
1978 // loop.
1979 if (continue_replacement == nullptr) return set_replacement(nullptr);
1980 }
1981
1982 return ChoiceNode::FilterOneByte(depth - 1, compiler);
1983}
1984
1985RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
1986 if (info()->replacement_calculated) return replacement();
1987 if (depth < 0) return this;
1988 if (info()->visited) return this;
1989 VisitMarker marker(info());
1990 int choice_count = alternatives_->length();
1991
1992 for (int i = 0; i < choice_count; i++) {
1993 GuardedAlternative alternative = alternatives_->at(i);
1994 if (alternative.guards() != nullptr &&
1995 alternative.guards()->length() != 0) {
1996 set_replacement(this);
1997 return this;
1998 }
1999 }
2000
2001 int surviving = 0;
2002 RegExpNode* survivor = nullptr;
2003 for (int i = 0; i < choice_count; i++) {
2004 GuardedAlternative alternative = alternatives_->at(i);
2005 RegExpNode* replacement =
2006 alternative.node()->FilterOneByte(depth - 1, compiler);
2007 DCHECK(replacement != this)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(replacement != this)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(replacement != this))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("replacement != this"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2007); AnnotateMozCrashReason("MOZ_ASSERT" "(" "replacement != this"
")"); do { *((volatile int*)__null) = 2007; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // No missing EMPTY_MATCH_CHECK.
2008 if (replacement != nullptr) {
2009 alternatives_->at(i).set_node(replacement);
2010 surviving++;
2011 survivor = replacement;
2012 }
2013 }
2014 if (surviving < 2) return set_replacement(survivor);
2015
2016 set_replacement(this);
2017 if (surviving == choice_count) {
2018 return this;
2019 }
2020 // Only some of the nodes survived the filtering. We need to rebuild the
2021 // alternatives list.
2022 ZoneList<GuardedAlternative>* new_alternatives =
2023 zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
2024 for (int i = 0; i < choice_count; i++) {
2025 RegExpNode* replacement =
2026 alternatives_->at(i).node()->FilterOneByte(depth - 1, compiler);
2027 if (replacement != nullptr) {
2028 alternatives_->at(i).set_node(replacement);
2029 new_alternatives->Add(alternatives_->at(i), zone());
2030 }
2031 }
2032 alternatives_ = new_alternatives;
2033 return this;
2034}
2035
2036RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(
2037 int depth, RegExpCompiler* compiler) {
2038 if (info()->replacement_calculated) return replacement();
2039 if (depth < 0) return this;
2040 if (info()->visited) return this;
2041 VisitMarker marker(info());
2042 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2043 // afterwards.
2044 RegExpNode* node = continue_node();
2045 RegExpNode* replacement = node->FilterOneByte(depth - 1, compiler);
2046 if (replacement == nullptr) return set_replacement(nullptr);
2047 alternatives_->at(kContinueIndex).set_node(replacement);
2048
2049 RegExpNode* neg_node = lookaround_node();
2050 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, compiler);
2051 // If the negative lookahead is always going to fail then
2052 // we don't need to check it.
2053 if (neg_replacement == nullptr) return set_replacement(replacement);
2054 alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
2055 return set_replacement(this);
2056}
2057
2058void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2059 RegExpCompiler* compiler,
2060 int characters_filled_in,
2061 bool not_at_start) {
2062 if (body_can_be_zero_length_ || info()->visited) return;
2063 not_at_start = not_at_start || this->not_at_start();
2064 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2064); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 2064; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
2065 if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
2066 loop_node_->EatsAtLeast(not_at_start) >
2067 continue_node_->EatsAtLeast(true)) {
2068 // Loop body is guaranteed to execute at least once, and consume characters
2069 // when it does, meaning the only possible quick checks from this point
2070 // begin with the loop body. We may recursively visit this LoopChoiceNode,
2071 // but we temporarily decrease its minimum iteration counter so we know when
2072 // to check the continue case.
2073 IterationDecrementer next_iteration(this);
2074 loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
2075 not_at_start);
2076 } else {
2077 // Might not consume anything in the loop body, so treat it like a normal
2078 // ChoiceNode (and don't recursively visit this node again).
2079 VisitMarker marker(info());
2080 ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
2081 not_at_start);
2082 }
2083}
2084
2085void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
2086 QuickCheckDetails* details, RegExpCompiler* compiler,
2087 int characters_filled_in, bool not_at_start) {
2088 if (traversed_loop_initialization_node_) {
2089 // We already entered this loop once, exited via its continuation node, and
2090 // followed an outer loop's back-edge to before the loop entry point. We
2091 // could try to reset the minimum iteration count to its starting value at
2092 // this point, but that seems like more trouble than it's worth. It's safe
2093 // to keep going with the current (possibly reduced) minimum iteration
2094 // count.
2095 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2096 } else {
2097 // We are entering a loop via its counter initialization action, meaning we
2098 // are guaranteed to run the loop body at least some minimum number of times
2099 // before running the continuation node. Set a flag so that this node knows
2100 // (now and any times we visit it again recursively) that it was entered
2101 // from the top.
2102 LoopInitializationMarker marker(this);
2103 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2104 }
2105}
2106
2107void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
2108 BoyerMooreLookahead* bm, bool not_at_start) {
2109 if (body_can_be_zero_length_ || budget <= 0) {
2110 bm->SetRest(offset);
2111 SaveBMInfo(bm, not_at_start, offset);
2112 return;
2113 }
2114 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2115 SaveBMInfo(bm, not_at_start, offset);
2116}
2117
2118void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2119 RegExpCompiler* compiler,
2120 int characters_filled_in,
2121 bool not_at_start) {
2122 not_at_start = (not_at_start || not_at_start_);
2123 int choice_count = alternatives_->length();
2124 DCHECK_LT(0, choice_count)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) < (choice_count))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) < (choice_count)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) < (choice_count)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2124); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) < (choice_count)"
")"); do { *((volatile int*)__null) = 2124; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2125 alternatives_->at(0).node()->GetQuickCheckDetails(
2126 details, compiler, characters_filled_in, not_at_start);
2127 for (int i = 1; i < choice_count; i++) {
2128 QuickCheckDetails new_details(details->characters());
2129 RegExpNode* node = alternatives_->at(i).node();
2130 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
2131 not_at_start);
2132 // Here we merge the quick match details of the two branches.
2133 details->Merge(&new_details, characters_filled_in);
2134 }
2135}
2136
2137namespace {
2138
2139// Check for [0-9A-Z_a-z].
2140void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
2141 Label* non_word, bool fall_through_on_word) {
2142 if (assembler->CheckSpecialClassRanges(
2143 fall_through_on_word ? StandardCharacterSet::kWord
2144 : StandardCharacterSet::kNotWord,
2145 fall_through_on_word ? non_word : word)) {
2146 // Optimized implementation available.
2147 return;
2148 }
2149 assembler->CheckCharacterGT('z', non_word);
2150 assembler->CheckCharacterLT('0', non_word);
2151 assembler->CheckCharacterGT('a' - 1, word);
2152 assembler->CheckCharacterLT('9' + 1, word);
2153 assembler->CheckCharacterLT('A', non_word);
2154 assembler->CheckCharacterLT('Z' + 1, word);
2155 if (fall_through_on_word) {
2156 assembler->CheckNotCharacter('_', non_word);
2157 } else {
2158 assembler->CheckCharacter('_', word);
2159 }
2160}
2161
2162// Emit the code to check for a ^ in multiline mode (1-character lookbehind
2163// that matches newline or the start of input).
2164void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
2165 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2166
2167 // We will load the previous character into the current character register.
2168 Trace new_trace(*trace);
2169 new_trace.InvalidateCurrentCharacter();
2170
2171 // A positive (> 0) cp_offset means we've already successfully matched a
2172 // non-empty-width part of the pattern, and thus cannot be at or before the
2173 // start of the subject string. We can thus skip both at-start and
2174 // bounds-checks when loading the one-character lookbehind.
2175 const bool may_be_at_or_before_subject_string_start =
2176 new_trace.cp_offset() <= 0;
2177
2178 Label ok;
2179 if (may_be_at_or_before_subject_string_start) {
2180 // The start of input counts as a newline in this context, so skip to ok if
2181 // we are at the start.
2182 assembler->CheckAtStart(new_trace.cp_offset(), &ok);
2183 }
2184
2185 // If we've already checked that we are not at the start of input, it's okay
2186 // to load the previous character without bounds checks.
2187 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2188 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2189 new_trace.backtrack(), can_skip_bounds_check);
2190 if (!assembler->CheckSpecialClassRanges(StandardCharacterSet::kLineTerminator,
2191 new_trace.backtrack())) {
2192 // Newline means \n, \r, 0x2028 or 0x2029.
2193 if (!compiler->one_byte()) {
2194 assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
2195 }
2196 assembler->CheckCharacter('\n', &ok);
2197 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2198 }
2199 assembler->Bind(&ok);
2200 on_success->Emit(compiler, &new_trace);
2201}
2202
2203} // namespace
2204
2205// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2206void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2207 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2208 Isolate* isolate = assembler->isolate();
2209 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2210 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2211 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2212 if (lookahead == nullptr) {
2213 int eats_at_least =
2214 std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
2215 if (eats_at_least >= 1) {
2216 BoyerMooreLookahead* bm =
2217 zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
2218 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
2219 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
2220 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2221 }
2222 } else {
2223 if (lookahead->at(0)->is_non_word())
2224 next_is_word_character = Trace::FALSE_VALUE;
2225 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2226 }
2227 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2228 if (next_is_word_character == Trace::UNKNOWN) {
2229 Label before_non_word;
2230 Label before_word;
2231 if (trace->characters_preloaded() != 1) {
2232 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2233 }
2234 // Fall through on non-word.
2235 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2236 // Next character is not a word character.
2237 assembler->Bind(&before_non_word);
2238 Label ok;
2239 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2240 assembler->GoTo(&ok);
2241
2242 assembler->Bind(&before_word);
2243 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2244 assembler->Bind(&ok);
2245 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2246 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2247 } else {
2248 DCHECK(next_is_word_character == Trace::FALSE_VALUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(next_is_word_character == Trace::FALSE_VALUE)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(next_is_word_character == Trace::FALSE_VALUE))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("next_is_word_character == Trace::FALSE_VALUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2248); AnnotateMozCrashReason("MOZ_ASSERT" "(" "next_is_word_character == Trace::FALSE_VALUE"
")"); do { *((volatile int*)__null) = 2248; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2249 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2250 }
2251}
2252
2253void AssertionNode::BacktrackIfPrevious(
2254 RegExpCompiler* compiler, Trace* trace,
2255 AssertionNode::IfPrevious backtrack_if_previous) {
2256 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2257 Trace new_trace(*trace);
2258 new_trace.InvalidateCurrentCharacter();
2259
2260 Label fall_through;
2261 Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
2262 : &fall_through;
2263 Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
2264 : new_trace.backtrack();
2265
2266 // A positive (> 0) cp_offset means we've already successfully matched a
2267 // non-empty-width part of the pattern, and thus cannot be at or before the
2268 // start of the subject string. We can thus skip both at-start and
2269 // bounds-checks when loading the one-character lookbehind.
2270 const bool may_be_at_or_before_subject_string_start =
2271 new_trace.cp_offset() <= 0;
2272
2273 if (may_be_at_or_before_subject_string_start) {
2274 // The start of input counts as a non-word character, so the question is
2275 // decided if we are at the start.
2276 assembler->CheckAtStart(new_trace.cp_offset(), non_word);
2277 }
2278
2279 // If we've already checked that we are not at the start of input, it's okay
2280 // to load the previous character without bounds checks.
2281 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2282 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
2283 can_skip_bounds_check);
2284 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2285
2286 assembler->Bind(&fall_through);
2287 on_success()->Emit(compiler, &new_trace);
2288}
2289
2290void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2291 RegExpCompiler* compiler,
2292 int filled_in, bool not_at_start) {
2293 if (assertion_type_ == AT_START && not_at_start) {
2294 details->set_cannot_match();
2295 return;
2296 }
2297 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2298 not_at_start);
2299}
2300
2301void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2302 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2303 switch (assertion_type_) {
2304 case AT_END: {
2305 Label ok;
2306 assembler->CheckPosition(trace->cp_offset(), &ok);
2307 assembler->GoTo(trace->backtrack());
2308 assembler->Bind(&ok);
2309 break;
2310 }
2311 case AT_START: {
2312 if (trace->at_start() == Trace::FALSE_VALUE) {
2313 assembler->GoTo(trace->backtrack());
2314 return;
2315 }
2316 if (trace->at_start() == Trace::UNKNOWN) {
2317 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
2318 Trace at_start_trace = *trace;
2319 at_start_trace.set_at_start(Trace::TRUE_VALUE);
2320 on_success()->Emit(compiler, &at_start_trace);
2321 return;
2322 }
2323 } break;
2324 case AFTER_NEWLINE:
2325 EmitHat(compiler, on_success(), trace);
2326 return;
2327 case AT_BOUNDARY:
2328 case AT_NON_BOUNDARY: {
2329 EmitBoundaryCheck(compiler, trace);
2330 return;
2331 }
2332 }
2333 on_success()->Emit(compiler, trace);
2334}
2335
2336namespace {
2337
2338bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
2339 if (quick_check == nullptr) return false;
2340 if (offset >= quick_check->characters()) return false;
2341 return quick_check->positions(offset)->determines_perfectly;
2342}
2343
2344void UpdateBoundsCheck(int index, int* checked_up_to) {
2345 if (index > *checked_up_to) {
2346 *checked_up_to = index;
2347 }
2348}
2349
2350} // namespace
2351
2352// We call this repeatedly to generate code for each pass over the text node.
2353// The passes are in increasing order of difficulty because we hope one
2354// of the first passes will fail in which case we are saved the work of the
2355// later passes. for example for the case independent regexp /%[asdfghjkl]a/
2356// we will check the '%' in the first pass, the case independent 'a' in the
2357// second pass and the character class in the last pass.
2358//
2359// The passes are done from right to left, so for example to test for /bar/
2360// we will first test for an 'r' with offset 2, then an 'a' with offset 1
2361// and then a 'b' with offset 0. This means we can avoid the end-of-input
2362// bounds check most of the time. In the example we only need to check for
2363// end-of-input when loading the putative 'r'.
2364//
2365// A slight complication involves the fact that the first character may already
2366// be fetched into a register by the previous node. In this case we want to
2367// do the test for that character first. We do this in separate passes. The
2368// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
2369// pass has been performed then subsequent passes will have true in
2370// first_element_checked to indicate that that character does not need to be
2371// checked again.
2372//
2373// In addition to all this we are passed a Trace, which can
2374// contain an AlternativeGeneration object. In this AlternativeGeneration
2375// object we can see details of any quick check that was already passed in
2376// order to get to the code we are now generating. The quick check can involve
2377// loading characters, which means we do not need to recheck the bounds
2378// up to the limit the quick check already checked. In addition the quick
2379// check can have involved a mask and compare operation which may simplify
2380// or obviate the need for further checks at some character positions.
2381void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
2382 bool preloaded, Trace* trace,
2383 bool first_element_checked, int* checked_up_to) {
2384 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2385 Isolate* isolate = assembler->isolate();
2386 bool one_byte = compiler->one_byte();
2387 Label* backtrack = trace->backtrack();
2388 QuickCheckDetails* quick_check = trace->quick_check_performed();
2389 int element_count = elements()->length();
2390 int backward_offset = read_backward() ? -Length() : 0;
2391 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2392 TextElement elm = elements()->at(i);
2393 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
2394 if (elm.text_type() == TextElement::ATOM) {
2395 base::Vector<const base::uc16> quarks = elm.atom()->data();
2396 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2397 if (first_element_checked && i == 0 && j == 0) continue;
2398 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
2399 base::uc16 quark = quarks[j];
2400 bool needs_bounds_check =
2401 *checked_up_to < cp_offset + j || read_backward();
2402 bool bounds_checked = false;
2403 switch (pass) {
2404 case NON_LATIN1_MATCH: {
2405 DCHECK(one_byte)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(one_byte)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(one_byte))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("one_byte", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2405); AnnotateMozCrashReason("MOZ_ASSERT" "(" "one_byte" ")"
); do { *((volatile int*)__null) = 2405; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // This pass is only done in one-byte mode.
2406 if (IsIgnoreCase(compiler->flags())) {
2407 // We are compiling for a one-byte subject, case independent mode.
2408 // We have to check whether any of the case alternatives are in
2409 // the one-byte range.
2410 unibrow::uchar chars[4];
2411 // Only returns characters that are in the one-byte range.
2412 int length =
2413 GetCaseIndependentLetters(isolate, quark, compiler, chars, 4);
2414 if (length == 0) {
2415 assembler->GoTo(backtrack);
2416 return;
2417 }
2418 } else {
2419 // Case-dependent mode.
2420 if (quark > String::kMaxOneByteCharCode) {
2421 assembler->GoTo(backtrack);
2422 return;
2423 }
2424 }
2425 break;
2426 }
2427 case NON_LETTER_CHARACTER_MATCH:
2428 bounds_checked =
2429 EmitAtomNonLetter(isolate, compiler, quark, backtrack,
2430 cp_offset + j, needs_bounds_check, preloaded);
2431 break;
2432 case SIMPLE_CHARACTER_MATCH:
2433 bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
2434 backtrack, cp_offset + j,
2435 needs_bounds_check, preloaded);
2436 break;
2437 case CASE_CHARACTER_MATCH:
2438 bounds_checked =
2439 EmitAtomLetter(isolate, compiler, quark, backtrack,
2440 cp_offset + j, needs_bounds_check, preloaded);
2441 break;
2442 default:
2443 break;
2444 }
2445 if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2446 }
2447 } else {
2448 DCHECK_EQ(TextElement::CLASS_RANGES, elm.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (elm.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (elm.text_type())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (elm.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2448); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (elm.text_type())"
")"); do { *((volatile int*)__null) = 2448; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2449 if (pass == CHARACTER_CLASS_MATCH) {
2450 if (first_element_checked && i == 0) continue;
2451 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2452 RegExpClassRanges* cr = elm.class_ranges();
2453 bool bounds_check = *checked_up_to < cp_offset || read_backward();
2454 EmitClassRanges(assembler, cr, one_byte, backtrack, cp_offset,
2455 bounds_check, preloaded, zone());
2456 UpdateBoundsCheck(cp_offset, checked_up_to);
2457 }
2458 }
2459 }
2460}
2461
2462int TextNode::Length() {
2463 TextElement elm = elements()->last();
2464 DCHECK_LE(0, elm.cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) <= (elm.cp_offset()))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) <= (elm.cp_offset()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(0) <= (elm.cp_offset())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2464); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) <= (elm.cp_offset())"
")"); do { *((volatile int*)__null) = 2464; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2465 return elm.cp_offset() + elm.length();
2466}
2467
2468TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
2469 ZoneList<CharacterRange>* ranges,
2470 bool read_backward,
2471 RegExpNode* on_success) {
2472 DCHECK_NOT_NULL(ranges)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((ranges) != nullptr))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(ranges) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2472); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges) != nullptr"
")"); do { *((volatile int*)__null) = 2472; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2473 // TODO(jgruber): There's no fundamental need to create this
2474 // RegExpClassRanges; we could refactor to avoid the allocation.
2475 return zone->New<TextNode>(zone->New<RegExpClassRanges>(zone, ranges),
2476 read_backward, on_success);
2477}
2478
2479TextNode* TextNode::CreateForSurrogatePair(
2480 Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
2481 bool read_backward, RegExpNode* on_success) {
2482 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2483 if (lead.from() == lead.to()) {
2484 ZoneList<base::uc16> lead_surrogate(1, zone);
2485 lead_surrogate.Add(lead.from(), zone);
2486 RegExpAtom* atom = zone->New<RegExpAtom>(lead_surrogate.ToConstVector());
2487 elms->Add(TextElement::Atom(atom), zone);
2488 } else {
2489 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
2490 elms->Add(TextElement::ClassRanges(
2491 zone->New<RegExpClassRanges>(zone, lead_ranges)),
2492 zone);
2493 }
2494 elms->Add(TextElement::ClassRanges(
2495 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2496 zone);
2497 return zone->New<TextNode>(elms, read_backward, on_success);
2498}
2499
2500TextNode* TextNode::CreateForSurrogatePair(
2501 Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail,
2502 bool read_backward, RegExpNode* on_success) {
2503 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
2504 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2505 elms->Add(
2506 TextElement::ClassRanges(zone->New<RegExpClassRanges>(zone, lead_ranges)),
2507 zone);
2508 elms->Add(TextElement::ClassRanges(
2509 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2510 zone);
2511 return zone->New<TextNode>(elms, read_backward, on_success);
2512}
2513
2514// This generates the code to match a text node. A text node can contain
2515// straight character sequences (possibly to be matched in a case-independent
2516// way) and character classes. For efficiency we do not do this in a single
2517// pass from left to right. Instead we pass over the text node several times,
2518// emitting code for some character positions every time. See the comment on
2519// TextEmitPass for details.
2520void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2521 LimitResult limit_result = LimitVersions(compiler, trace);
2522 if (limit_result == DONE) return;
2523 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2523); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 2523; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2524
2525 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2526 compiler->SetRegExpTooBig();
2527 return;
2528 }
2529
2530 if (compiler->one_byte()) {
2531 int dummy = 0;
2532 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
2533 }
2534
2535 bool first_elt_done = false;
2536 int bound_checked_to = trace->cp_offset() - 1;
2537 bound_checked_to += trace->bound_checked_up_to();
2538
2539 // If a character is preloaded into the current character register then
2540 // check that first to save reloading it.
2541 for (int twice = 0; twice < 2; twice++) {
2542 bool is_preloaded_pass = twice == 0;
2543 if (is_preloaded_pass && trace->characters_preloaded() != 1) continue;
2544 if (IsIgnoreCase(compiler->flags())) {
2545 TextEmitPass(compiler, NON_LETTER_CHARACTER_MATCH, is_preloaded_pass,
2546 trace, first_elt_done, &bound_checked_to);
2547 TextEmitPass(compiler, CASE_CHARACTER_MATCH, is_preloaded_pass, trace,
2548 first_elt_done, &bound_checked_to);
2549 } else {
2550 TextEmitPass(compiler, SIMPLE_CHARACTER_MATCH, is_preloaded_pass, trace,
2551 first_elt_done, &bound_checked_to);
2552 }
2553 TextEmitPass(compiler, CHARACTER_CLASS_MATCH, is_preloaded_pass, trace,
2554 first_elt_done, &bound_checked_to);
2555 first_elt_done = true;
2556 }
2557
2558 Trace successor_trace(*trace);
2559 // If we advance backward, we may end up at the start.
2560 successor_trace.AdvanceCurrentPositionInTrace(
2561 read_backward() ? -Length() : Length(), compiler);
2562 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
2563 : Trace::FALSE_VALUE);
2564 RecursionCheck rc(compiler);
2565 on_success()->Emit(compiler, &successor_trace);
2566}
2567
2568void Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; }
2569
2570void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
2571 // We don't have an instruction for shifting the current character register
2572 // down or for using a shifted value for anything so lets just forget that
2573 // we preloaded any characters into it.
2574 characters_preloaded_ = 0;
2575 // Adjust the offsets of the quick check performed information. This
2576 // information is used to find out what we already determined about the
2577 // characters by means of mask and compare.
2578 quick_check_performed_.Advance(by, compiler->one_byte());
2579 cp_offset_ += by;
2580 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2581 compiler->SetRegExpTooBig();
2582 cp_offset_ = 0;
2583 }
2584 bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
2585}
2586
2587void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
2588 RegExpFlags flags) {
2589 if (!IsIgnoreCase(flags)) return;
2590#ifdef V8_INTL_SUPPORT1
2591 // This is done in an earlier step when generating the nodes from the AST
2592 // because we may have to split up into separate nodes.
2593 if (NeedsUnicodeCaseEquivalents(flags)) return;
2594#endif
2595
2596 int element_count = elements()->length();
2597 for (int i = 0; i < element_count; i++) {
2598 TextElement elm = elements()->at(i);
2599 if (elm.text_type() == TextElement::CLASS_RANGES) {
2600 RegExpClassRanges* cr = elm.class_ranges();
2601 // None of the standard character classes is different in the case
2602 // independent case and it slows us down if we don't know that.
2603 if (cr->is_standard(zone())) continue;
2604 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
2605 CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
2606 }
2607 }
2608}
2609
2610int TextNode::GreedyLoopTextLength() { return Length(); }
2611
2612RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2613 RegExpCompiler* compiler) {
2614 if (read_backward()) return nullptr;
2615 if (elements()->length() != 1) return nullptr;
2616 TextElement elm = elements()->at(0);
2617 if (elm.text_type() != TextElement::CLASS_RANGES) return nullptr;
2618 RegExpClassRanges* node = elm.class_ranges();
2619 ZoneList<CharacterRange>* ranges = node->ranges(zone());
2620 CharacterRange::Canonicalize(ranges);
2621 if (node->is_negated()) {
2622 return ranges->length() == 0 ? on_success() : nullptr;
2623 }
2624 if (ranges->length() != 1) return nullptr;
2625 const base::uc32 max_char = MaxCodeUnit(compiler->one_byte());
2626 return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
2627}
2628
2629// Finds the fixed match length of a sequence of nodes that goes from
2630// this alternative and back to this choice node. If there are variable
2631// length nodes or other complications in the way then return a sentinel
2632// value indicating that a greedy loop cannot be constructed.
2633int ChoiceNode::GreedyLoopTextLengthForAlternative(
2634 GuardedAlternative* alternative) {
2635 int length = 0;
2636 RegExpNode* node = alternative->node();
2637 // Later we will generate code for all these text nodes using recursion
2638 // so we have to limit the max number.
2639 int recursion_depth = 0;
2640 while (node != this) {
2641 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2642 return kNodeIsTooComplexForGreedyLoops;
2643 }
2644 int node_length = node->GreedyLoopTextLength();
2645 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2646 return kNodeIsTooComplexForGreedyLoops;
2647 }
2648 length += node_length;
2649 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2650 node = seq_node->on_success();
2651 }
2652 if (read_backward()) {
2653 length = -length;
2654 }
2655 // Check that we can jump by the whole text length. If not, return sentinel
2656 // to indicate the we can't construct a greedy loop.
2657 if (length < RegExpMacroAssembler::kMinCPOffset ||
2658 length > RegExpMacroAssembler::kMaxCPOffset) {
2659 return kNodeIsTooComplexForGreedyLoops;
2660 }
2661 return length;
2662}
2663
2664void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2665 DCHECK_NULL(loop_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((loop_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((loop_node_) == nullptr))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(loop_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2665); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(loop_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2665; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2666 AddAlternative(alt);
2667 loop_node_ = alt.node();
2668}
2669
2670void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2671 DCHECK_NULL(continue_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((continue_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((continue_node_) == nullptr)
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(continue_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2671); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(continue_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2671; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2672 AddAlternative(alt);
2673 continue_node_ = alt.node();
2674}
2675
2676void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2677 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2678 if (trace->stop_node() == this) {
1
Assuming the condition is false
2
Taking false branch
2679 // Back edge of greedy optimized loop node graph.
2680 int text_length =
2681 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2682 DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((kNodeIsTooComplexForGreedyLoops) != (text_length))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((kNodeIsTooComplexForGreedyLoops) != (text_length)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(kNodeIsTooComplexForGreedyLoops) != (text_length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2682); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(kNodeIsTooComplexForGreedyLoops) != (text_length)"
")"); do { *((volatile int*)__null) = 2682; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2683 // Update the counter-based backtracking info on the stack. This is an
2684 // optimization for greedy loops (see below).
2685 DCHECK(trace->cp_offset() == text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == text_length)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->cp_offset() == text_length
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->cp_offset() == text_length", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2685); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == text_length"
")"); do { *((volatile int*)__null) = 2685; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2686 macro_assembler->AdvanceCurrentPosition(text_length);
2687 macro_assembler->GoTo(trace->loop_label());
2688 return;
2689 }
2690 DCHECK_NULL(trace->stop_node())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((trace->stop_node()) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((trace->stop_node()) == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(trace->stop_node()) == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2690); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(trace->stop_node()) == nullptr"
")"); do { *((volatile int*)__null) = 2690; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3
Assuming the condition is true
4
Taking false branch
5
Loop condition is false. Exiting loop
2691 if (!trace->is_trivial()) {
6
Taking false branch
2692 trace->Flush(compiler, this);
2693 return;
2694 }
2695 ChoiceNode::Emit(compiler, trace);
7
Calling 'ChoiceNode::Emit'
2696}
2697
2698int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2699 int eats_at_least) {
2700 int preload_characters = std::min(4, eats_at_least);
2701 DCHECK_LE(preload_characters, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((preload_characters) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((preload_characters) <= (
4)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(preload_characters) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2701); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(preload_characters) <= (4)"
")"); do { *((volatile int*)__null) = 2701; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2702 if (compiler->macro_assembler()->CanReadUnaligned()) {
2703 bool one_byte = compiler->one_byte();
2704 if (one_byte) {
2705 // We can't preload 3 characters because there is no machine instruction
2706 // to do that. We can't just load 4 because we could be reading
2707 // beyond the end of the string, which could cause a memory fault.
2708 if (preload_characters == 3) preload_characters = 2;
2709 } else {
2710 if (preload_characters > 2) preload_characters = 2;
2711 }
2712 } else {
2713 if (preload_characters > 1) preload_characters = 1;
2714 }
2715 return preload_characters;
2716}
2717
2718// This class is used when generating the alternatives in a choice node. It
2719// records the way the alternative is being code generated.
2720class AlternativeGeneration : public Malloced {
2721 public:
2722 AlternativeGeneration()
2723 : possible_success(),
2724 expects_preload(false),
2725 after(),
2726 quick_check_details() {}
2727 Label possible_success;
2728 bool expects_preload;
2729 Label after;
2730 QuickCheckDetails quick_check_details;
2731};
2732
2733// Creates a list of AlternativeGenerations. If the list has a reasonable
2734// size then it is on the stack, otherwise the excess is on the heap.
2735class AlternativeGenerationList {
2736 public:
2737 AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) {
2738 for (int i = 0; i < count && i < kAFew; i++) {
2739 alt_gens_.Add(a_few_alt_gens_ + i, zone);
2740 }
2741 for (int i = kAFew; i < count; i++) {
2742 alt_gens_.Add(new AlternativeGeneration(), zone);
2743 }
2744 }
2745 ~AlternativeGenerationList() {
2746 for (int i = kAFew; i < alt_gens_.length(); i++) {
2747 delete alt_gens_[i];
2748 alt_gens_[i] = nullptr;
2749 }
2750 }
2751
2752 AlternativeGeneration* at(int i) { return alt_gens_[i]; }
2753
2754 private:
2755 static const int kAFew = 10;
2756 ZoneList<AlternativeGeneration*> alt_gens_;
2757 AlternativeGeneration a_few_alt_gens_[kAFew];
2758};
2759
2760void BoyerMoorePositionInfo::Set(int character) {
2761 SetInterval(Interval(character, character));
2762}
2763
2764namespace {
2765
2766ContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
2767 int ranges_length, Interval new_range) {
2768 DCHECK_EQ(1, ranges_length & 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((1) == (ranges_length & 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((1) == (ranges_length & 1
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(1) == (ranges_length & 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2768); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(1) == (ranges_length & 1)"
")"); do { *((volatile int*)__null) = 2768; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2769 DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1])do { static_assert( mozilla::detail::AssertionConditionType<
decltype((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1]))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1])))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2769); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
")"); do { *((volatile int*)__null) = 2769; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2770 if (containment == kLatticeUnknown) return containment;
2771 bool inside = false;
2772 int last = 0;
2773 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
2774 // Consider the range from last to ranges[i].
2775 // We haven't got to the new range yet.
2776 if (ranges[i] <= new_range.from()) continue;
2777 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
2778 // inclusive, but the values in ranges are not.
2779 if (last <= new_range.from() && new_range.to() < ranges[i]) {
2780 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
2781 }
2782 return kLatticeUnknown;
2783 }
2784 return containment;
2785}
2786
2787int BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) {
2788 static_assert(BoyerMoorePositionInfo::kMapSize ==
2789 2 * kInt64Size * kBitsPerByte);
2790
2791 // Slight fiddling is needed here, since the bitset is of length 128 while
2792 // CountTrailingZeros requires an integral type and std::bitset can only
2793 // convert to unsigned long long. So we handle the most- and least-significant
2794 // bits separately.
2795
2796 {
2797 static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0});
2798 BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask;
2799 static_assert(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong())));
2800 uint64_t lsb = masked_bitset.to_ullong();
2801 if (lsb != 0) return base::bits::CountTrailingZeros(lsb);
2802 }
2803
2804 {
2805 BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64;
2806 uint64_t msb = masked_bitset.to_ullong();
2807 if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb);
2808 }
2809
2810 return -1;
2811}
2812
2813} // namespace
2814
2815void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2816 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2817
2818 if (interval.size() >= kMapSize) {
2819 map_count_ = kMapSize;
2820 map_.set();
2821 return;
2822 }
2823
2824 for (int i = interval.from(); i <= interval.to(); i++) {
2825 int mod_character = (i & kMask);
2826 if (!map_[mod_character]) {
2827 map_count_++;
2828 map_.set(mod_character);
2829 }
2830 if (map_count_ == kMapSize) return;
2831 }
2832}
2833
2834void BoyerMoorePositionInfo::SetAll() {
2835 w_ = kLatticeUnknown;
2836 if (map_count_ != kMapSize) {
2837 map_count_ = kMapSize;
2838 map_.set();
2839 }
2840}
2841
2842BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
2843 Zone* zone)
2844 : length_(length),
2845 compiler_(compiler),
2846 max_char_(MaxCodeUnit(compiler->one_byte())) {
2847 bitmaps_ = zone->New<ZoneList<BoyerMoorePositionInfo*>>(length, zone);
2848 for (int i = 0; i < length; i++) {
2849 bitmaps_->Add(zone->New<BoyerMoorePositionInfo>(), zone);
2850 }
2851}
2852
2853// Find the longest range of lookahead that has the fewest number of different
2854// characters that can occur at a given position. Since we are optimizing two
2855// different parameters at once this is a tradeoff.
2856bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
2857 int biggest_points = 0;
2858 // If more than 32 characters out of 128 can occur it is unlikely that we can
2859 // be lucky enough to step forwards much of the time.
2860 const int kMaxMax = 32;
2861 for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
2862 max_number_of_chars *= 2) {
2863 biggest_points =
2864 FindBestInterval(max_number_of_chars, biggest_points, from, to);
2865 }
2866 if (biggest_points == 0) return false;
2867 return true;
2868}
2869
2870// Find the highest-points range between 0 and length_ where the character
2871// information is not too vague. 'Too vague' means that there are more than
2872// max_number_of_chars that can occur at this position. Calculates the number
2873// of points as the product of width-of-the-range and
2874// probability-of-finding-one-of-the-characters, where the probability is
2875// calculated using the frequency distribution of the sample subject string.
2876int BoyerMooreLookahead::FindBestInterval(int max_number_of_chars,
2877 int old_biggest_points, int* from,
2878 int* to) {
2879 int biggest_points = old_biggest_points;
2880 static const int kSize = RegExpMacroAssembler::kTableSize;
2881 for (int i = 0; i < length_;) {
2882 while (i < length_ && Count(i) > max_number_of_chars) i++;
2883 if (i == length_) break;
2884 int remembered_from = i;
2885
2886 BoyerMoorePositionInfo::Bitset union_bitset;
2887 for (; i < length_ && Count(i) <= max_number_of_chars; i++) {
2888 union_bitset |= bitmaps_->at(i)->raw_bitset();
2889 }
2890
2891 int frequency = 0;
2892
2893 // Iterate only over set bits.
2894 int j;
2895 while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
2896 DCHECK(union_bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(union_bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(union_bitset[j]))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("union_bitset[j]"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2896); AnnotateMozCrashReason("MOZ_ASSERT" "(" "union_bitset[j]"
")"); do { *((volatile int*)__null) = 2896; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2897 // Add 1 to the frequency to give a small per-character boost for
2898 // the cases where our sampling is not good enough and many
2899 // characters have a frequency of zero. This means the frequency
2900 // can theoretically be up to 2*kSize though we treat it mostly as
2901 // a fraction of kSize.
2902 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
2903 union_bitset.reset(j);
2904 }
2905
2906 // We use the probability of skipping times the distance we are skipping to
2907 // judge the effectiveness of this. Actually we have a cut-off: By
2908 // dividing by 2 we switch off the skipping if the probability of skipping
2909 // is less than 50%. This is because the multibyte mask-and-compare
2910 // skipping in quickcheck is more likely to do well on this case.
2911 bool in_quickcheck_range =
2912 ((i - remembered_from < 4) ||
2913 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
2914 // Called 'probability' but it is only a rough estimate and can actually
2915 // be outside the 0-kSize range.
2916 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
2917 int points = (i - remembered_from) * probability;
2918 if (points > biggest_points) {
2919 *from = remembered_from;
2920 *to = i - 1;
2921 biggest_points = points;
2922 }
2923 }
2924 return biggest_points;
2925}
2926
2927// Take all the characters that will not prevent a successful match if they
2928// occur in the subject string in the range between min_lookahead and
2929// max_lookahead (inclusive) measured from the current position. If the
2930// character at max_lookahead offset is not one of these characters, then we
2931// can safely skip forwards by the number of characters in the range.
2932int BoyerMooreLookahead::GetSkipTable(
2933 int min_lookahead, int max_lookahead,
2934 DirectHandle<ByteArray> boolean_skip_table) {
2935 const int kSkipArrayEntry = 0;
2936 const int kDontSkipArrayEntry = 1;
2937
2938 std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
2939 boolean_skip_table->length());
2940
2941 for (int i = max_lookahead; i >= min_lookahead; i--) {
2942 BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset();
2943
2944 // Iterate only over set bits.
2945 int j;
2946 while ((j = BitsetFirstSetBit(bitset)) != -1) {
2947 DCHECK(bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(bitset[j]))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("bitset[j]", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2947); AnnotateMozCrashReason("MOZ_ASSERT" "(" "bitset[j]" ")"
); do { *((volatile int*)__null) = 2947; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2948 boolean_skip_table->set(j, kDontSkipArrayEntry);
2949 bitset.reset(j);
2950 }
2951 }
2952
2953 const int skip = max_lookahead + 1 - min_lookahead;
2954 return skip;
2955}
2956
2957// See comment above on the implementation of GetSkipTable.
2958void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
2959 const int kSize = RegExpMacroAssembler::kTableSize;
2960
2961 int min_lookahead = 0;
2962 int max_lookahead = 0;
2963
2964 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
2965
2966 // Check if we only have a single non-empty position info, and that info
2967 // contains precisely one character.
2968 bool found_single_character = false;
2969 int single_character = 0;
2970 for (int i = max_lookahead; i >= min_lookahead; i--) {
2971 BoyerMoorePositionInfo* map = bitmaps_->at(i);
2972 if (map->map_count() == 0) continue;
2973
2974 if (found_single_character || map->map_count() > 1) {
2975 found_single_character = false;
2976 break;
2977 }
2978
2979 DCHECK(!found_single_character)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!found_single_character)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!found_single_character))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!found_single_character"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2979); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!found_single_character"
")"); do { *((volatile int*)__null) = 2979; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2980 DCHECK_EQ(map->map_count(), 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((map->map_count()) == (1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((map->map_count()) == (1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(map->map_count()) == (1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2980); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(map->map_count()) == (1)"
")"); do { *((volatile int*)__null) = 2980; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2981
2982 found_single_character = true;
2983 single_character = BitsetFirstSetBit(map->raw_bitset());
2984
2985 DCHECK_NE(single_character, -1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((single_character) != (-1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((single_character) != (-1)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(single_character) != (-1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2985); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(single_character) != (-1)"
")"); do { *((volatile int*)__null) = 2985; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2986 }
2987
2988 int lookahead_width = max_lookahead + 1 - min_lookahead;
2989
2990 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
2991 // The mask-compare can probably handle this better.
2992 return;
2993 }
2994
2995 if (found_single_character) {
2996 Label cont, again;
2997 masm->Bind(&again);
2998 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2999 if (max_char_ > kSize) {
3000 masm->CheckCharacterAfterAnd(single_character,
3001 RegExpMacroAssembler::kTableMask, &cont);
3002 } else {
3003 masm->CheckCharacter(single_character, &cont);
3004 }
3005 masm->AdvanceCurrentPosition(lookahead_width);
3006 masm->GoTo(&again);
3007 masm->Bind(&cont);
3008 return;
3009 }
3010
3011 Factory* factory = masm->isolate()->factory();
3012 Handle<ByteArray> boolean_skip_table =
3013 factory->NewByteArray(kSize, AllocationType::kOld);
3014 int skip_distance =
3015 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
3016 DCHECK_NE(0, skip_distance)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) != (skip_distance))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) != (skip_distance)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) != (skip_distance)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3016); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) != (skip_distance)"
")"); do { *((volatile int*)__null) = 3016; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3017
3018 Label cont, again;
3019 masm->Bind(&again);
3020 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3021 masm->CheckBitInTable(boolean_skip_table, &cont);
3022 masm->AdvanceCurrentPosition(skip_distance);
3023 masm->GoTo(&again);
3024 masm->Bind(&cont);
3025}
3026
3027/* Code generation for choice nodes.
3028 *
3029 * We generate quick checks that do a mask and compare to eliminate a
3030 * choice. If the quick check succeeds then it jumps to the continuation to
3031 * do slow checks and check subsequent nodes. If it fails (the common case)
3032 * it falls through to the next choice.
3033 *
3034 * Here is the desired flow graph. Nodes directly below each other imply
3035 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
3036 * 3 doesn't have a quick check so we have to call the slow check.
3037 * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
3038 * regexp continuation is generated directly after the Sn node, up to the
3039 * next GoTo if we decide to reuse some already generated code. Some
3040 * nodes expect preload_characters to be preloaded into the current
3041 * character register. R nodes do this preloading. Vertices are marked
3042 * F for failures and S for success (possible success in the case of quick
3043 * nodes). L, V, < and > are used as arrow heads.
3044 *
3045 * ----------> R
3046 * |
3047 * V
3048 * Q1 -----> S1
3049 * | S /
3050 * F| /
3051 * | F/
3052 * | /
3053 * | R
3054 * | /
3055 * V L
3056 * Q2 -----> S2
3057 * | S /
3058 * F| /
3059 * | F/
3060 * | /
3061 * | R
3062 * | /
3063 * V L
3064 * S3
3065 * |
3066 * F|
3067 * |
3068 * R
3069 * |
3070 * backtrack V
3071 * <----------Q4
3072 * \ F |
3073 * \ |S
3074 * \ F V
3075 * \-----S4
3076 *
3077 * For greedy loops we push the current position, then generate the code that
3078 * eats the input specially in EmitGreedyLoop. The other choice (the
3079 * continuation) is generated by the normal code in EmitChoices, and steps back
3080 * in the input to the starting position when it fails to match. The loop code
3081 * looks like this (U is the unwind code that steps back in the greedy loop).
3082 *
3083 * _____
3084 * / \
3085 * V |
3086 * ----------> S1 |
3087 * /| |
3088 * / |S |
3089 * F/ \_____/
3090 * /
3091 * |<-----
3092 * | \
3093 * V |S
3094 * Q2 ---> U----->backtrack
3095 * | F /
3096 * S| /
3097 * V F /
3098 * S2--/
3099 */
3100
3101GreedyLoopState::GreedyLoopState(bool not_at_start) {
3102 counter_backtrack_trace_.set_backtrack(&label_);
3103 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3104}
3105
3106void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3107#ifdef DEBUG1
3108 int choice_count = alternatives_->length();
3109 for (int i = 0; i < choice_count - 1; i++) {
3110 GuardedAlternative alternative = alternatives_->at(i);
3111 ZoneList<Guard*>* guards = alternative.guards();
3112 int guard_count = (guards == nullptr) ? 0 : guards->length();
3113 for (int j = 0; j < guard_count; j++) {
3114 DCHECK(!trace->mentions_reg(guards->at(j)->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guards->at(j)->reg()))
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!trace->mentions_reg(guards->at(j)->reg()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guards->at(j)->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3114); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guards->at(j)->reg())"
")"); do { *((volatile int*)__null) = 3114; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3115 }
3116 }
3117#endif
3118}
3119
3120void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
3121 PreloadState* state) {
3122 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3123 // Save some time by looking at most one machine word ahead.
3124 state->eats_at_least_ =
3125 EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
3126 }
3127 state->preload_characters_ =
3128 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3129
3130 state->preload_is_current_ =
3131 (current_trace->characters_preloaded() == state->preload_characters_);
3132 state->preload_has_checked_bounds_ = state->preload_is_current_;
3133}
3134
3135void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3136 int choice_count = alternatives_->length();
3137
3138 if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
8
Assuming 'choice_count' is not equal to 1
3139 alternatives_->at(0).node()->Emit(compiler, trace);
3140 return;
3141 }
3142
3143 AssertGuardsMentionRegisters(trace);
3144
3145 LimitResult limit_result = LimitVersions(compiler, trace);
3146 if (limit_result == DONE) return;
9
Assuming 'limit_result' is not equal to DONE
10
Taking false branch
3147 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3147); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3147; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
11
Assuming 'limit_result' is equal to CONTINUE
12
Taking false branch
3148
3149 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
3150 // other choice nodes we only flush if we are out of code size budget.
3151 if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
13
Loop condition is false. Exiting loop
14
Assuming the condition is false
3152 trace->Flush(compiler, this);
3153 return;
3154 }
3155
3156 RecursionCheck rc(compiler);
3157
3158 PreloadState preload;
3159 preload.init();
3160 GreedyLoopState greedy_loop_state(not_at_start());
3161
3162 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
3163 AlternativeGenerationList alt_gens(choice_count, zone());
3164
3165 if (choice_count
14.1
'choice_count' is <= 1
> 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3166 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
3167 &greedy_loop_state, text_length);
3168 } else {
3169 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3170
3171 EmitChoices(compiler, &alt_gens, 0, trace, &preload);
15
Calling 'ChoiceNode::EmitChoices'
3172 }
3173
3174 // At this point we need to generate slow checks for the alternatives where
3175 // the quick check was inlined. We can recognize these because the associated
3176 // label was bound.
3177 int new_flush_budget = trace->flush_budget() / choice_count;
3178 for (int i = 0; i < choice_count; i++) {
3179 AlternativeGeneration* alt_gen = alt_gens.at(i);
3180 Trace new_trace(*trace);
3181 // If there are actions to be flushed we have to limit how many times
3182 // they are flushed. Take the budget of the parent trace and distribute
3183 // it fairly amongst the children.
3184 if (new_trace.actions() != nullptr) {
3185 new_trace.set_flush_budget(new_flush_budget);
3186 }
3187 bool next_expects_preload =
3188 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
3189 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i),
3190 alt_gen, preload.preload_characters_,
3191 next_expects_preload);
3192 }
3193}
3194
3195Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
3196 AlternativeGenerationList* alt_gens,
3197 PreloadState* preload,
3198 GreedyLoopState* greedy_loop_state,
3199 int text_length) {
3200 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3201 // Here we have special handling for greedy loops containing only text nodes
3202 // and other simple nodes. These are handled by pushing the current
3203 // position on the stack and then incrementing the current position each
3204 // time around the switch. On backtrack we decrement the current position
3205 // and check it against the pushed value. This avoids pushing backtrack
3206 // information for each iteration of the loop, which could take up a lot of
3207 // space.
3208 DCHECK(trace->stop_node() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->stop_node() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->stop_node() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->stop_node() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3208); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->stop_node() == nullptr"
")"); do { *((volatile int*)__null) = 3208; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3209 macro_assembler->PushCurrentPosition();
3210 Label greedy_match_failed;
3211 Trace greedy_match_trace;
3212 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
3213 greedy_match_trace.set_backtrack(&greedy_match_failed);
3214 Label loop_label;
3215 macro_assembler->Bind(&loop_label);
3216 greedy_match_trace.set_stop_node(this);
3217 greedy_match_trace.set_loop_label(&loop_label);
3218 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3219 macro_assembler->Bind(&greedy_match_failed);
3220
3221 Label second_choice; // For use in greedy matches.
3222 macro_assembler->Bind(&second_choice);
3223
3224 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3225
3226 EmitChoices(compiler, alt_gens, 1, new_trace, preload);
3227
3228 macro_assembler->Bind(greedy_loop_state->label());
3229 // If we have unwound to the bottom then backtrack.
3230 macro_assembler->CheckGreedyLoop(trace->backtrack());
3231 // Otherwise try the second priority at an earlier position.
3232 macro_assembler->AdvanceCurrentPosition(-text_length);
3233 macro_assembler->GoTo(&second_choice);
3234 return new_trace;
3235}
3236
3237int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3238 Trace* trace) {
3239 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3240 if (alternatives_->length() != 2) return eats_at_least;
3241
3242 GuardedAlternative alt1 = alternatives_->at(1);
3243 if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
3244 return eats_at_least;
3245 }
3246 RegExpNode* eats_anything_node = alt1.node();
3247 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
3248 return eats_at_least;
3249 }
3250
3251 // Really we should be creating a new trace when we execute this function,
3252 // but there is no need, because the code it generates cannot backtrack, and
3253 // we always arrive here with a trivial trace (since it's the entry to a
3254 // loop. That also implies that there are no preloaded characters, which is
3255 // good, because it means we won't be violating any assumptions by
3256 // overwriting those characters with new load instructions.
3257 DCHECK(trace->is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->is_trivial()))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("trace->is_trivial()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3257); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->is_trivial()"
")"); do { *((volatile int*)__null) = 3257; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3258
3259 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3260 Isolate* isolate = macro_assembler->isolate();
3261 // At this point we know that we are at a non-greedy loop that will eat
3262 // any character one at a time. Any non-anchored regexp has such a
3263 // loop prepended to it in order to find where it starts. We look for
3264 // a pattern of the form ...abc... where we can look 6 characters ahead
3265 // and step forwards 3 if the character is not one of abc. Abc need
3266 // not be atoms, they can be any reasonably limited character class or
3267 // small alternation.
3268 BoyerMooreLookahead* bm = bm_info(false);
3269 if (bm == nullptr) {
3270 eats_at_least = std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
3271 if (eats_at_least >= 1) {
3272 bm = zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
3273 GuardedAlternative alt0 = alternatives_->at(0);
3274 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
3275 }
3276 }
3277 if (bm != nullptr) {
3278 bm->EmitSkipInstructions(macro_assembler);
3279 }
3280 return eats_at_least;
3281}
3282
3283void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3284 AlternativeGenerationList* alt_gens,
3285 int first_choice, Trace* trace,
3286 PreloadState* preload) {
3287 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3288 SetUpPreLoad(compiler, trace, preload);
3289
3290 // For now we just call all choices one after the other. The idea ultimately
3291 // is to use the Dispatch table to try only the relevant ones.
3292 int choice_count = alternatives_->length();
3293
3294 int new_flush_budget = trace->flush_budget() / choice_count;
3295
3296 for (int i = first_choice; i < choice_count; i++) {
16
Assuming 'i' is < 'choice_count'
17
Loop condition is true. Entering loop body
3297 bool is_last = i == choice_count - 1;
18
Assuming the condition is false
3298 bool fall_through_on_failure = !is_last;
3299 GuardedAlternative alternative = alternatives_->at(i);
3300 AlternativeGeneration* alt_gen = alt_gens->at(i);
3301 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3302 ZoneList<Guard*>* guards = alternative.guards();
3303 int guard_count = (guards == nullptr) ? 0 : guards->length();
19
Assuming the condition is false
20
'?' condition is false
3304 Trace new_trace(*trace);
3305 new_trace.set_characters_preloaded(
3306 preload->preload_is_current_
20.1
Field 'preload_is_current_' is false
? preload->preload_characters_ : 0);
21
'?' condition is false
3307 if (preload->preload_has_checked_bounds_
21.1
Field 'preload_has_checked_bounds_' is false
) {
22
Taking false branch
3308 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3309 }
3310 new_trace.quick_check_performed()->Clear();
3311 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
23
Assuming field 'not_at_start_' is false
24
Taking false branch
3312 if (!is_last
24.1
'is_last' is false
) {
25
Taking true branch
3313 new_trace.set_backtrack(&alt_gen->after);
3314 }
3315 alt_gen->expects_preload = preload->preload_is_current_;
3316 bool generate_full_check_inline = false;
3317 if (compiler->optimize() &&
26
Assuming the condition is true
3318 try_to_emit_quick_check_for_alternative(i == 0) &&
27
Assuming the condition is true
3319 alternative.node()->EmitQuickCheck(
28
Calling 'RegExpNode::EmitQuickCheck'
3320 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
3321 &alt_gen->possible_success, &alt_gen->quick_check_details,
3322 fall_through_on_failure, this)) {
3323 // Quick check was generated for this choice.
3324 preload->preload_is_current_ = true;
3325 preload->preload_has_checked_bounds_ = true;
3326 // If we generated the quick check to fall through on possible success,
3327 // we now need to generate the full check inline.
3328 if (!fall_through_on_failure) {
3329 macro_assembler->Bind(&alt_gen->possible_success);
3330 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3331 new_trace.set_characters_preloaded(preload->preload_characters_);
3332 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3333 generate_full_check_inline = true;
3334 }
3335 } else if (alt_gen->quick_check_details.cannot_match()) {
3336 if (!fall_through_on_failure) {
3337 macro_assembler->GoTo(trace->backtrack());
3338 }
3339 continue;
3340 } else {
3341 // No quick check was generated. Put the full code here.
3342 // If this is not the first choice then there could be slow checks from
3343 // previous cases that go here when they fail. There's no reason to
3344 // insist that they preload characters since the slow check we are about
3345 // to generate probably can't use it.
3346 if (i != first_choice) {
3347 alt_gen->expects_preload = false;
3348 new_trace.InvalidateCurrentCharacter();
3349 }
3350 generate_full_check_inline = true;
3351 }
3352 if (generate_full_check_inline) {
3353 if (new_trace.actions() != nullptr) {
3354 new_trace.set_flush_budget(new_flush_budget);
3355 }
3356 for (int j = 0; j < guard_count; j++) {
3357 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
3358 }
3359 alternative.node()->Emit(compiler, &new_trace);
3360 preload->preload_is_current_ = false;
3361 }
3362 macro_assembler->Bind(&alt_gen->after);
3363 }
3364}
3365
3366void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3367 Trace* trace,
3368 GuardedAlternative alternative,
3369 AlternativeGeneration* alt_gen,
3370 int preload_characters,
3371 bool next_expects_preload) {
3372 if (!alt_gen->possible_success.is_linked()) return;
3373
3374 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3375 macro_assembler->Bind(&alt_gen->possible_success);
3376 Trace out_of_line_trace(*trace);
3377 out_of_line_trace.set_characters_preloaded(preload_characters);
3378 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3379 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
3380 ZoneList<Guard*>* guards = alternative.guards();
3381 int guard_count = (guards == nullptr) ? 0 : guards->length();
3382 if (next_expects_preload) {
3383 Label reload_current_char;
3384 out_of_line_trace.set_backtrack(&reload_current_char);
3385 for (int j = 0; j < guard_count; j++) {
3386 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3387 }
3388 alternative.node()->Emit(compiler, &out_of_line_trace);
3389 macro_assembler->Bind(&reload_current_char);
3390 // Reload the current character, since the next quick check expects that.
3391 // We don't need to check bounds here because we only get into this
3392 // code through a quick check which already did the checked load.
3393 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
3394 preload_characters);
3395 macro_assembler->GoTo(&(alt_gen->after));
3396 } else {
3397 out_of_line_trace.set_backtrack(&(alt_gen->after));
3398 for (int j = 0; j < guard_count; j++) {
3399 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3400 }
3401 alternative.node()->Emit(compiler, &out_of_line_trace);
3402 }
3403}
3404
3405void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3406 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3407 LimitResult limit_result = LimitVersions(compiler, trace);
3408 if (limit_result == DONE) return;
3409 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3409); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3409; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3410
3411 RecursionCheck rc(compiler);
3412
3413 switch (action_type_) {
3414 case STORE_POSITION: {
3415 Trace::DeferredCapture new_capture(data_.u_position_register.reg,
3416 data_.u_position_register.is_capture,
3417 trace);
3418 Trace new_trace = *trace;
3419 new_trace.add_action(&new_capture);
3420 on_success()->Emit(compiler, &new_trace);
3421 break;
3422 }
3423 case INCREMENT_REGISTER: {
3424 Trace::DeferredIncrementRegister new_increment(
3425 data_.u_increment_register.reg);
3426 Trace new_trace = *trace;
3427 new_trace.add_action(&new_increment);
3428 on_success()->Emit(compiler, &new_trace);
3429 break;
3430 }
3431 case SET_REGISTER_FOR_LOOP: {
3432 Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
3433 data_.u_store_register.value);
3434 Trace new_trace = *trace;
3435 new_trace.add_action(&new_set);
3436 on_success()->Emit(compiler, &new_trace);
3437 break;
3438 }
3439 case CLEAR_CAPTURES: {
3440 Trace::DeferredClearCaptures new_capture(Interval(
3441 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
3442 Trace new_trace = *trace;
3443 new_trace.add_action(&new_capture);
3444 on_success()->Emit(compiler, &new_trace);
3445 break;
3446 }
3447 case BEGIN_POSITIVE_SUBMATCH:
3448 case BEGIN_NEGATIVE_SUBMATCH:
3449 if (!trace->is_trivial()) {
3450 trace->Flush(compiler, this);
3451 } else {
3452 assembler->WriteCurrentPositionToRegister(
3453 data_.u_submatch.current_position_register, 0);
3454 assembler->WriteStackPointerToRegister(
3455 data_.u_submatch.stack_pointer_register);
3456 on_success()->Emit(compiler, trace);
3457 }
3458 break;
3459 case EMPTY_MATCH_CHECK: {
3460 int start_pos_reg = data_.u_empty_match_check.start_register;
3461 int stored_pos = 0;
3462 int rep_reg = data_.u_empty_match_check.repetition_register;
3463 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3464 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3465 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3466 // If we know we haven't advanced and there is no minimum we
3467 // can just backtrack immediately.
3468 assembler->GoTo(trace->backtrack());
3469 } else if (know_dist && stored_pos < trace->cp_offset()) {
3470 // If we know we've advanced we can generate the continuation
3471 // immediately.
3472 on_success()->Emit(compiler, trace);
3473 } else if (!trace->is_trivial()) {
3474 trace->Flush(compiler, this);
3475 } else {
3476 Label skip_empty_check;
3477 // If we have a minimum number of repetitions we check the current
3478 // number first and skip the empty check if it's not enough.
3479 if (has_minimum) {
3480 int limit = data_.u_empty_match_check.repetition_limit;
3481 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3482 }
3483 // If the match is empty we bail out, otherwise we fall through
3484 // to the on-success continuation.
3485 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3486 trace->backtrack());
3487 assembler->Bind(&skip_empty_check);
3488 on_success()->Emit(compiler, trace);
3489 }
3490 break;
3491 }
3492 case POSITIVE_SUBMATCH_SUCCESS: {
3493 if (!trace->is_trivial()) {
3494 trace->Flush(compiler, this);
3495 return;
3496 }
3497 assembler->ReadCurrentPositionFromRegister(
3498 data_.u_submatch.current_position_register);
3499 assembler->ReadStackPointerFromRegister(
3500 data_.u_submatch.stack_pointer_register);
3501 int clear_register_count = data_.u_submatch.clear_register_count;
3502 if (clear_register_count == 0) {
3503 on_success()->Emit(compiler, trace);
3504 return;
3505 }
3506 int clear_registers_from = data_.u_submatch.clear_register_from;
3507 Label clear_registers_backtrack;
3508 Trace new_trace = *trace;
3509 new_trace.set_backtrack(&clear_registers_backtrack);
3510 on_success()->Emit(compiler, &new_trace);
3511
3512 assembler->Bind(&clear_registers_backtrack);
3513 int clear_registers_to = clear_registers_from + clear_register_count - 1;
3514 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3515
3516 DCHECK(trace->backtrack() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->backtrack() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->backtrack() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->backtrack() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3516); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->backtrack() == nullptr"
")"); do { *((volatile int*)__null) = 3516; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3517 assembler->Backtrack();
3518 return;
3519 }
3520 case MODIFY_FLAGS: {
3521 compiler->set_flags(flags());
3522 on_success()->Emit(compiler, trace);
3523 break;
3524 }
3525 default:
3526 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3526); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 3526; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3527 }
3528}
3529
3530void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3531 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3532 if (!trace->is_trivial()) {
3533 trace->Flush(compiler, this);
3534 return;
3535 }
3536
3537 LimitResult limit_result = LimitVersions(compiler, trace);
3538 if (limit_result == DONE) return;
3539 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3539); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3539; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3540
3541 RecursionCheck rc(compiler);
3542
3543 DCHECK_EQ(start_reg_ + 1, end_reg_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_reg_ + 1) == (end_reg_))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_reg_ + 1) == (end_reg_
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_reg_ + 1) == (end_reg_)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3543); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_reg_ + 1) == (end_reg_)"
")"); do { *((volatile int*)__null) = 3543; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3544 if (IsIgnoreCase(compiler->flags())) {
3545 bool unicode = IsEitherUnicode(compiler->flags());
3546 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
3547 unicode, trace->backtrack());
3548 } else {
3549 assembler->CheckNotBackReference(start_reg_, read_backward(),
3550 trace->backtrack());
3551 }
3552 // We are going to advance backward, so we may end up at the start.
3553 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
3554
3555 // Check that the back reference does not end inside a surrogate pair.
3556 if (IsEitherUnicode(compiler->flags()) && !compiler->one_byte()) {
3557 assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
3558 }
3559 on_success()->Emit(compiler, trace);
3560}
3561
3562void TextNode::CalculateOffsets() {
3563 int element_count = elements()->length();
3564 // Set up the offsets of the elements relative to the start. This is a fixed
3565 // quantity since a TextNode can only contain fixed-width things.
3566 int cp_offset = 0;
3567 for (int i = 0; i < element_count; i++) {
3568 TextElement& elm = elements()->at(i);
3569 elm.set_cp_offset(cp_offset);
3570 cp_offset += elm.length();
3571 }
3572}
3573
3574namespace {
3575
3576// Assertion propagation moves information about assertions such as
3577// \b to the affected nodes. For instance, in /.\b./ information must
3578// be propagated to the first '.' that whatever follows needs to know
3579// if it matched a word or a non-word, and to the second '.' that it
3580// has to check if it succeeds a word or non-word. In this case the
3581// result will be something like:
3582//
3583// +-------+ +------------+
3584// | . | | . |
3585// +-------+ ---> +------------+
3586// | word? | | check word |
3587// +-------+ +------------+
3588class AssertionPropagator : public AllStatic {
3589 public:
3590 static void VisitText(TextNode* that) {}
3591
3592 static void VisitAction(ActionNode* that) {
3593 // If the next node is interested in what it follows then this node
3594 // has to be interested too so it can pass the information on.
3595 that->info()->AddFromFollowing(that->on_success()->info());
3596 }
3597
3598 static void VisitChoice(ChoiceNode* that, int i) {
3599 // Anything the following nodes need to know has to be known by
3600 // this node also, so it can pass it on.
3601 that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
3602 }
3603
3604 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3605 that->info()->AddFromFollowing(that->continue_node()->info());
3606 }
3607
3608 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
3609 that->info()->AddFromFollowing(that->loop_node()->info());
3610 }
3611
3612 static void VisitNegativeLookaroundChoiceLookaroundNode(
3613 NegativeLookaroundChoiceNode* that) {
3614 VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
3615 }
3616
3617 static void VisitNegativeLookaroundChoiceContinueNode(
3618 NegativeLookaroundChoiceNode* that) {
3619 VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
3620 }
3621
3622 static void VisitBackReference(BackReferenceNode* that) {}
3623
3624 static void VisitAssertion(AssertionNode* that) {}
3625};
3626
3627// Propagates information about the minimum size of successful matches from
3628// successor nodes to their predecessors. Note that all eats_at_least values
3629// are initialized to zero before analysis.
3630class EatsAtLeastPropagator : public AllStatic {
3631 public:
3632 static void VisitText(TextNode* that) {
3633 // The eats_at_least value is not used if reading backward.
3634 if (!that->read_backward()) {
3635 // We are not at the start after this node, and thus we can use the
3636 // successor's eats_at_least_from_not_start value.
3637 uint8_t eats_at_least = base::saturated_cast<uint8_t>(
3638 that->Length() + that->on_success()
3639 ->eats_at_least_info()
3640 ->eats_at_least_from_not_start);
3641 that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
3642 }
3643 }
3644
3645 static void VisitAction(ActionNode* that) {
3646 switch (that->action_type()) {
3647 case ActionNode::BEGIN_POSITIVE_SUBMATCH:
3648 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3649 // We do not propagate eats_at_least data through positive lookarounds,
3650 // because they rewind input.
3651 // TODO(v8:11859) Potential approaches for fixing this include:
3652 // 1. Add a dedicated choice node for positive lookaround, similar to
3653 // NegativeLookaroundChoiceNode.
3654 // 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which
3655 // is <= eats_at_least_from_possibly_start, and use that value in
3656 // EatsAtLeastFromLoopEntry.
3657 DCHECK(that->eats_at_least_info()->IsZero())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(that->eats_at_least_info()->IsZero())>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(that->eats_at_least_info()->IsZero()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("that->eats_at_least_info()->IsZero()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3657); AnnotateMozCrashReason("MOZ_ASSERT" "(" "that->eats_at_least_info()->IsZero()"
")"); do { *((volatile int*)__null) = 3657; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3658 break;
3659 case ActionNode::SET_REGISTER_FOR_LOOP:
3660 // SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
3661 // loop body will run at least the minimum number of times before the
3662 // continuation case can run.
3663 that->set_eats_at_least_info(
3664 that->on_success()->EatsAtLeastFromLoopEntry());
3665 break;
3666 case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
3667 default:
3668 // Otherwise, the current node eats at least as much as its successor.
3669 // Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH
3670 // because NegativeLookaroundChoiceNode ignores its lookaround successor
3671 // when computing eats-at-least and quick check information.
3672 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3673 break;
3674 }
3675 }
3676
3677 static void VisitChoice(ChoiceNode* that, int i) {
3678 // The minimum possible match from a choice node is the minimum of its
3679 // successors.
3680 EatsAtLeastInfo eats_at_least =
3681 i == 0 ? EatsAtLeastInfo(UINT8_MAX(255)) : *that->eats_at_least_info();
3682 eats_at_least.SetMin(
3683 *that->alternatives()->at(i).node()->eats_at_least_info());
3684 that->set_eats_at_least_info(eats_at_least);
3685 }
3686
3687 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3688 if (!that->read_backward()) {
3689 that->set_eats_at_least_info(
3690 *that->continue_node()->eats_at_least_info());
3691 }
3692 }
3693
3694 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
3695
3696 static void VisitNegativeLookaroundChoiceLookaroundNode(
3697 NegativeLookaroundChoiceNode* that) {}
3698
3699 static void VisitNegativeLookaroundChoiceContinueNode(
3700 NegativeLookaroundChoiceNode* that) {
3701 that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
3702 }
3703
3704 static void VisitBackReference(BackReferenceNode* that) {
3705 if (!that->read_backward()) {
3706 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3707 }
3708 }
3709
3710 static void VisitAssertion(AssertionNode* that) {
3711 EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
3712 if (that->assertion_type() == AssertionNode::AT_START) {
3713 // If we know we are not at the start and we are asked "how many
3714 // characters will you match if you succeed?" then we can answer anything
3715 // since false implies false. So let's just set the max answer
3716 // (UINT8_MAX) since that won't prevent us from preloading a lot of
3717 // characters for the other branches in the node graph.
3718 eats_at_least.eats_at_least_from_not_start = UINT8_MAX(255);
3719 }
3720 that->set_eats_at_least_info(eats_at_least);
3721 }
3722};
3723
3724} // namespace
3725
3726// -------------------------------------------------------------------
3727// Analysis
3728
3729// Iterates the node graph and provides the opportunity for propagators to set
3730// values that depend on successor nodes.
3731template <typename... Propagators>
3732class Analysis : public NodeVisitor {
3733 public:
3734 Analysis(Isolate* isolate, bool is_one_byte, RegExpFlags flags)
3735 : isolate_(isolate),
3736 is_one_byte_(is_one_byte),
3737 flags_(flags),
3738 error_(RegExpError::kNone) {}
3739
3740 void EnsureAnalyzed(RegExpNode* that) {
3741 StackLimitCheck check(isolate());
3742 if (check.HasOverflowed()) {
3743 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
3744 FATAL("Analysis: Aborting on stack overflow")do { do { } while (false); MOZ_ReportCrash("" "Analysis: Aborting on stack overflow"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3744); AnnotateMozCrashReason("MOZ_CRASH(" "Analysis: Aborting on stack overflow"
")"); do { *((volatile int*)__null) = 3744; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3745 }
3746 fail(RegExpError::kAnalysisStackOverflow);
3747 return;
3748 }
3749 if (that->info()->been_analyzed || that->info()->being_analyzed) return;
3750 that->info()->being_analyzed = true;
3751 that->Accept(this);
3752 that->info()->being_analyzed = false;
3753 that->info()->been_analyzed = true;
3754 }
3755
3756 bool has_failed() { return error_ != RegExpError::kNone; }
3757 RegExpError error() {
3758 DCHECK(error_ != RegExpError::kNone)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(error_ != RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(error_ != RegExpError::kNone
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"error_ != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3758); AnnotateMozCrashReason("MOZ_ASSERT" "(" "error_ != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3758; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3759 return error_;
3760 }
3761 void fail(RegExpError error) { error_ = error; }
3762
3763 Isolate* isolate() const { return isolate_; }
3764
3765 void VisitEnd(EndNode* that) override {
3766 // nothing to do
3767 }
3768
3769// Used to call the given static function on each propagator / variadic template
3770// argument.
3771#define STATIC_FOR_EACH(expr) \
3772 do { \
3773 int dummy[] = {((expr), 0)...}; \
3774 USE(dummy)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{dummy};
(void)unused_tmp_array_for_use_macro; } while (false)
; \
3775 } while (false)
3776
3777 void VisitText(TextNode* that) override {
3778 that->MakeCaseIndependent(isolate(), is_one_byte_, flags());
3779 EnsureAnalyzed(that->on_success());
3780 if (has_failed()) return;
3781 that->CalculateOffsets();
3782 STATIC_FOR_EACH(Propagators::VisitText(that));
3783 }
3784
3785 void VisitAction(ActionNode* that) override {
3786 if (that->action_type() == ActionNode::MODIFY_FLAGS) {
3787 set_flags(that->flags());
3788 }
3789 EnsureAnalyzed(that->on_success());
3790 if (has_failed()) return;
3791 STATIC_FOR_EACH(Propagators::VisitAction(that));
3792 }
3793
3794 void VisitChoice(ChoiceNode* that) override {
3795 for (int i = 0; i < that->alternatives()->length(); i++) {
3796 EnsureAnalyzed(that->alternatives()->at(i).node());
3797 if (has_failed()) return;
3798 STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
3799 }
3800 }
3801
3802 void VisitLoopChoice(LoopChoiceNode* that) override {
3803 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3803); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3803; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Just loop and continue.
3804
3805 // First propagate all information from the continuation node.
3806 EnsureAnalyzed(that->continue_node());
3807 if (has_failed()) return;
3808 STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
3809
3810 // Check the loop last since it may need the value of this node
3811 // to get a correct result.
3812 EnsureAnalyzed(that->loop_node());
3813 if (has_failed()) return;
3814 STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
3815 }
3816
3817 void VisitNegativeLookaroundChoice(
3818 NegativeLookaroundChoiceNode* that) override {
3819 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3819); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3819; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Lookaround and continue.
3820
3821 EnsureAnalyzed(that->lookaround_node());
3822 if (has_failed()) return;
3823 STATIC_FOR_EACH(
3824 Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
3825
3826 EnsureAnalyzed(that->continue_node());
3827 if (has_failed()) return;
3828 STATIC_FOR_EACH(
3829 Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
3830 }
3831
3832 void VisitBackReference(BackReferenceNode* that) override {
3833 EnsureAnalyzed(that->on_success());
3834 if (has_failed()) return;
3835 STATIC_FOR_EACH(Propagators::VisitBackReference(that));
3836 }
3837
3838 void VisitAssertion(AssertionNode* that) override {
3839 EnsureAnalyzed(that->on_success());
3840 if (has_failed()) return;
3841 STATIC_FOR_EACH(Propagators::VisitAssertion(that));
3842 }
3843
3844#undef STATIC_FOR_EACH
3845
3846 private:
3847 RegExpFlags flags() const { return flags_; }
3848 void set_flags(RegExpFlags flags) { flags_ = flags; }
3849
3850 Isolate* isolate_;
3851 const bool is_one_byte_;
3852 RegExpFlags flags_;
3853 RegExpError error_;
3854
3855 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis)Analysis() = delete; Analysis(const Analysis&) = delete; Analysis
& operator=(const Analysis&) = delete
;
3856};
3857
3858RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags,
3859 RegExpNode* node) {
3860 Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
3861 isolate, is_one_byte, flags);
3862 DCHECK_EQ(node->info()->been_analyzed, false)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node->info()->been_analyzed) == (false))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((node->info()->been_analyzed) == (false)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(node->info()->been_analyzed) == (false)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3862); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node->info()->been_analyzed) == (false)"
")"); do { *((volatile int*)__null) = 3862; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3863 analysis.EnsureAnalyzed(node);
3864 DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone)do { if (analysis.has_failed()) { do { static_assert( mozilla
::detail::AssertionConditionType<decltype(analysis.error()
!= RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(analysis.error() != RegExpError
::kNone))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("analysis.error() != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3864); AnnotateMozCrashReason("MOZ_ASSERT" "(" "analysis.error() != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3864; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
3865 return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
3866}
3867
3868void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3869 BoyerMooreLookahead* bm,
3870 bool not_at_start) {
3871 // Working out the set of characters that a backreference can match is too
3872 // hard, so we just say that any character can match.
3873 bm->SetRest(offset);
3874 SaveBMInfo(bm, not_at_start, offset);
3875}
3876
3877static_assert(BoyerMoorePositionInfo::kMapSize ==
3878 RegExpMacroAssembler::kTableSize);
3879
3880void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3881 BoyerMooreLookahead* bm, bool not_at_start) {
3882 ZoneList<GuardedAlternative>* alts = alternatives();
3883 budget = (budget - 1) / alts->length();
3884 for (int i = 0; i < alts->length(); i++) {
3885 GuardedAlternative& alt = alts->at(i);
3886 if (alt.guards() != nullptr && alt.guards()->length() != 0) {
3887 bm->SetRest(offset); // Give up trying to fill in info.
3888 SaveBMInfo(bm, not_at_start, offset);
3889 return;
3890 }
3891 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
3892 }
3893 SaveBMInfo(bm, not_at_start, offset);
3894}
3895
3896void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
3897 BoyerMooreLookahead* bm, bool not_at_start) {
3898 if (initial_offset >= bm->length()) return;
3899 int offset = initial_offset;
3900 int max_char = bm->max_char();
3901 for (int i = 0; i < elements()->length(); i++) {
3902 if (offset >= bm->length()) {
3903 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3904 return;
3905 }
3906 TextElement text = elements()->at(i);
3907 if (text.text_type() == TextElement::ATOM) {
3908 RegExpAtom* atom = text.atom();
3909 for (int j = 0; j < atom->length(); j++, offset++) {
3910 if (offset >= bm->length()) {
3911 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3912 return;
3913 }
3914 base::uc16 character = atom->data()[j];
3915 if (IsIgnoreCase(bm->compiler()->flags())) {
3916 unibrow::uchar chars[4];
3917 int length = GetCaseIndependentLetters(isolate, character,
3918 bm->compiler(), chars, 4);
3919 for (int k = 0; k < length; k++) {
3920 bm->Set(offset, chars[k]);
3921 }
3922 } else {
3923 if (character <= max_char) bm->Set(offset, character);
3924 }
3925 }
3926 } else {
3927 DCHECK_EQ(TextElement::CLASS_RANGES, text.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (text.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (text.text_type()))))
, 0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (text.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3927); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (text.text_type())"
")"); do { *((volatile int*)__null) = 3927; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3928 RegExpClassRanges* class_ranges = text.class_ranges();
3929 ZoneList<CharacterRange>* ranges = class_ranges->ranges(zone());
3930 if (class_ranges->is_negated()) {
3931 bm->SetAll(offset);
3932 } else {
3933 for (int k = 0; k < ranges->length(); k++) {
3934 CharacterRange& range = ranges->at(k);
3935 if (static_cast<int>(range.from()) > max_char) continue;
3936 int to = std::min(max_char, static_cast<int>(range.to()));
3937 bm->SetInterval(offset, Interval(range.from(), to));
3938 }
3939 }
3940 offset++;
3941 }
3942 }
3943 if (offset >= bm->length()) {
3944 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3945 return;
3946 }
3947 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
3948 true); // Not at start after a text node.
3949 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3950}
3951
3952RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
3953 RegExpNode* on_success) {
3954 DCHECK(!read_backward())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!read_backward())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!read_backward()))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("!read_backward()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3954); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!read_backward()"
")"); do { *((volatile int*)__null) = 3954; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3955 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
3956 zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
3957 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
3958 zone(), CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
3959
3960 ChoiceNode* optional_step_back = zone()->New<ChoiceNode>(2, zone());
3961
3962 int stack_register = UnicodeLookaroundStackRegister();
3963 int position_register = UnicodeLookaroundPositionRegister();
3964 RegExpNode* step_back = TextNode::CreateForCharacterRanges(
3965 zone(), lead_surrogates, true, on_success);
3966 RegExpLookaround::Builder builder(true, step_back, stack_register,
3967 position_register);
3968 RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
3969 zone(), trail_surrogates, false, builder.on_match_success());
3970
3971 optional_step_back->AddAlternative(
3972 GuardedAlternative(builder.ForMatch(match_trail)));
3973 optional_step_back->AddAlternative(GuardedAlternative(on_success));
3974
3975 return optional_step_back;
3976}
3977
3978RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
3979 bool is_one_byte) {
3980 // Wrap the body of the regexp in capture #0.
3981 RegExpNode* captured_body =
3982 RegExpCapture::ToNode(data->tree, 0, this, accept());
3983 RegExpNode* node = captured_body;
3984 if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags())) {
3985 // Add a .*? at the beginning, outside the body capture, unless
3986 // this expression is anchored at the beginning or sticky.
3987 RegExpNode* loop_node = RegExpQuantifier::ToNode(
3988 0, RegExpTree::kInfinity, false,
3989 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything), this,
3990 captured_body, data->contains_anchor);
3991
3992 if (data->contains_anchor) {
3993 // Unroll loop once, to take care of the case that might start
3994 // at the start of input.
3995 ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
3996 first_step_node->AddAlternative(GuardedAlternative(captured_body));
3997 first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
3998 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything),
3999 false, loop_node)));
4000 node = first_step_node;
4001 } else {
4002 node = loop_node;
4003 }
4004 }
4005 if (is_one_byte) {
4006 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, this);
4007 // Do it again to propagate the new nodes to places where they were not
4008 // put because they had not been calculated yet.
4009 if (node != nullptr) {
4010 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, this);
4011 }
4012 } else if (IsEitherUnicode(flags()) &&
4013 (IsGlobal(flags()) || IsSticky(flags()))) {
4014 node = OptionallyStepBackToLeadSurrogate(node);
4015 }
4016
4017 if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());
4018 return node;
4019}
4020
4021void RegExpCompiler::ToNodeCheckForStackOverflow() {
4022 if (StackLimitCheck{isolate()}.HasOverflowed()) {
4023 V8::FatalProcessOutOfMemory(isolate(), "RegExpCompiler");
4024 }
4025}
4026
4027} // namespace internal
4028} // namespace v8