Bug Summary

File:var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
Warning:line 1447, column 38
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name regexp-compiler.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -resource-dir /usr/lib/llvm-19/lib/clang/19 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/stl_wrappers -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D DEBUG=1 -D WASM_SUPPORTS_HUGE_MEMORY -D JS_CACHEIR_SPEW -D JS_STRUCTURED_SPEW -D JS_HAS_CTYPES -D FFI_BUILDING -D EXPORT_JS_API -D MOZ_HAS_MOZGLUE -D MOZ_SUPPORT_LEAKCHECKING -I /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src/irregexp -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/js/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -D V8_INTL_SUPPORT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-19/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-error=type-limits -Wno-error=return-type -Wno-sign-compare -Wno-c++11-narrowing -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2024-09-22-115206-3586786-1 -x c++ /var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc
1// Copyright 2019 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "irregexp/imported/regexp-compiler.h"
6
7#include <optional>
8
9#include "irregexp/imported/regexp-macro-assembler-arch.h"
10
11#ifdef V8_INTL_SUPPORT1
12#include "irregexp/imported/special-case.h"
13#include "unicode/locid.h"
14#include "unicode/uniset.h"
15#include "unicode/utypes.h"
16#endif // V8_INTL_SUPPORT
17
18namespace v8::internal {
19
20using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
21
22// -------------------------------------------------------------------
23// Implementation of the Irregexp regular expression engine.
24//
25// The Irregexp regular expression engine is intended to be a complete
26// implementation of ECMAScript regular expressions. It generates either
27// bytecodes or native code.
28
29// The Irregexp regexp engine is structured in three steps.
30// 1) The parser generates an abstract syntax tree. See ast.cc.
31// 2) From the AST a node network is created. The nodes are all
32// subclasses of RegExpNode. The nodes represent states when
33// executing a regular expression. Several optimizations are
34// performed on the node network.
35// 3) From the nodes we generate either byte codes or native code
36// that can actually execute the regular expression (perform
37// the search). The code generation step is described in more
38// detail below.
39
40// Code generation.
41//
42// The nodes are divided into four main categories.
43// * Choice nodes
44// These represent places where the regular expression can
45// match in more than one way. For example on entry to an
46// alternation (foo|bar) or a repetition (*, +, ? or {}).
47// * Action nodes
48// These represent places where some action should be
49// performed. Examples include recording the current position
50// in the input string to a register (in order to implement
51// captures) or other actions on register for example in order
52// to implement the counters needed for {} repetitions.
53// * Matching nodes
54// These attempt to match some element part of the input string.
55// Examples of elements include character classes, plain strings
56// or back references.
57// * End nodes
58// These are used to implement the actions required on finding
59// a successful match or failing to find a match.
60//
61// The code generated (whether as byte codes or native code) maintains
62// some state as it runs. This consists of the following elements:
63//
64// * The capture registers. Used for string captures.
65// * Other registers. Used for counters etc.
66// * The current position.
67// * The stack of backtracking information. Used when a matching node
68// fails to find a match and needs to try an alternative.
69//
70// Conceptual regular expression execution model:
71//
72// There is a simple conceptual model of regular expression execution
73// which will be presented first. The actual code generated is a more
74// efficient simulation of the simple conceptual model:
75//
76// * Choice nodes are implemented as follows:
77// For each choice except the last {
78// push current position
79// push backtrack code location
80// <generate code to test for choice>
81// backtrack code location:
82// pop current position
83// }
84// <generate code to test for last choice>
85//
86// * Actions nodes are generated as follows
87// <push affected registers on backtrack stack>
88// <generate code to perform action>
89// push backtrack code location
90// <generate code to test for following nodes>
91// backtrack code location:
92// <pop affected registers to restore their state>
93// <pop backtrack location from stack and go to it>
94//
95// * Matching nodes are generated as follows:
96// if input string matches at current position
97// update current position
98// <generate code to test for following nodes>
99// else
100// <pop backtrack location from stack and go to it>
101//
102// Thus it can be seen that the current position is saved and restored
103// by the choice nodes, whereas the registers are saved and restored by
104// by the action nodes that manipulate them.
105//
106// The other interesting aspect of this model is that nodes are generated
107// at the point where they are needed by a recursive call to Emit(). If
108// the node has already been code generated then the Emit() call will
109// generate a jump to the previously generated code instead. In order to
110// limit recursion it is possible for the Emit() function to put the node
111// on a work list for later generation and instead generate a jump. The
112// destination of the jump is resolved later when the code is generated.
113//
114// Actual regular expression code generation.
115//
116// Code generation is actually more complicated than the above. In order to
117// improve the efficiency of the generated code some optimizations are
118// performed
119//
120// * Choice nodes have 1-character lookahead.
121// A choice node looks at the following character and eliminates some of
122// the choices immediately based on that character. This is not yet
123// implemented.
124// * Simple greedy loops store reduced backtracking information.
125// A quantifier like /.*foo/m will greedily match the whole input. It will
126// then need to backtrack to a point where it can match "foo". The naive
127// implementation of this would push each character position onto the
128// backtracking stack, then pop them off one by one. This would use space
129// proportional to the length of the input string. However since the "."
130// can only match in one way and always has a constant length (in this case
131// of 1) it suffices to store the current position on the top of the stack
132// once. Matching now becomes merely incrementing the current position and
133// backtracking becomes decrementing the current position and checking the
134// result against the stored current position. This is faster and saves
135// space.
136// * The current state is virtualized.
137// This is used to defer expensive operations until it is clear that they
138// are needed and to generate code for a node more than once, allowing
139// specialized an efficient versions of the code to be created. This is
140// explained in the section below.
141//
142// Execution state virtualization.
143//
144// Instead of emitting code, nodes that manipulate the state can record their
145// manipulation in an object called the Trace. The Trace object can record a
146// current position offset, an optional backtrack code location on the top of
147// the virtualized backtrack stack and some register changes. When a node is
148// to be emitted it can flush the Trace or update it. Flushing the Trace
149// will emit code to bring the actual state into line with the virtual state.
150// Avoiding flushing the state can postpone some work (e.g. updates of capture
151// registers). Postponing work can save time when executing the regular
152// expression since it may be found that the work never has to be done as a
153// failure to match can occur. In addition it is much faster to jump to a
154// known backtrack code location than it is to pop an unknown backtrack
155// location from the stack and jump there.
156//
157// The virtual state found in the Trace affects code generation. For example
158// the virtual state contains the difference between the actual current
159// position and the virtual current position, and matching code needs to use
160// this offset to attempt a match in the correct location of the input
161// string. Therefore code generated for a non-trivial trace is specialized
162// to that trace. The code generator therefore has the ability to generate
163// code for each node several times. In order to limit the size of the
164// generated code there is an arbitrary limit on how many specialized sets of
165// code may be generated for a given node. If the limit is reached, the
166// trace is flushed and a generic version of the code for a node is emitted.
167// This is subsequently used for that node. The code emitted for non-generic
168// trace is not recorded in the node and so it cannot currently be reused in
169// the event that code generation is requested for an identical trace.
170
171namespace {
172
173constexpr base::uc32 MaxCodeUnit(const bool one_byte) {
174 static_assert(String::kMaxOneByteCharCodeU <=
175 std::numeric_limits<uint16_t>::max());
176 static_assert(String::kMaxUtf16CodeUnitU <=
177 std::numeric_limits<uint16_t>::max());
178 return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU;
179}
180
181constexpr uint32_t CharMask(const bool one_byte) {
182 static_assert(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1));
183 static_assert(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1));
184 return MaxCodeUnit(one_byte);
185}
186
187} // namespace
188
189void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 189); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 189; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
; }
190
191void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
192 text->AddElement(TextElement::Atom(this), zone);
193}
194
195void RegExpClassRanges::AppendToText(RegExpText* text, Zone* zone) {
196 text->AddElement(TextElement::ClassRanges(this), zone);
197}
198
199void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
200 for (int i = 0; i < elements()->length(); i++)
201 text->AddElement(elements()->at(i), zone);
202}
203
204TextElement TextElement::Atom(RegExpAtom* atom) {
205 return TextElement(ATOM, atom);
206}
207
208TextElement TextElement::ClassRanges(RegExpClassRanges* class_ranges) {
209 return TextElement(CLASS_RANGES, class_ranges);
210}
211
212int TextElement::length() const {
213 switch (text_type()) {
214 case ATOM:
215 return atom()->length();
216
217 case CLASS_RANGES:
218 return 1;
219 }
220 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 220); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 220; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
221}
222
223class RecursionCheck {
224 public:
225 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
226 compiler->IncrementRecursionDepth();
227 }
228 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
229
230 private:
231 RegExpCompiler* compiler_;
232};
233
234// Attempts to compile the regexp using an Irregexp code generator. Returns
235// a fixed array or a null handle depending on whether it succeeded.
236RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
237 RegExpFlags flags, bool one_byte)
238 : next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
239 unicode_lookaround_stack_register_(kNoRegister),
240 unicode_lookaround_position_register_(kNoRegister),
241 work_list_(nullptr),
242 recursion_depth_(0),
243 flags_(flags),
244 one_byte_(one_byte),
245 reg_exp_too_big_(false),
246 limiting_recursion_(false),
247 optimize_(v8_flagsjs::jit::JitOptions.regexp_optimization),
248 read_backward_(false),
249 current_expansion_factor_(1),
250 frequency_collator_(),
251 isolate_(isolate),
252 zone_(zone) {
253 accept_ = zone->New<EndNode>(EndNode::ACCEPT, zone);
254 DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((RegExpMacroAssembler::kMaxRegister) >= (next_register_
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 254); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(RegExpMacroAssembler::kMaxRegister) >= (next_register_ - 1)"
")"); do { *((volatile int*)__null) = 254; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
255}
256
257RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
258 Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
259 int capture_count, Handle<String> pattern) {
260 macro_assembler_ = macro_assembler;
261
262 ZoneVector<RegExpNode*> work_list(zone());
263 work_list_ = &work_list;
264 Label fail;
265 macro_assembler_->PushBacktrack(&fail);
266 Trace new_trace;
267 start->Emit(this, &new_trace);
268 macro_assembler_->BindJumpTarget(&fail);
269 macro_assembler_->Fail();
270 while (!work_list.empty()) {
271 RegExpNode* node = work_list.back();
272 work_list.pop_back();
273 node->set_on_work_list(false);
274 if (!node->label()->is_bound()) node->Emit(this, &new_trace);
275 }
276 if (reg_exp_too_big_) {
277 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
278 FATAL("Aborting on excess zone allocation")do { do { } while (false); MOZ_ReportCrash("" "Aborting on excess zone allocation"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 278); AnnotateMozCrashReason("MOZ_CRASH(" "Aborting on excess zone allocation"
")"); do { *((volatile int*)__null) = 278; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
279 }
280 macro_assembler_->AbortedCodeGeneration();
281 return CompilationResult::RegExpTooBig();
282 }
283
284 Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
285 isolate->IncreaseTotalRegexpCodeGenerated(code);
286 work_list_ = nullptr;
287
288 return {code, next_register_};
289}
290
291bool Trace::DeferredAction::Mentions(int that) {
292 if (action_type() == ActionNode::CLEAR_CAPTURES) {
293 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
294 return range.Contains(that);
295 } else {
296 return reg() == that;
297 }
298}
299
300bool Trace::mentions_reg(int reg) {
301 for (DeferredAction* action = actions_; action != nullptr;
302 action = action->next()) {
303 if (action->Mentions(reg)) return true;
304 }
305 return false;
306}
307
308bool Trace::GetStoredPosition(int reg, int* cp_offset) {
309 DCHECK_EQ(0, *cp_offset)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) == (*cp_offset))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) == (*cp_offset)))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(0) == (*cp_offset)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 309); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) == (*cp_offset)"
")"); do { *((volatile int*)__null) = 309; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
310 for (DeferredAction* action = actions_; action != nullptr;
311 action = action->next()) {
312 if (action->Mentions(reg)) {
313 if (action->action_type() == ActionNode::STORE_POSITION) {
314 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
315 return true;
316 } else {
317 return false;
318 }
319 }
320 }
321 return false;
322}
323
324// A (dynamically-sized) set of unsigned integers that behaves especially well
325// on small integers (< kFirstLimit). May do zone-allocation.
326class DynamicBitSet : public ZoneObject {
327 public:
328 V8_EXPORT_PRIVATE bool Get(unsigned value) const {
329 if (value < kFirstLimit) {
330 return (first_ & (1 << value)) != 0;
331 } else if (remaining_ == nullptr) {
332 return false;
333 } else {
334 return remaining_->Contains(value);
335 }
336 }
337
338 // Destructively set a value in this set.
339 void Set(unsigned value, Zone* zone) {
340 if (value < kFirstLimit) {
341 first_ |= (1 << value);
342 } else {
343 if (remaining_ == nullptr)
344 remaining_ = zone->New<ZoneList<unsigned>>(1, zone);
345 if (remaining_->is_empty() || !remaining_->Contains(value))
346 remaining_->Add(value, zone);
347 }
348 }
349
350 private:
351 static constexpr unsigned kFirstLimit = 32;
352
353 uint32_t first_ = 0;
354 ZoneList<unsigned>* remaining_ = nullptr;
355};
356
357int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
358 Zone* zone) {
359 int max_register = RegExpCompiler::kNoRegister;
360 for (DeferredAction* action = actions_; action != nullptr;
361 action = action->next()) {
362 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
363 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
364 for (int i = range.from(); i <= range.to(); i++)
365 affected_registers->Set(i, zone);
366 if (range.to() > max_register) max_register = range.to();
367 } else {
368 affected_registers->Set(action->reg(), zone);
369 if (action->reg() > max_register) max_register = action->reg();
370 }
371 }
372 return max_register;
373}
374
375void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
376 int max_register,
377 const DynamicBitSet& registers_to_pop,
378 const DynamicBitSet& registers_to_clear) {
379 for (int reg = max_register; reg >= 0; reg--) {
380 if (registers_to_pop.Get(reg)) {
381 assembler->PopRegister(reg);
382 } else if (registers_to_clear.Get(reg)) {
383 int clear_to = reg;
384 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
385 reg--;
386 }
387 assembler->ClearRegisters(reg, clear_to);
388 }
389 }
390}
391
392void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
393 int max_register,
394 const DynamicBitSet& affected_registers,
395 DynamicBitSet* registers_to_pop,
396 DynamicBitSet* registers_to_clear,
397 Zone* zone) {
398 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
399 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
400
401 // Count pushes performed to force a stack limit check occasionally.
402 int pushes = 0;
403
404 for (int reg = 0; reg <= max_register; reg++) {
405 if (!affected_registers.Get(reg)) continue;
406
407 // The chronologically first deferred action in the trace
408 // is used to infer the action needed to restore a register
409 // to its previous state (or not, if it's safe to ignore it).
410 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
411 DeferredActionUndoType undo_action = IGNORE;
412
413 int value = 0;
414 bool absolute = false;
415 bool clear = false;
416 static const int kNoStore = kMinInt((int32_t)0x80000000);
417 int store_position = kNoStore;
418 // This is a little tricky because we are scanning the actions in reverse
419 // historical order (newest first).
420 for (DeferredAction* action = actions_; action != nullptr;
421 action = action->next()) {
422 if (action->Mentions(reg)) {
423 switch (action->action_type()) {
424 case ActionNode::SET_REGISTER_FOR_LOOP: {
425 Trace::DeferredSetRegisterForLoop* psr =
426 static_cast<Trace::DeferredSetRegisterForLoop*>(action);
427 if (!absolute) {
428 value += psr->value();
429 absolute = true;
430 }
431 // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
432 // counters. They can have a significant previous value if they
433 // occur in a loop. TODO(lrn): Propagate this information, so
434 // we can set undo_action to IGNORE if we know there is no value to
435 // restore.
436 undo_action = RESTORE;
437 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 437); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 437; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
438 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 438); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 438; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
439 break;
440 }
441 case ActionNode::INCREMENT_REGISTER:
442 if (!absolute) {
443 value++;
444 }
445 DCHECK_EQ(store_position, kNoStore)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((store_position) == (kNoStore))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((store_position) == (kNoStore
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(store_position) == (kNoStore)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 445); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(store_position) == (kNoStore)"
")"); do { *((volatile int*)__null) = 445; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
446 DCHECK(!clear)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!clear)>::isValid, "invalid assertion condition")
; if ((__builtin_expect(!!(!(!!(!clear))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!clear", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 446); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!clear" ")")
; do { *((volatile int*)__null) = 446; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
447 undo_action = RESTORE;
448 break;
449 case ActionNode::STORE_POSITION: {
450 Trace::DeferredCapture* pc =
451 static_cast<Trace::DeferredCapture*>(action);
452 if (!clear && store_position == kNoStore) {
453 store_position = pc->cp_offset();
454 }
455
456 // For captures we know that stores and clears alternate.
457 // Other register, are never cleared, and if the occur
458 // inside a loop, they might be assigned more than once.
459 if (reg <= 1) {
460 // Registers zero and one, aka "capture zero", is
461 // always set correctly if we succeed. There is no
462 // need to undo a setting on backtrack, because we
463 // will set it again or fail.
464 undo_action = IGNORE;
465 } else {
466 undo_action = pc->is_capture() ? CLEAR : RESTORE;
467 }
468 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 468); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 468; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
469 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 469); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 469; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
470 break;
471 }
472 case ActionNode::CLEAR_CAPTURES: {
473 // Since we're scanning in reverse order, if we've already
474 // set the position we have to ignore historically earlier
475 // clearing operations.
476 if (store_position == kNoStore) {
477 clear = true;
478 }
479 undo_action = RESTORE;
480 DCHECK(!absolute)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!absolute)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!absolute))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("!absolute", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 480); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!absolute" ")"
); do { *((volatile int*)__null) = 480; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
481 DCHECK_EQ(value, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((value) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((value) == (0)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(value) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 481); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(value) == (0)"
")"); do { *((volatile int*)__null) = 481; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
482 break;
483 }
484 default:
485 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 485); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 485; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
486 }
487 }
488 }
489 // Prepare for the undo-action (e.g., push if it's going to be popped).
490 if (undo_action == RESTORE) {
491 pushes++;
492 RegExpMacroAssembler::StackCheckFlag stack_check =
493 RegExpMacroAssembler::kNoStackLimitCheck;
494 if (pushes == push_limit) {
495 stack_check = RegExpMacroAssembler::kCheckStackLimit;
496 pushes = 0;
497 }
498
499 assembler->PushRegister(reg, stack_check);
500 registers_to_pop->Set(reg, zone);
501 } else if (undo_action == CLEAR) {
502 registers_to_clear->Set(reg, zone);
503 }
504 // Perform the chronologically last action (or accumulated increment)
505 // for the register.
506 if (store_position != kNoStore) {
507 assembler->WriteCurrentPositionToRegister(reg, store_position);
508 } else if (clear) {
509 assembler->ClearRegisters(reg, reg);
510 } else if (absolute) {
511 assembler->SetRegister(reg, value);
512 } else if (value != 0) {
513 assembler->AdvanceRegister(reg, value);
514 }
515 }
516}
517
518// This is called as we come into a loop choice node and some other tricky
519// nodes. It normalizes the state of the code generator to ensure we can
520// generate generic code.
521void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
522 RegExpMacroAssembler* assembler = compiler->macro_assembler();
523
524 DCHECK(!is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!is_trivial()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("!is_trivial()",
"/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 524); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!is_trivial()"
")"); do { *((volatile int*)__null) = 524; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
525
526 if (actions_ == nullptr && backtrack() == nullptr) {
527 // Here we just have some deferred cp advances to fix and we are back to
528 // a normal situation. We may also have to forget some information gained
529 // through a quick check that was already performed.
530 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
531 // Create a new trivial state and generate the node with that.
532 Trace new_state;
533 successor->Emit(compiler, &new_state);
534 return;
535 }
536
537 // Generate deferred actions here along with code to undo them again.
538 DynamicBitSet affected_registers;
539
540 if (backtrack() != nullptr) {
541 // Here we have a concrete backtrack location. These are set up by choice
542 // nodes and so they indicate that we have a deferred save of the current
543 // position which we may need to emit here.
544 assembler->PushCurrentPosition();
545 }
546
547 int max_register =
548 FindAffectedRegisters(&affected_registers, compiler->zone());
549 DynamicBitSet registers_to_pop;
550 DynamicBitSet registers_to_clear;
551 PerformDeferredActions(assembler, max_register, affected_registers,
552 &registers_to_pop, &registers_to_clear,
553 compiler->zone());
554 if (cp_offset_ != 0) {
555 assembler->AdvanceCurrentPosition(cp_offset_);
556 }
557
558 // Create a new trivial state and generate the node with that.
559 Label undo;
560 assembler->PushBacktrack(&undo);
561 if (successor->KeepRecursing(compiler)) {
562 Trace new_state;
563 successor->Emit(compiler, &new_state);
564 } else {
565 compiler->AddWork(successor);
566 assembler->GoTo(successor->label());
567 }
568
569 // On backtrack we need to restore state.
570 assembler->BindJumpTarget(&undo);
571 RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
572 registers_to_clear);
573 if (backtrack() == nullptr) {
574 assembler->Backtrack();
575 } else {
576 assembler->PopCurrentPosition();
577 assembler->GoTo(backtrack());
578 }
579}
580
581void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
582 RegExpMacroAssembler* assembler = compiler->macro_assembler();
583
584 // Omit flushing the trace. We discard the entire stack frame anyway.
585
586 if (!label()->is_bound()) {
587 // We are completely independent of the trace, since we ignore it,
588 // so this code can be used as the generic version.
589 assembler->Bind(label());
590 }
591
592 // Throw away everything on the backtrack stack since the start
593 // of the negative submatch and restore the character position.
594 assembler->ReadCurrentPositionFromRegister(current_position_register_);
595 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
596 if (clear_capture_count_ > 0) {
597 // Clear any captures that might have been performed during the success
598 // of the body of the negative look-ahead.
599 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
600 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
601 }
602 // Now that we have unwound the stack we find at the top of the stack the
603 // backtrack that the BeginNegativeSubmatch node got.
604 assembler->Backtrack();
605}
606
607void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
608 if (!trace->is_trivial()) {
609 trace->Flush(compiler, this);
610 return;
611 }
612 RegExpMacroAssembler* assembler = compiler->macro_assembler();
613 if (!label()->is_bound()) {
614 assembler->Bind(label());
615 }
616 switch (action_) {
617 case ACCEPT:
618 assembler->Succeed();
619 return;
620 case BACKTRACK:
621 assembler->GoTo(trace->backtrack());
622 return;
623 case NEGATIVE_SUBMATCH_SUCCESS:
624 // This case is handled in a different virtual method.
625 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 625); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 625; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
626 }
627 UNIMPLEMENTED()do { do { } while (false); MOZ_ReportCrash("" "unimplemented code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 627); AnnotateMozCrashReason("MOZ_CRASH(" "unimplemented code"
")"); do { *((volatile int*)__null) = 627; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
628}
629
630void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
631 if (guards_ == nullptr) guards_ = zone->New<ZoneList<Guard*>>(1, zone);
632 guards_->Add(guard, zone);
633}
634
635ActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
636 RegExpNode* on_success) {
637 ActionNode* result =
638 on_success->zone()->New<ActionNode>(SET_REGISTER_FOR_LOOP, on_success);
639 result->data_.u_store_register.reg = reg;
640 result->data_.u_store_register.value = val;
641 return result;
642}
643
644ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
645 ActionNode* result =
646 on_success->zone()->New<ActionNode>(INCREMENT_REGISTER, on_success);
647 result->data_.u_increment_register.reg = reg;
648 return result;
649}
650
651ActionNode* ActionNode::StorePosition(int reg, bool is_capture,
652 RegExpNode* on_success) {
653 ActionNode* result =
654 on_success->zone()->New<ActionNode>(STORE_POSITION, on_success);
655 result->data_.u_position_register.reg = reg;
656 result->data_.u_position_register.is_capture = is_capture;
657 return result;
658}
659
660ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
661 ActionNode* result =
662 on_success->zone()->New<ActionNode>(CLEAR_CAPTURES, on_success);
663 result->data_.u_clear_captures.range_from = range.from();
664 result->data_.u_clear_captures.range_to = range.to();
665 return result;
666}
667
668ActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg,
669 RegExpNode* on_success) {
670 ActionNode* result =
671 on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success);
672 result->data_.u_submatch.stack_pointer_register = stack_reg;
673 result->data_.u_submatch.current_position_register = position_reg;
674 return result;
675}
676
677ActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg,
678 RegExpNode* on_success) {
679 ActionNode* result =
680 on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success);
681 result->data_.u_submatch.stack_pointer_register = stack_reg;
682 result->data_.u_submatch.current_position_register = position_reg;
683 return result;
684}
685
686ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg,
687 int clear_register_count,
688 int clear_register_from,
689 RegExpNode* on_success) {
690 ActionNode* result = on_success->zone()->New<ActionNode>(
691 POSITIVE_SUBMATCH_SUCCESS, on_success);
692 result->data_.u_submatch.stack_pointer_register = stack_reg;
693 result->data_.u_submatch.current_position_register = position_reg;
694 result->data_.u_submatch.clear_register_count = clear_register_count;
695 result->data_.u_submatch.clear_register_from = clear_register_from;
696 return result;
697}
698
699ActionNode* ActionNode::EmptyMatchCheck(int start_register,
700 int repetition_register,
701 int repetition_limit,
702 RegExpNode* on_success) {
703 ActionNode* result =
704 on_success->zone()->New<ActionNode>(EMPTY_MATCH_CHECK, on_success);
705 result->data_.u_empty_match_check.start_register = start_register;
706 result->data_.u_empty_match_check.repetition_register = repetition_register;
707 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
708 return result;
709}
710
711ActionNode* ActionNode::ModifyFlags(RegExpFlags flags, RegExpNode* on_success) {
712 ActionNode* result =
713 on_success->zone()->New<ActionNode>(MODIFY_FLAGS, on_success);
714 result->data_.u_modify_flags.flags = flags;
715 return result;
716}
717
718#define DEFINE_ACCEPT(Type) \
719 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
720FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)DEFINE_ACCEPT(End) DEFINE_ACCEPT(Action) DEFINE_ACCEPT(Choice
) DEFINE_ACCEPT(LoopChoice) DEFINE_ACCEPT(NegativeLookaroundChoice
) DEFINE_ACCEPT(BackReference) DEFINE_ACCEPT(Assertion) DEFINE_ACCEPT
(Text)
721#undef DEFINE_ACCEPT
722
723// -------------------------------------------------------------------
724// Emit code.
725
726void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
727 Guard* guard, Trace* trace) {
728 switch (guard->op()) {
729 case Guard::LT:
730 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 730); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 730; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
731 macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
732 trace->backtrack());
733 break;
734 case Guard::GEQ:
735 DCHECK(!trace->mentions_reg(guard->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guard->reg()))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(!trace->mentions_reg(guard->reg())))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guard->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 735); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guard->reg())"
")"); do { *((volatile int*)__null) = 735; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
736 macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
737 trace->backtrack());
738 break;
739 }
740}
741
742namespace {
743
744#ifdef DEBUG1
745bool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
746 static_assert(sizeof(unibrow::uchar) == 4);
747 for (int i = 0; i < length; i++) {
748 if (chars[i] > String::kMaxUtf16CodeUnit) return false;
749 }
750 return true;
751}
752#endif // DEBUG
753
754// Returns the number of characters in the equivalence class, omitting those
755// that cannot occur in the source string because it is Latin1.
756int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
757 bool one_byte_subject, unibrow::uchar* letters,
758 int letter_length) {
759#ifdef V8_INTL_SUPPORT1
760 if (RegExpCaseFolding::IgnoreSet().contains(character)) {
761 letters[0] = character;
762 DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, 1))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, 1)))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 762); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, 1)"
")"); do { *((volatile int*)__null) = 762; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
763 return 1;
764 }
765 bool in_special_add_set =
766 RegExpCaseFolding::SpecialAddSet().contains(character);
767
768 icu::UnicodeSet set;
769 set.add(character);
770 set = set.closeOver(USET_CASE_INSENSITIVE);
771
772 UChar32 canon = 0;
773 if (in_special_add_set) {
774 canon = RegExpCaseFolding::Canonicalize(character);
775 }
776
777 int32_t range_count = set.getRangeCount();
778 int items = 0;
779 for (int32_t i = 0; i < range_count; i++) {
780 UChar32 start = set.getRangeStart(i);
781 UChar32 end = set.getRangeEnd(i);
782 CHECK(end - start + items <= letter_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(end - start + items <= letter_length)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(end - start + items <= letter_length))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("end - start + items <= letter_length"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 782); AnnotateMozCrashReason("MOZ_RELEASE_ASSERT" "(" "end - start + items <= letter_length"
")"); do { *((volatile int*)__null) = 782; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
783 for (UChar32 cu = start; cu <= end; cu++) {
784 if (one_byte_subject && cu > String::kMaxOneByteCharCode) break;
785 if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) {
786 continue;
787 }
788 letters[items++] = static_cast<unibrow::uchar>(cu);
789 }
790 }
791 DCHECK(ContainsOnlyUtf16CodeUnits(letters, items))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, items))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, items)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, items)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 791); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, items)"
")"); do { *((volatile int*)__null) = 791; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
792 return items;
793#else
794 int length =
795 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
796 // Unibrow returns 0 or 1 for characters where case independence is
797 // trivial.
798 if (length == 0) {
799 letters[0] = character;
800 length = 1;
801 }
802
803 if (one_byte_subject) {
804 int new_length = 0;
805 for (int i = 0; i < length; i++) {
806 if (letters[i] <= String::kMaxOneByteCharCode) {
807 letters[new_length++] = letters[i];
808 }
809 }
810 length = new_length;
811 }
812
813 DCHECK(ContainsOnlyUtf16CodeUnits(letters, length))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(ContainsOnlyUtf16CodeUnits(letters, length))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(ContainsOnlyUtf16CodeUnits(letters, length)))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("ContainsOnlyUtf16CodeUnits(letters, length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 813); AnnotateMozCrashReason("MOZ_ASSERT" "(" "ContainsOnlyUtf16CodeUnits(letters, length)"
")"); do { *((volatile int*)__null) = 813; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
814 return length;
815#endif // V8_INTL_SUPPORT
816}
817
818inline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
819 base::uc16 c, Label* on_failure, int cp_offset,
820 bool check, bool preloaded) {
821 RegExpMacroAssembler* assembler = compiler->macro_assembler();
822 bool bound_checked = false;
823 if (!preloaded) {
824 assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
825 bound_checked = true;
826 }
827 assembler->CheckNotCharacter(c, on_failure);
828 return bound_checked;
829}
830
831// Only emits non-letters (things that don't have case). Only used for case
832// independent matches.
833inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
834 base::uc16 c, Label* on_failure, int cp_offset,
835 bool check, bool preloaded) {
836 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
837 bool one_byte = compiler->one_byte();
838 unibrow::uchar chars[4];
839 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
840 if (length < 1) {
841 // This can't match. Must be an one-byte subject and a non-one-byte
842 // character. We do not need to do anything since the one-byte pass
843 // already handled this.
844 return false; // Bounds not checked.
845 }
846 bool checked = false;
847 // We handle the length > 1 case in a later pass.
848 if (length == 1) {
849 if (one_byte && c > String::kMaxOneByteCharCodeU) {
850 // Can't match - see above.
851 return false; // Bounds not checked.
852 }
853 if (!preloaded) {
854 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
855 checked = check;
856 }
857 macro_assembler->CheckNotCharacter(c, on_failure);
858 }
859 return checked;
860}
861
862bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
863 bool one_byte, base::uc16 c1, base::uc16 c2,
864 Label* on_failure) {
865 const uint32_t char_mask = CharMask(one_byte);
866 base::uc16 exor = c1 ^ c2;
867 // Check whether exor has only one bit set.
868 if (((exor - 1) & exor) == 0) {
869 // If c1 and c2 differ only by one bit.
870 // Ecma262UnCanonicalize always gives the highest number last.
871 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 871); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 871; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
872 base::uc16 mask = char_mask ^ exor;
873 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
874 return true;
875 }
876 DCHECK(c2 > c1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(c2 > c1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(c2 > c1))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("c2 > c1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 876); AnnotateMozCrashReason("MOZ_ASSERT" "(" "c2 > c1" ")"
); do { *((volatile int*)__null) = 876; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
877 base::uc16 diff = c2 - c1;
878 if (((diff - 1) & diff) == 0 && c1 >= diff) {
879 // If the characters differ by 2^n but don't differ by one bit then
880 // subtract the difference from the found character, then do the or
881 // trick. We avoid the theoretical case where negative numbers are
882 // involved in order to simplify code generation.
883 base::uc16 mask = char_mask ^ diff;
884 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
885 on_failure);
886 return true;
887 }
888 return false;
889}
890
891// Only emits letters (things that have case). Only used for case independent
892// matches.
893inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
894 base::uc16 c, Label* on_failure, int cp_offset,
895 bool check, bool preloaded) {
896 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
897 bool one_byte = compiler->one_byte();
898 unibrow::uchar chars[4];
899 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
900 if (length <= 1) return false;
901 // We may not need to check against the end of the input string
902 // if this character lies before a character that matched.
903 if (!preloaded) {
904 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
905 }
906 Label ok;
907 switch (length) {
908 case 2: {
909 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
910 chars[1], on_failure)) {
911 } else {
912 macro_assembler->CheckCharacter(chars[0], &ok);
913 macro_assembler->CheckNotCharacter(chars[1], on_failure);
914 macro_assembler->Bind(&ok);
915 }
916 break;
917 }
918 case 4:
919 macro_assembler->CheckCharacter(chars[3], &ok);
920 [[fallthrough]];
921 case 3:
922 macro_assembler->CheckCharacter(chars[0], &ok);
923 macro_assembler->CheckCharacter(chars[1], &ok);
924 macro_assembler->CheckNotCharacter(chars[2], on_failure);
925 macro_assembler->Bind(&ok);
926 break;
927 default:
928 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 928); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 928; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
929 }
930 return true;
931}
932
933void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
934 Label* fall_through, Label* above_or_equal,
935 Label* below) {
936 if (below != fall_through) {
937 masm->CheckCharacterLT(border, below);
938 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
939 } else {
940 masm->CheckCharacterGT(border - 1, above_or_equal);
941 }
942}
943
944void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last,
945 Label* fall_through, Label* in_range,
946 Label* out_of_range) {
947 if (in_range == fall_through) {
948 if (first == last) {
949 masm->CheckNotCharacter(first, out_of_range);
950 } else {
951 masm->CheckCharacterNotInRange(first, last, out_of_range);
952 }
953 } else {
954 if (first == last) {
955 masm->CheckCharacter(first, in_range);
956 } else {
957 masm->CheckCharacterInRange(first, last, in_range);
958 }
959 if (out_of_range != fall_through) masm->GoTo(out_of_range);
960 }
961}
962
963// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
964// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
965void EmitUseLookupTable(RegExpMacroAssembler* masm,
966 ZoneList<base::uc32>* ranges, uint32_t start_index,
967 uint32_t end_index, base::uc32 min_char,
968 Label* fall_through, Label* even_label,
969 Label* odd_label) {
970 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
971 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
972
973 base::uc32 base = (min_char & ~kMask);
974 USE(base)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{base}; (
void)unused_tmp_array_for_use_macro; } while (false)
;
975
976 // Assert that everything is on one kTableSize page.
977 for (uint32_t i = start_index; i <= end_index; i++) {
978 DCHECK_EQ(ranges->at(i) & ~kMask, base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(i) & ~kMask) == (base))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(i) & ~kMask) == (base)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(ranges->at(i) & ~kMask) == (base)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 978); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(i) & ~kMask) == (base)"
")"); do { *((volatile int*)__null) = 978; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
979 }
980 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(start_index == 0 || (ranges->at(start_index - 1) &
~kMask) <= base)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(start_index == 0 || (ranges->
at(start_index - 1) & ~kMask) <= base))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 980); AnnotateMozCrashReason("MOZ_ASSERT" "(" "start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base"
")"); do { *((volatile int*)__null) = 980; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
981
982 char templ[kSize];
983 Label* on_bit_set;
984 Label* on_bit_clear;
985 int bit;
986 if (even_label == fall_through) {
987 on_bit_set = odd_label;
988 on_bit_clear = even_label;
989 bit = 1;
990 } else {
991 on_bit_set = even_label;
992 on_bit_clear = odd_label;
993 bit = 0;
994 }
995 for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize;
996 i++) {
997 templ[i] = bit;
998 }
999 uint32_t j = 0;
1000 bit ^= 1;
1001 for (uint32_t i = start_index; i < end_index; i++) {
1002 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1003 templ[j] = bit;
1004 }
1005 bit ^= 1;
1006 }
1007 for (uint32_t i = j; i < kSize; i++) {
1008 templ[i] = bit;
1009 }
1010 Factory* factory = masm->isolate()->factory();
1011 // TODO(erikcorry): Cache these.
1012 Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
1013 for (uint32_t i = 0; i < kSize; i++) {
1014 ba->set(i, templ[i]);
1015 }
1016 masm->CheckBitInTable(ba, on_bit_set);
1017 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1018}
1019
1020void CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1021 uint32_t start_index, uint32_t end_index, uint32_t cut_index,
1022 Label* even_label, Label* odd_label) {
1023 bool odd = (((cut_index - start_index) & 1) == 1);
1024 Label* in_range_label = odd ? odd_label : even_label;
1025 Label dummy;
1026 EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
1027 ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
1028 &dummy);
1029 DCHECK(!dummy.is_linked())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!dummy.is_linked())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!dummy.is_linked()))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!dummy.is_linked()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1029); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!dummy.is_linked()"
")"); do { *((volatile int*)__null) = 1029; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1030 // Cut out the single range by rewriting the array. This creates a new
1031 // range that is a merger of the two ranges on either side of the one we
1032 // are cutting out. The oddity of the labels is preserved.
1033 for (uint32_t j = cut_index; j > start_index; j--) {
1034 ranges->at(j) = ranges->at(j - 1);
1035 }
1036 for (uint32_t j = cut_index + 1; j < end_index; j++) {
1037 ranges->at(j) = ranges->at(j + 1);
1038 }
1039}
1040
1041// Unicode case. Split the search space into kSize spaces that are handled
1042// with recursion.
1043void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
1044 uint32_t end_index, uint32_t* new_start_index,
1045 uint32_t* new_end_index, base::uc32* border) {
1046 static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
1047 static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
1048
1049 base::uc32 first = ranges->at(start_index);
1050 base::uc32 last = ranges->at(end_index) - 1;
1051
1052 *new_start_index = start_index;
1053 *border = (ranges->at(start_index) & ~kMask) + kSize;
1054 while (*new_start_index < end_index) {
1055 if (ranges->at(*new_start_index) > *border) break;
1056 (*new_start_index)++;
1057 }
1058 // new_start_index is the index of the first edge that is beyond the
1059 // current kSize space.
1060
1061 // For very large search spaces we do a binary chop search of the non-Latin1
1062 // space instead of just going to the end of the current kSize space. The
1063 // heuristics are complicated a little by the fact that any 128-character
1064 // encoding space can be quickly tested with a table lookup, so we don't
1065 // wish to do binary chop search at a smaller granularity than that. A
1066 // 128-character space can take up a lot of space in the ranges array if,
1067 // for example, we only want to match every second character (eg. the lower
1068 // case characters on some Unicode pages).
1069 uint32_t binary_chop_index = (end_index + start_index) / 2;
1070 // The first test ensures that we get to the code that handles the Latin1
1071 // range with a single not-taken branch, speeding up this important
1072 // character range (even non-Latin1 charset-based text has spaces and
1073 // punctuation).
1074 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
1075 end_index - start_index > (*new_start_index - start_index) * 2 &&
1076 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1077 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1078 uint32_t scan_forward_for_section_border = binary_chop_index;
1079 uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1080
1081 while (scan_forward_for_section_border < end_index) {
1082 if (ranges->at(scan_forward_for_section_border) > new_border) {
1083 *new_start_index = scan_forward_for_section_border;
1084 *border = new_border;
1085 break;
1086 }
1087 scan_forward_for_section_border++;
1088 }
1089 }
1090
1091 DCHECK(*new_start_index > start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(*new_start_index > start_index)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(*new_start_index > start_index
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"*new_start_index > start_index", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1091); AnnotateMozCrashReason("MOZ_ASSERT" "(" "*new_start_index > start_index"
")"); do { *((volatile int*)__null) = 1091; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1092 *new_end_index = *new_start_index - 1;
1093 if (ranges->at(*new_end_index) == *border) {
1094 (*new_end_index)--;
1095 }
1096 if (*border >= ranges->at(end_index)) {
1097 *border = ranges->at(end_index);
1098 *new_start_index = end_index; // Won't be used.
1099 *new_end_index = end_index - 1;
1100 }
1101}
1102
1103// Gets a series of segment boundaries representing a character class. If the
1104// character is in the range between an even and an odd boundary (counting from
1105// start_index) then go to even_label, otherwise go to odd_label. We already
1106// know that the character is in the range of min_char to max_char inclusive.
1107// Either label can be nullptr indicating backtracking. Either label can also
1108// be equal to the fall_through label.
1109void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1110 uint32_t start_index, uint32_t end_index,
1111 base::uc32 min_char, base::uc32 max_char,
1112 Label* fall_through, Label* even_label,
1113 Label* odd_label) {
1114 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((min_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(min_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1114); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1114; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1115 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((max_char) <= (String::kMaxUtf16CodeUnit))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((max_char) <= (String::kMaxUtf16CodeUnit)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(max_char) <= (String::kMaxUtf16CodeUnit)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1115); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(max_char) <= (String::kMaxUtf16CodeUnit)"
")"); do { *((volatile int*)__null) = 1115; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1116
1117 base::uc32 first = ranges->at(start_index);
1118 base::uc32 last = ranges->at(end_index) - 1;
1119
1120 DCHECK_LT(min_char, first)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (first))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (first)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(min_char) < (first)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1120); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (first)"
")"); do { *((volatile int*)__null) = 1120; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1121
1122 // Just need to test if the character is before or on-or-after
1123 // a particular character.
1124 if (start_index == end_index) {
1125 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1126 return;
1127 }
1128
1129 // Another almost trivial case: There is one interval in the middle that is
1130 // different from the end intervals.
1131 if (start_index + 1 == end_index) {
1132 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
1133 odd_label);
1134 return;
1135 }
1136
1137 // It's not worth using table lookup if there are very few intervals in the
1138 // character class.
1139 if (end_index - start_index <= 6) {
1140 // It is faster to test for individual characters, so we look for those
1141 // first, then try arbitrary ranges in the second round.
1142 static uint32_t kNoCutIndex = -1;
1143 uint32_t cut = kNoCutIndex;
1144 for (uint32_t i = start_index; i < end_index; i++) {
1145 if (ranges->at(i) == ranges->at(i + 1) - 1) {
1146 cut = i;
1147 break;
1148 }
1149 }
1150 if (cut == kNoCutIndex) cut = start_index;
1151 CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
1152 odd_label);
1153 DCHECK_GE(end_index - start_index, 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((end_index - start_index) >= (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((end_index - start_index) >=
(2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(end_index - start_index) >= (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1153); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(end_index - start_index) >= (2)"
")"); do { *((volatile int*)__null) = 1153; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1154 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
1155 max_char, fall_through, even_label, odd_label);
1156 return;
1157 }
1158
1159 // If there are a lot of intervals in the regexp, then we will use tables to
1160 // determine whether the character is inside or outside the character class.
1161 static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1162
1163 if ((max_char >> kBits) == (min_char >> kBits)) {
1164 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1165 fall_through, even_label, odd_label);
1166 return;
1167 }
1168
1169 if ((min_char >> kBits) != first >> kBits) {
1170 masm->CheckCharacterLT(first, odd_label);
1171 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
1172 fall_through, odd_label, even_label);
1173 return;
1174 }
1175
1176 uint32_t new_start_index = 0;
1177 uint32_t new_end_index = 0;
1178 base::uc32 border = 0;
1179
1180 SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
1181 &new_end_index, &border);
1182
1183 Label handle_rest;
1184 Label* above = &handle_rest;
1185 if (border == last + 1) {
1186 // We didn't find any section that started after the limit, so everything
1187 // above the border is one of the terminal labels.
1188 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1189 DCHECK(new_end_index == end_index - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index == end_index - 1)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(new_end_index == end_index -
1))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index == end_index - 1", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1189); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index == end_index - 1"
")"); do { *((volatile int*)__null) = 1189; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1190 }
1191
1192 DCHECK_LE(start_index, new_end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) <= (new_end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) <= (new_end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) <= (new_end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1192); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) <= (new_end_index)"
")"); do { *((volatile int*)__null) = 1192; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1193 DCHECK_LE(new_start_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_start_index) <= (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_start_index) <= (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_start_index) <= (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1193); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_start_index) <= (end_index)"
")"); do { *((volatile int*)__null) = 1193; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1194 DCHECK_LT(start_index, new_start_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_index) < (new_start_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_index) < (new_start_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_index) < (new_start_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1194); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_index) < (new_start_index)"
")"); do { *((volatile int*)__null) = 1194; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1195 DCHECK_LT(new_end_index, end_index)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((new_end_index) < (end_index))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((new_end_index) < (end_index
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(new_end_index) < (end_index)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1195); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(new_end_index) < (end_index)"
")"); do { *((volatile int*)__null) = 1195; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1196 DCHECK(new_end_index + 1 == new_start_index ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1198); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1198; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1197 (new_end_index + 2 == new_start_index &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1198); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1198; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1198 border == ranges->at(new_end_index + 1)))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1)))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_end_index + 1 == new_start_index || (new_end_index
+ 2 == new_start_index && border == ranges->at(new_end_index
+ 1))))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1198); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_end_index + 1 == new_start_index || (new_end_index + 2 == new_start_index && border == ranges->at(new_end_index + 1))"
")"); do { *((volatile int*)__null) = 1198; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1199 DCHECK_LT(min_char, border - 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((min_char) < (border - 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((min_char) < (border - 1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(min_char) < (border - 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1199); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(min_char) < (border - 1)"
")"); do { *((volatile int*)__null) = 1199; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1200 DCHECK_LT(border, max_char)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((border) < (max_char))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((border) < (max_char)))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(border) < (max_char)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1200); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(border) < (max_char)"
")"); do { *((volatile int*)__null) = 1200; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1201 DCHECK_LT(ranges->at(new_end_index), border)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges->at(new_end_index)) < (border))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((ranges->at(new_end_index)) < (border)))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("(ranges->at(new_end_index)) < (border)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1201); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges->at(new_end_index)) < (border)"
")"); do { *((volatile int*)__null) = 1201; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1202 DCHECK(border < ranges->at(new_start_index) ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1203 (border == ranges->at(new_start_index) &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1204 new_start_index == end_index && new_end_index == end_index - 1 &&do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1205 border == last + 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(border < ranges->at(new_start_index) || (border
== ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(border < ranges->at(new_start_index
) || (border == ranges->at(new_start_index) && new_start_index
== end_index && new_end_index == end_index - 1 &&
border == last + 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1205); AnnotateMozCrashReason("MOZ_ASSERT" "(" "border < ranges->at(new_start_index) || (border == ranges->at(new_start_index) && new_start_index == end_index && new_end_index == end_index - 1 && border == last + 1)"
")"); do { *((volatile int*)__null) = 1205; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1206 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(new_start_index == 0 || border >= ranges->at(new_start_index
- 1))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(new_start_index == 0 || border >= ranges->at(new_start_index
- 1)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1206); AnnotateMozCrashReason("MOZ_ASSERT" "(" "new_start_index == 0 || border >= ranges->at(new_start_index - 1)"
")"); do { *((volatile int*)__null) = 1206; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1207
1208 masm->CheckCharacterGT(border - 1, above);
1209 Label dummy;
1210 GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
1211 border - 1, &dummy, even_label, odd_label);
1212 if (handle_rest.is_linked()) {
1213 masm->Bind(&handle_rest);
1214 bool flip = (new_start_index & 1) != (start_index & 1);
1215 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1216 &dummy, flip ? odd_label : even_label,
1217 flip ? even_label : odd_label);
1218 }
1219}
1220
1221void EmitClassRanges(RegExpMacroAssembler* macro_assembler,
1222 RegExpClassRanges* cr, bool one_byte, Label* on_failure,
1223 int cp_offset, bool check_offset, bool preloaded,
1224 Zone* zone) {
1225 ZoneList<CharacterRange>* ranges = cr->ranges(zone);
1226 CharacterRange::Canonicalize(ranges);
1227
1228 // Now that all processing (like case-insensitivity) is done, clamp the
1229 // ranges to the set of ranges that may actually occur in the subject string.
1230 if (one_byte) CharacterRange::ClampToOneByte(ranges);
1231
1232 const int ranges_length = ranges->length();
1233 if (ranges_length == 0) {
1234 if (!cr->is_negated()) {
1235 macro_assembler->GoTo(on_failure);
1236 }
1237 if (check_offset) {
1238 macro_assembler->CheckPosition(cp_offset, on_failure);
1239 }
1240 return;
1241 }
1242
1243 const base::uc32 max_char = MaxCodeUnit(one_byte);
1244 if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
1245 if (cr->is_negated()) {
1246 macro_assembler->GoTo(on_failure);
1247 } else {
1248 // This is a common case hit by non-anchored expressions.
1249 if (check_offset) {
1250 macro_assembler->CheckPosition(cp_offset, on_failure);
1251 }
1252 }
1253 return;
1254 }
1255
1256 if (!preloaded) {
1257 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1258 }
1259
1260 if (cr->is_standard(zone) && macro_assembler->CheckSpecialClassRanges(
1261 cr->standard_type(), on_failure)) {
1262 return;
1263 }
1264
1265 static constexpr int kMaxRangesForInlineBranchGeneration = 16;
1266 if (ranges_length > kMaxRangesForInlineBranchGeneration) {
1267 // For large range sets, emit a more compact instruction sequence to avoid
1268 // a potentially problematic increase in code size.
1269 // Note the flipped logic below (we check InRange if negated, NotInRange if
1270 // not negated); this is necessary since the method falls through on
1271 // failure whereas we want to fall through on success.
1272 if (cr->is_negated()) {
1273 if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
1274 return;
1275 }
1276 } else {
1277 if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
1278 return;
1279 }
1280 }
1281 }
1282
1283 // Generate a flat list of range boundaries for consumption by
1284 // GenerateBranches. See the comment on that function for how the list should
1285 // be structured
1286 ZoneList<base::uc32>* range_boundaries =
1287 zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
1288
1289 bool zeroth_entry_is_failure = !cr->is_negated();
1290
1291 for (int i = 0; i < ranges_length; i++) {
1292 CharacterRange& range = ranges->at(i);
1293 if (range.from() == 0) {
1294 DCHECK_EQ(i, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((i) == (0))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((i) == (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(i) == (0)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1294); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(i) == (0)"
")"); do { *((volatile int*)__null) = 1294; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1295 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1296 } else {
1297 range_boundaries->Add(range.from(), zone);
1298 }
1299 // `+ 1` to convert from inclusive to exclusive `to`.
1300 // [from, to] == [from, to+1[.
1301 range_boundaries->Add(range.to() + 1, zone);
1302 }
1303 int end_index = range_boundaries->length() - 1;
1304 if (range_boundaries->at(end_index) > max_char) {
1305 end_index--;
1306 }
1307
1308 Label fall_through;
1309 GenerateBranches(macro_assembler, range_boundaries,
1310 0, // start_index.
1311 end_index,
1312 0, // min_char.
1313 max_char, &fall_through,
1314 zeroth_entry_is_failure ? &fall_through : on_failure,
1315 zeroth_entry_is_failure ? on_failure : &fall_through);
1316 macro_assembler->Bind(&fall_through);
1317}
1318
1319} // namespace
1320
1321RegExpNode::~RegExpNode() = default;
1322
1323RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1324 Trace* trace) {
1325 // If we are generating a greedy loop then don't stop and don't reuse code.
1326 if (trace->stop_node() != nullptr) {
1327 return CONTINUE;
1328 }
1329
1330 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1331 if (trace->is_trivial()) {
1332 if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
1333 // If a generic version is already scheduled to be generated or we have
1334 // recursed too deeply then just generate a jump to that code.
1335 macro_assembler->GoTo(&label_);
1336 // This will queue it up for generation of a generic version if it hasn't
1337 // already been queued.
1338 compiler->AddWork(this);
1339 return DONE;
1340 }
1341 // Generate generic version of the node and bind the label for later use.
1342 macro_assembler->Bind(&label_);
1343 return CONTINUE;
1344 }
1345
1346 // We are being asked to make a non-generic version. Keep track of how many
1347 // non-generic versions we generate so as not to overdo it.
1348 trace_count_++;
1349 if (KeepRecursing(compiler) && compiler->optimize() &&
1350 trace_count_ < kMaxCopiesCodeGenerated) {
1351 return CONTINUE;
1352 }
1353
1354 // If we get here code has been generated for this node too many times or
1355 // recursion is too deep. Time to switch to a generic version. The code for
1356 // generic versions above can handle deep recursion properly.
1357 bool was_limiting = compiler->limiting_recursion();
1358 compiler->set_limiting_recursion(true);
1359 trace->Flush(compiler, this);
1360 compiler->set_limiting_recursion(was_limiting);
1361 return DONE;
1362}
1363
1364bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
1365 return !compiler->limiting_recursion() &&
1366 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
1367}
1368
1369void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1370 BoyerMooreLookahead* bm, bool not_at_start) {
1371 std::optional<RegExpFlags> old_flags;
1372 if (action_type_ == MODIFY_FLAGS) {
1373 // It is not guaranteed that we hit the resetting modify flags node, due to
1374 // recursion budget limitation for filling in BMInfo. Therefore we reset the
1375 // flags manually to the previous state after recursing.
1376 old_flags = bm->compiler()->flags();
1377 bm->compiler()->set_flags(flags());
1378 }
1379 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
1380 // Anything may follow a positive submatch success, thus we need to accept
1381 // all characters from this position onwards.
1382 bm->SetRest(offset);
1383 } else {
1384 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1385 }
1386 SaveBMInfo(bm, not_at_start, offset);
1387 if (old_flags.has_value()) {
1388 bm->compiler()->set_flags(*old_flags);
1389 }
1390}
1391
1392void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
1393 RegExpCompiler* compiler, int filled_in,
1394 bool not_at_start) {
1395 if (action_type_ == SET_REGISTER_FOR_LOOP) {
1396 on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
1397 filled_in, not_at_start);
1398 } else {
1399 if (action_type() == MODIFY_FLAGS) {
1400 compiler->set_flags(flags());
1401 }
1402 on_success()->GetQuickCheckDetails(details, compiler, filled_in,
1403 not_at_start);
1404 }
1405}
1406
1407void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
1408 BoyerMooreLookahead* bm, bool not_at_start) {
1409 // Match the behaviour of EatsAtLeast on this node.
1410 if (assertion_type() == AT_START && not_at_start) return;
1411 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
1412 SaveBMInfo(bm, not_at_start, offset);
1413}
1414
1415void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
1416 QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
1417 bool not_at_start) {
1418 RegExpNode* node = continue_node();
1419 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1420}
1421
1422namespace {
1423
1424// Takes the left-most 1-bit and smears it out, setting all bits to its right.
1425inline uint32_t SmearBitsRight(uint32_t v) {
1426 v |= v >> 1;
1427 v |= v >> 2;
1428 v |= v >> 4;
1429 v |= v >> 8;
1430 v |= v >> 16;
1431 return v;
1432}
1433
1434} // namespace
1435
1436bool QuickCheckDetails::Rationalize(bool asc) {
1437 bool found_useful_op = false;
1438 const uint32_t char_mask = CharMask(asc);
1439 mask_ = 0;
1440 value_ = 0;
1441 int char_shift = 0;
1442 for (int i = 0; i < characters_; i++) {
36
Assuming 'i' is < field 'characters_'
37
Loop condition is true. Entering loop body
42
Assuming 'i' is < field 'characters_'
43
Loop condition is true. Entering loop body
48
Assuming 'i' is < field 'characters_'
49
Loop condition is true. Entering loop body
1443 Position* pos = &positions_[i];
1444 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
38
Assuming the condition is false
39
Taking false branch
44
Assuming the condition is false
45
Taking false branch
50
Assuming the condition is false
51
Taking false branch
1445 found_useful_op = true;
1446 }
1447 mask_ |= (pos->mask & char_mask) << char_shift;
52
The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'uint32_t'
1448 value_ |= (pos->value & char_mask) << char_shift;
1449 char_shift += asc
45.1
'asc' is false
? 8 : 16
;
40
Assuming 'asc' is false
41
'?' condition is false
46
'?' condition is false
47
The value 32 is assigned to 'char_shift'
1450 }
1451 return found_useful_op;
1452}
1453
1454int RegExpNode::EatsAtLeast(bool not_at_start) {
1455 return not_at_start ? eats_at_least_.eats_at_least_from_not_start
1456 : eats_at_least_.eats_at_least_from_possibly_start;
1457}
1458
1459EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
1460 // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
1461 // implies that the following node must be a LoopChoiceNode. If we need to
1462 // set registers to constant values for other reasons, we could introduce a
1463 // new action type SET_REGISTER that doesn't imply anything about its
1464 // successor.
1465 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1465); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1465; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1466}
1467
1468void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
1469 RegExpCompiler* compiler,
1470 int characters_filled_in,
1471 bool not_at_start) {
1472 // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
1473 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1473); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 1473; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
1474}
1475
1476EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
1477 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1477); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 1477; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
1478
1479 if (read_backward()) {
1480 // The eats_at_least value is not used if reading backward. The
1481 // EatsAtLeastPropagator should've zeroed it as well.
1482 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_possibly_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1482); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_possibly_start) == (0)"
")"); do { *((volatile int*)__null) = 1482; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1483 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least_info()->eats_at_least_from_not_start
) == (0))>::isValid, "invalid assertion condition"); if ((
__builtin_expect(!!(!(!!((eats_at_least_info()->eats_at_least_from_not_start
) == (0)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1483); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least_info()->eats_at_least_from_not_start) == (0)"
")"); do { *((volatile int*)__null) = 1483; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1484 return {};
1485 }
1486
1487 // Figure out how much the loop body itself eats, not including anything in
1488 // the continuation case. In general, the nodes in the loop body should report
1489 // that they eat at least the number eaten by the continuation node, since any
1490 // successful match in the loop body must also include the continuation node.
1491 // However, in some cases involving positive lookaround, the loop body under-
1492 // reports its appetite, so use saturated math here to avoid negative numbers.
1493 uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
1494 loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
1495 uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
1496 loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
1497
1498 // Limit the number of loop iterations to avoid overflow in subsequent steps.
1499 int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
1500
1501 EatsAtLeastInfo result;
1502 result.eats_at_least_from_not_start =
1503 base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
1504 continue_node_->EatsAtLeast(true));
1505 if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
1506 // First loop iteration eats at least one, so all subsequent iterations
1507 // and the after-loop chunk are guaranteed to not be at the start.
1508 result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
1509 loop_body_from_possibly_start +
1510 (loop_iterations - 1) * loop_body_from_not_start +
1511 continue_node_->EatsAtLeast(true));
1512 } else {
1513 // Loop body might eat nothing, so only continue node contributes.
1514 result.eats_at_least_from_possibly_start =
1515 continue_node_->EatsAtLeast(false);
1516 }
1517 return result;
1518}
1519
1520bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1521 Trace* bounds_check_trace, Trace* trace,
1522 bool preload_has_checked_bounds,
1523 Label* on_possible_success,
1524 QuickCheckDetails* details,
1525 bool fall_through_on_failure,
1526 ChoiceNode* predecessor) {
1527 DCHECK_NOT_NULL(predecessor)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((predecessor) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((predecessor) != nullptr))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(predecessor) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1527); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(predecessor) != nullptr"
")"); do { *((volatile int*)__null) = 1527; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
29
Taking false branch
30
Loop condition is false. Exiting loop
1528 if (details->characters() == 0) return false;
31
Assuming the condition is false
32
Taking false branch
1529 GetQuickCheckDetails(details, compiler, 0,
1530 trace->at_start() == Trace::FALSE_VALUE);
1531 if (details->cannot_match()) return false;
33
Assuming the condition is false
34
Taking false branch
1532 if (!details->Rationalize(compiler->one_byte())) return false;
35
Calling 'QuickCheckDetails::Rationalize'
1533 DCHECK(details->characters() == 1 ||do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1534); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1534; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
1534 compiler->macro_assembler()->CanReadUnaligned())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(details->characters() == 1 || compiler->macro_assembler
()->CanReadUnaligned())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(details->characters() == 1
|| compiler->macro_assembler()->CanReadUnaligned()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1534); AnnotateMozCrashReason("MOZ_ASSERT" "(" "details->characters() == 1 || compiler->macro_assembler()->CanReadUnaligned()"
")"); do { *((volatile int*)__null) = 1534; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1535 uint32_t mask = details->mask();
1536 uint32_t value = details->value();
1537
1538 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1539
1540 if (trace->characters_preloaded() != details->characters()) {
1541 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == bounds_check_trace->cp_offset
())>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(trace->cp_offset() == bounds_check_trace->cp_offset
()))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("trace->cp_offset() == bounds_check_trace->cp_offset()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1541); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == bounds_check_trace->cp_offset()"
")"); do { *((volatile int*)__null) = 1541; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1542 // The bounds check is performed using the minimum number of characters
1543 // any choice would eat, so if the bounds check fails, then none of the
1544 // choices can succeed, so we can just immediately backtrack, rather
1545 // than go to the next choice. The number of characters preloaded may be
1546 // less than the number used for the bounds check.
1547 int eats_at_least = predecessor->EatsAtLeast(
1548 bounds_check_trace->at_start() == Trace::FALSE_VALUE);
1549 DCHECK_GE(eats_at_least, details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((eats_at_least) >= (details->characters()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((eats_at_least) >= (details->characters())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(eats_at_least) >= (details->characters())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1549); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(eats_at_least) >= (details->characters())"
")"); do { *((volatile int*)__null) = 1549; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1550 assembler->LoadCurrentCharacter(
1551 trace->cp_offset(), bounds_check_trace->backtrack(),
1552 !preload_has_checked_bounds, details->characters(), eats_at_least);
1553 }
1554
1555 bool need_mask = true;
1556
1557 if (details->characters() == 1) {
1558 // If number of characters preloaded is 1 then we used a byte or 16 bit
1559 // load so the value is already masked down.
1560 const uint32_t char_mask = CharMask(compiler->one_byte());
1561 if ((mask & char_mask) == char_mask) need_mask = false;
1562 mask &= char_mask;
1563 } else {
1564 // For 2-character preloads in one-byte mode or 1-character preloads in
1565 // two-byte mode we also use a 16 bit load with zero extend.
1566 static const uint32_t kTwoByteMask = 0xFFFF;
1567 static const uint32_t kFourByteMask = 0xFFFFFFFF;
1568 if (details->characters() == 2 && compiler->one_byte()) {
1569 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1570 } else if (details->characters() == 1 && !compiler->one_byte()) {
1571 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
1572 } else {
1573 if (mask == kFourByteMask) need_mask = false;
1574 }
1575 }
1576
1577 if (fall_through_on_failure) {
1578 if (need_mask) {
1579 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
1580 } else {
1581 assembler->CheckCharacter(value, on_possible_success);
1582 }
1583 } else {
1584 if (need_mask) {
1585 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
1586 } else {
1587 assembler->CheckNotCharacter(value, trace->backtrack());
1588 }
1589 }
1590 return true;
1591}
1592
1593// Here is the meat of GetQuickCheckDetails (see also the comment on the
1594// super-class in the .h file).
1595//
1596// We iterate along the text object, building up for each character a
1597// mask and value that can be used to test for a quick failure to match.
1598// The masks and values for the positions will be combined into a single
1599// machine word for the current character width in order to be used in
1600// generating a quick check.
1601void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1602 RegExpCompiler* compiler,
1603 int characters_filled_in,
1604 bool not_at_start) {
1605 // Do not collect any quick check details if the text node reads backward,
1606 // since it reads in the opposite direction than we use for quick checks.
1607 if (read_backward()) return;
1608 Isolate* isolate = compiler->macro_assembler()->isolate();
1609 DCHECK(characters_filled_in < details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in < details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in < details->characters()))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in < details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1609); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in < details->characters()"
")"); do { *((volatile int*)__null) = 1609; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1610 int characters = details->characters();
1611 const uint32_t char_mask = CharMask(compiler->one_byte());
1612 for (int k = 0; k < elements()->length(); k++) {
1613 TextElement elm = elements()->at(k);
1614 if (elm.text_type() == TextElement::ATOM) {
1615 base::Vector<const base::uc16> quarks = elm.atom()->data();
1616 for (int i = 0; i < characters && i < quarks.length(); i++) {
1617 QuickCheckDetails::Position* pos =
1618 details->positions(characters_filled_in);
1619 base::uc16 c = quarks[i];
1620 if (IsIgnoreCase(compiler->flags())) {
1621 unibrow::uchar chars[4];
1622 int length = GetCaseIndependentLetters(
1623 isolate, c, compiler->one_byte(), chars, 4);
1624 if (length == 0) {
1625 // This can happen because all case variants are non-Latin1, but we
1626 // know the input is Latin1.
1627 details->set_cannot_match();
1628 pos->determines_perfectly = false;
1629 return;
1630 }
1631 if (length == 1) {
1632 // This letter has no case equivalents, so it's nice and simple
1633 // and the mask-compare will determine definitely whether we have
1634 // a match at this character position.
1635 pos->mask = char_mask;
1636 pos->value = chars[0];
1637 pos->determines_perfectly = true;
1638 } else {
1639 uint32_t common_bits = char_mask;
1640 uint32_t bits = chars[0];
1641 for (int j = 1; j < length; j++) {
1642 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1643 common_bits ^= differing_bits;
1644 bits &= common_bits;
1645 }
1646 // If length is 2 and common bits has only one zero in it then
1647 // our mask and compare instruction will determine definitely
1648 // whether we have a match at this character position. Otherwise
1649 // it can only be an approximate check.
1650 uint32_t one_zero = (common_bits | ~char_mask);
1651 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
1652 pos->determines_perfectly = true;
1653 }
1654 pos->mask = common_bits;
1655 pos->value = bits;
1656 }
1657 } else {
1658 // Don't ignore case. Nice simple case where the mask-compare will
1659 // determine definitely whether we have a match at this character
1660 // position.
1661 if (c > char_mask) {
1662 details->set_cannot_match();
1663 pos->determines_perfectly = false;
1664 return;
1665 }
1666 pos->mask = char_mask;
1667 pos->value = c;
1668 pos->determines_perfectly = true;
1669 }
1670 characters_filled_in++;
1671 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1671); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1671; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1672 if (characters_filled_in == details->characters()) {
1673 return;
1674 }
1675 }
1676 } else {
1677 QuickCheckDetails::Position* pos =
1678 details->positions(characters_filled_in);
1679 RegExpClassRanges* tree = elm.class_ranges();
1680 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
1681 if (tree->is_negated() || ranges->is_empty()) {
1682 // A quick check uses multi-character mask and compare. There is no
1683 // useful way to incorporate a negative char class into this scheme
1684 // so we just conservatively create a mask and value that will always
1685 // succeed.
1686 // Likewise for empty ranges (empty ranges can occur e.g. when
1687 // compiling for one-byte subjects and impossible (non-one-byte) ranges
1688 // have been removed).
1689 pos->mask = 0;
1690 pos->value = 0;
1691 } else {
1692 int first_range = 0;
1693 while (ranges->at(first_range).from() > char_mask) {
1694 first_range++;
1695 if (first_range == ranges->length()) {
1696 details->set_cannot_match();
1697 pos->determines_perfectly = false;
1698 return;
1699 }
1700 }
1701 CharacterRange range = ranges->at(first_range);
1702 const base::uc32 first_from = range.from();
1703 const base::uc32 first_to =
1704 (range.to() > char_mask) ? char_mask : range.to();
1705 const uint32_t differing_bits = (first_from ^ first_to);
1706 // A mask and compare is only perfect if the differing bits form a
1707 // number like 00011111 with one single block of trailing 1s.
1708 if ((differing_bits & (differing_bits + 1)) == 0 &&
1709 first_from + differing_bits == first_to) {
1710 pos->determines_perfectly = true;
1711 }
1712 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1713 uint32_t bits = (first_from & common_bits);
1714 for (int i = first_range + 1; i < ranges->length(); i++) {
1715 range = ranges->at(i);
1716 const base::uc32 from = range.from();
1717 if (from > char_mask) continue;
1718 const base::uc32 to =
1719 (range.to() > char_mask) ? char_mask : range.to();
1720 // Here we are combining more ranges into the mask and compare
1721 // value. With each new range the mask becomes more sparse and
1722 // so the chances of a false positive rise. A character class
1723 // with multiple ranges is assumed never to be equivalent to a
1724 // mask and compare operation.
1725 pos->determines_perfectly = false;
1726 uint32_t new_common_bits = (from ^ to);
1727 new_common_bits = ~SmearBitsRight(new_common_bits);
1728 common_bits &= new_common_bits;
1729 bits &= new_common_bits;
1730 uint32_t new_differing_bits = (from & common_bits) ^ bits;
1731 common_bits ^= new_differing_bits;
1732 bits &= common_bits;
1733 }
1734 pos->mask = common_bits;
1735 pos->value = bits;
1736 }
1737 characters_filled_in++;
1738 DCHECK(characters_filled_in <= details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in <= details->characters())
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in <= details->characters())
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in <= details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1738); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in <= details->characters()"
")"); do { *((volatile int*)__null) = 1738; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1739 if (characters_filled_in == details->characters()) return;
1740 }
1741 }
1742 DCHECK(characters_filled_in != details->characters())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_filled_in != details->characters())>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(characters_filled_in != details->characters()))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("characters_filled_in != details->characters()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1742); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_filled_in != details->characters()"
")"); do { *((volatile int*)__null) = 1742; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1743 if (!details->cannot_match()) {
1744 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1745 true);
1746 }
1747}
1748
1749void QuickCheckDetails::Clear() {
1750 for (int i = 0; i < characters_; i++) {
1751 positions_[i].mask = 0;
1752 positions_[i].value = 0;
1753 positions_[i].determines_perfectly = false;
1754 }
1755 characters_ = 0;
1756}
1757
1758void QuickCheckDetails::Advance(int by, bool one_byte) {
1759 if (by >= characters_ || by < 0) {
1760 DCHECK_IMPLIES(by < 0, characters_ == 0)do { if (by < 0) { do { static_assert( mozilla::detail::AssertionConditionType
<decltype(characters_ == 0)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == 0))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("characters_ == 0"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1760); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == 0"
")"); do { *((volatile int*)__null) = 1760; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
1761 Clear();
1762 return;
1763 }
1764 DCHECK_LE(characters_ - by, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_ - by) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_ - by) <= (4)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(characters_ - by) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1764); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_ - by) <= (4)"
")"); do { *((volatile int*)__null) = 1764; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1765 DCHECK_LE(characters_, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((characters_) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((characters_) <= (4)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(characters_) <= (4)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1765); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(characters_) <= (4)"
")"); do { *((volatile int*)__null) = 1765; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1766 for (int i = 0; i < characters_ - by; i++) {
1767 positions_[i] = positions_[by + i];
1768 }
1769 for (int i = characters_ - by; i < characters_; i++) {
1770 positions_[i].mask = 0;
1771 positions_[i].value = 0;
1772 positions_[i].determines_perfectly = false;
1773 }
1774 characters_ -= by;
1775 // We could change mask_ and value_ here but we would never advance unless
1776 // they had already been used in a check and they won't be used again because
1777 // it would gain us nothing. So there's no point.
1778}
1779
1780void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
1781 DCHECK(characters_ == other->characters_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(characters_ == other->characters_)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(characters_ == other->characters_
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"characters_ == other->characters_", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1781); AnnotateMozCrashReason("MOZ_ASSERT" "(" "characters_ == other->characters_"
")"); do { *((volatile int*)__null) = 1781; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1782 if (other->cannot_match_) {
1783 return;
1784 }
1785 if (cannot_match_) {
1786 *this = *other;
1787 return;
1788 }
1789 for (int i = from_index; i < characters_; i++) {
1790 QuickCheckDetails::Position* pos = positions(i);
1791 QuickCheckDetails::Position* other_pos = other->positions(i);
1792 if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
1793 !other_pos->determines_perfectly) {
1794 // Our mask-compare operation will be approximate unless we have the
1795 // exact same operation on both sides of the alternation.
1796 pos->determines_perfectly = false;
1797 }
1798 pos->mask &= other_pos->mask;
1799 pos->value &= pos->mask;
1800 other_pos->value &= pos->mask;
1801 uint32_t differing_bits = (pos->value ^ other_pos->value);
1802 pos->mask &= ~differing_bits;
1803 pos->value &= pos->mask;
1804 }
1805}
1806
1807class VisitMarker {
1808 public:
1809 explicit VisitMarker(NodeInfo* info) : info_(info) {
1810 DCHECK(!info->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info->visited))), 0))) {
do { } while (false); MOZ_ReportAssertionFailure("!info->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1810); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info->visited"
")"); do { *((volatile int*)__null) = 1810; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1811 info->visited = true;
1812 }
1813 ~VisitMarker() { info_->visited = false; }
1814
1815 private:
1816 NodeInfo* info_;
1817};
1818
1819// Temporarily sets traversed_loop_initialization_node_.
1820class LoopInitializationMarker {
1821 public:
1822 explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
1823 DCHECK(!node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!node_->traversed_loop_initialization_node_))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1823); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1823; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1824 node_->traversed_loop_initialization_node_ = true;
1825 }
1826 ~LoopInitializationMarker() {
1827 DCHECK(node_->traversed_loop_initialization_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(node_->traversed_loop_initialization_node_)>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(node_->traversed_loop_initialization_node_))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("node_->traversed_loop_initialization_node_"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1827); AnnotateMozCrashReason("MOZ_ASSERT" "(" "node_->traversed_loop_initialization_node_"
")"); do { *((volatile int*)__null) = 1827; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1828 node_->traversed_loop_initialization_node_ = false;
1829 }
1830 LoopInitializationMarker(const LoopInitializationMarker&) = delete;
1831 LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete;
1832
1833 private:
1834 LoopChoiceNode* node_;
1835};
1836
1837// Temporarily decrements min_loop_iterations_.
1838class IterationDecrementer {
1839 public:
1840 explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
1841 DCHECK_GT(node_->min_loop_iterations_, 0)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node_->min_loop_iterations_) > (0))>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!((node_->min_loop_iterations_) > (0)))), 0))) { do { }
while (false); MOZ_ReportAssertionFailure("(node_->min_loop_iterations_) > (0)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1841); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node_->min_loop_iterations_) > (0)"
")"); do { *((volatile int*)__null) = 1841; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1842 --node_->min_loop_iterations_;
1843 }
1844 ~IterationDecrementer() { ++node_->min_loop_iterations_; }
1845 IterationDecrementer(const IterationDecrementer&) = delete;
1846 IterationDecrementer& operator=(const IterationDecrementer&) = delete;
1847
1848 private:
1849 LoopChoiceNode* node_;
1850};
1851
1852RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpFlags flags) {
1853 if (info()->replacement_calculated) return replacement();
1854 if (depth < 0) return this;
1855 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1855); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1855; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1856 VisitMarker marker(info());
1857 return FilterSuccessor(depth - 1, flags);
1858}
1859
1860RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, RegExpFlags flags) {
1861 RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags);
1862 if (next == nullptr) return set_replacement(nullptr);
1863 on_success_ = next;
1864 return set_replacement(this);
1865}
1866
1867// We need to check for the following characters: 0x39C 0x3BC 0x178.
1868bool RangeContainsLatin1Equivalents(CharacterRange range) {
1869 // TODO(dcarney): this could be a lot more efficient.
1870 return range.Contains(0x039C) || range.Contains(0x03BC) ||
1871 range.Contains(0x0178);
1872}
1873
1874namespace {
1875
1876bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
1877 for (int i = 0; i < ranges->length(); i++) {
1878 // TODO(dcarney): this could be a lot more efficient.
1879 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
1880 }
1881 return false;
1882}
1883
1884} // namespace
1885
1886RegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) {
1887 if (info()->replacement_calculated) return replacement();
1888 if (depth < 0) return this;
1889 DCHECK(!info()->visited)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!info()->visited)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!info()->visited))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("!info()->visited"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1889); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!info()->visited"
")"); do { *((volatile int*)__null) = 1889; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1890 VisitMarker marker(info());
1891 int element_count = elements()->length();
1892 for (int i = 0; i < element_count; i++) {
1893 TextElement elm = elements()->at(i);
1894 if (elm.text_type() == TextElement::ATOM) {
1895 base::Vector<const base::uc16> quarks = elm.atom()->data();
1896 for (int j = 0; j < quarks.length(); j++) {
1897 base::uc16 c = quarks[j];
1898 if (IsIgnoreCase(flags)) {
1899 c = unibrow::Latin1::TryConvertToLatin1(c);
1900 }
1901 if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
1902 // Replace quark in case we converted to Latin-1.
1903 base::uc16* writable_quarks = const_cast<base::uc16*>(quarks.begin());
1904 writable_quarks[j] = c;
1905 }
1906 } else {
1907 DCHECK(elm.text_type() == TextElement::CLASS_RANGES)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(elm.text_type() == TextElement::CLASS_RANGES)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(elm.text_type() == TextElement::CLASS_RANGES))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("elm.text_type() == TextElement::CLASS_RANGES"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1907); AnnotateMozCrashReason("MOZ_ASSERT" "(" "elm.text_type() == TextElement::CLASS_RANGES"
")"); do { *((volatile int*)__null) = 1907; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
1908 RegExpClassRanges* cr = elm.class_ranges();
1909 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
1910 CharacterRange::Canonicalize(ranges);
1911 // Now they are in order so we only need to look at the first.
1912 int range_count = ranges->length();
1913 if (cr->is_negated()) {
1914 if (range_count != 0 && ranges->at(0).from() == 0 &&
1915 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
1916 // This will be handled in a later filter.
1917 if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
1918 continue;
1919 }
1920 return set_replacement(nullptr);
1921 }
1922 } else {
1923 if (range_count == 0 ||
1924 ranges->at(0).from() > String::kMaxOneByteCharCode) {
1925 // This will be handled in a later filter.
1926 if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
1927 continue;
1928 }
1929 return set_replacement(nullptr);
1930 }
1931 }
1932 }
1933 }
1934 return FilterSuccessor(depth - 1, flags);
1935}
1936
1937RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
1938 if (info()->replacement_calculated) return replacement();
1939 if (depth < 0) return this;
1940 if (info()->visited) return this;
1941 {
1942 VisitMarker marker(info());
1943
1944 RegExpNode* continue_replacement =
1945 continue_node_->FilterOneByte(depth - 1, flags);
1946 // If we can't continue after the loop then there is no sense in doing the
1947 // loop.
1948 if (continue_replacement == nullptr) return set_replacement(nullptr);
1949 }
1950
1951 return ChoiceNode::FilterOneByte(depth - 1, flags);
1952}
1953
1954RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
1955 if (info()->replacement_calculated) return replacement();
1956 if (depth < 0) return this;
1957 if (info()->visited) return this;
1958 VisitMarker marker(info());
1959 int choice_count = alternatives_->length();
1960
1961 for (int i = 0; i < choice_count; i++) {
1962 GuardedAlternative alternative = alternatives_->at(i);
1963 if (alternative.guards() != nullptr &&
1964 alternative.guards()->length() != 0) {
1965 set_replacement(this);
1966 return this;
1967 }
1968 }
1969
1970 int surviving = 0;
1971 RegExpNode* survivor = nullptr;
1972 for (int i = 0; i < choice_count; i++) {
1973 GuardedAlternative alternative = alternatives_->at(i);
1974 RegExpNode* replacement =
1975 alternative.node()->FilterOneByte(depth - 1, flags);
1976 DCHECK(replacement != this)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(replacement != this)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(replacement != this))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("replacement != this"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 1976); AnnotateMozCrashReason("MOZ_ASSERT" "(" "replacement != this"
")"); do { *((volatile int*)__null) = 1976; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // No missing EMPTY_MATCH_CHECK.
1977 if (replacement != nullptr) {
1978 alternatives_->at(i).set_node(replacement);
1979 surviving++;
1980 survivor = replacement;
1981 }
1982 }
1983 if (surviving < 2) return set_replacement(survivor);
1984
1985 set_replacement(this);
1986 if (surviving == choice_count) {
1987 return this;
1988 }
1989 // Only some of the nodes survived the filtering. We need to rebuild the
1990 // alternatives list.
1991 ZoneList<GuardedAlternative>* new_alternatives =
1992 zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
1993 for (int i = 0; i < choice_count; i++) {
1994 RegExpNode* replacement =
1995 alternatives_->at(i).node()->FilterOneByte(depth - 1, flags);
1996 if (replacement != nullptr) {
1997 alternatives_->at(i).set_node(replacement);
1998 new_alternatives->Add(alternatives_->at(i), zone());
1999 }
2000 }
2001 alternatives_ = new_alternatives;
2002 return this;
2003}
2004
2005RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
2006 RegExpFlags flags) {
2007 if (info()->replacement_calculated) return replacement();
2008 if (depth < 0) return this;
2009 if (info()->visited) return this;
2010 VisitMarker marker(info());
2011 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2012 // afterwards.
2013 RegExpNode* node = continue_node();
2014 RegExpNode* replacement = node->FilterOneByte(depth - 1, flags);
2015 if (replacement == nullptr) return set_replacement(nullptr);
2016 alternatives_->at(kContinueIndex).set_node(replacement);
2017
2018 RegExpNode* neg_node = lookaround_node();
2019 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags);
2020 // If the negative lookahead is always going to fail then
2021 // we don't need to check it.
2022 if (neg_replacement == nullptr) return set_replacement(replacement);
2023 alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
2024 return set_replacement(this);
2025}
2026
2027void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2028 RegExpCompiler* compiler,
2029 int characters_filled_in,
2030 bool not_at_start) {
2031 if (body_can_be_zero_length_ || info()->visited) return;
2032 not_at_start = not_at_start || this->not_at_start();
2033 DCHECK_EQ(alternatives_->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((alternatives_->length()) == (2))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((alternatives_->length())
== (2)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(alternatives_->length()) == (2)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2033); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(alternatives_->length()) == (2)"
")"); do { *((volatile int*)__null) = 2033; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // There's just loop and continue.
2034 if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
2035 loop_node_->EatsAtLeast(not_at_start) >
2036 continue_node_->EatsAtLeast(true)) {
2037 // Loop body is guaranteed to execute at least once, and consume characters
2038 // when it does, meaning the only possible quick checks from this point
2039 // begin with the loop body. We may recursively visit this LoopChoiceNode,
2040 // but we temporarily decrease its minimum iteration counter so we know when
2041 // to check the continue case.
2042 IterationDecrementer next_iteration(this);
2043 loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
2044 not_at_start);
2045 } else {
2046 // Might not consume anything in the loop body, so treat it like a normal
2047 // ChoiceNode (and don't recursively visit this node again).
2048 VisitMarker marker(info());
2049 ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
2050 not_at_start);
2051 }
2052}
2053
2054void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
2055 QuickCheckDetails* details, RegExpCompiler* compiler,
2056 int characters_filled_in, bool not_at_start) {
2057 if (traversed_loop_initialization_node_) {
2058 // We already entered this loop once, exited via its continuation node, and
2059 // followed an outer loop's back-edge to before the loop entry point. We
2060 // could try to reset the minimum iteration count to its starting value at
2061 // this point, but that seems like more trouble than it's worth. It's safe
2062 // to keep going with the current (possibly reduced) minimum iteration
2063 // count.
2064 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2065 } else {
2066 // We are entering a loop via its counter initialization action, meaning we
2067 // are guaranteed to run the loop body at least some minimum number of times
2068 // before running the continuation node. Set a flag so that this node knows
2069 // (now and any times we visit it again recursively) that it was entered
2070 // from the top.
2071 LoopInitializationMarker marker(this);
2072 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2073 }
2074}
2075
2076void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
2077 BoyerMooreLookahead* bm, bool not_at_start) {
2078 if (body_can_be_zero_length_ || budget <= 0) {
2079 bm->SetRest(offset);
2080 SaveBMInfo(bm, not_at_start, offset);
2081 return;
2082 }
2083 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2084 SaveBMInfo(bm, not_at_start, offset);
2085}
2086
2087void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2088 RegExpCompiler* compiler,
2089 int characters_filled_in,
2090 bool not_at_start) {
2091 not_at_start = (not_at_start || not_at_start_);
2092 int choice_count = alternatives_->length();
2093 DCHECK_LT(0, choice_count)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) < (choice_count))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) < (choice_count)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) < (choice_count)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2093); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) < (choice_count)"
")"); do { *((volatile int*)__null) = 2093; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2094 alternatives_->at(0).node()->GetQuickCheckDetails(
2095 details, compiler, characters_filled_in, not_at_start);
2096 for (int i = 1; i < choice_count; i++) {
2097 QuickCheckDetails new_details(details->characters());
2098 RegExpNode* node = alternatives_->at(i).node();
2099 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
2100 not_at_start);
2101 // Here we merge the quick match details of the two branches.
2102 details->Merge(&new_details, characters_filled_in);
2103 }
2104}
2105
2106namespace {
2107
2108// Check for [0-9A-Z_a-z].
2109void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
2110 Label* non_word, bool fall_through_on_word) {
2111 if (assembler->CheckSpecialClassRanges(
2112 fall_through_on_word ? StandardCharacterSet::kWord
2113 : StandardCharacterSet::kNotWord,
2114 fall_through_on_word ? non_word : word)) {
2115 // Optimized implementation available.
2116 return;
2117 }
2118 assembler->CheckCharacterGT('z', non_word);
2119 assembler->CheckCharacterLT('0', non_word);
2120 assembler->CheckCharacterGT('a' - 1, word);
2121 assembler->CheckCharacterLT('9' + 1, word);
2122 assembler->CheckCharacterLT('A', non_word);
2123 assembler->CheckCharacterLT('Z' + 1, word);
2124 if (fall_through_on_word) {
2125 assembler->CheckNotCharacter('_', non_word);
2126 } else {
2127 assembler->CheckCharacter('_', word);
2128 }
2129}
2130
2131// Emit the code to check for a ^ in multiline mode (1-character lookbehind
2132// that matches newline or the start of input).
2133void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
2134 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2135
2136 // We will load the previous character into the current character register.
2137 Trace new_trace(*trace);
2138 new_trace.InvalidateCurrentCharacter();
2139
2140 // A positive (> 0) cp_offset means we've already successfully matched a
2141 // non-empty-width part of the pattern, and thus cannot be at or before the
2142 // start of the subject string. We can thus skip both at-start and
2143 // bounds-checks when loading the one-character lookbehind.
2144 const bool may_be_at_or_before_subject_string_start =
2145 new_trace.cp_offset() <= 0;
2146
2147 Label ok;
2148 if (may_be_at_or_before_subject_string_start) {
2149 // The start of input counts as a newline in this context, so skip to ok if
2150 // we are at the start.
2151 assembler->CheckAtStart(new_trace.cp_offset(), &ok);
2152 }
2153
2154 // If we've already checked that we are not at the start of input, it's okay
2155 // to load the previous character without bounds checks.
2156 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2157 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2158 new_trace.backtrack(), can_skip_bounds_check);
2159 if (!assembler->CheckSpecialClassRanges(StandardCharacterSet::kLineTerminator,
2160 new_trace.backtrack())) {
2161 // Newline means \n, \r, 0x2028 or 0x2029.
2162 if (!compiler->one_byte()) {
2163 assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
2164 }
2165 assembler->CheckCharacter('\n', &ok);
2166 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2167 }
2168 assembler->Bind(&ok);
2169 on_success->Emit(compiler, &new_trace);
2170}
2171
2172} // namespace
2173
2174// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2175void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2176 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2177 Isolate* isolate = assembler->isolate();
2178 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2179 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2180 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2181 if (lookahead == nullptr) {
2182 int eats_at_least =
2183 std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
2184 if (eats_at_least >= 1) {
2185 BoyerMooreLookahead* bm =
2186 zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
2187 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
2188 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
2189 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2190 }
2191 } else {
2192 if (lookahead->at(0)->is_non_word())
2193 next_is_word_character = Trace::FALSE_VALUE;
2194 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2195 }
2196 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2197 if (next_is_word_character == Trace::UNKNOWN) {
2198 Label before_non_word;
2199 Label before_word;
2200 if (trace->characters_preloaded() != 1) {
2201 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2202 }
2203 // Fall through on non-word.
2204 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2205 // Next character is not a word character.
2206 assembler->Bind(&before_non_word);
2207 Label ok;
2208 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2209 assembler->GoTo(&ok);
2210
2211 assembler->Bind(&before_word);
2212 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2213 assembler->Bind(&ok);
2214 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2215 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2216 } else {
2217 DCHECK(next_is_word_character == Trace::FALSE_VALUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(next_is_word_character == Trace::FALSE_VALUE)>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(next_is_word_character == Trace::FALSE_VALUE))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("next_is_word_character == Trace::FALSE_VALUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2217); AnnotateMozCrashReason("MOZ_ASSERT" "(" "next_is_word_character == Trace::FALSE_VALUE"
")"); do { *((volatile int*)__null) = 2217; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2218 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2219 }
2220}
2221
2222void AssertionNode::BacktrackIfPrevious(
2223 RegExpCompiler* compiler, Trace* trace,
2224 AssertionNode::IfPrevious backtrack_if_previous) {
2225 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2226 Trace new_trace(*trace);
2227 new_trace.InvalidateCurrentCharacter();
2228
2229 Label fall_through;
2230 Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
2231 : &fall_through;
2232 Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
2233 : new_trace.backtrack();
2234
2235 // A positive (> 0) cp_offset means we've already successfully matched a
2236 // non-empty-width part of the pattern, and thus cannot be at or before the
2237 // start of the subject string. We can thus skip both at-start and
2238 // bounds-checks when loading the one-character lookbehind.
2239 const bool may_be_at_or_before_subject_string_start =
2240 new_trace.cp_offset() <= 0;
2241
2242 if (may_be_at_or_before_subject_string_start) {
2243 // The start of input counts as a non-word character, so the question is
2244 // decided if we are at the start.
2245 assembler->CheckAtStart(new_trace.cp_offset(), non_word);
2246 }
2247
2248 // If we've already checked that we are not at the start of input, it's okay
2249 // to load the previous character without bounds checks.
2250 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2251 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
2252 can_skip_bounds_check);
2253 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2254
2255 assembler->Bind(&fall_through);
2256 on_success()->Emit(compiler, &new_trace);
2257}
2258
2259void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2260 RegExpCompiler* compiler,
2261 int filled_in, bool not_at_start) {
2262 if (assertion_type_ == AT_START && not_at_start) {
2263 details->set_cannot_match();
2264 return;
2265 }
2266 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2267 not_at_start);
2268}
2269
2270void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2271 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2272 switch (assertion_type_) {
2273 case AT_END: {
2274 Label ok;
2275 assembler->CheckPosition(trace->cp_offset(), &ok);
2276 assembler->GoTo(trace->backtrack());
2277 assembler->Bind(&ok);
2278 break;
2279 }
2280 case AT_START: {
2281 if (trace->at_start() == Trace::FALSE_VALUE) {
2282 assembler->GoTo(trace->backtrack());
2283 return;
2284 }
2285 if (trace->at_start() == Trace::UNKNOWN) {
2286 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
2287 Trace at_start_trace = *trace;
2288 at_start_trace.set_at_start(Trace::TRUE_VALUE);
2289 on_success()->Emit(compiler, &at_start_trace);
2290 return;
2291 }
2292 } break;
2293 case AFTER_NEWLINE:
2294 EmitHat(compiler, on_success(), trace);
2295 return;
2296 case AT_BOUNDARY:
2297 case AT_NON_BOUNDARY: {
2298 EmitBoundaryCheck(compiler, trace);
2299 return;
2300 }
2301 }
2302 on_success()->Emit(compiler, trace);
2303}
2304
2305namespace {
2306
2307bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
2308 if (quick_check == nullptr) return false;
2309 if (offset >= quick_check->characters()) return false;
2310 return quick_check->positions(offset)->determines_perfectly;
2311}
2312
2313void UpdateBoundsCheck(int index, int* checked_up_to) {
2314 if (index > *checked_up_to) {
2315 *checked_up_to = index;
2316 }
2317}
2318
2319} // namespace
2320
2321// We call this repeatedly to generate code for each pass over the text node.
2322// The passes are in increasing order of difficulty because we hope one
2323// of the first passes will fail in which case we are saved the work of the
2324// later passes. for example for the case independent regexp /%[asdfghjkl]a/
2325// we will check the '%' in the first pass, the case independent 'a' in the
2326// second pass and the character class in the last pass.
2327//
2328// The passes are done from right to left, so for example to test for /bar/
2329// we will first test for an 'r' with offset 2, then an 'a' with offset 1
2330// and then a 'b' with offset 0. This means we can avoid the end-of-input
2331// bounds check most of the time. In the example we only need to check for
2332// end-of-input when loading the putative 'r'.
2333//
2334// A slight complication involves the fact that the first character may already
2335// be fetched into a register by the previous node. In this case we want to
2336// do the test for that character first. We do this in separate passes. The
2337// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
2338// pass has been performed then subsequent passes will have true in
2339// first_element_checked to indicate that that character does not need to be
2340// checked again.
2341//
2342// In addition to all this we are passed a Trace, which can
2343// contain an AlternativeGeneration object. In this AlternativeGeneration
2344// object we can see details of any quick check that was already passed in
2345// order to get to the code we are now generating. The quick check can involve
2346// loading characters, which means we do not need to recheck the bounds
2347// up to the limit the quick check already checked. In addition the quick
2348// check can have involved a mask and compare operation which may simplify
2349// or obviate the need for further checks at some character positions.
2350void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
2351 bool preloaded, Trace* trace,
2352 bool first_element_checked, int* checked_up_to) {
2353 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2354 Isolate* isolate = assembler->isolate();
2355 bool one_byte = compiler->one_byte();
2356 Label* backtrack = trace->backtrack();
2357 QuickCheckDetails* quick_check = trace->quick_check_performed();
2358 int element_count = elements()->length();
2359 int backward_offset = read_backward() ? -Length() : 0;
2360 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2361 TextElement elm = elements()->at(i);
2362 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
2363 if (elm.text_type() == TextElement::ATOM) {
2364 base::Vector<const base::uc16> quarks = elm.atom()->data();
2365 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2366 if (first_element_checked && i == 0 && j == 0) continue;
2367 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
2368 base::uc16 quark = quarks[j];
2369 if (IsIgnoreCase(compiler->flags())) {
2370 // Everywhere else we assume that a non-Latin-1 character cannot match
2371 // a Latin-1 character. Avoid the cases where this is assumption is
2372 // invalid by using the Latin1 equivalent instead.
2373 quark = unibrow::Latin1::TryConvertToLatin1(quark);
2374 }
2375 bool needs_bounds_check =
2376 *checked_up_to < cp_offset + j || read_backward();
2377 bool bounds_checked = false;
2378 switch (pass) {
2379 case NON_LATIN1_MATCH:
2380 DCHECK(one_byte)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(one_byte)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(one_byte))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("one_byte", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2380); AnnotateMozCrashReason("MOZ_ASSERT" "(" "one_byte" ")"
); do { *((volatile int*)__null) = 2380; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2381 if (quark > String::kMaxOneByteCharCode) {
2382 assembler->GoTo(backtrack);
2383 return;
2384 }
2385 break;
2386 case NON_LETTER_CHARACTER_MATCH:
2387 bounds_checked =
2388 EmitAtomNonLetter(isolate, compiler, quark, backtrack,
2389 cp_offset + j, needs_bounds_check, preloaded);
2390 break;
2391 case SIMPLE_CHARACTER_MATCH:
2392 bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
2393 backtrack, cp_offset + j,
2394 needs_bounds_check, preloaded);
2395 break;
2396 case CASE_CHARACTER_MATCH:
2397 bounds_checked =
2398 EmitAtomLetter(isolate, compiler, quark, backtrack,
2399 cp_offset + j, needs_bounds_check, preloaded);
2400 break;
2401 default:
2402 break;
2403 }
2404 if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2405 }
2406 } else {
2407 DCHECK_EQ(TextElement::CLASS_RANGES, elm.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (elm.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (elm.text_type())))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (elm.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2407); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (elm.text_type())"
")"); do { *((volatile int*)__null) = 2407; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2408 if (pass == CHARACTER_CLASS_MATCH) {
2409 if (first_element_checked && i == 0) continue;
2410 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2411 RegExpClassRanges* cr = elm.class_ranges();
2412 bool bounds_check = *checked_up_to < cp_offset || read_backward();
2413 EmitClassRanges(assembler, cr, one_byte, backtrack, cp_offset,
2414 bounds_check, preloaded, zone());
2415 UpdateBoundsCheck(cp_offset, checked_up_to);
2416 }
2417 }
2418 }
2419}
2420
2421int TextNode::Length() {
2422 TextElement elm = elements()->last();
2423 DCHECK_LE(0, elm.cp_offset())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) <= (elm.cp_offset()))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) <= (elm.cp_offset()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(0) <= (elm.cp_offset())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2423); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) <= (elm.cp_offset())"
")"); do { *((volatile int*)__null) = 2423; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2424 return elm.cp_offset() + elm.length();
2425}
2426
2427TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
2428 ZoneList<CharacterRange>* ranges,
2429 bool read_backward,
2430 RegExpNode* on_success) {
2431 DCHECK_NOT_NULL(ranges)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((ranges) != nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((ranges) != nullptr))), 0)))
{ do { } while (false); MOZ_ReportAssertionFailure("(ranges) != nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2431); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(ranges) != nullptr"
")"); do { *((volatile int*)__null) = 2431; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2432 // TODO(jgruber): There's no fundamental need to create this
2433 // RegExpClassRanges; we could refactor to avoid the allocation.
2434 return zone->New<TextNode>(zone->New<RegExpClassRanges>(zone, ranges),
2435 read_backward, on_success);
2436}
2437
2438TextNode* TextNode::CreateForSurrogatePair(
2439 Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
2440 bool read_backward, RegExpNode* on_success) {
2441 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2442 if (lead.from() == lead.to()) {
2443 ZoneList<base::uc16> lead_surrogate(1, zone);
2444 lead_surrogate.Add(lead.from(), zone);
2445 RegExpAtom* atom = zone->New<RegExpAtom>(lead_surrogate.ToConstVector());
2446 elms->Add(TextElement::Atom(atom), zone);
2447 } else {
2448 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
2449 elms->Add(TextElement::ClassRanges(
2450 zone->New<RegExpClassRanges>(zone, lead_ranges)),
2451 zone);
2452 }
2453 elms->Add(TextElement::ClassRanges(
2454 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2455 zone);
2456 return zone->New<TextNode>(elms, read_backward, on_success);
2457}
2458
2459TextNode* TextNode::CreateForSurrogatePair(
2460 Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail,
2461 bool read_backward, RegExpNode* on_success) {
2462 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
2463 ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
2464 elms->Add(
2465 TextElement::ClassRanges(zone->New<RegExpClassRanges>(zone, lead_ranges)),
2466 zone);
2467 elms->Add(TextElement::ClassRanges(
2468 zone->New<RegExpClassRanges>(zone, trail_ranges)),
2469 zone);
2470 return zone->New<TextNode>(elms, read_backward, on_success);
2471}
2472
2473// This generates the code to match a text node. A text node can contain
2474// straight character sequences (possibly to be matched in a case-independent
2475// way) and character classes. For efficiency we do not do this in a single
2476// pass from left to right. Instead we pass over the text node several times,
2477// emitting code for some character positions every time. See the comment on
2478// TextEmitPass for details.
2479void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2480 LimitResult limit_result = LimitVersions(compiler, trace);
2481 if (limit_result == DONE) return;
2482 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2482); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 2482; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2483
2484 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2485 compiler->SetRegExpTooBig();
2486 return;
2487 }
2488
2489 if (compiler->one_byte()) {
2490 int dummy = 0;
2491 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
2492 }
2493
2494 bool first_elt_done = false;
2495 int bound_checked_to = trace->cp_offset() - 1;
2496 bound_checked_to += trace->bound_checked_up_to();
2497
2498 // If a character is preloaded into the current character register then
2499 // check that first to save reloading it.
2500 for (int twice = 0; twice < 2; twice++) {
2501 bool is_preloaded_pass = twice == 0;
2502 if (is_preloaded_pass && trace->characters_preloaded() != 1) continue;
2503 if (IsIgnoreCase(compiler->flags())) {
2504 TextEmitPass(compiler, NON_LETTER_CHARACTER_MATCH, is_preloaded_pass,
2505 trace, first_elt_done, &bound_checked_to);
2506 TextEmitPass(compiler, CASE_CHARACTER_MATCH, is_preloaded_pass, trace,
2507 first_elt_done, &bound_checked_to);
2508 } else {
2509 TextEmitPass(compiler, SIMPLE_CHARACTER_MATCH, is_preloaded_pass, trace,
2510 first_elt_done, &bound_checked_to);
2511 }
2512 TextEmitPass(compiler, CHARACTER_CLASS_MATCH, is_preloaded_pass, trace,
2513 first_elt_done, &bound_checked_to);
2514 first_elt_done = true;
2515 }
2516
2517 Trace successor_trace(*trace);
2518 // If we advance backward, we may end up at the start.
2519 successor_trace.AdvanceCurrentPositionInTrace(
2520 read_backward() ? -Length() : Length(), compiler);
2521 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
2522 : Trace::FALSE_VALUE);
2523 RecursionCheck rc(compiler);
2524 on_success()->Emit(compiler, &successor_trace);
2525}
2526
2527void Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; }
2528
2529void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
2530 // We don't have an instruction for shifting the current character register
2531 // down or for using a shifted value for anything so lets just forget that
2532 // we preloaded any characters into it.
2533 characters_preloaded_ = 0;
2534 // Adjust the offsets of the quick check performed information. This
2535 // information is used to find out what we already determined about the
2536 // characters by means of mask and compare.
2537 quick_check_performed_.Advance(by, compiler->one_byte());
2538 cp_offset_ += by;
2539 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2540 compiler->SetRegExpTooBig();
2541 cp_offset_ = 0;
2542 }
2543 bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
2544}
2545
2546void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
2547 RegExpFlags flags) {
2548 if (!IsIgnoreCase(flags)) return;
2549#ifdef V8_INTL_SUPPORT1
2550 if (NeedsUnicodeCaseEquivalents(flags)) return;
2551#endif
2552
2553 int element_count = elements()->length();
2554 for (int i = 0; i < element_count; i++) {
2555 TextElement elm = elements()->at(i);
2556 if (elm.text_type() == TextElement::CLASS_RANGES) {
2557 RegExpClassRanges* cr = elm.class_ranges();
2558 // None of the standard character classes is different in the case
2559 // independent case and it slows us down if we don't know that.
2560 if (cr->is_standard(zone())) continue;
2561 ZoneList<CharacterRange>* ranges = cr->ranges(zone());
2562 CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
2563 }
2564 }
2565}
2566
2567int TextNode::GreedyLoopTextLength() { return Length(); }
2568
2569RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2570 RegExpCompiler* compiler) {
2571 if (read_backward()) return nullptr;
2572 if (elements()->length() != 1) return nullptr;
2573 TextElement elm = elements()->at(0);
2574 if (elm.text_type() != TextElement::CLASS_RANGES) return nullptr;
2575 RegExpClassRanges* node = elm.class_ranges();
2576 ZoneList<CharacterRange>* ranges = node->ranges(zone());
2577 CharacterRange::Canonicalize(ranges);
2578 if (node->is_negated()) {
2579 return ranges->length() == 0 ? on_success() : nullptr;
2580 }
2581 if (ranges->length() != 1) return nullptr;
2582 const base::uc32 max_char = MaxCodeUnit(compiler->one_byte());
2583 return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
2584}
2585
2586// Finds the fixed match length of a sequence of nodes that goes from
2587// this alternative and back to this choice node. If there are variable
2588// length nodes or other complications in the way then return a sentinel
2589// value indicating that a greedy loop cannot be constructed.
2590int ChoiceNode::GreedyLoopTextLengthForAlternative(
2591 GuardedAlternative* alternative) {
2592 int length = 0;
2593 RegExpNode* node = alternative->node();
2594 // Later we will generate code for all these text nodes using recursion
2595 // so we have to limit the max number.
2596 int recursion_depth = 0;
2597 while (node != this) {
2598 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2599 return kNodeIsTooComplexForGreedyLoops;
2600 }
2601 int node_length = node->GreedyLoopTextLength();
2602 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2603 return kNodeIsTooComplexForGreedyLoops;
2604 }
2605 length += node_length;
2606 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2607 node = seq_node->on_success();
2608 }
2609 if (read_backward()) {
2610 length = -length;
2611 }
2612 // Check that we can jump by the whole text length. If not, return sentinel
2613 // to indicate the we can't construct a greedy loop.
2614 if (length < RegExpMacroAssembler::kMinCPOffset ||
2615 length > RegExpMacroAssembler::kMaxCPOffset) {
2616 return kNodeIsTooComplexForGreedyLoops;
2617 }
2618 return length;
2619}
2620
2621void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2622 DCHECK_NULL(loop_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((loop_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((loop_node_) == nullptr))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(loop_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2622); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(loop_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2622; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2623 AddAlternative(alt);
2624 loop_node_ = alt.node();
2625}
2626
2627void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2628 DCHECK_NULL(continue_node_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((continue_node_) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((continue_node_) == nullptr)
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(continue_node_) == nullptr"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2628); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(continue_node_) == nullptr"
")"); do { *((volatile int*)__null) = 2628; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2629 AddAlternative(alt);
2630 continue_node_ = alt.node();
2631}
2632
2633void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2634 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2635 if (trace->stop_node() == this) {
1
Assuming the condition is false
2
Taking false branch
2636 // Back edge of greedy optimized loop node graph.
2637 int text_length =
2638 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2639 DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((kNodeIsTooComplexForGreedyLoops) != (text_length))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((kNodeIsTooComplexForGreedyLoops) != (text_length)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(kNodeIsTooComplexForGreedyLoops) != (text_length)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2639); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(kNodeIsTooComplexForGreedyLoops) != (text_length)"
")"); do { *((volatile int*)__null) = 2639; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2640 // Update the counter-based backtracking info on the stack. This is an
2641 // optimization for greedy loops (see below).
2642 DCHECK(trace->cp_offset() == text_length)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->cp_offset() == text_length)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->cp_offset() == text_length
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->cp_offset() == text_length", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2642); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->cp_offset() == text_length"
")"); do { *((volatile int*)__null) = 2642; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2643 macro_assembler->AdvanceCurrentPosition(text_length);
2644 macro_assembler->GoTo(trace->loop_label());
2645 return;
2646 }
2647 DCHECK_NULL(trace->stop_node())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((trace->stop_node()) == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((trace->stop_node()) == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(trace->stop_node()) == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2647); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(trace->stop_node()) == nullptr"
")"); do { *((volatile int*)__null) = 2647; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3
Assuming the condition is true
4
Taking false branch
5
Loop condition is false. Exiting loop
2648 if (!trace->is_trivial()) {
6
Taking false branch
2649 trace->Flush(compiler, this);
2650 return;
2651 }
2652 ChoiceNode::Emit(compiler, trace);
7
Calling 'ChoiceNode::Emit'
2653}
2654
2655int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2656 int eats_at_least) {
2657 int preload_characters = std::min(4, eats_at_least);
2658 DCHECK_LE(preload_characters, 4)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((preload_characters) <= (4))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((preload_characters) <= (
4)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(preload_characters) <= (4)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2658); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(preload_characters) <= (4)"
")"); do { *((volatile int*)__null) = 2658; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2659 if (compiler->macro_assembler()->CanReadUnaligned()) {
2660 bool one_byte = compiler->one_byte();
2661 if (one_byte) {
2662 // We can't preload 3 characters because there is no machine instruction
2663 // to do that. We can't just load 4 because we could be reading
2664 // beyond the end of the string, which could cause a memory fault.
2665 if (preload_characters == 3) preload_characters = 2;
2666 } else {
2667 if (preload_characters > 2) preload_characters = 2;
2668 }
2669 } else {
2670 if (preload_characters > 1) preload_characters = 1;
2671 }
2672 return preload_characters;
2673}
2674
2675// This class is used when generating the alternatives in a choice node. It
2676// records the way the alternative is being code generated.
2677class AlternativeGeneration : public Malloced {
2678 public:
2679 AlternativeGeneration()
2680 : possible_success(),
2681 expects_preload(false),
2682 after(),
2683 quick_check_details() {}
2684 Label possible_success;
2685 bool expects_preload;
2686 Label after;
2687 QuickCheckDetails quick_check_details;
2688};
2689
2690// Creates a list of AlternativeGenerations. If the list has a reasonable
2691// size then it is on the stack, otherwise the excess is on the heap.
2692class AlternativeGenerationList {
2693 public:
2694 AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) {
2695 for (int i = 0; i < count && i < kAFew; i++) {
2696 alt_gens_.Add(a_few_alt_gens_ + i, zone);
2697 }
2698 for (int i = kAFew; i < count; i++) {
2699 alt_gens_.Add(new AlternativeGeneration(), zone);
2700 }
2701 }
2702 ~AlternativeGenerationList() {
2703 for (int i = kAFew; i < alt_gens_.length(); i++) {
2704 delete alt_gens_[i];
2705 alt_gens_[i] = nullptr;
2706 }
2707 }
2708
2709 AlternativeGeneration* at(int i) { return alt_gens_[i]; }
2710
2711 private:
2712 static const int kAFew = 10;
2713 ZoneList<AlternativeGeneration*> alt_gens_;
2714 AlternativeGeneration a_few_alt_gens_[kAFew];
2715};
2716
2717void BoyerMoorePositionInfo::Set(int character) {
2718 SetInterval(Interval(character, character));
2719}
2720
2721namespace {
2722
2723ContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
2724 int ranges_length, Interval new_range) {
2725 DCHECK_EQ(1, ranges_length & 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((1) == (ranges_length & 1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((1) == (ranges_length & 1
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(1) == (ranges_length & 1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2725); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(1) == (ranges_length & 1)"
")"); do { *((volatile int*)__null) = 2725; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2726 DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1])do { static_assert( mozilla::detail::AssertionConditionType<
decltype((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1]))>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((String::kMaxCodePoint + 1) == (ranges[ranges_length
- 1])))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2726); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(String::kMaxCodePoint + 1) == (ranges[ranges_length - 1])"
")"); do { *((volatile int*)__null) = 2726; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2727 if (containment == kLatticeUnknown) return containment;
2728 bool inside = false;
2729 int last = 0;
2730 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
2731 // Consider the range from last to ranges[i].
2732 // We haven't got to the new range yet.
2733 if (ranges[i] <= new_range.from()) continue;
2734 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
2735 // inclusive, but the values in ranges are not.
2736 if (last <= new_range.from() && new_range.to() < ranges[i]) {
2737 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
2738 }
2739 return kLatticeUnknown;
2740 }
2741 return containment;
2742}
2743
2744int BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) {
2745 static_assert(BoyerMoorePositionInfo::kMapSize ==
2746 2 * kInt64Size * kBitsPerByte);
2747
2748 // Slight fiddling is needed here, since the bitset is of length 128 while
2749 // CountTrailingZeros requires an integral type and std::bitset can only
2750 // convert to unsigned long long. So we handle the most- and least-significant
2751 // bits separately.
2752
2753 {
2754 static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0});
2755 BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask;
2756 static_assert(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong())));
2757 uint64_t lsb = masked_bitset.to_ullong();
2758 if (lsb != 0) return base::bits::CountTrailingZeros(lsb);
2759 }
2760
2761 {
2762 BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64;
2763 uint64_t msb = masked_bitset.to_ullong();
2764 if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb);
2765 }
2766
2767 return -1;
2768}
2769
2770} // namespace
2771
2772void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2773 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2774
2775 if (interval.size() >= kMapSize) {
2776 map_count_ = kMapSize;
2777 map_.set();
2778 return;
2779 }
2780
2781 for (int i = interval.from(); i <= interval.to(); i++) {
2782 int mod_character = (i & kMask);
2783 if (!map_[mod_character]) {
2784 map_count_++;
2785 map_.set(mod_character);
2786 }
2787 if (map_count_ == kMapSize) return;
2788 }
2789}
2790
2791void BoyerMoorePositionInfo::SetAll() {
2792 w_ = kLatticeUnknown;
2793 if (map_count_ != kMapSize) {
2794 map_count_ = kMapSize;
2795 map_.set();
2796 }
2797}
2798
2799BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
2800 Zone* zone)
2801 : length_(length),
2802 compiler_(compiler),
2803 max_char_(MaxCodeUnit(compiler->one_byte())) {
2804 bitmaps_ = zone->New<ZoneList<BoyerMoorePositionInfo*>>(length, zone);
2805 for (int i = 0; i < length; i++) {
2806 bitmaps_->Add(zone->New<BoyerMoorePositionInfo>(), zone);
2807 }
2808}
2809
2810// Find the longest range of lookahead that has the fewest number of different
2811// characters that can occur at a given position. Since we are optimizing two
2812// different parameters at once this is a tradeoff.
2813bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
2814 int biggest_points = 0;
2815 // If more than 32 characters out of 128 can occur it is unlikely that we can
2816 // be lucky enough to step forwards much of the time.
2817 const int kMaxMax = 32;
2818 for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
2819 max_number_of_chars *= 2) {
2820 biggest_points =
2821 FindBestInterval(max_number_of_chars, biggest_points, from, to);
2822 }
2823 if (biggest_points == 0) return false;
2824 return true;
2825}
2826
2827// Find the highest-points range between 0 and length_ where the character
2828// information is not too vague. 'Too vague' means that there are more than
2829// max_number_of_chars that can occur at this position. Calculates the number
2830// of points as the product of width-of-the-range and
2831// probability-of-finding-one-of-the-characters, where the probability is
2832// calculated using the frequency distribution of the sample subject string.
2833int BoyerMooreLookahead::FindBestInterval(int max_number_of_chars,
2834 int old_biggest_points, int* from,
2835 int* to) {
2836 int biggest_points = old_biggest_points;
2837 static const int kSize = RegExpMacroAssembler::kTableSize;
2838 for (int i = 0; i < length_;) {
2839 while (i < length_ && Count(i) > max_number_of_chars) i++;
2840 if (i == length_) break;
2841 int remembered_from = i;
2842
2843 BoyerMoorePositionInfo::Bitset union_bitset;
2844 for (; i < length_ && Count(i) <= max_number_of_chars; i++) {
2845 union_bitset |= bitmaps_->at(i)->raw_bitset();
2846 }
2847
2848 int frequency = 0;
2849
2850 // Iterate only over set bits.
2851 int j;
2852 while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
2853 DCHECK(union_bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(union_bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(union_bitset[j]))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("union_bitset[j]"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2853); AnnotateMozCrashReason("MOZ_ASSERT" "(" "union_bitset[j]"
")"); do { *((volatile int*)__null) = 2853; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2854 // Add 1 to the frequency to give a small per-character boost for
2855 // the cases where our sampling is not good enough and many
2856 // characters have a frequency of zero. This means the frequency
2857 // can theoretically be up to 2*kSize though we treat it mostly as
2858 // a fraction of kSize.
2859 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
2860 union_bitset.reset(j);
2861 }
2862
2863 // We use the probability of skipping times the distance we are skipping to
2864 // judge the effectiveness of this. Actually we have a cut-off: By
2865 // dividing by 2 we switch off the skipping if the probability of skipping
2866 // is less than 50%. This is because the multibyte mask-and-compare
2867 // skipping in quickcheck is more likely to do well on this case.
2868 bool in_quickcheck_range =
2869 ((i - remembered_from < 4) ||
2870 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
2871 // Called 'probability' but it is only a rough estimate and can actually
2872 // be outside the 0-kSize range.
2873 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
2874 int points = (i - remembered_from) * probability;
2875 if (points > biggest_points) {
2876 *from = remembered_from;
2877 *to = i - 1;
2878 biggest_points = points;
2879 }
2880 }
2881 return biggest_points;
2882}
2883
2884// Take all the characters that will not prevent a successful match if they
2885// occur in the subject string in the range between min_lookahead and
2886// max_lookahead (inclusive) measured from the current position. If the
2887// character at max_lookahead offset is not one of these characters, then we
2888// can safely skip forwards by the number of characters in the range.
2889int BoyerMooreLookahead::GetSkipTable(
2890 int min_lookahead, int max_lookahead,
2891 DirectHandle<ByteArray> boolean_skip_table) {
2892 const int kSkipArrayEntry = 0;
2893 const int kDontSkipArrayEntry = 1;
2894
2895 std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
2896 boolean_skip_table->length());
2897
2898 for (int i = max_lookahead; i >= min_lookahead; i--) {
2899 BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset();
2900
2901 // Iterate only over set bits.
2902 int j;
2903 while ((j = BitsetFirstSetBit(bitset)) != -1) {
2904 DCHECK(bitset[j])do { static_assert( mozilla::detail::AssertionConditionType<
decltype(bitset[j])>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(bitset[j]))), 0))) { do { } while
(false); MOZ_ReportAssertionFailure("bitset[j]", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2904); AnnotateMozCrashReason("MOZ_ASSERT" "(" "bitset[j]" ")"
); do { *((volatile int*)__null) = 2904; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Sanity check.
2905 boolean_skip_table->set(j, kDontSkipArrayEntry);
2906 bitset.reset(j);
2907 }
2908 }
2909
2910 const int skip = max_lookahead + 1 - min_lookahead;
2911 return skip;
2912}
2913
2914// See comment above on the implementation of GetSkipTable.
2915void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
2916 const int kSize = RegExpMacroAssembler::kTableSize;
2917
2918 int min_lookahead = 0;
2919 int max_lookahead = 0;
2920
2921 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
2922
2923 // Check if we only have a single non-empty position info, and that info
2924 // contains precisely one character.
2925 bool found_single_character = false;
2926 int single_character = 0;
2927 for (int i = max_lookahead; i >= min_lookahead; i--) {
2928 BoyerMoorePositionInfo* map = bitmaps_->at(i);
2929 if (map->map_count() == 0) continue;
2930
2931 if (found_single_character || map->map_count() > 1) {
2932 found_single_character = false;
2933 break;
2934 }
2935
2936 DCHECK(!found_single_character)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!found_single_character)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!found_single_character))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("!found_single_character"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2936); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!found_single_character"
")"); do { *((volatile int*)__null) = 2936; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2937 DCHECK_EQ(map->map_count(), 1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((map->map_count()) == (1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((map->map_count()) == (1)
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"(map->map_count()) == (1)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2937); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(map->map_count()) == (1)"
")"); do { *((volatile int*)__null) = 2937; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2938
2939 found_single_character = true;
2940 single_character = BitsetFirstSetBit(map->raw_bitset());
2941
2942 DCHECK_NE(single_character, -1)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((single_character) != (-1))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((single_character) != (-1)))
), 0))) { do { } while (false); MOZ_ReportAssertionFailure("(single_character) != (-1)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2942); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(single_character) != (-1)"
")"); do { *((volatile int*)__null) = 2942; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2943 }
2944
2945 int lookahead_width = max_lookahead + 1 - min_lookahead;
2946
2947 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
2948 // The mask-compare can probably handle this better.
2949 return;
2950 }
2951
2952 if (found_single_character) {
2953 Label cont, again;
2954 masm->Bind(&again);
2955 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2956 if (max_char_ > kSize) {
2957 masm->CheckCharacterAfterAnd(single_character,
2958 RegExpMacroAssembler::kTableMask, &cont);
2959 } else {
2960 masm->CheckCharacter(single_character, &cont);
2961 }
2962 masm->AdvanceCurrentPosition(lookahead_width);
2963 masm->GoTo(&again);
2964 masm->Bind(&cont);
2965 return;
2966 }
2967
2968 Factory* factory = masm->isolate()->factory();
2969 Handle<ByteArray> boolean_skip_table =
2970 factory->NewByteArray(kSize, AllocationType::kOld);
2971 int skip_distance =
2972 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
2973 DCHECK_NE(0, skip_distance)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((0) != (skip_distance))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((0) != (skip_distance)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(0) != (skip_distance)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 2973); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(0) != (skip_distance)"
")"); do { *((volatile int*)__null) = 2973; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
2974
2975 Label cont, again;
2976 masm->Bind(&again);
2977 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
2978 masm->CheckBitInTable(boolean_skip_table, &cont);
2979 masm->AdvanceCurrentPosition(skip_distance);
2980 masm->GoTo(&again);
2981 masm->Bind(&cont);
2982}
2983
2984/* Code generation for choice nodes.
2985 *
2986 * We generate quick checks that do a mask and compare to eliminate a
2987 * choice. If the quick check succeeds then it jumps to the continuation to
2988 * do slow checks and check subsequent nodes. If it fails (the common case)
2989 * it falls through to the next choice.
2990 *
2991 * Here is the desired flow graph. Nodes directly below each other imply
2992 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
2993 * 3 doesn't have a quick check so we have to call the slow check.
2994 * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
2995 * regexp continuation is generated directly after the Sn node, up to the
2996 * next GoTo if we decide to reuse some already generated code. Some
2997 * nodes expect preload_characters to be preloaded into the current
2998 * character register. R nodes do this preloading. Vertices are marked
2999 * F for failures and S for success (possible success in the case of quick
3000 * nodes). L, V, < and > are used as arrow heads.
3001 *
3002 * ----------> R
3003 * |
3004 * V
3005 * Q1 -----> S1
3006 * | S /
3007 * F| /
3008 * | F/
3009 * | /
3010 * | R
3011 * | /
3012 * V L
3013 * Q2 -----> S2
3014 * | S /
3015 * F| /
3016 * | F/
3017 * | /
3018 * | R
3019 * | /
3020 * V L
3021 * S3
3022 * |
3023 * F|
3024 * |
3025 * R
3026 * |
3027 * backtrack V
3028 * <----------Q4
3029 * \ F |
3030 * \ |S
3031 * \ F V
3032 * \-----S4
3033 *
3034 * For greedy loops we push the current position, then generate the code that
3035 * eats the input specially in EmitGreedyLoop. The other choice (the
3036 * continuation) is generated by the normal code in EmitChoices, and steps back
3037 * in the input to the starting position when it fails to match. The loop code
3038 * looks like this (U is the unwind code that steps back in the greedy loop).
3039 *
3040 * _____
3041 * / \
3042 * V |
3043 * ----------> S1 |
3044 * /| |
3045 * / |S |
3046 * F/ \_____/
3047 * /
3048 * |<-----
3049 * | \
3050 * V |S
3051 * Q2 ---> U----->backtrack
3052 * | F /
3053 * S| /
3054 * V F /
3055 * S2--/
3056 */
3057
3058GreedyLoopState::GreedyLoopState(bool not_at_start) {
3059 counter_backtrack_trace_.set_backtrack(&label_);
3060 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3061}
3062
3063void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3064#ifdef DEBUG1
3065 int choice_count = alternatives_->length();
3066 for (int i = 0; i < choice_count - 1; i++) {
3067 GuardedAlternative alternative = alternatives_->at(i);
3068 ZoneList<Guard*>* guards = alternative.guards();
3069 int guard_count = (guards == nullptr) ? 0 : guards->length();
3070 for (int j = 0; j < guard_count; j++) {
3071 DCHECK(!trace->mentions_reg(guards->at(j)->reg()))do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!trace->mentions_reg(guards->at(j)->reg()))
>::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!(!trace->mentions_reg(guards->at(j)->reg()))
)), 0))) { do { } while (false); MOZ_ReportAssertionFailure("!trace->mentions_reg(guards->at(j)->reg())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3071); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!trace->mentions_reg(guards->at(j)->reg())"
")"); do { *((volatile int*)__null) = 3071; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3072 }
3073 }
3074#endif
3075}
3076
3077void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
3078 PreloadState* state) {
3079 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3080 // Save some time by looking at most one machine word ahead.
3081 state->eats_at_least_ =
3082 EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
3083 }
3084 state->preload_characters_ =
3085 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3086
3087 state->preload_is_current_ =
3088 (current_trace->characters_preloaded() == state->preload_characters_);
3089 state->preload_has_checked_bounds_ = state->preload_is_current_;
3090}
3091
3092void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3093 int choice_count = alternatives_->length();
3094
3095 if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
8
Assuming 'choice_count' is not equal to 1
3096 alternatives_->at(0).node()->Emit(compiler, trace);
3097 return;
3098 }
3099
3100 AssertGuardsMentionRegisters(trace);
3101
3102 LimitResult limit_result = LimitVersions(compiler, trace);
3103 if (limit_result == DONE) return;
9
Assuming 'limit_result' is not equal to DONE
10
Taking false branch
3104 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3104); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3104; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
11
Assuming 'limit_result' is equal to CONTINUE
12
Taking false branch
3105
3106 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
3107 // other choice nodes we only flush if we are out of code size budget.
3108 if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
13
Loop condition is false. Exiting loop
14
Assuming the condition is false
3109 trace->Flush(compiler, this);
3110 return;
3111 }
3112
3113 RecursionCheck rc(compiler);
3114
3115 PreloadState preload;
3116 preload.init();
3117 GreedyLoopState greedy_loop_state(not_at_start());
3118
3119 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
3120 AlternativeGenerationList alt_gens(choice_count, zone());
3121
3122 if (choice_count
14.1
'choice_count' is <= 1
> 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3123 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
3124 &greedy_loop_state, text_length);
3125 } else {
3126 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3127
3128 EmitChoices(compiler, &alt_gens, 0, trace, &preload);
15
Calling 'ChoiceNode::EmitChoices'
3129 }
3130
3131 // At this point we need to generate slow checks for the alternatives where
3132 // the quick check was inlined. We can recognize these because the associated
3133 // label was bound.
3134 int new_flush_budget = trace->flush_budget() / choice_count;
3135 for (int i = 0; i < choice_count; i++) {
3136 AlternativeGeneration* alt_gen = alt_gens.at(i);
3137 Trace new_trace(*trace);
3138 // If there are actions to be flushed we have to limit how many times
3139 // they are flushed. Take the budget of the parent trace and distribute
3140 // it fairly amongst the children.
3141 if (new_trace.actions() != nullptr) {
3142 new_trace.set_flush_budget(new_flush_budget);
3143 }
3144 bool next_expects_preload =
3145 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
3146 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i),
3147 alt_gen, preload.preload_characters_,
3148 next_expects_preload);
3149 }
3150}
3151
3152Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
3153 AlternativeGenerationList* alt_gens,
3154 PreloadState* preload,
3155 GreedyLoopState* greedy_loop_state,
3156 int text_length) {
3157 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3158 // Here we have special handling for greedy loops containing only text nodes
3159 // and other simple nodes. These are handled by pushing the current
3160 // position on the stack and then incrementing the current position each
3161 // time around the switch. On backtrack we decrement the current position
3162 // and check it against the pushed value. This avoids pushing backtrack
3163 // information for each iteration of the loop, which could take up a lot of
3164 // space.
3165 DCHECK(trace->stop_node() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->stop_node() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->stop_node() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->stop_node() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3165); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->stop_node() == nullptr"
")"); do { *((volatile int*)__null) = 3165; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3166 macro_assembler->PushCurrentPosition();
3167 Label greedy_match_failed;
3168 Trace greedy_match_trace;
3169 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
3170 greedy_match_trace.set_backtrack(&greedy_match_failed);
3171 Label loop_label;
3172 macro_assembler->Bind(&loop_label);
3173 greedy_match_trace.set_stop_node(this);
3174 greedy_match_trace.set_loop_label(&loop_label);
3175 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3176 macro_assembler->Bind(&greedy_match_failed);
3177
3178 Label second_choice; // For use in greedy matches.
3179 macro_assembler->Bind(&second_choice);
3180
3181 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3182
3183 EmitChoices(compiler, alt_gens, 1, new_trace, preload);
3184
3185 macro_assembler->Bind(greedy_loop_state->label());
3186 // If we have unwound to the bottom then backtrack.
3187 macro_assembler->CheckGreedyLoop(trace->backtrack());
3188 // Otherwise try the second priority at an earlier position.
3189 macro_assembler->AdvanceCurrentPosition(-text_length);
3190 macro_assembler->GoTo(&second_choice);
3191 return new_trace;
3192}
3193
3194int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3195 Trace* trace) {
3196 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3197 if (alternatives_->length() != 2) return eats_at_least;
3198
3199 GuardedAlternative alt1 = alternatives_->at(1);
3200 if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
3201 return eats_at_least;
3202 }
3203 RegExpNode* eats_anything_node = alt1.node();
3204 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
3205 return eats_at_least;
3206 }
3207
3208 // Really we should be creating a new trace when we execute this function,
3209 // but there is no need, because the code it generates cannot backtrack, and
3210 // we always arrive here with a trivial trace (since it's the entry to a
3211 // loop. That also implies that there are no preloaded characters, which is
3212 // good, because it means we won't be violating any assumptions by
3213 // overwriting those characters with new load instructions.
3214 DCHECK(trace->is_trivial())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->is_trivial())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->is_trivial()))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("trace->is_trivial()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3214); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->is_trivial()"
")"); do { *((volatile int*)__null) = 3214; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3215
3216 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3217 Isolate* isolate = macro_assembler->isolate();
3218 // At this point we know that we are at a non-greedy loop that will eat
3219 // any character one at a time. Any non-anchored regexp has such a
3220 // loop prepended to it in order to find where it starts. We look for
3221 // a pattern of the form ...abc... where we can look 6 characters ahead
3222 // and step forwards 3 if the character is not one of abc. Abc need
3223 // not be atoms, they can be any reasonably limited character class or
3224 // small alternation.
3225 BoyerMooreLookahead* bm = bm_info(false);
3226 if (bm == nullptr) {
3227 eats_at_least = std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
3228 if (eats_at_least >= 1) {
3229 bm = zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
3230 GuardedAlternative alt0 = alternatives_->at(0);
3231 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
3232 }
3233 }
3234 if (bm != nullptr) {
3235 bm->EmitSkipInstructions(macro_assembler);
3236 }
3237 return eats_at_least;
3238}
3239
3240void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3241 AlternativeGenerationList* alt_gens,
3242 int first_choice, Trace* trace,
3243 PreloadState* preload) {
3244 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3245 SetUpPreLoad(compiler, trace, preload);
3246
3247 // For now we just call all choices one after the other. The idea ultimately
3248 // is to use the Dispatch table to try only the relevant ones.
3249 int choice_count = alternatives_->length();
3250
3251 int new_flush_budget = trace->flush_budget() / choice_count;
3252
3253 for (int i = first_choice; i < choice_count; i++) {
16
Assuming 'i' is < 'choice_count'
17
Loop condition is true. Entering loop body
3254 bool is_last = i == choice_count - 1;
18
Assuming the condition is false
3255 bool fall_through_on_failure = !is_last;
3256 GuardedAlternative alternative = alternatives_->at(i);
3257 AlternativeGeneration* alt_gen = alt_gens->at(i);
3258 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3259 ZoneList<Guard*>* guards = alternative.guards();
3260 int guard_count = (guards == nullptr) ? 0 : guards->length();
19
Assuming the condition is false
20
'?' condition is false
3261 Trace new_trace(*trace);
3262 new_trace.set_characters_preloaded(
3263 preload->preload_is_current_
20.1
Field 'preload_is_current_' is false
? preload->preload_characters_ : 0);
21
'?' condition is false
3264 if (preload->preload_has_checked_bounds_
21.1
Field 'preload_has_checked_bounds_' is false
) {
22
Taking false branch
3265 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3266 }
3267 new_trace.quick_check_performed()->Clear();
3268 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
23
Assuming field 'not_at_start_' is false
24
Taking false branch
3269 if (!is_last
24.1
'is_last' is false
) {
25
Taking true branch
3270 new_trace.set_backtrack(&alt_gen->after);
3271 }
3272 alt_gen->expects_preload = preload->preload_is_current_;
3273 bool generate_full_check_inline = false;
3274 if (v8_flagsjs::jit::JitOptions.regexp_optimization &&
26
Assuming field 'regexp_optimization' is true
3275 try_to_emit_quick_check_for_alternative(i == 0) &&
27
Assuming the condition is true
3276 alternative.node()->EmitQuickCheck(
28
Calling 'RegExpNode::EmitQuickCheck'
3277 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
3278 &alt_gen->possible_success, &alt_gen->quick_check_details,
3279 fall_through_on_failure, this)) {
3280 // Quick check was generated for this choice.
3281 preload->preload_is_current_ = true;
3282 preload->preload_has_checked_bounds_ = true;
3283 // If we generated the quick check to fall through on possible success,
3284 // we now need to generate the full check inline.
3285 if (!fall_through_on_failure) {
3286 macro_assembler->Bind(&alt_gen->possible_success);
3287 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3288 new_trace.set_characters_preloaded(preload->preload_characters_);
3289 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3290 generate_full_check_inline = true;
3291 }
3292 } else if (alt_gen->quick_check_details.cannot_match()) {
3293 if (!fall_through_on_failure) {
3294 macro_assembler->GoTo(trace->backtrack());
3295 }
3296 continue;
3297 } else {
3298 // No quick check was generated. Put the full code here.
3299 // If this is not the first choice then there could be slow checks from
3300 // previous cases that go here when they fail. There's no reason to
3301 // insist that they preload characters since the slow check we are about
3302 // to generate probably can't use it.
3303 if (i != first_choice) {
3304 alt_gen->expects_preload = false;
3305 new_trace.InvalidateCurrentCharacter();
3306 }
3307 generate_full_check_inline = true;
3308 }
3309 if (generate_full_check_inline) {
3310 if (new_trace.actions() != nullptr) {
3311 new_trace.set_flush_budget(new_flush_budget);
3312 }
3313 for (int j = 0; j < guard_count; j++) {
3314 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
3315 }
3316 alternative.node()->Emit(compiler, &new_trace);
3317 preload->preload_is_current_ = false;
3318 }
3319 macro_assembler->Bind(&alt_gen->after);
3320 }
3321}
3322
3323void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3324 Trace* trace,
3325 GuardedAlternative alternative,
3326 AlternativeGeneration* alt_gen,
3327 int preload_characters,
3328 bool next_expects_preload) {
3329 if (!alt_gen->possible_success.is_linked()) return;
3330
3331 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3332 macro_assembler->Bind(&alt_gen->possible_success);
3333 Trace out_of_line_trace(*trace);
3334 out_of_line_trace.set_characters_preloaded(preload_characters);
3335 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3336 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
3337 ZoneList<Guard*>* guards = alternative.guards();
3338 int guard_count = (guards == nullptr) ? 0 : guards->length();
3339 if (next_expects_preload) {
3340 Label reload_current_char;
3341 out_of_line_trace.set_backtrack(&reload_current_char);
3342 for (int j = 0; j < guard_count; j++) {
3343 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3344 }
3345 alternative.node()->Emit(compiler, &out_of_line_trace);
3346 macro_assembler->Bind(&reload_current_char);
3347 // Reload the current character, since the next quick check expects that.
3348 // We don't need to check bounds here because we only get into this
3349 // code through a quick check which already did the checked load.
3350 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
3351 preload_characters);
3352 macro_assembler->GoTo(&(alt_gen->after));
3353 } else {
3354 out_of_line_trace.set_backtrack(&(alt_gen->after));
3355 for (int j = 0; j < guard_count; j++) {
3356 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3357 }
3358 alternative.node()->Emit(compiler, &out_of_line_trace);
3359 }
3360}
3361
3362void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3363 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3364 LimitResult limit_result = LimitVersions(compiler, trace);
3365 if (limit_result == DONE) return;
3366 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3366); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3366; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3367
3368 RecursionCheck rc(compiler);
3369
3370 switch (action_type_) {
3371 case STORE_POSITION: {
3372 Trace::DeferredCapture new_capture(data_.u_position_register.reg,
3373 data_.u_position_register.is_capture,
3374 trace);
3375 Trace new_trace = *trace;
3376 new_trace.add_action(&new_capture);
3377 on_success()->Emit(compiler, &new_trace);
3378 break;
3379 }
3380 case INCREMENT_REGISTER: {
3381 Trace::DeferredIncrementRegister new_increment(
3382 data_.u_increment_register.reg);
3383 Trace new_trace = *trace;
3384 new_trace.add_action(&new_increment);
3385 on_success()->Emit(compiler, &new_trace);
3386 break;
3387 }
3388 case SET_REGISTER_FOR_LOOP: {
3389 Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
3390 data_.u_store_register.value);
3391 Trace new_trace = *trace;
3392 new_trace.add_action(&new_set);
3393 on_success()->Emit(compiler, &new_trace);
3394 break;
3395 }
3396 case CLEAR_CAPTURES: {
3397 Trace::DeferredClearCaptures new_capture(Interval(
3398 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
3399 Trace new_trace = *trace;
3400 new_trace.add_action(&new_capture);
3401 on_success()->Emit(compiler, &new_trace);
3402 break;
3403 }
3404 case BEGIN_POSITIVE_SUBMATCH:
3405 case BEGIN_NEGATIVE_SUBMATCH:
3406 if (!trace->is_trivial()) {
3407 trace->Flush(compiler, this);
3408 } else {
3409 assembler->WriteCurrentPositionToRegister(
3410 data_.u_submatch.current_position_register, 0);
3411 assembler->WriteStackPointerToRegister(
3412 data_.u_submatch.stack_pointer_register);
3413 on_success()->Emit(compiler, trace);
3414 }
3415 break;
3416 case EMPTY_MATCH_CHECK: {
3417 int start_pos_reg = data_.u_empty_match_check.start_register;
3418 int stored_pos = 0;
3419 int rep_reg = data_.u_empty_match_check.repetition_register;
3420 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3421 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3422 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3423 // If we know we haven't advanced and there is no minimum we
3424 // can just backtrack immediately.
3425 assembler->GoTo(trace->backtrack());
3426 } else if (know_dist && stored_pos < trace->cp_offset()) {
3427 // If we know we've advanced we can generate the continuation
3428 // immediately.
3429 on_success()->Emit(compiler, trace);
3430 } else if (!trace->is_trivial()) {
3431 trace->Flush(compiler, this);
3432 } else {
3433 Label skip_empty_check;
3434 // If we have a minimum number of repetitions we check the current
3435 // number first and skip the empty check if it's not enough.
3436 if (has_minimum) {
3437 int limit = data_.u_empty_match_check.repetition_limit;
3438 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3439 }
3440 // If the match is empty we bail out, otherwise we fall through
3441 // to the on-success continuation.
3442 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3443 trace->backtrack());
3444 assembler->Bind(&skip_empty_check);
3445 on_success()->Emit(compiler, trace);
3446 }
3447 break;
3448 }
3449 case POSITIVE_SUBMATCH_SUCCESS: {
3450 if (!trace->is_trivial()) {
3451 trace->Flush(compiler, this);
3452 return;
3453 }
3454 assembler->ReadCurrentPositionFromRegister(
3455 data_.u_submatch.current_position_register);
3456 assembler->ReadStackPointerFromRegister(
3457 data_.u_submatch.stack_pointer_register);
3458 int clear_register_count = data_.u_submatch.clear_register_count;
3459 if (clear_register_count == 0) {
3460 on_success()->Emit(compiler, trace);
3461 return;
3462 }
3463 int clear_registers_from = data_.u_submatch.clear_register_from;
3464 Label clear_registers_backtrack;
3465 Trace new_trace = *trace;
3466 new_trace.set_backtrack(&clear_registers_backtrack);
3467 on_success()->Emit(compiler, &new_trace);
3468
3469 assembler->Bind(&clear_registers_backtrack);
3470 int clear_registers_to = clear_registers_from + clear_register_count - 1;
3471 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3472
3473 DCHECK(trace->backtrack() == nullptr)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(trace->backtrack() == nullptr)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(trace->backtrack() == nullptr
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"trace->backtrack() == nullptr", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3473); AnnotateMozCrashReason("MOZ_ASSERT" "(" "trace->backtrack() == nullptr"
")"); do { *((volatile int*)__null) = 3473; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3474 assembler->Backtrack();
3475 return;
3476 }
3477 case MODIFY_FLAGS: {
3478 compiler->set_flags(flags());
3479 on_success()->Emit(compiler, trace);
3480 break;
3481 }
3482 default:
3483 UNREACHABLE()do { do { } while (false); MOZ_ReportCrash("" "unreachable code"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3483); AnnotateMozCrashReason("MOZ_CRASH(" "unreachable code"
")"); do { *((volatile int*)__null) = 3483; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3484 }
3485}
3486
3487void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3488 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3489 if (!trace->is_trivial()) {
3490 trace->Flush(compiler, this);
3491 return;
3492 }
3493
3494 LimitResult limit_result = LimitVersions(compiler, trace);
3495 if (limit_result == DONE) return;
3496 DCHECK(limit_result == CONTINUE)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(limit_result == CONTINUE)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(limit_result == CONTINUE))),
0))) { do { } while (false); MOZ_ReportAssertionFailure("limit_result == CONTINUE"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3496); AnnotateMozCrashReason("MOZ_ASSERT" "(" "limit_result == CONTINUE"
")"); do { *((volatile int*)__null) = 3496; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3497
3498 RecursionCheck rc(compiler);
3499
3500 DCHECK_EQ(start_reg_ + 1, end_reg_)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((start_reg_ + 1) == (end_reg_))>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!((start_reg_ + 1) == (end_reg_
)))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("(start_reg_ + 1) == (end_reg_)", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3500); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(start_reg_ + 1) == (end_reg_)"
")"); do { *((volatile int*)__null) = 3500; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3501 if (IsIgnoreCase(compiler->flags())) {
3502 bool unicode = IsEitherUnicode(compiler->flags());
3503 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
3504 unicode, trace->backtrack());
3505 } else {
3506 assembler->CheckNotBackReference(start_reg_, read_backward(),
3507 trace->backtrack());
3508 }
3509 // We are going to advance backward, so we may end up at the start.
3510 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
3511
3512 // Check that the back reference does not end inside a surrogate pair.
3513 if (IsEitherUnicode(compiler->flags()) && !compiler->one_byte()) {
3514 assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
3515 }
3516 on_success()->Emit(compiler, trace);
3517}
3518
3519void TextNode::CalculateOffsets() {
3520 int element_count = elements()->length();
3521 // Set up the offsets of the elements relative to the start. This is a fixed
3522 // quantity since a TextNode can only contain fixed-width things.
3523 int cp_offset = 0;
3524 for (int i = 0; i < element_count; i++) {
3525 TextElement& elm = elements()->at(i);
3526 elm.set_cp_offset(cp_offset);
3527 cp_offset += elm.length();
3528 }
3529}
3530
3531namespace {
3532
3533// Assertion propagation moves information about assertions such as
3534// \b to the affected nodes. For instance, in /.\b./ information must
3535// be propagated to the first '.' that whatever follows needs to know
3536// if it matched a word or a non-word, and to the second '.' that it
3537// has to check if it succeeds a word or non-word. In this case the
3538// result will be something like:
3539//
3540// +-------+ +------------+
3541// | . | | . |
3542// +-------+ ---> +------------+
3543// | word? | | check word |
3544// +-------+ +------------+
3545class AssertionPropagator : public AllStatic {
3546 public:
3547 static void VisitText(TextNode* that) {}
3548
3549 static void VisitAction(ActionNode* that) {
3550 // If the next node is interested in what it follows then this node
3551 // has to be interested too so it can pass the information on.
3552 that->info()->AddFromFollowing(that->on_success()->info());
3553 }
3554
3555 static void VisitChoice(ChoiceNode* that, int i) {
3556 // Anything the following nodes need to know has to be known by
3557 // this node also, so it can pass it on.
3558 that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
3559 }
3560
3561 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3562 that->info()->AddFromFollowing(that->continue_node()->info());
3563 }
3564
3565 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
3566 that->info()->AddFromFollowing(that->loop_node()->info());
3567 }
3568
3569 static void VisitNegativeLookaroundChoiceLookaroundNode(
3570 NegativeLookaroundChoiceNode* that) {
3571 VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
3572 }
3573
3574 static void VisitNegativeLookaroundChoiceContinueNode(
3575 NegativeLookaroundChoiceNode* that) {
3576 VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
3577 }
3578
3579 static void VisitBackReference(BackReferenceNode* that) {}
3580
3581 static void VisitAssertion(AssertionNode* that) {}
3582};
3583
3584// Propagates information about the minimum size of successful matches from
3585// successor nodes to their predecessors. Note that all eats_at_least values
3586// are initialized to zero before analysis.
3587class EatsAtLeastPropagator : public AllStatic {
3588 public:
3589 static void VisitText(TextNode* that) {
3590 // The eats_at_least value is not used if reading backward.
3591 if (!that->read_backward()) {
3592 // We are not at the start after this node, and thus we can use the
3593 // successor's eats_at_least_from_not_start value.
3594 uint8_t eats_at_least = base::saturated_cast<uint8_t>(
3595 that->Length() + that->on_success()
3596 ->eats_at_least_info()
3597 ->eats_at_least_from_not_start);
3598 that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
3599 }
3600 }
3601
3602 static void VisitAction(ActionNode* that) {
3603 switch (that->action_type()) {
3604 case ActionNode::BEGIN_POSITIVE_SUBMATCH:
3605 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3606 // We do not propagate eats_at_least data through positive lookarounds,
3607 // because they rewind input.
3608 // TODO(v8:11859) Potential approaches for fixing this include:
3609 // 1. Add a dedicated choice node for positive lookaround, similar to
3610 // NegativeLookaroundChoiceNode.
3611 // 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which
3612 // is <= eats_at_least_from_possibly_start, and use that value in
3613 // EatsAtLeastFromLoopEntry.
3614 DCHECK(that->eats_at_least_info()->IsZero())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(that->eats_at_least_info()->IsZero())>::isValid
, "invalid assertion condition"); if ((__builtin_expect(!!(!(
!!(that->eats_at_least_info()->IsZero()))), 0))) { do {
} while (false); MOZ_ReportAssertionFailure("that->eats_at_least_info()->IsZero()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3614); AnnotateMozCrashReason("MOZ_ASSERT" "(" "that->eats_at_least_info()->IsZero()"
")"); do { *((volatile int*)__null) = 3614; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3615 break;
3616 case ActionNode::SET_REGISTER_FOR_LOOP:
3617 // SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
3618 // loop body will run at least the minimum number of times before the
3619 // continuation case can run.
3620 that->set_eats_at_least_info(
3621 that->on_success()->EatsAtLeastFromLoopEntry());
3622 break;
3623 case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
3624 default:
3625 // Otherwise, the current node eats at least as much as its successor.
3626 // Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH
3627 // because NegativeLookaroundChoiceNode ignores its lookaround successor
3628 // when computing eats-at-least and quick check information.
3629 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3630 break;
3631 }
3632 }
3633
3634 static void VisitChoice(ChoiceNode* that, int i) {
3635 // The minimum possible match from a choice node is the minimum of its
3636 // successors.
3637 EatsAtLeastInfo eats_at_least =
3638 i == 0 ? EatsAtLeastInfo(UINT8_MAX(255)) : *that->eats_at_least_info();
3639 eats_at_least.SetMin(
3640 *that->alternatives()->at(i).node()->eats_at_least_info());
3641 that->set_eats_at_least_info(eats_at_least);
3642 }
3643
3644 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3645 if (!that->read_backward()) {
3646 that->set_eats_at_least_info(
3647 *that->continue_node()->eats_at_least_info());
3648 }
3649 }
3650
3651 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
3652
3653 static void VisitNegativeLookaroundChoiceLookaroundNode(
3654 NegativeLookaroundChoiceNode* that) {}
3655
3656 static void VisitNegativeLookaroundChoiceContinueNode(
3657 NegativeLookaroundChoiceNode* that) {
3658 that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
3659 }
3660
3661 static void VisitBackReference(BackReferenceNode* that) {
3662 if (!that->read_backward()) {
3663 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3664 }
3665 }
3666
3667 static void VisitAssertion(AssertionNode* that) {
3668 EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
3669 if (that->assertion_type() == AssertionNode::AT_START) {
3670 // If we know we are not at the start and we are asked "how many
3671 // characters will you match if you succeed?" then we can answer anything
3672 // since false implies false. So let's just set the max answer
3673 // (UINT8_MAX) since that won't prevent us from preloading a lot of
3674 // characters for the other branches in the node graph.
3675 eats_at_least.eats_at_least_from_not_start = UINT8_MAX(255);
3676 }
3677 that->set_eats_at_least_info(eats_at_least);
3678 }
3679};
3680
3681} // namespace
3682
3683// -------------------------------------------------------------------
3684// Analysis
3685
3686// Iterates the node graph and provides the opportunity for propagators to set
3687// values that depend on successor nodes.
3688template <typename... Propagators>
3689class Analysis : public NodeVisitor {
3690 public:
3691 Analysis(Isolate* isolate, bool is_one_byte, RegExpFlags flags)
3692 : isolate_(isolate),
3693 is_one_byte_(is_one_byte),
3694 flags_(flags),
3695 error_(RegExpError::kNone) {}
3696
3697 void EnsureAnalyzed(RegExpNode* that) {
3698 StackLimitCheck check(isolate());
3699 if (check.HasOverflowed()) {
3700 if (v8_flagsjs::jit::JitOptions.correctness_fuzzer_suppressions) {
3701 FATAL("Analysis: Aborting on stack overflow")do { do { } while (false); MOZ_ReportCrash("" "Analysis: Aborting on stack overflow"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3701); AnnotateMozCrashReason("MOZ_CRASH(" "Analysis: Aborting on stack overflow"
")"); do { *((volatile int*)__null) = 3701; __attribute__((nomerge
)) ::abort(); } while (false); } while (false)
;
3702 }
3703 fail(RegExpError::kAnalysisStackOverflow);
3704 return;
3705 }
3706 if (that->info()->been_analyzed || that->info()->being_analyzed) return;
3707 that->info()->being_analyzed = true;
3708 that->Accept(this);
3709 that->info()->being_analyzed = false;
3710 that->info()->been_analyzed = true;
3711 }
3712
3713 bool has_failed() { return error_ != RegExpError::kNone; }
3714 RegExpError error() {
3715 DCHECK(error_ != RegExpError::kNone)do { static_assert( mozilla::detail::AssertionConditionType<
decltype(error_ != RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(error_ != RegExpError::kNone
))), 0))) { do { } while (false); MOZ_ReportAssertionFailure(
"error_ != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3715); AnnotateMozCrashReason("MOZ_ASSERT" "(" "error_ != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3715; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3716 return error_;
3717 }
3718 void fail(RegExpError error) { error_ = error; }
3719
3720 Isolate* isolate() const { return isolate_; }
3721
3722 void VisitEnd(EndNode* that) override {
3723 // nothing to do
3724 }
3725
3726// Used to call the given static function on each propagator / variadic template
3727// argument.
3728#define STATIC_FOR_EACH(expr) \
3729 do { \
3730 int dummy[] = {((expr), 0)...}; \
3731 USE(dummy)do { ::v8::base::Use unused_tmp_array_for_use_macro[]{dummy};
(void)unused_tmp_array_for_use_macro; } while (false)
; \
3732 } while (false)
3733
3734 void VisitText(TextNode* that) override {
3735 that->MakeCaseIndependent(isolate(), is_one_byte_, flags());
3736 EnsureAnalyzed(that->on_success());
3737 if (has_failed()) return;
3738 that->CalculateOffsets();
3739 STATIC_FOR_EACH(Propagators::VisitText(that));
3740 }
3741
3742 void VisitAction(ActionNode* that) override {
3743 if (that->action_type() == ActionNode::MODIFY_FLAGS) {
3744 set_flags(that->flags());
3745 }
3746 EnsureAnalyzed(that->on_success());
3747 if (has_failed()) return;
3748 STATIC_FOR_EACH(Propagators::VisitAction(that));
3749 }
3750
3751 void VisitChoice(ChoiceNode* that) override {
3752 for (int i = 0; i < that->alternatives()->length(); i++) {
3753 EnsureAnalyzed(that->alternatives()->at(i).node());
3754 if (has_failed()) return;
3755 STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
3756 }
3757 }
3758
3759 void VisitLoopChoice(LoopChoiceNode* that) override {
3760 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3760); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3760; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Just loop and continue.
3761
3762 // First propagate all information from the continuation node.
3763 EnsureAnalyzed(that->continue_node());
3764 if (has_failed()) return;
3765 STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
3766
3767 // Check the loop last since it may need the value of this node
3768 // to get a correct result.
3769 EnsureAnalyzed(that->loop_node());
3770 if (has_failed()) return;
3771 STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
3772 }
3773
3774 void VisitNegativeLookaroundChoice(
3775 NegativeLookaroundChoiceNode* that) override {
3776 DCHECK_EQ(that->alternatives()->length(), 2)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((that->alternatives()->length()) == (2))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((that->alternatives()->length()) == (2)))), 0)
)) { do { } while (false); MOZ_ReportAssertionFailure("(that->alternatives()->length()) == (2)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3776); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(that->alternatives()->length()) == (2)"
")"); do { *((volatile int*)__null) = 3776; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
; // Lookaround and continue.
3777
3778 EnsureAnalyzed(that->lookaround_node());
3779 if (has_failed()) return;
3780 STATIC_FOR_EACH(
3781 Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
3782
3783 EnsureAnalyzed(that->continue_node());
3784 if (has_failed()) return;
3785 STATIC_FOR_EACH(
3786 Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
3787 }
3788
3789 void VisitBackReference(BackReferenceNode* that) override {
3790 EnsureAnalyzed(that->on_success());
3791 if (has_failed()) return;
3792 STATIC_FOR_EACH(Propagators::VisitBackReference(that));
3793 }
3794
3795 void VisitAssertion(AssertionNode* that) override {
3796 EnsureAnalyzed(that->on_success());
3797 if (has_failed()) return;
3798 STATIC_FOR_EACH(Propagators::VisitAssertion(that));
3799 }
3800
3801#undef STATIC_FOR_EACH
3802
3803 private:
3804 RegExpFlags flags() const { return flags_; }
3805 void set_flags(RegExpFlags flags) { flags_ = flags; }
3806
3807 Isolate* isolate_;
3808 const bool is_one_byte_;
3809 RegExpFlags flags_;
3810 RegExpError error_;
3811
3812 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis)Analysis() = delete; Analysis(const Analysis&) = delete; Analysis
& operator=(const Analysis&) = delete
;
3813};
3814
3815RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags,
3816 RegExpNode* node) {
3817 Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
3818 isolate, is_one_byte, flags);
3819 DCHECK_EQ(node->info()->been_analyzed, false)do { static_assert( mozilla::detail::AssertionConditionType<
decltype((node->info()->been_analyzed) == (false))>::
isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((node->info()->been_analyzed) == (false)))), 0
))) { do { } while (false); MOZ_ReportAssertionFailure("(node->info()->been_analyzed) == (false)"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3819); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(node->info()->been_analyzed) == (false)"
")"); do { *((volatile int*)__null) = 3819; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3820 analysis.EnsureAnalyzed(node);
3821 DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone)do { if (analysis.has_failed()) { do { static_assert( mozilla
::detail::AssertionConditionType<decltype(analysis.error()
!= RegExpError::kNone)>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(analysis.error() != RegExpError
::kNone))), 0))) { do { } while (false); MOZ_ReportAssertionFailure
("analysis.error() != RegExpError::kNone", "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3821); AnnotateMozCrashReason("MOZ_ASSERT" "(" "analysis.error() != RegExpError::kNone"
")"); do { *((volatile int*)__null) = 3821; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false); } } while (
false)
;
3822 return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
3823}
3824
3825void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3826 BoyerMooreLookahead* bm,
3827 bool not_at_start) {
3828 // Working out the set of characters that a backreference can match is too
3829 // hard, so we just say that any character can match.
3830 bm->SetRest(offset);
3831 SaveBMInfo(bm, not_at_start, offset);
3832}
3833
3834static_assert(BoyerMoorePositionInfo::kMapSize ==
3835 RegExpMacroAssembler::kTableSize);
3836
3837void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
3838 BoyerMooreLookahead* bm, bool not_at_start) {
3839 ZoneList<GuardedAlternative>* alts = alternatives();
3840 budget = (budget - 1) / alts->length();
3841 for (int i = 0; i < alts->length(); i++) {
3842 GuardedAlternative& alt = alts->at(i);
3843 if (alt.guards() != nullptr && alt.guards()->length() != 0) {
3844 bm->SetRest(offset); // Give up trying to fill in info.
3845 SaveBMInfo(bm, not_at_start, offset);
3846 return;
3847 }
3848 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
3849 }
3850 SaveBMInfo(bm, not_at_start, offset);
3851}
3852
3853void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
3854 BoyerMooreLookahead* bm, bool not_at_start) {
3855 if (initial_offset >= bm->length()) return;
3856 int offset = initial_offset;
3857 int max_char = bm->max_char();
3858 for (int i = 0; i < elements()->length(); i++) {
3859 if (offset >= bm->length()) {
3860 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3861 return;
3862 }
3863 TextElement text = elements()->at(i);
3864 if (text.text_type() == TextElement::ATOM) {
3865 RegExpAtom* atom = text.atom();
3866 for (int j = 0; j < atom->length(); j++, offset++) {
3867 if (offset >= bm->length()) {
3868 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3869 return;
3870 }
3871 base::uc16 character = atom->data()[j];
3872 if (IsIgnoreCase(bm->compiler()->flags())) {
3873 unibrow::uchar chars[4];
3874 int length = GetCaseIndependentLetters(
3875 isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
3876 chars, 4);
3877 for (int k = 0; k < length; k++) {
3878 bm->Set(offset, chars[k]);
3879 }
3880 } else {
3881 if (character <= max_char) bm->Set(offset, character);
3882 }
3883 }
3884 } else {
3885 DCHECK_EQ(TextElement::CLASS_RANGES, text.text_type())do { static_assert( mozilla::detail::AssertionConditionType<
decltype((TextElement::CLASS_RANGES) == (text.text_type()))>
::isValid, "invalid assertion condition"); if ((__builtin_expect
(!!(!(!!((TextElement::CLASS_RANGES) == (text.text_type()))))
, 0))) { do { } while (false); MOZ_ReportAssertionFailure("(TextElement::CLASS_RANGES) == (text.text_type())"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3885); AnnotateMozCrashReason("MOZ_ASSERT" "(" "(TextElement::CLASS_RANGES) == (text.text_type())"
")"); do { *((volatile int*)__null) = 3885; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3886 RegExpClassRanges* class_ranges = text.class_ranges();
3887 ZoneList<CharacterRange>* ranges = class_ranges->ranges(zone());
3888 if (class_ranges->is_negated()) {
3889 bm->SetAll(offset);
3890 } else {
3891 for (int k = 0; k < ranges->length(); k++) {
3892 CharacterRange& range = ranges->at(k);
3893 if (static_cast<int>(range.from()) > max_char) continue;
3894 int to = std::min(max_char, static_cast<int>(range.to()));
3895 bm->SetInterval(offset, Interval(range.from(), to));
3896 }
3897 }
3898 offset++;
3899 }
3900 }
3901 if (offset >= bm->length()) {
3902 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3903 return;
3904 }
3905 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
3906 true); // Not at start after a text node.
3907 if (initial_offset == 0) set_bm_info(not_at_start, bm);
3908}
3909
3910RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
3911 RegExpNode* on_success) {
3912 DCHECK(!read_backward())do { static_assert( mozilla::detail::AssertionConditionType<
decltype(!read_backward())>::isValid, "invalid assertion condition"
); if ((__builtin_expect(!!(!(!!(!read_backward()))), 0))) { do
{ } while (false); MOZ_ReportAssertionFailure("!read_backward()"
, "/var/lib/jenkins/workspace/firefox-scan-build/js/src/irregexp/imported/regexp-compiler.cc"
, 3912); AnnotateMozCrashReason("MOZ_ASSERT" "(" "!read_backward()"
")"); do { *((volatile int*)__null) = 3912; __attribute__((nomerge
)) ::abort(); } while (false); } } while (false)
;
3913 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
3914 zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
3915 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
3916 zone(), CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
3917
3918 ChoiceNode* optional_step_back = zone()->New<ChoiceNode>(2, zone());
3919
3920 int stack_register = UnicodeLookaroundStackRegister();
3921 int position_register = UnicodeLookaroundPositionRegister();
3922 RegExpNode* step_back = TextNode::CreateForCharacterRanges(
3923 zone(), lead_surrogates, true, on_success);
3924 RegExpLookaround::Builder builder(true, step_back, stack_register,
3925 position_register);
3926 RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
3927 zone(), trail_surrogates, false, builder.on_match_success());
3928
3929 optional_step_back->AddAlternative(
3930 GuardedAlternative(builder.ForMatch(match_trail)));
3931 optional_step_back->AddAlternative(GuardedAlternative(on_success));
3932
3933 return optional_step_back;
3934}
3935
3936RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
3937 bool is_one_byte) {
3938 // Wrap the body of the regexp in capture #0.
3939 RegExpNode* captured_body =
3940 RegExpCapture::ToNode(data->tree, 0, this, accept());
3941 RegExpNode* node = captured_body;
3942 if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags())) {
3943 // Add a .*? at the beginning, outside the body capture, unless
3944 // this expression is anchored at the beginning or sticky.
3945 RegExpNode* loop_node = RegExpQuantifier::ToNode(
3946 0, RegExpTree::kInfinity, false,
3947 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything), this,
3948 captured_body, data->contains_anchor);
3949
3950 if (data->contains_anchor) {
3951 // Unroll loop once, to take care of the case that might start
3952 // at the start of input.
3953 ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
3954 first_step_node->AddAlternative(GuardedAlternative(captured_body));
3955 first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
3956 zone()->New<RegExpClassRanges>(StandardCharacterSet::kEverything),
3957 false, loop_node)));
3958 node = first_step_node;
3959 } else {
3960 node = loop_node;
3961 }
3962 }
3963 if (is_one_byte) {
3964 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
3965 // Do it again to propagate the new nodes to places where they were not
3966 // put because they had not been calculated yet.
3967 if (node != nullptr) {
3968 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
3969 }
3970 } else if (IsEitherUnicode(flags()) &&
3971 (IsGlobal(flags()) || IsSticky(flags()))) {
3972 node = OptionallyStepBackToLeadSurrogate(node);
3973 }
3974
3975 if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());
3976 return node;
3977}
3978
3979void RegExpCompiler::ToNodeCheckForStackOverflow() {
3980 if (StackLimitCheck{isolate()}.HasOverflowed()) {
3981 V8::FatalProcessOutOfMemory(isolate(), "RegExpCompiler");
3982 }
3983}
3984
3985} // namespace v8::internal