Bug Summary

File:var/lib/jenkins/workspace/firefox-scan-build/tools/power/rapl.cpp
Warning:line 516, column 17
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name rapl.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/tools/power -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/tools/power -resource-dir /usr/lib/llvm-20/lib/clang/20 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -I /var/lib/jenkins/workspace/firefox-scan-build/tools/power -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/tools/power -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-20/lib/clang/20/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fno-rtti -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-01-20-090804-167946-1 -x c++ /var/lib/jenkins/workspace/firefox-scan-build/tools/power/rapl.cpp
1/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3/* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7// This program provides processor power estimates. It does this by reading
8// model-specific registers (MSRs) that are part Intel's Running Average Power
9// Limit (RAPL) interface. These MSRs provide good quality estimates of the
10// energy consumption of up to four system components:
11// - PKG: the entire processor package;
12// - PP0: the cores (a subset of the package);
13// - PP1: the GPU (a subset of the package);
14// - DRAM: main memory.
15//
16// For more details about RAPL, see section 14.9 of Volume 3 of the "Intel 64
17// and IA-32 Architecture's Software Developer's Manual", Order Number 325384.
18//
19// This program exists because there are no existing tools on Mac that can
20// obtain all four RAPL estimates. (|powermetrics| can obtain the package
21// estimate, but not the others. Intel Power Gadget can obtain the package and
22// cores estimates.)
23//
24// On Linux |perf| can obtain all four estimates (as Joules, which are easily
25// converted to Watts), but this program is implemented for Linux because it's
26// not too hard to do, and that gives us multi-platform consistency.
27//
28// This program does not support Windows, unfortunately. It's not obvious how
29// to access the RAPL MSRs on Windows.
30//
31// This program deliberately uses only standard libraries and avoids
32// Mozilla-specific code, to make it easy to compile and test on different
33// machines.
34
35#include <assert.h>
36#include <getopt.h>
37#include <math.h>
38#include <signal.h>
39#include <stdarg.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <sys/time.h>
45#include <unistd.h>
46
47#include <algorithm>
48#include <numeric>
49#include <vector>
50
51#ifdef MOZ_CLANG_PLUGIN
52# define MOZ_RUNINIT __attribute__((annotate("moz_global_var")))
53#else
54# define MOZ_RUNINIT
55#endif
56
57//---------------------------------------------------------------------------
58// Utilities
59//---------------------------------------------------------------------------
60
61// The value of argv[0] passed to main(). Used in error messages.
62static const char* gArgv0;
63
64static void Abort(const char* aFormat, ...) {
65 va_list vargs;
66 va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat);
67 fprintf(stderrstderr, "%s: ", gArgv0);
68 vfprintf(stderrstderr, aFormat, vargs);
69 fprintf(stderrstderr, "\n");
70 va_end(vargs)__builtin_va_end(vargs);
71
72 exit(1);
73}
74
75static void CmdLineAbort(const char* aMsg) {
76 if (aMsg) {
77 fprintf(stderrstderr, "%s: %s\n", gArgv0, aMsg);
78 }
79 fprintf(stderrstderr, "Use --help for more information.\n");
80 exit(1);
81}
82
83// A special value that represents an estimate from an unsupported RAPL domain.
84static const double kUnsupported_j = -1.0;
85
86// Print to stdout and flush it, so that the output appears immediately even if
87// being redirected through |tee| or anything like that.
88static void PrintAndFlush(const char* aFormat, ...) {
89 va_list vargs;
90 va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat);
91 vfprintf(stdoutstdout, aFormat, vargs);
92 va_end(vargs)__builtin_va_end(vargs);
93
94 fflush(stdoutstdout);
95}
96
97//---------------------------------------------------------------------------
98// Mac-specific code
99//---------------------------------------------------------------------------
100
101#if defined(__APPLE__)
102
103// Because of the pkg_energy_statistics_t::pkes_version check below, the
104// earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72).
105
106# include <sys/types.h>
107# include <sys/sysctl.h>
108
109// OS X has four kinds of system calls:
110//
111// 1. Mach traps;
112// 2. UNIX system calls;
113// 3. machine-dependent calls;
114// 4. diagnostic calls.
115//
116// (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.)
117//
118// The last category has a single call named diagCall() or diagCall64(). Its
119// mode is controlled by its first argument, and one of the modes allows access
120// to the Intel RAPL MSRs.
121//
122// The interface to diagCall64() is not exported, so we have to import some
123// definitions from the XNU kernel. All imported definitions are annotated with
124// the XNU source file they come from, and information about what XNU versions
125// they were introduced in and (if relevant) modified.
126
127// The diagCall64() mode.
128// From osfmk/i386/Diagnostics.h
129// - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value
130// 17 was used for dgGzallocTest.)
131# define dgPowerStat 17
132
133// From osfmk/i386/cpu_data.h
134// - In 10.8.5 these values were introduced, along with core_energy_stat_t.
135# define CPU_RTIME_BINS (12)
136# define CPU_ITIME_BINS (CPU_RTIME_BINS)
137
138// core_energy_stat_t and pkg_energy_statistics_t are both from
139// osfmk/i386/Diagnostics.c.
140// - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many
141// fewer fields.
142// - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with
143// numerous new fields.
144// - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added.
145// diagCall64(dgPowerStat) fills it with '1' in all versions since (up to
146// 10.10.2 at time of writing).
147// - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally
148// added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the
149// source code, but it could be defined at compile-time via compiler flags.)
150// pkg_energy_statistics_t::pkes_version did not change, though.
151
152typedef struct {
153 uint64_t caperf;
154 uint64_t cmperf;
155 uint64_t ccres[6];
156 uint64_t crtimes[CPU_RTIME_BINS];
157 uint64_t citimes[CPU_ITIME_BINS];
158 uint64_t crtime_total;
159 uint64_t citime_total;
160 uint64_t cpu_idle_exits;
161 uint64_t cpu_insns;
162 uint64_t cpu_ucc;
163 uint64_t cpu_urc;
164# if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72).
165 uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72).
166# endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72).
167} core_energy_stat_t;
168
169typedef struct {
170 uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72).
171 uint64_t pkg_cres[2][7];
172
173 // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT
174 // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT.
175 uint64_t pkg_power_unit;
176
177 // These are the four fields for the four RAPL domains. For each field
178 // we list:
179 //
180 // - the corresponding MSR number;
181 // - Intel's name for that MSR;
182 // - XNU's name for that MSR;
183 // - which Intel processors the MSR is supported on.
184 //
185 // The last of these is determined from chapter 35 of Volume 3 of the
186 // "Intel 64 and IA-32 Architecture's Software Developer's Manual",
187 // Order Number 325384. (Note that chapter 35 contradicts section 14.9
188 // to some degree.)
189
190 // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS
191 // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
192 uint64_t pkg_energy;
193
194 // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS
195 // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
196 uint64_t pp0_energy;
197
198 // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS
199 // Sandy Bridge, Haswell.
200 uint64_t pp1_energy;
201
202 // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS
203 // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model
204 // 0x57)
205 uint64_t ddr_energy;
206
207 uint64_t llc_flushed_cycles;
208 uint64_t ring_ratio_instantaneous;
209 uint64_t IA_frequency_clipping_cause;
210 uint64_t GT_frequency_clipping_cause;
211 uint64_t pkg_idle_exits;
212 uint64_t pkg_rtimes[CPU_RTIME_BINS];
213 uint64_t pkg_itimes[CPU_ITIME_BINS];
214 uint64_t mbus_delay_time;
215 uint64_t mint_delay_time;
216 uint32_t ncpus;
217 core_energy_stat_t cest[];
218} pkg_energy_statistics_t;
219
220static int diagCall64(uint64_t aMode, void* aBuf) {
221 // We cannot use syscall() here because it doesn't work with diagnostic
222 // system calls -- it raises SIGSYS if you try. So we have to use asm.
223
224# ifdef __x86_64__1
225 // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01
226 // suffix indicates the syscall number is 1, which also happens to be the
227 // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more
228 // details.
229 static const uint64_t diagCallNum = 0x4000001;
230 uint64_t rv;
231
232 __asm__ __volatile__(
233 "syscall"
234
235 // Return value goes in "a" (%rax).
236 : /* outputs */ "=a"(rv)
237
238 // The syscall number goes in "0", a synonym (from outputs) for "a"
239 // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi).
240 : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf)
241
242 // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And
243 // this particular syscall also writes memory (aBuf).
244 : /* clobbers */ "rcx", "r11", "cc", "memory");
245 return rv;
246# else
247# error Sorry, only x86-64 is supported
248# endif
249}
250
251static void diagCall64_dgPowerStat(pkg_energy_statistics_t* aPkes) {
252 static const uint64_t supported_version = 1;
253
254 // Write an unsupported version number into pkes_version so that the check
255 // below cannot succeed by dumb luck.
256 aPkes->pkes_version = supported_version - 1;
257
258 // diagCall64() returns 1 on success, and 0 on failure (which can only happen
259 // if the mode is unrecognized, e.g. in 10.7.x or earlier versions).
260 if (diagCall64(dgPowerStat, aPkes) != 1) {
261 Abort("diagCall64() failed");
262 }
263
264 if (aPkes->pkes_version != 1) {
265 Abort("unexpected pkes_version: %llu", aPkes->pkes_version);
266 }
267}
268
269class RAPL {
270 bool mIsGpuSupported; // Is the GPU domain supported by the processor?
271 bool mIsRamSupported; // Is the RAM domain supported by the processor?
272
273 // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J ==
274 // 15.3 microJoules) which is different to the power unit MSR. (See the
275 // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of
276 // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.)
277 // This field records whether the quirk is present.
278 bool mHasRamUnitsQuirk;
279
280 // The abovementioned 15.3 microJoules value.
281 static const double kQuirkyRamJoulesPerTick;
282
283 // The previous sample's MSR values.
284 uint64_t mPrevPkgTicks;
285 uint64_t mPrevPp0Ticks;
286 uint64_t mPrevPp1Ticks;
287 uint64_t mPrevDdrTicks;
288
289 // The struct passed to diagCall64().
290 pkg_energy_statistics_t* mPkes;
291
292 public:
293 RAPL() : mHasRamUnitsQuirk(false) {
294 // Work out which RAPL MSRs this CPU model supports.
295 int cpuModel;
296 size_t size = sizeof(cpuModel);
297 if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL__null, 0) != 0) {
298 Abort("sysctlbyname(\"machdep.cpu.model\") failed");
299 }
300
301 // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in
302 // linux-4.1.5/.
303 //
304 // By linux-5.6.14/, this stuff had moved into
305 // arch/x86/events/intel/rapl.c, which references processor families in
306 // arch/x86/include/asm/intel-family.h.
307 switch (cpuModel) {
308 case 0x2a: // Sandy Bridge
309 case 0x3a: // Ivy Bridge
310 // Supports package, cores, GPU.
311 mIsGpuSupported = true;
312 mIsRamSupported = false;
313 break;
314
315 case 0x3f: // Haswell X
316 case 0x4f: // Broadwell X
317 case 0x55: // Skylake X
318 case 0x56: // Broadwell D
319 // Supports package, cores, RAM. Has the units quirk.
320 mIsGpuSupported = false;
321 mIsRamSupported = true;
322 mHasRamUnitsQuirk = true;
323 break;
324
325 case 0x2d: // Sandy Bridge X
326 case 0x3e: // Ivy Bridge X
327 // Supports package, cores, RAM.
328 mIsGpuSupported = false;
329 mIsRamSupported = true;
330 break;
331
332 case 0x3c: // Haswell
333 case 0x3d: // Broadwell
334 case 0x45: // Haswell L
335 case 0x46: // Haswell G
336 case 0x47: // Broadwell G
337 // Supports package, cores, GPU, RAM.
338 mIsGpuSupported = true;
339 mIsRamSupported = true;
340 break;
341
342 case 0x4e: // Skylake L
343 case 0x5e: // Skylake
344 case 0x8e: // Kaby Lake L
345 case 0x9e: // Kaby Lake
346 case 0x66: // Cannon Lake L
347 case 0x7d: // Ice Lake
348 case 0x7e: // Ice Lake L
349 case 0xa5: // Comet Lake
350 case 0xa6: // Comet Lake L
351 // Supports package, cores, GPU, RAM, PSYS.
352 // XXX: this tool currently doesn't measure PSYS.
353 mIsGpuSupported = true;
354 mIsRamSupported = true;
355 break;
356
357 default:
358 Abort("unknown CPU model: %d", cpuModel);
359 break;
360 }
361
362 // Get the maximum number of logical CPUs so that we know how big to make
363 // |mPkes|.
364 int logicalcpu_max;
365 size = sizeof(logicalcpu_max);
366 if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL__null, 0) !=
367 0) {
368 Abort("sysctlbyname(\"hw.logicalcpu_max\") failed");
369 }
370
371 // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around
372 // core_energy_stat_t::gpmcs and for any other future extensions to that
373 // struct. (The fields we read all come before the core_energy_stat_t
374 // array, so it won't matter to us whether gpmcs is present or not.)
375 size_t pkesSize = sizeof(pkg_energy_statistics_t) +
376 logicalcpu_max * sizeof(core_energy_stat_t) +
377 logicalcpu_max * 1024;
378 mPkes = (pkg_energy_statistics_t*)malloc(pkesSize);
379 if (!mPkes) {
380 Abort("malloc() failed");
381 }
382
383 // Do an initial measurement so that the first sample's diffs are sensible.
384 double dummy1, dummy2, dummy3, dummy4;
385 EnergyEstimates(dummy1, dummy2, dummy3, dummy4);
386 }
387
388 ~RAPL() { free(mPkes); }
389
390 static double Joules(uint64_t aTicks, double aJoulesPerTick) {
391 return double(aTicks) * aJoulesPerTick;
392 }
393
394 void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J,
395 double& aRam_J) {
396 diagCall64_dgPowerStat(mPkes);
397
398 // Bits 12:8 are the ESU.
399 // Energy measurements come in multiples of 1/(2^ESU).
400 uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f;
401 double joulesPerTick = ((double)1 / (1 << energyStatusUnits));
402
403 aPkg_J = Joules(mPkes->pkg_energy - mPrevPkgTicks, joulesPerTick);
404 aCores_J = Joules(mPkes->pp0_energy - mPrevPp0Ticks, joulesPerTick);
405 aGpu_J = mIsGpuSupported
406 ? Joules(mPkes->pp1_energy - mPrevPp1Ticks, joulesPerTick)
407 : kUnsupported_j;
408 aRam_J = mIsRamSupported
409 ? Joules(mPkes->ddr_energy - mPrevDdrTicks,
410 mHasRamUnitsQuirk ? kQuirkyRamJoulesPerTick
411 : joulesPerTick)
412 : kUnsupported_j;
413
414 mPrevPkgTicks = mPkes->pkg_energy;
415 mPrevPp0Ticks = mPkes->pp0_energy;
416 if (mIsGpuSupported) {
417 mPrevPp1Ticks = mPkes->pp1_energy;
418 }
419 if (mIsRamSupported) {
420 mPrevDdrTicks = mPkes->ddr_energy;
421 }
422 }
423};
424
425/* static */ const double RAPL::kQuirkyRamJoulesPerTick = (double)1 / 65536;
426
427//---------------------------------------------------------------------------
428// Linux-specific code
429//---------------------------------------------------------------------------
430
431#elif defined(__linux__1)
432
433# include <linux1/perf_event.h>
434# include <sys/syscall.h>
435
436// There is no glibc wrapper for this system call so we provide our own.
437static int perf_event_open(struct perf_event_attr* aAttr, pid_t aPid, int aCpu,
438 int aGroupFd, unsigned long aFlags) {
439 return syscall(__NR_perf_event_open298, aAttr, aPid, aCpu, aGroupFd, aFlags);
440}
441
442// Returns false if the file cannot be opened.
443template <typename T>
444static bool ReadValueFromPowerFile(const char* aStr1, const char* aStr2,
445 const char* aStr3, const char* aScanfString,
446 T* aOut) {
447 // The filenames going into this buffer are under our control and the longest
448 // one is "/sys/bus/event_source/devices/power/events/energy-cores.scale".
449 // So 256 chars is plenty.
450 char filename[256];
451
452 sprintf(filename, "/sys/bus/event_source/devices/power/%s%s%s", aStr1, aStr2,
453 aStr3);
454 FILE* fp = fopen(filename, "r");
455 if (!fp
8.1
'fp' is non-null
) {
9
Taking false branch
456 return false;
457 }
458 if (fscanf(fp, aScanfString, aOut) != 1) {
10
Taking true branch
459 Abort("fscanf() failed");
460 }
461 fclose(fp);
462
463 return true;
11
Returning without writing to '*aOut'
464}
465
466// This class encapsulates the reading of a single RAPL domain.
467class Domain {
468 bool mIsSupported; // Is the domain supported by the processor?
469
470 // These three are only set if |mIsSupported| is true.
471 double mJoulesPerTick; // How many Joules each tick of the MSR represents.
472 int mFd; // The fd through which the MSR is read.
473 double mPrevTicks; // The previous sample's MSR value.
474
475 public:
476 enum IsOptional { Optional, NonOptional };
477
478 Domain(const char* aName, uint32_t aType,
479 IsOptional aOptional = NonOptional) {
480 uint64_t config;
7
'config' declared without an initial value
481 if (!ReadValueFromPowerFile("events/energy-", aName, "", "event=%llx",
8
Calling 'ReadValueFromPowerFile<unsigned long>'
12
Returning from 'ReadValueFromPowerFile<unsigned long>'
13
Taking false branch
482 &config)) {
483 // Failure is allowed for optional domains.
484 if (aOptional == NonOptional) {
485 Abort(
486 "failed to open file for non-optional domain '%s'\n"
487 "- Is your kernel version 3.14 or later, as required? "
488 "Run |uname -r| to see.",
489 aName);
490 }
491 mIsSupported = false;
492 return;
493 }
494
495 mIsSupported = true;
496
497 if (!ReadValueFromPowerFile("events/energy-", aName, ".scale", "%lf",
14
Taking true branch
498 &mJoulesPerTick)) {
499 Abort("failed to read from .scale file");
500 }
501
502 // The unit should be "Joules", so 128 chars should be plenty.
503 char unit[128];
504 if (!ReadValueFromPowerFile("events/energy-", aName, ".unit", "%127s",
15
Taking true branch
505 unit)) {
506 Abort("failed to read from .unit file");
507 }
508 if (strcmp(unit, "Joules") != 0) {
16
Assuming the condition is false
17
Taking false branch
509 Abort("unexpected unit '%s' in .unit file", unit);
510 }
511
512 struct perf_event_attr attr;
513 memset(&attr, 0, sizeof(attr));
514 attr.type = aType;
515 attr.size = uint32_t(sizeof(attr));
516 attr.config = config;
18
Assigned value is garbage or undefined
517
518 // Measure all processes/threads. The specified CPU doesn't matter.
519 mFd = perf_event_open(&attr, /* aPid = */ -1, /* aCpu = */ 0,
520 /* aGroupFd = */ -1, /* aFlags = */ 0);
521 if (mFd < 0) {
522 Abort(
523 "perf_event_open() failed\n"
524 "- Did you run as root (e.g. with |sudo|) or set\n"
525 " /proc/sys/kernel/perf_event_paranoid to 0, as required?");
526 }
527
528 mPrevTicks = 0;
529 }
530
531 ~Domain() {
532 if (mIsSupported) {
533 close(mFd);
534 }
535 }
536
537 double EnergyEstimate() {
538 if (!mIsSupported) {
539 return kUnsupported_j;
540 }
541
542 uint64_t thisTicks;
543 if (read(mFd, &thisTicks, sizeof(uint64_t)) != sizeof(uint64_t)) {
544 Abort("read() failed");
545 }
546
547 uint64_t ticks = thisTicks - mPrevTicks;
548 mPrevTicks = thisTicks;
549 double joules = ticks * mJoulesPerTick;
550 return joules;
551 }
552};
553
554class RAPL {
555 Domain* mPkg;
556 Domain* mCores;
557 Domain* mGpu;
558 Domain* mRam;
559
560 public:
561 RAPL() {
562 uint32_t type;
563 if (!ReadValueFromPowerFile("type", "", "", "%u", &type)) {
5
Taking false branch
564 Abort("failed to read from type file");
565 }
566
567 mPkg = new Domain("pkg", type);
6
Calling constructor for 'Domain'
568 mCores = new Domain("cores", type);
569 mGpu = new Domain("gpu", type, Domain::Optional);
570 mRam = new Domain("ram", type, Domain::Optional);
571 if (!mPkg || !mCores || !mGpu || !mRam) {
572 Abort("new Domain() failed");
573 }
574 }
575
576 ~RAPL() {
577 delete mPkg;
578 delete mCores;
579 delete mGpu;
580 delete mRam;
581 }
582
583 void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J,
584 double& aRam_J) {
585 aPkg_J = mPkg->EnergyEstimate();
586 aCores_J = mCores->EnergyEstimate();
587 aGpu_J = mGpu->EnergyEstimate();
588 aRam_J = mRam->EnergyEstimate();
589 }
590};
591
592#else
593
594//---------------------------------------------------------------------------
595// Unsupported platforms
596//---------------------------------------------------------------------------
597
598# error Sorry, this platform is not supported
599
600#endif // platform
601
602//---------------------------------------------------------------------------
603// The main loop
604//---------------------------------------------------------------------------
605
606// The sample interval, measured in seconds.
607static double gSampleInterval_sec;
608
609// The platform-specific RAPL-reading machinery.
610static RAPL* gRapl;
611
612// All the sampled "total" values, in Watts.
613MOZ_RUNINIT static std::vector<double> gTotals_W;
614
615// Power = Energy / Time, where power is measured in Watts, Energy is measured
616// in Joules, and Time is measured in seconds.
617static double JoulesToWatts(double aJoules) {
618 return aJoules / gSampleInterval_sec;
619}
620
621// "Normalize" here means convert kUnsupported_j to zero so it can be used in
622// additive expressions. All printed values are 5 or maybe 6 chars (though 6
623// chars would require a value > 100 W, which is unlikely). Values above 1000 W
624// are normalized to " n/a ", so 6 chars is the longest that may be printed.
625static void NormalizeAndPrintAsWatts(char* aBuf, double& aValue_J) {
626 if (aValue_J == kUnsupported_j || aValue_J >= 1000) {
627 aValue_J = 0;
628 sprintf(aBuf, "%s", " n/a ");
629 } else {
630 sprintf(aBuf, "%5.2f", JoulesToWatts(aValue_J));
631 }
632}
633
634static void SigAlrmHandler(int aSigNum, siginfo_t* aInfo, void* aContext) {
635 static int sampleNumber = 1;
636
637 double pkg_J, cores_J, gpu_J, ram_J;
638 gRapl->EnergyEstimates(pkg_J, cores_J, gpu_J, ram_J);
639
640 // We should have pkg and cores estimates, but might not have gpu and ram
641 // estimates.
642 assert(pkg_J != kUnsupported_j)(static_cast <bool> (pkg_J != kUnsupported_j) ? void (0
) : __assert_fail ("pkg_J != kUnsupported_j", __builtin_FILE (
), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
643 assert(cores_J != kUnsupported_j)(static_cast <bool> (cores_J != kUnsupported_j) ? void (
0) : __assert_fail ("cores_J != kUnsupported_j", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
644
645 // This needs to be big enough to print watt values to two decimal places. 16
646 // should be plenty.
647 static const size_t kNumStrLen = 16;
648
649 static char pkgStr[kNumStrLen], coresStr[kNumStrLen], gpuStr[kNumStrLen],
650 ramStr[kNumStrLen];
651 NormalizeAndPrintAsWatts(pkgStr, pkg_J);
652 NormalizeAndPrintAsWatts(coresStr, cores_J);
653 NormalizeAndPrintAsWatts(gpuStr, gpu_J);
654 NormalizeAndPrintAsWatts(ramStr, ram_J);
655
656 // Core and GPU power are a subset of the package power.
657 assert(pkg_J >= cores_J + gpu_J)(static_cast <bool> (pkg_J >= cores_J + gpu_J) ? void
(0) : __assert_fail ("pkg_J >= cores_J + gpu_J", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
658
659 // Compute "other" (i.e. rest of the package) and "total" only after the
660 // other values have been normalized.
661
662 char otherStr[kNumStrLen];
663 double other_J = pkg_J - cores_J - gpu_J;
664 NormalizeAndPrintAsWatts(otherStr, other_J);
665
666 char totalStr[kNumStrLen];
667 double total_J = pkg_J + ram_J;
668 NormalizeAndPrintAsWatts(totalStr, total_J);
669
670 gTotals_W.push_back(JoulesToWatts(total_J));
671
672 // Print and flush so that the output appears immediately even if being
673 // redirected through |tee| or anything like that.
674 PrintAndFlush("#%02d %s W = %s (%s + %s + %s) + %s W\n", sampleNumber++,
675 totalStr, pkgStr, coresStr, gpuStr, otherStr, ramStr);
676}
677
678static void Finish() {
679 size_t n = gTotals_W.size();
680
681 // This time calculation assumes that the timers are perfectly accurate which
682 // is not true but the inaccuracy should be small in practice.
683 double time = n * gSampleInterval_sec;
684
685 printf("\n");
686 printf("%d sample%s taken over a period of %.3f second%s\n", int(n),
687 n == 1 ? "" : "s", n * gSampleInterval_sec, time == 1.0 ? "" : "s");
688
689 if (n == 0 || n == 1) {
690 exit(0);
691 }
692
693 // Compute the mean.
694 double sum = std::accumulate(gTotals_W.begin(), gTotals_W.end(), 0.0);
695 double mean = sum / n;
696
697 // Compute the *population* standard deviation:
698 //
699 // popStdDev = sqrt(Sigma(x - m)^2 / n)
700 //
701 // where |x| is the sum variable, |m| is the mean, and |n| is the
702 // population size.
703 //
704 // This is different from the *sample* standard deviation, which divides by
705 // |n - 1|, and would be appropriate if we were using a random sample of a
706 // larger population.
707 double sumOfSquaredDeviations = 0;
708 for (double& iter : gTotals_W) {
709 double deviation = (iter - mean);
710 sumOfSquaredDeviations += deviation * deviation;
711 }
712 double popStdDev = sqrt(sumOfSquaredDeviations / n);
713
714 // Sort so that percentiles can be determined. We use the "Nearest Rank"
715 // method of determining percentiles, which is simplest to compute and which
716 // chooses values from those that appear in the input set.
717 std::sort(gTotals_W.begin(), gTotals_W.end());
718
719 printf("\n");
720 printf("Distribution of 'total' values:\n");
721 printf(" mean = %5.2f W\n", mean);
722 printf(" std dev = %5.2f W\n", popStdDev);
723 printf(" 0th percentile = %5.2f W (min)\n", gTotals_W[0]);
724 printf(" 5th percentile = %5.2f W\n", gTotals_W[ceil(0.05 * n) - 1]);
725 printf(" 25th percentile = %5.2f W\n", gTotals_W[ceil(0.25 * n) - 1]);
726 printf(" 50th percentile = %5.2f W\n", gTotals_W[ceil(0.50 * n) - 1]);
727 printf(" 75th percentile = %5.2f W\n", gTotals_W[ceil(0.75 * n) - 1]);
728 printf(" 95th percentile = %5.2f W\n", gTotals_W[ceil(0.95 * n) - 1]);
729 printf("100th percentile = %5.2f W (max)\n", gTotals_W[n - 1]);
730
731 exit(0);
732}
733
734static void SigIntHandler(int aSigNum, siginfo_t* aInfo, void* aContext) {
735 Finish();
736}
737
738static void PrintUsage() {
739 printf(
740 "usage: rapl [options]\n"
741 "\n"
742 "Options:\n"
743 "\n"
744 " -h --help show this message\n"
745 " -i --sample-interval <N> sample every N ms [default=1000]\n"
746 " -n --sample-count <N> get N samples (0 means unlimited) "
747 "[default=0]\n"
748 "\n"
749#if defined(__APPLE__)
750 "On Mac this program can be run by any user.\n"
751#elif defined(__linux__1)
752 "On Linux this program can only be run by the super-user unless the "
753 "contents\n"
754 "of /proc/sys/kernel/perf_event_paranoid is set to 0 or lower.\n"
755#else
756# error Sorry, this platform is not supported
757#endif
758 "\n");
759}
760
761int main(int argc, char** argv) {
762 // Process command line options.
763
764 gArgv0 = argv[0];
765
766 // Default values.
767 int sampleInterval_msec = 1000;
768 int sampleCount = 0;
769
770 struct option longOptions[] = {
771 {"help", no_argument0, NULL__null, 'h'},
772 {"sample-interval", required_argument1, NULL__null, 'i'},
773 {"sample-count", required_argument1, NULL__null, 'n'},
774 {NULL__null, 0, NULL__null, 0}};
775 const char* shortOptions = "hi:n:";
776
777 int c;
778 char* endPtr;
779 while ((c = getopt_long(argc, argv, shortOptions, longOptions, NULL__null)) != -1) {
1
Assuming the condition is false
2
Loop condition is false. Execution continues on line 822
780 switch (c) {
781 case 'h':
782 PrintUsage();
783 exit(0);
784
785 case 'i':
786 sampleInterval_msec = strtol(optarg, &endPtr, /* base = */ 10);
787 if (*endPtr) {
788 CmdLineAbort("sample interval is not an integer");
789 }
790 if (sampleInterval_msec < 1 || sampleInterval_msec > 3600000) {
791 CmdLineAbort("sample interval must be in the range 1..3600000 ms");
792 }
793 break;
794
795 case 'n':
796 sampleCount = strtol(optarg, &endPtr, /* base = */ 10);
797 if (*endPtr) {
798 CmdLineAbort("sample count is not an integer");
799 }
800 if (sampleCount < 0 || sampleCount > 1000000) {
801 CmdLineAbort("sample count must be in the range 0..1000000");
802 }
803 break;
804
805 default:
806 CmdLineAbort(NULL__null);
807 }
808 }
809
810 // The RAPL MSRs update every ~1 ms, but the measurement period isn't exactly
811 // 1 ms, which means the sample periods are not exact. "Power Measurement
812 // Techniques on Standard Compute Nodes: A Quantitative Comparison" by
813 // Hackenberg et al. suggests the following.
814 //
815 // "RAPL provides energy (and not power) consumption data without
816 // timestamps associated to each counter update. This makes sampling rates
817 // above 20 Samples/s unfeasible if the systematic error should be below
818 // 5%... Constantly polling the RAPL registers will both occupy a processor
819 // core and distort the measurement itself."
820 //
821 // So warn about this case.
822 if (sampleInterval_msec
2.1
'sampleInterval_msec' is >= 50
< 50) {
3
Taking false branch
823 fprintf(stderrstderr,
824 "\nWARNING: sample intervals < 50 ms are likely to produce "
825 "inaccurate estimates\n\n");
826 }
827 gSampleInterval_sec = double(sampleInterval_msec) / 1000;
828
829 // Initialize the platform-specific RAPL reading machinery.
830 gRapl = new RAPL();
4
Calling default constructor for 'RAPL'
831 if (!gRapl) {
832 Abort("new RAPL() failed");
833 }
834
835 // Install the signal handlers.
836
837 struct sigaction sa;
838 memset(&sa, 0, sizeof(sa));
839 sa.sa_flags = SA_RESTART0x10000000 | SA_SIGINFO4;
840 // The extra parens around (0) suppress a -Wunreachable-code warning on OS X
841 // where sigemptyset() is a macro that can never fail and always returns 0.
842 if (sigemptyset(&sa.sa_mask) < (0)) {
843 Abort("sigemptyset() failed");
844 }
845 sa.sa_sigaction__sigaction_handler.sa_sigaction = SigAlrmHandler;
846 if (sigaction(SIGALRM14, &sa, NULL__null) < 0) {
847 Abort("sigaction(SIGALRM) failed");
848 }
849 sa.sa_sigaction__sigaction_handler.sa_sigaction = SigIntHandler;
850 if (sigaction(SIGINT2, &sa, NULL__null) < 0) {
851 Abort("sigaction(SIGINT) failed");
852 }
853
854 // Set up the timer.
855 struct itimerval timer;
856 timer.it_interval.tv_sec = sampleInterval_msec / 1000;
857 timer.it_interval.tv_usec = (sampleInterval_msec % 1000) * 1000;
858 timer.it_value = timer.it_interval;
859 if (setitimer(ITIMER_REALITIMER_REAL, &timer, NULL__null) < 0) {
860 Abort("setitimer() failed");
861 }
862
863 // Print header.
864 PrintAndFlush(" total W = _pkg_ (cores + _gpu_ + other) + _ram_ W\n");
865
866 // Take samples.
867 if (sampleCount == 0) {
868 while (true) {
869 pause();
870 }
871 } else {
872 for (int i = 0; i < sampleCount; i++) {
873 pause();
874 }
875 }
876
877 Finish();
878
879 return 0;
880}