File: | var/lib/jenkins/workspace/firefox-scan-build/tools/power/rapl.cpp |
Warning: | line 567, column 16 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | |||
2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ | |||
3 | /* This Source Code Form is subject to the terms of the Mozilla Public | |||
4 | * License, v. 2.0. If a copy of the MPL was not distributed with this | |||
5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |||
6 | ||||
7 | // This program provides processor power estimates. It does this by reading | |||
8 | // model-specific registers (MSRs) that are part Intel's Running Average Power | |||
9 | // Limit (RAPL) interface. These MSRs provide good quality estimates of the | |||
10 | // energy consumption of up to four system components: | |||
11 | // - PKG: the entire processor package; | |||
12 | // - PP0: the cores (a subset of the package); | |||
13 | // - PP1: the GPU (a subset of the package); | |||
14 | // - DRAM: main memory. | |||
15 | // | |||
16 | // For more details about RAPL, see section 14.9 of Volume 3 of the "Intel 64 | |||
17 | // and IA-32 Architecture's Software Developer's Manual", Order Number 325384. | |||
18 | // | |||
19 | // This program exists because there are no existing tools on Mac that can | |||
20 | // obtain all four RAPL estimates. (|powermetrics| can obtain the package | |||
21 | // estimate, but not the others. Intel Power Gadget can obtain the package and | |||
22 | // cores estimates.) | |||
23 | // | |||
24 | // On Linux |perf| can obtain all four estimates (as Joules, which are easily | |||
25 | // converted to Watts), but this program is implemented for Linux because it's | |||
26 | // not too hard to do, and that gives us multi-platform consistency. | |||
27 | // | |||
28 | // This program does not support Windows, unfortunately. It's not obvious how | |||
29 | // to access the RAPL MSRs on Windows. | |||
30 | // | |||
31 | // This program deliberately uses only standard libraries and avoids | |||
32 | // Mozilla-specific code, to make it easy to compile and test on different | |||
33 | // machines. | |||
34 | ||||
35 | #include <assert.h> | |||
36 | #include <getopt.h> | |||
37 | #include <math.h> | |||
38 | #include <signal.h> | |||
39 | #include <stdarg.h> | |||
40 | #include <stdint.h> | |||
41 | #include <stdio.h> | |||
42 | #include <stdlib.h> | |||
43 | #include <string.h> | |||
44 | #include <sys/time.h> | |||
45 | #include <unistd.h> | |||
46 | ||||
47 | #include <algorithm> | |||
48 | #include <numeric> | |||
49 | #include <vector> | |||
50 | ||||
51 | #ifdef MOZ_CLANG_PLUGIN | |||
52 | # define MOZ_RUNINIT __attribute__((annotate("moz_global_var"))) | |||
53 | #else | |||
54 | # define MOZ_RUNINIT | |||
55 | #endif | |||
56 | ||||
57 | //--------------------------------------------------------------------------- | |||
58 | // Utilities | |||
59 | //--------------------------------------------------------------------------- | |||
60 | ||||
61 | // The value of argv[0] passed to main(). Used in error messages. | |||
62 | static const char* gArgv0; | |||
63 | ||||
64 | static void Abort(const char* aFormat, ...) { | |||
65 | va_list vargs; | |||
66 | va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat); | |||
67 | fprintf(stderrstderr, "%s: ", gArgv0); | |||
68 | vfprintf(stderrstderr, aFormat, vargs); | |||
69 | fprintf(stderrstderr, "\n"); | |||
70 | va_end(vargs)__builtin_va_end(vargs); | |||
71 | ||||
72 | exit(1); | |||
73 | } | |||
74 | ||||
75 | static void CmdLineAbort(const char* aMsg) { | |||
76 | if (aMsg) { | |||
77 | fprintf(stderrstderr, "%s: %s\n", gArgv0, aMsg); | |||
78 | } | |||
79 | fprintf(stderrstderr, "Use --help for more information.\n"); | |||
80 | exit(1); | |||
81 | } | |||
82 | ||||
83 | // A special value that represents an estimate from an unsupported RAPL domain. | |||
84 | static const double kUnsupported_j = -1.0; | |||
85 | ||||
86 | // Print to stdout and flush it, so that the output appears immediately even if | |||
87 | // being redirected through |tee| or anything like that. | |||
88 | static void PrintAndFlush(const char* aFormat, ...) { | |||
89 | va_list vargs; | |||
90 | va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat); | |||
91 | vfprintf(stdoutstdout, aFormat, vargs); | |||
92 | va_end(vargs)__builtin_va_end(vargs); | |||
93 | ||||
94 | fflush(stdoutstdout); | |||
95 | } | |||
96 | ||||
97 | //--------------------------------------------------------------------------- | |||
98 | // Mac-specific code | |||
99 | //--------------------------------------------------------------------------- | |||
100 | ||||
101 | #if defined(__APPLE__) | |||
102 | ||||
103 | // Because of the pkg_energy_statistics_t::pkes_version check below, the | |||
104 | // earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72). | |||
105 | ||||
106 | # include <sys/types.h> | |||
107 | # include <sys/sysctl.h> | |||
108 | ||||
109 | // OS X has four kinds of system calls: | |||
110 | // | |||
111 | // 1. Mach traps; | |||
112 | // 2. UNIX system calls; | |||
113 | // 3. machine-dependent calls; | |||
114 | // 4. diagnostic calls. | |||
115 | // | |||
116 | // (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.) | |||
117 | // | |||
118 | // The last category has a single call named diagCall() or diagCall64(). Its | |||
119 | // mode is controlled by its first argument, and one of the modes allows access | |||
120 | // to the Intel RAPL MSRs. | |||
121 | // | |||
122 | // The interface to diagCall64() is not exported, so we have to import some | |||
123 | // definitions from the XNU kernel. All imported definitions are annotated with | |||
124 | // the XNU source file they come from, and information about what XNU versions | |||
125 | // they were introduced in and (if relevant) modified. | |||
126 | ||||
127 | // The diagCall64() mode. | |||
128 | // From osfmk/i386/Diagnostics.h | |||
129 | // - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value | |||
130 | // 17 was used for dgGzallocTest.) | |||
131 | # define dgPowerStat 17 | |||
132 | ||||
133 | // From osfmk/i386/cpu_data.h | |||
134 | // - In 10.8.5 these values were introduced, along with core_energy_stat_t. | |||
135 | # define CPU_RTIME_BINS (12) | |||
136 | # define CPU_ITIME_BINS (CPU_RTIME_BINS) | |||
137 | ||||
138 | // core_energy_stat_t and pkg_energy_statistics_t are both from | |||
139 | // osfmk/i386/Diagnostics.c. | |||
140 | // - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many | |||
141 | // fewer fields. | |||
142 | // - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with | |||
143 | // numerous new fields. | |||
144 | // - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added. | |||
145 | // diagCall64(dgPowerStat) fills it with '1' in all versions since (up to | |||
146 | // 10.10.2 at time of writing). | |||
147 | // - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally | |||
148 | // added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the | |||
149 | // source code, but it could be defined at compile-time via compiler flags.) | |||
150 | // pkg_energy_statistics_t::pkes_version did not change, though. | |||
151 | ||||
152 | typedef struct { | |||
153 | uint64_t caperf; | |||
154 | uint64_t cmperf; | |||
155 | uint64_t ccres[6]; | |||
156 | uint64_t crtimes[CPU_RTIME_BINS]; | |||
157 | uint64_t citimes[CPU_ITIME_BINS]; | |||
158 | uint64_t crtime_total; | |||
159 | uint64_t citime_total; | |||
160 | uint64_t cpu_idle_exits; | |||
161 | uint64_t cpu_insns; | |||
162 | uint64_t cpu_ucc; | |||
163 | uint64_t cpu_urc; | |||
164 | # if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72). | |||
165 | uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72). | |||
166 | # endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72). | |||
167 | } core_energy_stat_t; | |||
168 | ||||
169 | typedef struct { | |||
170 | uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72). | |||
171 | uint64_t pkg_cres[2][7]; | |||
172 | ||||
173 | // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT | |||
174 | // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT. | |||
175 | uint64_t pkg_power_unit; | |||
176 | ||||
177 | // These are the four fields for the four RAPL domains. For each field | |||
178 | // we list: | |||
179 | // | |||
180 | // - the corresponding MSR number; | |||
181 | // - Intel's name for that MSR; | |||
182 | // - XNU's name for that MSR; | |||
183 | // - which Intel processors the MSR is supported on. | |||
184 | // | |||
185 | // The last of these is determined from chapter 35 of Volume 3 of the | |||
186 | // "Intel 64 and IA-32 Architecture's Software Developer's Manual", | |||
187 | // Order Number 325384. (Note that chapter 35 contradicts section 14.9 | |||
188 | // to some degree.) | |||
189 | ||||
190 | // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS | |||
191 | // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). | |||
192 | uint64_t pkg_energy; | |||
193 | ||||
194 | // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS | |||
195 | // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). | |||
196 | uint64_t pp0_energy; | |||
197 | ||||
198 | // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS | |||
199 | // Sandy Bridge, Haswell. | |||
200 | uint64_t pp1_energy; | |||
201 | ||||
202 | // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS | |||
203 | // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model | |||
204 | // 0x57) | |||
205 | uint64_t ddr_energy; | |||
206 | ||||
207 | uint64_t llc_flushed_cycles; | |||
208 | uint64_t ring_ratio_instantaneous; | |||
209 | uint64_t IA_frequency_clipping_cause; | |||
210 | uint64_t GT_frequency_clipping_cause; | |||
211 | uint64_t pkg_idle_exits; | |||
212 | uint64_t pkg_rtimes[CPU_RTIME_BINS]; | |||
213 | uint64_t pkg_itimes[CPU_ITIME_BINS]; | |||
214 | uint64_t mbus_delay_time; | |||
215 | uint64_t mint_delay_time; | |||
216 | uint32_t ncpus; | |||
217 | core_energy_stat_t cest[]; | |||
218 | } pkg_energy_statistics_t; | |||
219 | ||||
220 | static int diagCall64(uint64_t aMode, void* aBuf) { | |||
221 | // We cannot use syscall() here because it doesn't work with diagnostic | |||
222 | // system calls -- it raises SIGSYS if you try. So we have to use asm. | |||
223 | ||||
224 | # ifdef __x86_64__1 | |||
225 | // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01 | |||
226 | // suffix indicates the syscall number is 1, which also happens to be the | |||
227 | // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more | |||
228 | // details. | |||
229 | static const uint64_t diagCallNum = 0x4000001; | |||
230 | uint64_t rv; | |||
231 | ||||
232 | __asm__ __volatile__( | |||
233 | "syscall" | |||
234 | ||||
235 | // Return value goes in "a" (%rax). | |||
236 | : /* outputs */ "=a"(rv) | |||
237 | ||||
238 | // The syscall number goes in "0", a synonym (from outputs) for "a" | |||
239 | // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi). | |||
240 | : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf) | |||
241 | ||||
242 | // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And | |||
243 | // this particular syscall also writes memory (aBuf). | |||
244 | : /* clobbers */ "rcx", "r11", "cc", "memory"); | |||
245 | return rv; | |||
246 | # else | |||
247 | # error Sorry, only x86-64 is supported | |||
248 | # endif | |||
249 | } | |||
250 | ||||
251 | static void diagCall64_dgPowerStat(pkg_energy_statistics_t* aPkes) { | |||
252 | static const uint64_t supported_version = 1; | |||
253 | ||||
254 | // Write an unsupported version number into pkes_version so that the check | |||
255 | // below cannot succeed by dumb luck. | |||
256 | aPkes->pkes_version = supported_version - 1; | |||
257 | ||||
258 | // diagCall64() returns 1 on success, and 0 on failure (which can only happen | |||
259 | // if the mode is unrecognized, e.g. in 10.7.x or earlier versions). | |||
260 | if (diagCall64(dgPowerStat, aPkes) != 1) { | |||
261 | Abort("diagCall64() failed"); | |||
262 | } | |||
263 | ||||
264 | if (aPkes->pkes_version != 1) { | |||
265 | Abort("unexpected pkes_version: %llu", aPkes->pkes_version); | |||
266 | } | |||
267 | } | |||
268 | ||||
269 | class RAPL { | |||
270 | bool mIsGpuSupported; // Is the GPU domain supported by the processor? | |||
271 | bool mIsRamSupported; // Is the RAM domain supported by the processor? | |||
272 | ||||
273 | // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J == | |||
274 | // 15.3 microJoules) which is different to the power unit MSR. (See the | |||
275 | // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of | |||
276 | // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.) | |||
277 | // This field records whether the quirk is present. | |||
278 | bool mHasRamUnitsQuirk; | |||
279 | ||||
280 | // The abovementioned 15.3 microJoules value. | |||
281 | static const double kQuirkyRamJoulesPerTick; | |||
282 | ||||
283 | // The previous sample's MSR values. | |||
284 | uint64_t mPrevPkgTicks; | |||
285 | uint64_t mPrevPp0Ticks; | |||
286 | uint64_t mPrevPp1Ticks; | |||
287 | uint64_t mPrevDdrTicks; | |||
288 | ||||
289 | // The struct passed to diagCall64(). | |||
290 | pkg_energy_statistics_t* mPkes; | |||
291 | ||||
292 | public: | |||
293 | RAPL() : mHasRamUnitsQuirk(false) { | |||
294 | // Work out which RAPL MSRs this CPU model supports. | |||
295 | int cpuModel; | |||
296 | size_t size = sizeof(cpuModel); | |||
297 | if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL__null, 0) != 0) { | |||
298 | Abort("sysctlbyname(\"machdep.cpu.model\") failed"); | |||
299 | } | |||
300 | ||||
301 | // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in | |||
302 | // linux-4.1.5/. | |||
303 | // | |||
304 | // By linux-5.6.14/, this stuff had moved into | |||
305 | // arch/x86/events/intel/rapl.c, which references processor families in | |||
306 | // arch/x86/include/asm/intel-family.h. | |||
307 | switch (cpuModel) { | |||
308 | case 0x2a: // Sandy Bridge | |||
309 | case 0x3a: // Ivy Bridge | |||
310 | // Supports package, cores, GPU. | |||
311 | mIsGpuSupported = true; | |||
312 | mIsRamSupported = false; | |||
313 | break; | |||
314 | ||||
315 | case 0x3f: // Haswell X | |||
316 | case 0x4f: // Broadwell X | |||
317 | case 0x55: // Skylake X | |||
318 | case 0x56: // Broadwell D | |||
319 | // Supports package, cores, RAM. Has the units quirk. | |||
320 | mIsGpuSupported = false; | |||
321 | mIsRamSupported = true; | |||
322 | mHasRamUnitsQuirk = true; | |||
323 | break; | |||
324 | ||||
325 | case 0x2d: // Sandy Bridge X | |||
326 | case 0x3e: // Ivy Bridge X | |||
327 | // Supports package, cores, RAM. | |||
328 | mIsGpuSupported = false; | |||
329 | mIsRamSupported = true; | |||
330 | break; | |||
331 | ||||
332 | case 0x3c: // Haswell | |||
333 | case 0x3d: // Broadwell | |||
334 | case 0x45: // Haswell L | |||
335 | case 0x46: // Haswell G | |||
336 | case 0x47: // Broadwell G | |||
337 | // Supports package, cores, GPU, RAM. | |||
338 | mIsGpuSupported = true; | |||
339 | mIsRamSupported = true; | |||
340 | break; | |||
341 | ||||
342 | case 0x4e: // Skylake L | |||
343 | case 0x5e: // Skylake | |||
344 | case 0x8e: // Kaby Lake L | |||
345 | case 0x9e: // Kaby Lake | |||
346 | case 0x66: // Cannon Lake L | |||
347 | case 0x7d: // Ice Lake | |||
348 | case 0x7e: // Ice Lake L | |||
349 | case 0xa5: // Comet Lake | |||
350 | case 0xa6: // Comet Lake L | |||
351 | // Supports package, cores, GPU, RAM, PSYS. | |||
352 | // XXX: this tool currently doesn't measure PSYS. | |||
353 | mIsGpuSupported = true; | |||
354 | mIsRamSupported = true; | |||
355 | break; | |||
356 | ||||
357 | default: | |||
358 | Abort("unknown CPU model: %d", cpuModel); | |||
359 | break; | |||
360 | } | |||
361 | ||||
362 | // Get the maximum number of logical CPUs so that we know how big to make | |||
363 | // |mPkes|. | |||
364 | int logicalcpu_max; | |||
365 | size = sizeof(logicalcpu_max); | |||
366 | if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL__null, 0) != | |||
367 | 0) { | |||
368 | Abort("sysctlbyname(\"hw.logicalcpu_max\") failed"); | |||
369 | } | |||
370 | ||||
371 | // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around | |||
372 | // core_energy_stat_t::gpmcs and for any other future extensions to that | |||
373 | // struct. (The fields we read all come before the core_energy_stat_t | |||
374 | // array, so it won't matter to us whether gpmcs is present or not.) | |||
375 | size_t pkesSize = sizeof(pkg_energy_statistics_t) + | |||
376 | logicalcpu_max * sizeof(core_energy_stat_t) + | |||
377 | logicalcpu_max * 1024; | |||
378 | mPkes = (pkg_energy_statistics_t*)malloc(pkesSize); | |||
379 | if (!mPkes) { | |||
380 | Abort("malloc() failed"); | |||
381 | } | |||
382 | ||||
383 | // Do an initial measurement so that the first sample's diffs are sensible. | |||
384 | double dummy1, dummy2, dummy3, dummy4; | |||
385 | EnergyEstimates(dummy1, dummy2, dummy3, dummy4); | |||
386 | } | |||
387 | ||||
388 | ~RAPL() { free(mPkes); } | |||
389 | ||||
390 | static double Joules(uint64_t aTicks, double aJoulesPerTick) { | |||
391 | return double(aTicks) * aJoulesPerTick; | |||
392 | } | |||
393 | ||||
394 | void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J, | |||
395 | double& aRam_J) { | |||
396 | diagCall64_dgPowerStat(mPkes); | |||
397 | ||||
398 | // Bits 12:8 are the ESU. | |||
399 | // Energy measurements come in multiples of 1/(2^ESU). | |||
400 | uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f; | |||
401 | double joulesPerTick = ((double)1 / (1 << energyStatusUnits)); | |||
402 | ||||
403 | aPkg_J = Joules(mPkes->pkg_energy - mPrevPkgTicks, joulesPerTick); | |||
404 | aCores_J = Joules(mPkes->pp0_energy - mPrevPp0Ticks, joulesPerTick); | |||
405 | aGpu_J = mIsGpuSupported | |||
406 | ? Joules(mPkes->pp1_energy - mPrevPp1Ticks, joulesPerTick) | |||
407 | : kUnsupported_j; | |||
408 | aRam_J = mIsRamSupported | |||
409 | ? Joules(mPkes->ddr_energy - mPrevDdrTicks, | |||
410 | mHasRamUnitsQuirk ? kQuirkyRamJoulesPerTick | |||
411 | : joulesPerTick) | |||
412 | : kUnsupported_j; | |||
413 | ||||
414 | mPrevPkgTicks = mPkes->pkg_energy; | |||
415 | mPrevPp0Ticks = mPkes->pp0_energy; | |||
416 | if (mIsGpuSupported) { | |||
417 | mPrevPp1Ticks = mPkes->pp1_energy; | |||
418 | } | |||
419 | if (mIsRamSupported) { | |||
420 | mPrevDdrTicks = mPkes->ddr_energy; | |||
421 | } | |||
422 | } | |||
423 | }; | |||
424 | ||||
425 | /* static */ const double RAPL::kQuirkyRamJoulesPerTick = (double)1 / 65536; | |||
426 | ||||
427 | //--------------------------------------------------------------------------- | |||
428 | // Linux-specific code | |||
429 | //--------------------------------------------------------------------------- | |||
430 | ||||
431 | #elif defined(__linux__1) | |||
432 | ||||
433 | # include <linux1/perf_event.h> | |||
434 | # include <sys/syscall.h> | |||
435 | ||||
436 | // There is no glibc wrapper for this system call so we provide our own. | |||
437 | static int perf_event_open(struct perf_event_attr* aAttr, pid_t aPid, int aCpu, | |||
438 | int aGroupFd, unsigned long aFlags) { | |||
439 | return syscall(__NR_perf_event_open298, aAttr, aPid, aCpu, aGroupFd, aFlags); | |||
440 | } | |||
441 | ||||
442 | // Returns false if the file cannot be opened. | |||
443 | template <typename T> | |||
444 | static bool ReadValueFromPowerFile(const char* aStr1, const char* aStr2, | |||
445 | const char* aStr3, const char* aScanfString, | |||
446 | T* aOut) { | |||
447 | // The filenames going into this buffer are under our control and the longest | |||
448 | // one is "/sys/bus/event_source/devices/power/events/energy-cores.scale". | |||
449 | // So 256 chars is plenty. | |||
450 | char filename[256]; | |||
451 | ||||
452 | sprintf(filename, "/sys/bus/event_source/devices/power/%s%s%s", aStr1, aStr2, | |||
453 | aStr3); | |||
454 | FILE* fp = fopen(filename, "r"); | |||
455 | if (!fp
| |||
456 | return false; | |||
457 | } | |||
458 | if (fscanf(fp, aScanfString, aOut) != 1) { | |||
459 | Abort("fscanf() failed"); | |||
460 | } | |||
461 | fclose(fp); | |||
462 | ||||
463 | return true; | |||
464 | } | |||
465 | ||||
466 | // This class encapsulates the reading of a single RAPL domain. | |||
467 | class Domain { | |||
468 | bool mIsSupported; // Is the domain supported by the processor? | |||
469 | ||||
470 | // These three are only set if |mIsSupported| is true. | |||
471 | double mJoulesPerTick; // How many Joules each tick of the MSR represents. | |||
472 | int mFd; // The fd through which the MSR is read. | |||
473 | double mPrevTicks; // The previous sample's MSR value. | |||
474 | ||||
475 | public: | |||
476 | enum IsOptional { Optional, NonOptional }; | |||
477 | ||||
478 | Domain(const char* aName, uint32_t aType, | |||
479 | IsOptional aOptional = NonOptional) { | |||
480 | uint64_t config; | |||
481 | if (!ReadValueFromPowerFile("events/energy-", aName, "", "event=%llx", | |||
482 | &config)) { | |||
483 | // Failure is allowed for optional domains. | |||
484 | if (aOptional == NonOptional) { | |||
485 | Abort( | |||
486 | "failed to open file for non-optional domain '%s'\n" | |||
487 | "- Is your kernel version 3.14 or later, as required? " | |||
488 | "Run |uname -r| to see.", | |||
489 | aName); | |||
490 | } | |||
491 | mIsSupported = false; | |||
492 | return; | |||
493 | } | |||
494 | ||||
495 | mIsSupported = true; | |||
496 | ||||
497 | if (!ReadValueFromPowerFile("events/energy-", aName, ".scale", "%lf", | |||
498 | &mJoulesPerTick)) { | |||
499 | Abort("failed to read from .scale file"); | |||
500 | } | |||
501 | ||||
502 | // The unit should be "Joules", so 128 chars should be plenty. | |||
503 | char unit[128]; | |||
504 | if (!ReadValueFromPowerFile("events/energy-", aName, ".unit", "%127s", | |||
505 | unit)) { | |||
506 | Abort("failed to read from .unit file"); | |||
507 | } | |||
508 | if (strcmp(unit, "Joules") != 0) { | |||
509 | Abort("unexpected unit '%s' in .unit file", unit); | |||
510 | } | |||
511 | ||||
512 | struct perf_event_attr attr; | |||
513 | memset(&attr, 0, sizeof(attr)); | |||
514 | attr.type = aType; | |||
515 | attr.size = uint32_t(sizeof(attr)); | |||
516 | attr.config = config; | |||
517 | ||||
518 | // Measure all processes/threads. The specified CPU doesn't matter. | |||
519 | mFd = perf_event_open(&attr, /* aPid = */ -1, /* aCpu = */ 0, | |||
520 | /* aGroupFd = */ -1, /* aFlags = */ 0); | |||
521 | if (mFd < 0) { | |||
522 | Abort( | |||
523 | "perf_event_open() failed\n" | |||
524 | "- Did you run as root (e.g. with |sudo|) or set\n" | |||
525 | " /proc/sys/kernel/perf_event_paranoid to 0, as required?"); | |||
526 | } | |||
527 | ||||
528 | mPrevTicks = 0; | |||
529 | } | |||
530 | ||||
531 | ~Domain() { | |||
532 | if (mIsSupported) { | |||
533 | close(mFd); | |||
534 | } | |||
535 | } | |||
536 | ||||
537 | double EnergyEstimate() { | |||
538 | if (!mIsSupported) { | |||
539 | return kUnsupported_j; | |||
540 | } | |||
541 | ||||
542 | uint64_t thisTicks; | |||
543 | if (read(mFd, &thisTicks, sizeof(uint64_t)) != sizeof(uint64_t)) { | |||
544 | Abort("read() failed"); | |||
545 | } | |||
546 | ||||
547 | uint64_t ticks = thisTicks - mPrevTicks; | |||
548 | mPrevTicks = thisTicks; | |||
549 | double joules = ticks * mJoulesPerTick; | |||
550 | return joules; | |||
551 | } | |||
552 | }; | |||
553 | ||||
554 | class RAPL { | |||
555 | Domain* mPkg; | |||
556 | Domain* mCores; | |||
557 | Domain* mGpu; | |||
558 | Domain* mRam; | |||
559 | ||||
560 | public: | |||
561 | RAPL() { | |||
562 | uint32_t type; | |||
563 | if (!ReadValueFromPowerFile("type", "", "", "%u", &type)) { | |||
564 | Abort("failed to read from type file"); | |||
565 | } | |||
566 | ||||
567 | mPkg = new Domain("pkg", type); | |||
| ||||
568 | mCores = new Domain("cores", type); | |||
569 | mGpu = new Domain("gpu", type, Domain::Optional); | |||
570 | mRam = new Domain("ram", type, Domain::Optional); | |||
571 | if (!mPkg || !mCores || !mGpu || !mRam) { | |||
572 | Abort("new Domain() failed"); | |||
573 | } | |||
574 | } | |||
575 | ||||
576 | ~RAPL() { | |||
577 | delete mPkg; | |||
578 | delete mCores; | |||
579 | delete mGpu; | |||
580 | delete mRam; | |||
581 | } | |||
582 | ||||
583 | void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J, | |||
584 | double& aRam_J) { | |||
585 | aPkg_J = mPkg->EnergyEstimate(); | |||
586 | aCores_J = mCores->EnergyEstimate(); | |||
587 | aGpu_J = mGpu->EnergyEstimate(); | |||
588 | aRam_J = mRam->EnergyEstimate(); | |||
589 | } | |||
590 | }; | |||
591 | ||||
592 | #else | |||
593 | ||||
594 | //--------------------------------------------------------------------------- | |||
595 | // Unsupported platforms | |||
596 | //--------------------------------------------------------------------------- | |||
597 | ||||
598 | # error Sorry, this platform is not supported | |||
599 | ||||
600 | #endif // platform | |||
601 | ||||
602 | //--------------------------------------------------------------------------- | |||
603 | // The main loop | |||
604 | //--------------------------------------------------------------------------- | |||
605 | ||||
606 | // The sample interval, measured in seconds. | |||
607 | static double gSampleInterval_sec; | |||
608 | ||||
609 | // The platform-specific RAPL-reading machinery. | |||
610 | static RAPL* gRapl; | |||
611 | ||||
612 | // All the sampled "total" values, in Watts. | |||
613 | MOZ_RUNINIT static std::vector<double> gTotals_W; | |||
614 | ||||
615 | // Power = Energy / Time, where power is measured in Watts, Energy is measured | |||
616 | // in Joules, and Time is measured in seconds. | |||
617 | static double JoulesToWatts(double aJoules) { | |||
618 | return aJoules / gSampleInterval_sec; | |||
619 | } | |||
620 | ||||
621 | // "Normalize" here means convert kUnsupported_j to zero so it can be used in | |||
622 | // additive expressions. All printed values are 5 or maybe 6 chars (though 6 | |||
623 | // chars would require a value > 100 W, which is unlikely). Values above 1000 W | |||
624 | // are normalized to " n/a ", so 6 chars is the longest that may be printed. | |||
625 | static void NormalizeAndPrintAsWatts(char* aBuf, double& aValue_J) { | |||
626 | if (aValue_J == kUnsupported_j || aValue_J >= 1000) { | |||
627 | aValue_J = 0; | |||
628 | sprintf(aBuf, "%s", " n/a "); | |||
629 | } else { | |||
630 | sprintf(aBuf, "%5.2f", JoulesToWatts(aValue_J)); | |||
631 | } | |||
632 | } | |||
633 | ||||
634 | static void SigAlrmHandler(int aSigNum, siginfo_t* aInfo, void* aContext) { | |||
635 | static int sampleNumber = 1; | |||
636 | ||||
637 | double pkg_J, cores_J, gpu_J, ram_J; | |||
638 | gRapl->EnergyEstimates(pkg_J, cores_J, gpu_J, ram_J); | |||
639 | ||||
640 | // We should have pkg and cores estimates, but might not have gpu and ram | |||
641 | // estimates. | |||
642 | assert(pkg_J != kUnsupported_j)(static_cast <bool> (pkg_J != kUnsupported_j) ? void (0 ) : __assert_fail ("pkg_J != kUnsupported_j", __builtin_FILE ( ), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
643 | assert(cores_J != kUnsupported_j)(static_cast <bool> (cores_J != kUnsupported_j) ? void ( 0) : __assert_fail ("cores_J != kUnsupported_j", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
644 | ||||
645 | // This needs to be big enough to print watt values to two decimal places. 16 | |||
646 | // should be plenty. | |||
647 | static const size_t kNumStrLen = 16; | |||
648 | ||||
649 | static char pkgStr[kNumStrLen], coresStr[kNumStrLen], gpuStr[kNumStrLen], | |||
650 | ramStr[kNumStrLen]; | |||
651 | NormalizeAndPrintAsWatts(pkgStr, pkg_J); | |||
652 | NormalizeAndPrintAsWatts(coresStr, cores_J); | |||
653 | NormalizeAndPrintAsWatts(gpuStr, gpu_J); | |||
654 | NormalizeAndPrintAsWatts(ramStr, ram_J); | |||
655 | ||||
656 | // Core and GPU power are a subset of the package power. | |||
657 | assert(pkg_J >= cores_J + gpu_J)(static_cast <bool> (pkg_J >= cores_J + gpu_J) ? void (0) : __assert_fail ("pkg_J >= cores_J + gpu_J", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
658 | ||||
659 | // Compute "other" (i.e. rest of the package) and "total" only after the | |||
660 | // other values have been normalized. | |||
661 | ||||
662 | char otherStr[kNumStrLen]; | |||
663 | double other_J = pkg_J - cores_J - gpu_J; | |||
664 | NormalizeAndPrintAsWatts(otherStr, other_J); | |||
665 | ||||
666 | char totalStr[kNumStrLen]; | |||
667 | double total_J = pkg_J + ram_J; | |||
668 | NormalizeAndPrintAsWatts(totalStr, total_J); | |||
669 | ||||
670 | gTotals_W.push_back(JoulesToWatts(total_J)); | |||
671 | ||||
672 | // Print and flush so that the output appears immediately even if being | |||
673 | // redirected through |tee| or anything like that. | |||
674 | PrintAndFlush("#%02d %s W = %s (%s + %s + %s) + %s W\n", sampleNumber++, | |||
675 | totalStr, pkgStr, coresStr, gpuStr, otherStr, ramStr); | |||
676 | } | |||
677 | ||||
678 | static void Finish() { | |||
679 | size_t n = gTotals_W.size(); | |||
680 | ||||
681 | // This time calculation assumes that the timers are perfectly accurate which | |||
682 | // is not true but the inaccuracy should be small in practice. | |||
683 | double time = n * gSampleInterval_sec; | |||
684 | ||||
685 | printf("\n"); | |||
686 | printf("%d sample%s taken over a period of %.3f second%s\n", int(n), | |||
687 | n == 1 ? "" : "s", n * gSampleInterval_sec, time == 1.0 ? "" : "s"); | |||
688 | ||||
689 | if (n == 0 || n == 1) { | |||
690 | exit(0); | |||
691 | } | |||
692 | ||||
693 | // Compute the mean. | |||
694 | double sum = std::accumulate(gTotals_W.begin(), gTotals_W.end(), 0.0); | |||
695 | double mean = sum / n; | |||
696 | ||||
697 | // Compute the *population* standard deviation: | |||
698 | // | |||
699 | // popStdDev = sqrt(Sigma(x - m)^2 / n) | |||
700 | // | |||
701 | // where |x| is the sum variable, |m| is the mean, and |n| is the | |||
702 | // population size. | |||
703 | // | |||
704 | // This is different from the *sample* standard deviation, which divides by | |||
705 | // |n - 1|, and would be appropriate if we were using a random sample of a | |||
706 | // larger population. | |||
707 | double sumOfSquaredDeviations = 0; | |||
708 | for (double& iter : gTotals_W) { | |||
709 | double deviation = (iter - mean); | |||
710 | sumOfSquaredDeviations += deviation * deviation; | |||
711 | } | |||
712 | double popStdDev = sqrt(sumOfSquaredDeviations / n); | |||
713 | ||||
714 | // Sort so that percentiles can be determined. We use the "Nearest Rank" | |||
715 | // method of determining percentiles, which is simplest to compute and which | |||
716 | // chooses values from those that appear in the input set. | |||
717 | std::sort(gTotals_W.begin(), gTotals_W.end()); | |||
718 | ||||
719 | printf("\n"); | |||
720 | printf("Distribution of 'total' values:\n"); | |||
721 | printf(" mean = %5.2f W\n", mean); | |||
722 | printf(" std dev = %5.2f W\n", popStdDev); | |||
723 | printf(" 0th percentile = %5.2f W (min)\n", gTotals_W[0]); | |||
724 | printf(" 5th percentile = %5.2f W\n", gTotals_W[ceil(0.05 * n) - 1]); | |||
725 | printf(" 25th percentile = %5.2f W\n", gTotals_W[ceil(0.25 * n) - 1]); | |||
726 | printf(" 50th percentile = %5.2f W\n", gTotals_W[ceil(0.50 * n) - 1]); | |||
727 | printf(" 75th percentile = %5.2f W\n", gTotals_W[ceil(0.75 * n) - 1]); | |||
728 | printf(" 95th percentile = %5.2f W\n", gTotals_W[ceil(0.95 * n) - 1]); | |||
729 | printf("100th percentile = %5.2f W (max)\n", gTotals_W[n - 1]); | |||
730 | ||||
731 | exit(0); | |||
732 | } | |||
733 | ||||
734 | static void SigIntHandler(int aSigNum, siginfo_t* aInfo, void* aContext) { | |||
735 | Finish(); | |||
736 | } | |||
737 | ||||
738 | static void PrintUsage() { | |||
739 | printf( | |||
740 | "usage: rapl [options]\n" | |||
741 | "\n" | |||
742 | "Options:\n" | |||
743 | "\n" | |||
744 | " -h --help show this message\n" | |||
745 | " -i --sample-interval <N> sample every N ms [default=1000]\n" | |||
746 | " -n --sample-count <N> get N samples (0 means unlimited) " | |||
747 | "[default=0]\n" | |||
748 | "\n" | |||
749 | #if defined(__APPLE__) | |||
750 | "On Mac this program can be run by any user.\n" | |||
751 | #elif defined(__linux__1) | |||
752 | "On Linux this program can only be run by the super-user unless the " | |||
753 | "contents\n" | |||
754 | "of /proc/sys/kernel/perf_event_paranoid is set to 0 or lower.\n" | |||
755 | #else | |||
756 | # error Sorry, this platform is not supported | |||
757 | #endif | |||
758 | "\n"); | |||
759 | } | |||
760 | ||||
761 | int main(int argc, char** argv) { | |||
762 | // Process command line options. | |||
763 | ||||
764 | gArgv0 = argv[0]; | |||
765 | ||||
766 | // Default values. | |||
767 | int sampleInterval_msec = 1000; | |||
768 | int sampleCount = 0; | |||
769 | ||||
770 | struct option longOptions[] = { | |||
771 | {"help", no_argument0, NULL__null, 'h'}, | |||
772 | {"sample-interval", required_argument1, NULL__null, 'i'}, | |||
773 | {"sample-count", required_argument1, NULL__null, 'n'}, | |||
774 | {NULL__null, 0, NULL__null, 0}}; | |||
775 | const char* shortOptions = "hi:n:"; | |||
776 | ||||
777 | int c; | |||
778 | char* endPtr; | |||
779 | while ((c = getopt_long(argc, argv, shortOptions, longOptions, NULL__null)) != -1) { | |||
| ||||
780 | switch (c) { | |||
781 | case 'h': | |||
782 | PrintUsage(); | |||
783 | exit(0); | |||
784 | ||||
785 | case 'i': | |||
786 | sampleInterval_msec = strtol(optarg, &endPtr, /* base = */ 10); | |||
787 | if (*endPtr) { | |||
788 | CmdLineAbort("sample interval is not an integer"); | |||
789 | } | |||
790 | if (sampleInterval_msec < 1 || sampleInterval_msec > 3600000) { | |||
791 | CmdLineAbort("sample interval must be in the range 1..3600000 ms"); | |||
792 | } | |||
793 | break; | |||
794 | ||||
795 | case 'n': | |||
796 | sampleCount = strtol(optarg, &endPtr, /* base = */ 10); | |||
797 | if (*endPtr) { | |||
798 | CmdLineAbort("sample count is not an integer"); | |||
799 | } | |||
800 | if (sampleCount < 0 || sampleCount > 1000000) { | |||
801 | CmdLineAbort("sample count must be in the range 0..1000000"); | |||
802 | } | |||
803 | break; | |||
804 | ||||
805 | default: | |||
806 | CmdLineAbort(NULL__null); | |||
807 | } | |||
808 | } | |||
809 | ||||
810 | // The RAPL MSRs update every ~1 ms, but the measurement period isn't exactly | |||
811 | // 1 ms, which means the sample periods are not exact. "Power Measurement | |||
812 | // Techniques on Standard Compute Nodes: A Quantitative Comparison" by | |||
813 | // Hackenberg et al. suggests the following. | |||
814 | // | |||
815 | // "RAPL provides energy (and not power) consumption data without | |||
816 | // timestamps associated to each counter update. This makes sampling rates | |||
817 | // above 20 Samples/s unfeasible if the systematic error should be below | |||
818 | // 5%... Constantly polling the RAPL registers will both occupy a processor | |||
819 | // core and distort the measurement itself." | |||
820 | // | |||
821 | // So warn about this case. | |||
822 | if (sampleInterval_msec
| |||
823 | fprintf(stderrstderr, | |||
824 | "\nWARNING: sample intervals < 50 ms are likely to produce " | |||
825 | "inaccurate estimates\n\n"); | |||
826 | } | |||
827 | gSampleInterval_sec = double(sampleInterval_msec) / 1000; | |||
828 | ||||
829 | // Initialize the platform-specific RAPL reading machinery. | |||
830 | gRapl = new RAPL(); | |||
831 | if (!gRapl) { | |||
832 | Abort("new RAPL() failed"); | |||
833 | } | |||
834 | ||||
835 | // Install the signal handlers. | |||
836 | ||||
837 | struct sigaction sa; | |||
838 | memset(&sa, 0, sizeof(sa)); | |||
839 | sa.sa_flags = SA_RESTART0x10000000 | SA_SIGINFO4; | |||
840 | // The extra parens around (0) suppress a -Wunreachable-code warning on OS X | |||
841 | // where sigemptyset() is a macro that can never fail and always returns 0. | |||
842 | if (sigemptyset(&sa.sa_mask) < (0)) { | |||
843 | Abort("sigemptyset() failed"); | |||
844 | } | |||
845 | sa.sa_sigaction__sigaction_handler.sa_sigaction = SigAlrmHandler; | |||
846 | if (sigaction(SIGALRM14, &sa, NULL__null) < 0) { | |||
847 | Abort("sigaction(SIGALRM) failed"); | |||
848 | } | |||
849 | sa.sa_sigaction__sigaction_handler.sa_sigaction = SigIntHandler; | |||
850 | if (sigaction(SIGINT2, &sa, NULL__null) < 0) { | |||
851 | Abort("sigaction(SIGINT) failed"); | |||
852 | } | |||
853 | ||||
854 | // Set up the timer. | |||
855 | struct itimerval timer; | |||
856 | timer.it_interval.tv_sec = sampleInterval_msec / 1000; | |||
857 | timer.it_interval.tv_usec = (sampleInterval_msec % 1000) * 1000; | |||
858 | timer.it_value = timer.it_interval; | |||
859 | if (setitimer(ITIMER_REALITIMER_REAL, &timer, NULL__null) < 0) { | |||
860 | Abort("setitimer() failed"); | |||
861 | } | |||
862 | ||||
863 | // Print header. | |||
864 | PrintAndFlush(" total W = _pkg_ (cores + _gpu_ + other) + _ram_ W\n"); | |||
865 | ||||
866 | // Take samples. | |||
867 | if (sampleCount == 0) { | |||
868 | while (true) { | |||
869 | pause(); | |||
870 | } | |||
871 | } else { | |||
872 | for (int i = 0; i < sampleCount; i++) { | |||
873 | pause(); | |||
874 | } | |||
875 | } | |||
876 | ||||
877 | Finish(); | |||
878 | ||||
879 | return 0; | |||
880 | } |