File: | var/lib/jenkins/workspace/firefox-scan-build/tools/power/rapl.cpp |
Warning: | line 561, column 16 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | |||
2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ | |||
3 | /* This Source Code Form is subject to the terms of the Mozilla Public | |||
4 | * License, v. 2.0. If a copy of the MPL was not distributed with this | |||
5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |||
6 | ||||
7 | // This program provides processor power estimates. It does this by reading | |||
8 | // model-specific registers (MSRs) that are part Intel's Running Average Power | |||
9 | // Limit (RAPL) interface. These MSRs provide good quality estimates of the | |||
10 | // energy consumption of up to four system components: | |||
11 | // - PKG: the entire processor package; | |||
12 | // - PP0: the cores (a subset of the package); | |||
13 | // - PP1: the GPU (a subset of the package); | |||
14 | // - DRAM: main memory. | |||
15 | // | |||
16 | // For more details about RAPL, see section 14.9 of Volume 3 of the "Intel 64 | |||
17 | // and IA-32 Architecture's Software Developer's Manual", Order Number 325384. | |||
18 | // | |||
19 | // This program exists because there are no existing tools on Mac that can | |||
20 | // obtain all four RAPL estimates. (|powermetrics| can obtain the package | |||
21 | // estimate, but not the others. Intel Power Gadget can obtain the package and | |||
22 | // cores estimates.) | |||
23 | // | |||
24 | // On Linux |perf| can obtain all four estimates (as Joules, which are easily | |||
25 | // converted to Watts), but this program is implemented for Linux because it's | |||
26 | // not too hard to do, and that gives us multi-platform consistency. | |||
27 | // | |||
28 | // This program does not support Windows, unfortunately. It's not obvious how | |||
29 | // to access the RAPL MSRs on Windows. | |||
30 | // | |||
31 | // This program deliberately uses only standard libraries and avoids | |||
32 | // Mozilla-specific code, to make it easy to compile and test on different | |||
33 | // machines. | |||
34 | ||||
35 | #include <assert.h> | |||
36 | #include <getopt.h> | |||
37 | #include <math.h> | |||
38 | #include <signal.h> | |||
39 | #include <stdarg.h> | |||
40 | #include <stdint.h> | |||
41 | #include <stdio.h> | |||
42 | #include <stdlib.h> | |||
43 | #include <string.h> | |||
44 | #include <sys/time.h> | |||
45 | #include <unistd.h> | |||
46 | ||||
47 | #include <algorithm> | |||
48 | #include <numeric> | |||
49 | #include <vector> | |||
50 | ||||
51 | //--------------------------------------------------------------------------- | |||
52 | // Utilities | |||
53 | //--------------------------------------------------------------------------- | |||
54 | ||||
55 | // The value of argv[0] passed to main(). Used in error messages. | |||
56 | static const char* gArgv0; | |||
57 | ||||
58 | static void Abort(const char* aFormat, ...) { | |||
59 | va_list vargs; | |||
60 | va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat); | |||
61 | fprintf(stderrstderr, "%s: ", gArgv0); | |||
62 | vfprintf(stderrstderr, aFormat, vargs); | |||
63 | fprintf(stderrstderr, "\n"); | |||
64 | va_end(vargs)__builtin_va_end(vargs); | |||
65 | ||||
66 | exit(1); | |||
67 | } | |||
68 | ||||
69 | static void CmdLineAbort(const char* aMsg) { | |||
70 | if (aMsg) { | |||
71 | fprintf(stderrstderr, "%s: %s\n", gArgv0, aMsg); | |||
72 | } | |||
73 | fprintf(stderrstderr, "Use --help for more information.\n"); | |||
74 | exit(1); | |||
75 | } | |||
76 | ||||
77 | // A special value that represents an estimate from an unsupported RAPL domain. | |||
78 | static const double kUnsupported_j = -1.0; | |||
79 | ||||
80 | // Print to stdout and flush it, so that the output appears immediately even if | |||
81 | // being redirected through |tee| or anything like that. | |||
82 | static void PrintAndFlush(const char* aFormat, ...) { | |||
83 | va_list vargs; | |||
84 | va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat); | |||
85 | vfprintf(stdoutstdout, aFormat, vargs); | |||
86 | va_end(vargs)__builtin_va_end(vargs); | |||
87 | ||||
88 | fflush(stdoutstdout); | |||
89 | } | |||
90 | ||||
91 | //--------------------------------------------------------------------------- | |||
92 | // Mac-specific code | |||
93 | //--------------------------------------------------------------------------- | |||
94 | ||||
95 | #if defined(__APPLE__) | |||
96 | ||||
97 | // Because of the pkg_energy_statistics_t::pkes_version check below, the | |||
98 | // earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72). | |||
99 | ||||
100 | # include <sys/types.h> | |||
101 | # include <sys/sysctl.h> | |||
102 | ||||
103 | // OS X has four kinds of system calls: | |||
104 | // | |||
105 | // 1. Mach traps; | |||
106 | // 2. UNIX system calls; | |||
107 | // 3. machine-dependent calls; | |||
108 | // 4. diagnostic calls. | |||
109 | // | |||
110 | // (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.) | |||
111 | // | |||
112 | // The last category has a single call named diagCall() or diagCall64(). Its | |||
113 | // mode is controlled by its first argument, and one of the modes allows access | |||
114 | // to the Intel RAPL MSRs. | |||
115 | // | |||
116 | // The interface to diagCall64() is not exported, so we have to import some | |||
117 | // definitions from the XNU kernel. All imported definitions are annotated with | |||
118 | // the XNU source file they come from, and information about what XNU versions | |||
119 | // they were introduced in and (if relevant) modified. | |||
120 | ||||
121 | // The diagCall64() mode. | |||
122 | // From osfmk/i386/Diagnostics.h | |||
123 | // - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value | |||
124 | // 17 was used for dgGzallocTest.) | |||
125 | # define dgPowerStat 17 | |||
126 | ||||
127 | // From osfmk/i386/cpu_data.h | |||
128 | // - In 10.8.5 these values were introduced, along with core_energy_stat_t. | |||
129 | # define CPU_RTIME_BINS (12) | |||
130 | # define CPU_ITIME_BINS (CPU_RTIME_BINS) | |||
131 | ||||
132 | // core_energy_stat_t and pkg_energy_statistics_t are both from | |||
133 | // osfmk/i386/Diagnostics.c. | |||
134 | // - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many | |||
135 | // fewer fields. | |||
136 | // - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with | |||
137 | // numerous new fields. | |||
138 | // - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added. | |||
139 | // diagCall64(dgPowerStat) fills it with '1' in all versions since (up to | |||
140 | // 10.10.2 at time of writing). | |||
141 | // - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally | |||
142 | // added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the | |||
143 | // source code, but it could be defined at compile-time via compiler flags.) | |||
144 | // pkg_energy_statistics_t::pkes_version did not change, though. | |||
145 | ||||
146 | typedef struct { | |||
147 | uint64_t caperf; | |||
148 | uint64_t cmperf; | |||
149 | uint64_t ccres[6]; | |||
150 | uint64_t crtimes[CPU_RTIME_BINS]; | |||
151 | uint64_t citimes[CPU_ITIME_BINS]; | |||
152 | uint64_t crtime_total; | |||
153 | uint64_t citime_total; | |||
154 | uint64_t cpu_idle_exits; | |||
155 | uint64_t cpu_insns; | |||
156 | uint64_t cpu_ucc; | |||
157 | uint64_t cpu_urc; | |||
158 | # if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72). | |||
159 | uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72). | |||
160 | # endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72). | |||
161 | } core_energy_stat_t; | |||
162 | ||||
163 | typedef struct { | |||
164 | uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72). | |||
165 | uint64_t pkg_cres[2][7]; | |||
166 | ||||
167 | // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT | |||
168 | // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT. | |||
169 | uint64_t pkg_power_unit; | |||
170 | ||||
171 | // These are the four fields for the four RAPL domains. For each field | |||
172 | // we list: | |||
173 | // | |||
174 | // - the corresponding MSR number; | |||
175 | // - Intel's name for that MSR; | |||
176 | // - XNU's name for that MSR; | |||
177 | // - which Intel processors the MSR is supported on. | |||
178 | // | |||
179 | // The last of these is determined from chapter 35 of Volume 3 of the | |||
180 | // "Intel 64 and IA-32 Architecture's Software Developer's Manual", | |||
181 | // Order Number 325384. (Note that chapter 35 contradicts section 14.9 | |||
182 | // to some degree.) | |||
183 | ||||
184 | // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS | |||
185 | // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). | |||
186 | uint64_t pkg_energy; | |||
187 | ||||
188 | // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS | |||
189 | // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). | |||
190 | uint64_t pp0_energy; | |||
191 | ||||
192 | // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS | |||
193 | // Sandy Bridge, Haswell. | |||
194 | uint64_t pp1_energy; | |||
195 | ||||
196 | // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS | |||
197 | // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model | |||
198 | // 0x57) | |||
199 | uint64_t ddr_energy; | |||
200 | ||||
201 | uint64_t llc_flushed_cycles; | |||
202 | uint64_t ring_ratio_instantaneous; | |||
203 | uint64_t IA_frequency_clipping_cause; | |||
204 | uint64_t GT_frequency_clipping_cause; | |||
205 | uint64_t pkg_idle_exits; | |||
206 | uint64_t pkg_rtimes[CPU_RTIME_BINS]; | |||
207 | uint64_t pkg_itimes[CPU_ITIME_BINS]; | |||
208 | uint64_t mbus_delay_time; | |||
209 | uint64_t mint_delay_time; | |||
210 | uint32_t ncpus; | |||
211 | core_energy_stat_t cest[]; | |||
212 | } pkg_energy_statistics_t; | |||
213 | ||||
214 | static int diagCall64(uint64_t aMode, void* aBuf) { | |||
215 | // We cannot use syscall() here because it doesn't work with diagnostic | |||
216 | // system calls -- it raises SIGSYS if you try. So we have to use asm. | |||
217 | ||||
218 | # ifdef __x86_64__1 | |||
219 | // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01 | |||
220 | // suffix indicates the syscall number is 1, which also happens to be the | |||
221 | // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more | |||
222 | // details. | |||
223 | static const uint64_t diagCallNum = 0x4000001; | |||
224 | uint64_t rv; | |||
225 | ||||
226 | __asm__ __volatile__( | |||
227 | "syscall" | |||
228 | ||||
229 | // Return value goes in "a" (%rax). | |||
230 | : /* outputs */ "=a"(rv) | |||
231 | ||||
232 | // The syscall number goes in "0", a synonym (from outputs) for "a" | |||
233 | // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi). | |||
234 | : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf) | |||
235 | ||||
236 | // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And | |||
237 | // this particular syscall also writes memory (aBuf). | |||
238 | : /* clobbers */ "rcx", "r11", "cc", "memory"); | |||
239 | return rv; | |||
240 | # else | |||
241 | # error Sorry, only x86-64 is supported | |||
242 | # endif | |||
243 | } | |||
244 | ||||
245 | static void diagCall64_dgPowerStat(pkg_energy_statistics_t* aPkes) { | |||
246 | static const uint64_t supported_version = 1; | |||
247 | ||||
248 | // Write an unsupported version number into pkes_version so that the check | |||
249 | // below cannot succeed by dumb luck. | |||
250 | aPkes->pkes_version = supported_version - 1; | |||
251 | ||||
252 | // diagCall64() returns 1 on success, and 0 on failure (which can only happen | |||
253 | // if the mode is unrecognized, e.g. in 10.7.x or earlier versions). | |||
254 | if (diagCall64(dgPowerStat, aPkes) != 1) { | |||
255 | Abort("diagCall64() failed"); | |||
256 | } | |||
257 | ||||
258 | if (aPkes->pkes_version != 1) { | |||
259 | Abort("unexpected pkes_version: %llu", aPkes->pkes_version); | |||
260 | } | |||
261 | } | |||
262 | ||||
263 | class RAPL { | |||
264 | bool mIsGpuSupported; // Is the GPU domain supported by the processor? | |||
265 | bool mIsRamSupported; // Is the RAM domain supported by the processor? | |||
266 | ||||
267 | // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J == | |||
268 | // 15.3 microJoules) which is different to the power unit MSR. (See the | |||
269 | // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of | |||
270 | // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.) | |||
271 | // This field records whether the quirk is present. | |||
272 | bool mHasRamUnitsQuirk; | |||
273 | ||||
274 | // The abovementioned 15.3 microJoules value. | |||
275 | static const double kQuirkyRamJoulesPerTick; | |||
276 | ||||
277 | // The previous sample's MSR values. | |||
278 | uint64_t mPrevPkgTicks; | |||
279 | uint64_t mPrevPp0Ticks; | |||
280 | uint64_t mPrevPp1Ticks; | |||
281 | uint64_t mPrevDdrTicks; | |||
282 | ||||
283 | // The struct passed to diagCall64(). | |||
284 | pkg_energy_statistics_t* mPkes; | |||
285 | ||||
286 | public: | |||
287 | RAPL() : mHasRamUnitsQuirk(false) { | |||
288 | // Work out which RAPL MSRs this CPU model supports. | |||
289 | int cpuModel; | |||
290 | size_t size = sizeof(cpuModel); | |||
291 | if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL__null, 0) != 0) { | |||
292 | Abort("sysctlbyname(\"machdep.cpu.model\") failed"); | |||
293 | } | |||
294 | ||||
295 | // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in | |||
296 | // linux-4.1.5/. | |||
297 | // | |||
298 | // By linux-5.6.14/, this stuff had moved into | |||
299 | // arch/x86/events/intel/rapl.c, which references processor families in | |||
300 | // arch/x86/include/asm/intel-family.h. | |||
301 | switch (cpuModel) { | |||
302 | case 0x2a: // Sandy Bridge | |||
303 | case 0x3a: // Ivy Bridge | |||
304 | // Supports package, cores, GPU. | |||
305 | mIsGpuSupported = true; | |||
306 | mIsRamSupported = false; | |||
307 | break; | |||
308 | ||||
309 | case 0x3f: // Haswell X | |||
310 | case 0x4f: // Broadwell X | |||
311 | case 0x55: // Skylake X | |||
312 | case 0x56: // Broadwell D | |||
313 | // Supports package, cores, RAM. Has the units quirk. | |||
314 | mIsGpuSupported = false; | |||
315 | mIsRamSupported = true; | |||
316 | mHasRamUnitsQuirk = true; | |||
317 | break; | |||
318 | ||||
319 | case 0x2d: // Sandy Bridge X | |||
320 | case 0x3e: // Ivy Bridge X | |||
321 | // Supports package, cores, RAM. | |||
322 | mIsGpuSupported = false; | |||
323 | mIsRamSupported = true; | |||
324 | break; | |||
325 | ||||
326 | case 0x3c: // Haswell | |||
327 | case 0x3d: // Broadwell | |||
328 | case 0x45: // Haswell L | |||
329 | case 0x46: // Haswell G | |||
330 | case 0x47: // Broadwell G | |||
331 | // Supports package, cores, GPU, RAM. | |||
332 | mIsGpuSupported = true; | |||
333 | mIsRamSupported = true; | |||
334 | break; | |||
335 | ||||
336 | case 0x4e: // Skylake L | |||
337 | case 0x5e: // Skylake | |||
338 | case 0x8e: // Kaby Lake L | |||
339 | case 0x9e: // Kaby Lake | |||
340 | case 0x66: // Cannon Lake L | |||
341 | case 0x7d: // Ice Lake | |||
342 | case 0x7e: // Ice Lake L | |||
343 | case 0xa5: // Comet Lake | |||
344 | case 0xa6: // Comet Lake L | |||
345 | // Supports package, cores, GPU, RAM, PSYS. | |||
346 | // XXX: this tool currently doesn't measure PSYS. | |||
347 | mIsGpuSupported = true; | |||
348 | mIsRamSupported = true; | |||
349 | break; | |||
350 | ||||
351 | default: | |||
352 | Abort("unknown CPU model: %d", cpuModel); | |||
353 | break; | |||
354 | } | |||
355 | ||||
356 | // Get the maximum number of logical CPUs so that we know how big to make | |||
357 | // |mPkes|. | |||
358 | int logicalcpu_max; | |||
359 | size = sizeof(logicalcpu_max); | |||
360 | if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL__null, 0) != | |||
361 | 0) { | |||
362 | Abort("sysctlbyname(\"hw.logicalcpu_max\") failed"); | |||
363 | } | |||
364 | ||||
365 | // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around | |||
366 | // core_energy_stat_t::gpmcs and for any other future extensions to that | |||
367 | // struct. (The fields we read all come before the core_energy_stat_t | |||
368 | // array, so it won't matter to us whether gpmcs is present or not.) | |||
369 | size_t pkesSize = sizeof(pkg_energy_statistics_t) + | |||
370 | logicalcpu_max * sizeof(core_energy_stat_t) + | |||
371 | logicalcpu_max * 1024; | |||
372 | mPkes = (pkg_energy_statistics_t*)malloc(pkesSize); | |||
373 | if (!mPkes) { | |||
374 | Abort("malloc() failed"); | |||
375 | } | |||
376 | ||||
377 | // Do an initial measurement so that the first sample's diffs are sensible. | |||
378 | double dummy1, dummy2, dummy3, dummy4; | |||
379 | EnergyEstimates(dummy1, dummy2, dummy3, dummy4); | |||
380 | } | |||
381 | ||||
382 | ~RAPL() { free(mPkes); } | |||
383 | ||||
384 | static double Joules(uint64_t aTicks, double aJoulesPerTick) { | |||
385 | return double(aTicks) * aJoulesPerTick; | |||
386 | } | |||
387 | ||||
388 | void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J, | |||
389 | double& aRam_J) { | |||
390 | diagCall64_dgPowerStat(mPkes); | |||
391 | ||||
392 | // Bits 12:8 are the ESU. | |||
393 | // Energy measurements come in multiples of 1/(2^ESU). | |||
394 | uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f; | |||
395 | double joulesPerTick = ((double)1 / (1 << energyStatusUnits)); | |||
396 | ||||
397 | aPkg_J = Joules(mPkes->pkg_energy - mPrevPkgTicks, joulesPerTick); | |||
398 | aCores_J = Joules(mPkes->pp0_energy - mPrevPp0Ticks, joulesPerTick); | |||
399 | aGpu_J = mIsGpuSupported | |||
400 | ? Joules(mPkes->pp1_energy - mPrevPp1Ticks, joulesPerTick) | |||
401 | : kUnsupported_j; | |||
402 | aRam_J = mIsRamSupported | |||
403 | ? Joules(mPkes->ddr_energy - mPrevDdrTicks, | |||
404 | mHasRamUnitsQuirk ? kQuirkyRamJoulesPerTick | |||
405 | : joulesPerTick) | |||
406 | : kUnsupported_j; | |||
407 | ||||
408 | mPrevPkgTicks = mPkes->pkg_energy; | |||
409 | mPrevPp0Ticks = mPkes->pp0_energy; | |||
410 | if (mIsGpuSupported) { | |||
411 | mPrevPp1Ticks = mPkes->pp1_energy; | |||
412 | } | |||
413 | if (mIsRamSupported) { | |||
414 | mPrevDdrTicks = mPkes->ddr_energy; | |||
415 | } | |||
416 | } | |||
417 | }; | |||
418 | ||||
419 | /* static */ const double RAPL::kQuirkyRamJoulesPerTick = (double)1 / 65536; | |||
420 | ||||
421 | //--------------------------------------------------------------------------- | |||
422 | // Linux-specific code | |||
423 | //--------------------------------------------------------------------------- | |||
424 | ||||
425 | #elif defined(__linux__1) | |||
426 | ||||
427 | # include <linux1/perf_event.h> | |||
428 | # include <sys/syscall.h> | |||
429 | ||||
430 | // There is no glibc wrapper for this system call so we provide our own. | |||
431 | static int perf_event_open(struct perf_event_attr* aAttr, pid_t aPid, int aCpu, | |||
432 | int aGroupFd, unsigned long aFlags) { | |||
433 | return syscall(__NR_perf_event_open298, aAttr, aPid, aCpu, aGroupFd, aFlags); | |||
434 | } | |||
435 | ||||
436 | // Returns false if the file cannot be opened. | |||
437 | template <typename T> | |||
438 | static bool ReadValueFromPowerFile(const char* aStr1, const char* aStr2, | |||
439 | const char* aStr3, const char* aScanfString, | |||
440 | T* aOut) { | |||
441 | // The filenames going into this buffer are under our control and the longest | |||
442 | // one is "/sys/bus/event_source/devices/power/events/energy-cores.scale". | |||
443 | // So 256 chars is plenty. | |||
444 | char filename[256]; | |||
445 | ||||
446 | sprintf(filename, "/sys/bus/event_source/devices/power/%s%s%s", aStr1, aStr2, | |||
447 | aStr3); | |||
448 | FILE* fp = fopen(filename, "r"); | |||
449 | if (!fp
| |||
450 | return false; | |||
451 | } | |||
452 | if (fscanf(fp, aScanfString, aOut) != 1) { | |||
453 | Abort("fscanf() failed"); | |||
454 | } | |||
455 | fclose(fp); | |||
456 | ||||
457 | return true; | |||
458 | } | |||
459 | ||||
460 | // This class encapsulates the reading of a single RAPL domain. | |||
461 | class Domain { | |||
462 | bool mIsSupported; // Is the domain supported by the processor? | |||
463 | ||||
464 | // These three are only set if |mIsSupported| is true. | |||
465 | double mJoulesPerTick; // How many Joules each tick of the MSR represents. | |||
466 | int mFd; // The fd through which the MSR is read. | |||
467 | double mPrevTicks; // The previous sample's MSR value. | |||
468 | ||||
469 | public: | |||
470 | enum IsOptional { Optional, NonOptional }; | |||
471 | ||||
472 | Domain(const char* aName, uint32_t aType, | |||
473 | IsOptional aOptional = NonOptional) { | |||
474 | uint64_t config; | |||
475 | if (!ReadValueFromPowerFile("events/energy-", aName, "", "event=%llx", | |||
476 | &config)) { | |||
477 | // Failure is allowed for optional domains. | |||
478 | if (aOptional == NonOptional) { | |||
479 | Abort( | |||
480 | "failed to open file for non-optional domain '%s'\n" | |||
481 | "- Is your kernel version 3.14 or later, as required? " | |||
482 | "Run |uname -r| to see.", | |||
483 | aName); | |||
484 | } | |||
485 | mIsSupported = false; | |||
486 | return; | |||
487 | } | |||
488 | ||||
489 | mIsSupported = true; | |||
490 | ||||
491 | if (!ReadValueFromPowerFile("events/energy-", aName, ".scale", "%lf", | |||
492 | &mJoulesPerTick)) { | |||
493 | Abort("failed to read from .scale file"); | |||
494 | } | |||
495 | ||||
496 | // The unit should be "Joules", so 128 chars should be plenty. | |||
497 | char unit[128]; | |||
498 | if (!ReadValueFromPowerFile("events/energy-", aName, ".unit", "%127s", | |||
499 | unit)) { | |||
500 | Abort("failed to read from .unit file"); | |||
501 | } | |||
502 | if (strcmp(unit, "Joules") != 0) { | |||
503 | Abort("unexpected unit '%s' in .unit file", unit); | |||
504 | } | |||
505 | ||||
506 | struct perf_event_attr attr; | |||
507 | memset(&attr, 0, sizeof(attr)); | |||
508 | attr.type = aType; | |||
509 | attr.size = uint32_t(sizeof(attr)); | |||
510 | attr.config = config; | |||
511 | ||||
512 | // Measure all processes/threads. The specified CPU doesn't matter. | |||
513 | mFd = perf_event_open(&attr, /* aPid = */ -1, /* aCpu = */ 0, | |||
514 | /* aGroupFd = */ -1, /* aFlags = */ 0); | |||
515 | if (mFd < 0) { | |||
516 | Abort( | |||
517 | "perf_event_open() failed\n" | |||
518 | "- Did you run as root (e.g. with |sudo|) or set\n" | |||
519 | " /proc/sys/kernel/perf_event_paranoid to 0, as required?"); | |||
520 | } | |||
521 | ||||
522 | mPrevTicks = 0; | |||
523 | } | |||
524 | ||||
525 | ~Domain() { | |||
526 | if (mIsSupported) { | |||
527 | close(mFd); | |||
528 | } | |||
529 | } | |||
530 | ||||
531 | double EnergyEstimate() { | |||
532 | if (!mIsSupported) { | |||
533 | return kUnsupported_j; | |||
534 | } | |||
535 | ||||
536 | uint64_t thisTicks; | |||
537 | if (read(mFd, &thisTicks, sizeof(uint64_t)) != sizeof(uint64_t)) { | |||
538 | Abort("read() failed"); | |||
539 | } | |||
540 | ||||
541 | uint64_t ticks = thisTicks - mPrevTicks; | |||
542 | mPrevTicks = thisTicks; | |||
543 | double joules = ticks * mJoulesPerTick; | |||
544 | return joules; | |||
545 | } | |||
546 | }; | |||
547 | ||||
548 | class RAPL { | |||
549 | Domain* mPkg; | |||
550 | Domain* mCores; | |||
551 | Domain* mGpu; | |||
552 | Domain* mRam; | |||
553 | ||||
554 | public: | |||
555 | RAPL() { | |||
556 | uint32_t type; | |||
557 | if (!ReadValueFromPowerFile("type", "", "", "%u", &type)) { | |||
558 | Abort("failed to read from type file"); | |||
559 | } | |||
560 | ||||
561 | mPkg = new Domain("pkg", type); | |||
| ||||
562 | mCores = new Domain("cores", type); | |||
563 | mGpu = new Domain("gpu", type, Domain::Optional); | |||
564 | mRam = new Domain("ram", type, Domain::Optional); | |||
565 | if (!mPkg || !mCores || !mGpu || !mRam) { | |||
566 | Abort("new Domain() failed"); | |||
567 | } | |||
568 | } | |||
569 | ||||
570 | ~RAPL() { | |||
571 | delete mPkg; | |||
572 | delete mCores; | |||
573 | delete mGpu; | |||
574 | delete mRam; | |||
575 | } | |||
576 | ||||
577 | void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J, | |||
578 | double& aRam_J) { | |||
579 | aPkg_J = mPkg->EnergyEstimate(); | |||
580 | aCores_J = mCores->EnergyEstimate(); | |||
581 | aGpu_J = mGpu->EnergyEstimate(); | |||
582 | aRam_J = mRam->EnergyEstimate(); | |||
583 | } | |||
584 | }; | |||
585 | ||||
586 | #else | |||
587 | ||||
588 | //--------------------------------------------------------------------------- | |||
589 | // Unsupported platforms | |||
590 | //--------------------------------------------------------------------------- | |||
591 | ||||
592 | # error Sorry, this platform is not supported | |||
593 | ||||
594 | #endif // platform | |||
595 | ||||
596 | //--------------------------------------------------------------------------- | |||
597 | // The main loop | |||
598 | //--------------------------------------------------------------------------- | |||
599 | ||||
600 | // The sample interval, measured in seconds. | |||
601 | static double gSampleInterval_sec; | |||
602 | ||||
603 | // The platform-specific RAPL-reading machinery. | |||
604 | static RAPL* gRapl; | |||
605 | ||||
606 | // All the sampled "total" values, in Watts. | |||
607 | static std::vector<double> gTotals_W; | |||
608 | ||||
609 | // Power = Energy / Time, where power is measured in Watts, Energy is measured | |||
610 | // in Joules, and Time is measured in seconds. | |||
611 | static double JoulesToWatts(double aJoules) { | |||
612 | return aJoules / gSampleInterval_sec; | |||
613 | } | |||
614 | ||||
615 | // "Normalize" here means convert kUnsupported_j to zero so it can be used in | |||
616 | // additive expressions. All printed values are 5 or maybe 6 chars (though 6 | |||
617 | // chars would require a value > 100 W, which is unlikely). Values above 1000 W | |||
618 | // are normalized to " n/a ", so 6 chars is the longest that may be printed. | |||
619 | static void NormalizeAndPrintAsWatts(char* aBuf, double& aValue_J) { | |||
620 | if (aValue_J == kUnsupported_j || aValue_J >= 1000) { | |||
621 | aValue_J = 0; | |||
622 | sprintf(aBuf, "%s", " n/a "); | |||
623 | } else { | |||
624 | sprintf(aBuf, "%5.2f", JoulesToWatts(aValue_J)); | |||
625 | } | |||
626 | } | |||
627 | ||||
628 | static void SigAlrmHandler(int aSigNum, siginfo_t* aInfo, void* aContext) { | |||
629 | static int sampleNumber = 1; | |||
630 | ||||
631 | double pkg_J, cores_J, gpu_J, ram_J; | |||
632 | gRapl->EnergyEstimates(pkg_J, cores_J, gpu_J, ram_J); | |||
633 | ||||
634 | // We should have pkg and cores estimates, but might not have gpu and ram | |||
635 | // estimates. | |||
636 | assert(pkg_J != kUnsupported_j)(static_cast <bool> (pkg_J != kUnsupported_j) ? void (0 ) : __assert_fail ("pkg_J != kUnsupported_j", __builtin_FILE ( ), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
637 | assert(cores_J != kUnsupported_j)(static_cast <bool> (cores_J != kUnsupported_j) ? void ( 0) : __assert_fail ("cores_J != kUnsupported_j", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
638 | ||||
639 | // This needs to be big enough to print watt values to two decimal places. 16 | |||
640 | // should be plenty. | |||
641 | static const size_t kNumStrLen = 16; | |||
642 | ||||
643 | static char pkgStr[kNumStrLen], coresStr[kNumStrLen], gpuStr[kNumStrLen], | |||
644 | ramStr[kNumStrLen]; | |||
645 | NormalizeAndPrintAsWatts(pkgStr, pkg_J); | |||
646 | NormalizeAndPrintAsWatts(coresStr, cores_J); | |||
647 | NormalizeAndPrintAsWatts(gpuStr, gpu_J); | |||
648 | NormalizeAndPrintAsWatts(ramStr, ram_J); | |||
649 | ||||
650 | // Core and GPU power are a subset of the package power. | |||
651 | assert(pkg_J >= cores_J + gpu_J)(static_cast <bool> (pkg_J >= cores_J + gpu_J) ? void (0) : __assert_fail ("pkg_J >= cores_J + gpu_J", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
652 | ||||
653 | // Compute "other" (i.e. rest of the package) and "total" only after the | |||
654 | // other values have been normalized. | |||
655 | ||||
656 | char otherStr[kNumStrLen]; | |||
657 | double other_J = pkg_J - cores_J - gpu_J; | |||
658 | NormalizeAndPrintAsWatts(otherStr, other_J); | |||
659 | ||||
660 | char totalStr[kNumStrLen]; | |||
661 | double total_J = pkg_J + ram_J; | |||
662 | NormalizeAndPrintAsWatts(totalStr, total_J); | |||
663 | ||||
664 | gTotals_W.push_back(JoulesToWatts(total_J)); | |||
665 | ||||
666 | // Print and flush so that the output appears immediately even if being | |||
667 | // redirected through |tee| or anything like that. | |||
668 | PrintAndFlush("#%02d %s W = %s (%s + %s + %s) + %s W\n", sampleNumber++, | |||
669 | totalStr, pkgStr, coresStr, gpuStr, otherStr, ramStr); | |||
670 | } | |||
671 | ||||
672 | static void Finish() { | |||
673 | size_t n = gTotals_W.size(); | |||
674 | ||||
675 | // This time calculation assumes that the timers are perfectly accurate which | |||
676 | // is not true but the inaccuracy should be small in practice. | |||
677 | double time = n * gSampleInterval_sec; | |||
678 | ||||
679 | printf("\n"); | |||
680 | printf("%d sample%s taken over a period of %.3f second%s\n", int(n), | |||
681 | n == 1 ? "" : "s", n * gSampleInterval_sec, time == 1.0 ? "" : "s"); | |||
682 | ||||
683 | if (n == 0 || n == 1) { | |||
684 | exit(0); | |||
685 | } | |||
686 | ||||
687 | // Compute the mean. | |||
688 | double sum = std::accumulate(gTotals_W.begin(), gTotals_W.end(), 0.0); | |||
689 | double mean = sum / n; | |||
690 | ||||
691 | // Compute the *population* standard deviation: | |||
692 | // | |||
693 | // popStdDev = sqrt(Sigma(x - m)^2 / n) | |||
694 | // | |||
695 | // where |x| is the sum variable, |m| is the mean, and |n| is the | |||
696 | // population size. | |||
697 | // | |||
698 | // This is different from the *sample* standard deviation, which divides by | |||
699 | // |n - 1|, and would be appropriate if we were using a random sample of a | |||
700 | // larger population. | |||
701 | double sumOfSquaredDeviations = 0; | |||
702 | for (double& iter : gTotals_W) { | |||
703 | double deviation = (iter - mean); | |||
704 | sumOfSquaredDeviations += deviation * deviation; | |||
705 | } | |||
706 | double popStdDev = sqrt(sumOfSquaredDeviations / n); | |||
707 | ||||
708 | // Sort so that percentiles can be determined. We use the "Nearest Rank" | |||
709 | // method of determining percentiles, which is simplest to compute and which | |||
710 | // chooses values from those that appear in the input set. | |||
711 | std::sort(gTotals_W.begin(), gTotals_W.end()); | |||
712 | ||||
713 | printf("\n"); | |||
714 | printf("Distribution of 'total' values:\n"); | |||
715 | printf(" mean = %5.2f W\n", mean); | |||
716 | printf(" std dev = %5.2f W\n", popStdDev); | |||
717 | printf(" 0th percentile = %5.2f W (min)\n", gTotals_W[0]); | |||
718 | printf(" 5th percentile = %5.2f W\n", gTotals_W[ceil(0.05 * n) - 1]); | |||
719 | printf(" 25th percentile = %5.2f W\n", gTotals_W[ceil(0.25 * n) - 1]); | |||
720 | printf(" 50th percentile = %5.2f W\n", gTotals_W[ceil(0.50 * n) - 1]); | |||
721 | printf(" 75th percentile = %5.2f W\n", gTotals_W[ceil(0.75 * n) - 1]); | |||
722 | printf(" 95th percentile = %5.2f W\n", gTotals_W[ceil(0.95 * n) - 1]); | |||
723 | printf("100th percentile = %5.2f W (max)\n", gTotals_W[n - 1]); | |||
724 | ||||
725 | exit(0); | |||
726 | } | |||
727 | ||||
728 | static void SigIntHandler(int aSigNum, siginfo_t* aInfo, void* aContext) { | |||
729 | Finish(); | |||
730 | } | |||
731 | ||||
732 | static void PrintUsage() { | |||
733 | printf( | |||
734 | "usage: rapl [options]\n" | |||
735 | "\n" | |||
736 | "Options:\n" | |||
737 | "\n" | |||
738 | " -h --help show this message\n" | |||
739 | " -i --sample-interval <N> sample every N ms [default=1000]\n" | |||
740 | " -n --sample-count <N> get N samples (0 means unlimited) " | |||
741 | "[default=0]\n" | |||
742 | "\n" | |||
743 | #if defined(__APPLE__) | |||
744 | "On Mac this program can be run by any user.\n" | |||
745 | #elif defined(__linux__1) | |||
746 | "On Linux this program can only be run by the super-user unless the " | |||
747 | "contents\n" | |||
748 | "of /proc/sys/kernel/perf_event_paranoid is set to 0 or lower.\n" | |||
749 | #else | |||
750 | # error Sorry, this platform is not supported | |||
751 | #endif | |||
752 | "\n"); | |||
753 | } | |||
754 | ||||
755 | int main(int argc, char** argv) { | |||
756 | // Process command line options. | |||
757 | ||||
758 | gArgv0 = argv[0]; | |||
759 | ||||
760 | // Default values. | |||
761 | int sampleInterval_msec = 1000; | |||
762 | int sampleCount = 0; | |||
763 | ||||
764 | struct option longOptions[] = { | |||
765 | {"help", no_argument0, NULL__null, 'h'}, | |||
766 | {"sample-interval", required_argument1, NULL__null, 'i'}, | |||
767 | {"sample-count", required_argument1, NULL__null, 'n'}, | |||
768 | {NULL__null, 0, NULL__null, 0}}; | |||
769 | const char* shortOptions = "hi:n:"; | |||
770 | ||||
771 | int c; | |||
772 | char* endPtr; | |||
773 | while ((c = getopt_long(argc, argv, shortOptions, longOptions, NULL__null)) != -1) { | |||
| ||||
774 | switch (c) { | |||
775 | case 'h': | |||
776 | PrintUsage(); | |||
777 | exit(0); | |||
778 | ||||
779 | case 'i': | |||
780 | sampleInterval_msec = strtol(optarg, &endPtr, /* base = */ 10); | |||
781 | if (*endPtr) { | |||
782 | CmdLineAbort("sample interval is not an integer"); | |||
783 | } | |||
784 | if (sampleInterval_msec < 1 || sampleInterval_msec > 3600000) { | |||
785 | CmdLineAbort("sample interval must be in the range 1..3600000 ms"); | |||
786 | } | |||
787 | break; | |||
788 | ||||
789 | case 'n': | |||
790 | sampleCount = strtol(optarg, &endPtr, /* base = */ 10); | |||
791 | if (*endPtr) { | |||
792 | CmdLineAbort("sample count is not an integer"); | |||
793 | } | |||
794 | if (sampleCount < 0 || sampleCount > 1000000) { | |||
795 | CmdLineAbort("sample count must be in the range 0..1000000"); | |||
796 | } | |||
797 | break; | |||
798 | ||||
799 | default: | |||
800 | CmdLineAbort(NULL__null); | |||
801 | } | |||
802 | } | |||
803 | ||||
804 | // The RAPL MSRs update every ~1 ms, but the measurement period isn't exactly | |||
805 | // 1 ms, which means the sample periods are not exact. "Power Measurement | |||
806 | // Techniques on Standard Compute Nodes: A Quantitative Comparison" by | |||
807 | // Hackenberg et al. suggests the following. | |||
808 | // | |||
809 | // "RAPL provides energy (and not power) consumption data without | |||
810 | // timestamps associated to each counter update. This makes sampling rates | |||
811 | // above 20 Samples/s unfeasible if the systematic error should be below | |||
812 | // 5%... Constantly polling the RAPL registers will both occupy a processor | |||
813 | // core and distort the measurement itself." | |||
814 | // | |||
815 | // So warn about this case. | |||
816 | if (sampleInterval_msec
| |||
817 | fprintf(stderrstderr, | |||
818 | "\nWARNING: sample intervals < 50 ms are likely to produce " | |||
819 | "inaccurate estimates\n\n"); | |||
820 | } | |||
821 | gSampleInterval_sec = double(sampleInterval_msec) / 1000; | |||
822 | ||||
823 | // Initialize the platform-specific RAPL reading machinery. | |||
824 | gRapl = new RAPL(); | |||
825 | if (!gRapl) { | |||
826 | Abort("new RAPL() failed"); | |||
827 | } | |||
828 | ||||
829 | // Install the signal handlers. | |||
830 | ||||
831 | struct sigaction sa; | |||
832 | memset(&sa, 0, sizeof(sa)); | |||
833 | sa.sa_flags = SA_RESTART0x10000000 | SA_SIGINFO4; | |||
834 | // The extra parens around (0) suppress a -Wunreachable-code warning on OS X | |||
835 | // where sigemptyset() is a macro that can never fail and always returns 0. | |||
836 | if (sigemptyset(&sa.sa_mask) < (0)) { | |||
837 | Abort("sigemptyset() failed"); | |||
838 | } | |||
839 | sa.sa_sigaction__sigaction_handler.sa_sigaction = SigAlrmHandler; | |||
840 | if (sigaction(SIGALRM14, &sa, NULL__null) < 0) { | |||
841 | Abort("sigaction(SIGALRM) failed"); | |||
842 | } | |||
843 | sa.sa_sigaction__sigaction_handler.sa_sigaction = SigIntHandler; | |||
844 | if (sigaction(SIGINT2, &sa, NULL__null) < 0) { | |||
845 | Abort("sigaction(SIGINT) failed"); | |||
846 | } | |||
847 | ||||
848 | // Set up the timer. | |||
849 | struct itimerval timer; | |||
850 | timer.it_interval.tv_sec = sampleInterval_msec / 1000; | |||
851 | timer.it_interval.tv_usec = (sampleInterval_msec % 1000) * 1000; | |||
852 | timer.it_value = timer.it_interval; | |||
853 | if (setitimer(ITIMER_REALITIMER_REAL, &timer, NULL__null) < 0) { | |||
854 | Abort("setitimer() failed"); | |||
855 | } | |||
856 | ||||
857 | // Print header. | |||
858 | PrintAndFlush(" total W = _pkg_ (cores + _gpu_ + other) + _ram_ W\n"); | |||
859 | ||||
860 | // Take samples. | |||
861 | if (sampleCount == 0) { | |||
862 | while (true) { | |||
863 | pause(); | |||
864 | } | |||
865 | } else { | |||
866 | for (int i = 0; i < sampleCount; i++) { | |||
867 | pause(); | |||
868 | } | |||
869 | } | |||
870 | ||||
871 | Finish(); | |||
872 | ||||
873 | return 0; | |||
874 | } |