| File: | var/lib/jenkins/workspace/firefox-scan-build/tools/power/rapl.cpp |
| Warning: | line 567, column 16 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | |||
| 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ | |||
| 3 | /* This Source Code Form is subject to the terms of the Mozilla Public | |||
| 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this | |||
| 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |||
| 6 | ||||
| 7 | // This program provides processor power estimates. It does this by reading | |||
| 8 | // model-specific registers (MSRs) that are part Intel's Running Average Power | |||
| 9 | // Limit (RAPL) interface. These MSRs provide good quality estimates of the | |||
| 10 | // energy consumption of up to four system components: | |||
| 11 | // - PKG: the entire processor package; | |||
| 12 | // - PP0: the cores (a subset of the package); | |||
| 13 | // - PP1: the GPU (a subset of the package); | |||
| 14 | // - DRAM: main memory. | |||
| 15 | // | |||
| 16 | // For more details about RAPL, see section 14.9 of Volume 3 of the "Intel 64 | |||
| 17 | // and IA-32 Architecture's Software Developer's Manual", Order Number 325384. | |||
| 18 | // | |||
| 19 | // This program exists because there are no existing tools on Mac that can | |||
| 20 | // obtain all four RAPL estimates. (|powermetrics| can obtain the package | |||
| 21 | // estimate, but not the others. Intel Power Gadget can obtain the package and | |||
| 22 | // cores estimates.) | |||
| 23 | // | |||
| 24 | // On Linux |perf| can obtain all four estimates (as Joules, which are easily | |||
| 25 | // converted to Watts), but this program is implemented for Linux because it's | |||
| 26 | // not too hard to do, and that gives us multi-platform consistency. | |||
| 27 | // | |||
| 28 | // This program does not support Windows, unfortunately. It's not obvious how | |||
| 29 | // to access the RAPL MSRs on Windows. | |||
| 30 | // | |||
| 31 | // This program deliberately uses only standard libraries and avoids | |||
| 32 | // Mozilla-specific code, to make it easy to compile and test on different | |||
| 33 | // machines. | |||
| 34 | ||||
| 35 | #include <assert.h> | |||
| 36 | #include <getopt.h> | |||
| 37 | #include <math.h> | |||
| 38 | #include <signal.h> | |||
| 39 | #include <stdarg.h> | |||
| 40 | #include <stdint.h> | |||
| 41 | #include <stdio.h> | |||
| 42 | #include <stdlib.h> | |||
| 43 | #include <string.h> | |||
| 44 | #include <sys/time.h> | |||
| 45 | #include <unistd.h> | |||
| 46 | ||||
| 47 | #include <algorithm> | |||
| 48 | #include <numeric> | |||
| 49 | #include <vector> | |||
| 50 | ||||
| 51 | #ifdef MOZ_CLANG_PLUGIN | |||
| 52 | # define MOZ_RUNINIT __attribute__((annotate("moz_global_var"))) | |||
| 53 | #else | |||
| 54 | # define MOZ_RUNINIT | |||
| 55 | #endif | |||
| 56 | ||||
| 57 | //--------------------------------------------------------------------------- | |||
| 58 | // Utilities | |||
| 59 | //--------------------------------------------------------------------------- | |||
| 60 | ||||
| 61 | // The value of argv[0] passed to main(). Used in error messages. | |||
| 62 | static const char* gArgv0; | |||
| 63 | ||||
| 64 | static void Abort(const char* aFormat, ...) { | |||
| 65 | va_list vargs; | |||
| 66 | va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat); | |||
| 67 | fprintf(stderrstderr, "%s: ", gArgv0); | |||
| 68 | vfprintf(stderrstderr, aFormat, vargs); | |||
| 69 | fprintf(stderrstderr, "\n"); | |||
| 70 | va_end(vargs)__builtin_va_end(vargs); | |||
| 71 | ||||
| 72 | exit(1); | |||
| 73 | } | |||
| 74 | ||||
| 75 | static void CmdLineAbort(const char* aMsg) { | |||
| 76 | if (aMsg) { | |||
| 77 | fprintf(stderrstderr, "%s: %s\n", gArgv0, aMsg); | |||
| 78 | } | |||
| 79 | fprintf(stderrstderr, "Use --help for more information.\n"); | |||
| 80 | exit(1); | |||
| 81 | } | |||
| 82 | ||||
| 83 | // A special value that represents an estimate from an unsupported RAPL domain. | |||
| 84 | static const double kUnsupported_j = -1.0; | |||
| 85 | ||||
| 86 | // Print to stdout and flush it, so that the output appears immediately even if | |||
| 87 | // being redirected through |tee| or anything like that. | |||
| 88 | static void PrintAndFlush(const char* aFormat, ...) { | |||
| 89 | va_list vargs; | |||
| 90 | va_start(vargs, aFormat)__builtin_va_start(vargs, aFormat); | |||
| 91 | vfprintf(stdoutstdout, aFormat, vargs); | |||
| 92 | va_end(vargs)__builtin_va_end(vargs); | |||
| 93 | ||||
| 94 | fflush(stdoutstdout); | |||
| 95 | } | |||
| 96 | ||||
| 97 | //--------------------------------------------------------------------------- | |||
| 98 | // Mac-specific code | |||
| 99 | //--------------------------------------------------------------------------- | |||
| 100 | ||||
| 101 | #if defined(__APPLE__) | |||
| 102 | ||||
| 103 | // Because of the pkg_energy_statistics_t::pkes_version check below, the | |||
| 104 | // earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72). | |||
| 105 | ||||
| 106 | # include <sys/types.h> | |||
| 107 | # include <sys/sysctl.h> | |||
| 108 | ||||
| 109 | // OS X has four kinds of system calls: | |||
| 110 | // | |||
| 111 | // 1. Mach traps; | |||
| 112 | // 2. UNIX system calls; | |||
| 113 | // 3. machine-dependent calls; | |||
| 114 | // 4. diagnostic calls. | |||
| 115 | // | |||
| 116 | // (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.) | |||
| 117 | // | |||
| 118 | // The last category has a single call named diagCall() or diagCall64(). Its | |||
| 119 | // mode is controlled by its first argument, and one of the modes allows access | |||
| 120 | // to the Intel RAPL MSRs. | |||
| 121 | // | |||
| 122 | // The interface to diagCall64() is not exported, so we have to import some | |||
| 123 | // definitions from the XNU kernel. All imported definitions are annotated with | |||
| 124 | // the XNU source file they come from, and information about what XNU versions | |||
| 125 | // they were introduced in and (if relevant) modified. | |||
| 126 | ||||
| 127 | // The diagCall64() mode. | |||
| 128 | // From osfmk/i386/Diagnostics.h | |||
| 129 | // - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value | |||
| 130 | // 17 was used for dgGzallocTest.) | |||
| 131 | # define dgPowerStat 17 | |||
| 132 | ||||
| 133 | // From osfmk/i386/cpu_data.h | |||
| 134 | // - In 10.8.5 these values were introduced, along with core_energy_stat_t. | |||
| 135 | # define CPU_RTIME_BINS (12) | |||
| 136 | # define CPU_ITIME_BINS (CPU_RTIME_BINS) | |||
| 137 | ||||
| 138 | // core_energy_stat_t and pkg_energy_statistics_t are both from | |||
| 139 | // osfmk/i386/Diagnostics.c. | |||
| 140 | // - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many | |||
| 141 | // fewer fields. | |||
| 142 | // - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with | |||
| 143 | // numerous new fields. | |||
| 144 | // - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added. | |||
| 145 | // diagCall64(dgPowerStat) fills it with '1' in all versions since (up to | |||
| 146 | // 10.10.2 at time of writing). | |||
| 147 | // - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally | |||
| 148 | // added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the | |||
| 149 | // source code, but it could be defined at compile-time via compiler flags.) | |||
| 150 | // pkg_energy_statistics_t::pkes_version did not change, though. | |||
| 151 | ||||
| 152 | typedef struct { | |||
| 153 | uint64_t caperf; | |||
| 154 | uint64_t cmperf; | |||
| 155 | uint64_t ccres[6]; | |||
| 156 | uint64_t crtimes[CPU_RTIME_BINS]; | |||
| 157 | uint64_t citimes[CPU_ITIME_BINS]; | |||
| 158 | uint64_t crtime_total; | |||
| 159 | uint64_t citime_total; | |||
| 160 | uint64_t cpu_idle_exits; | |||
| 161 | uint64_t cpu_insns; | |||
| 162 | uint64_t cpu_ucc; | |||
| 163 | uint64_t cpu_urc; | |||
| 164 | # if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72). | |||
| 165 | uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72). | |||
| 166 | # endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72). | |||
| 167 | } core_energy_stat_t; | |||
| 168 | ||||
| 169 | typedef struct { | |||
| 170 | uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72). | |||
| 171 | uint64_t pkg_cres[2][7]; | |||
| 172 | ||||
| 173 | // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT | |||
| 174 | // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT. | |||
| 175 | uint64_t pkg_power_unit; | |||
| 176 | ||||
| 177 | // These are the four fields for the four RAPL domains. For each field | |||
| 178 | // we list: | |||
| 179 | // | |||
| 180 | // - the corresponding MSR number; | |||
| 181 | // - Intel's name for that MSR; | |||
| 182 | // - XNU's name for that MSR; | |||
| 183 | // - which Intel processors the MSR is supported on. | |||
| 184 | // | |||
| 185 | // The last of these is determined from chapter 35 of Volume 3 of the | |||
| 186 | // "Intel 64 and IA-32 Architecture's Software Developer's Manual", | |||
| 187 | // Order Number 325384. (Note that chapter 35 contradicts section 14.9 | |||
| 188 | // to some degree.) | |||
| 189 | ||||
| 190 | // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS | |||
| 191 | // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). | |||
| 192 | uint64_t pkg_energy; | |||
| 193 | ||||
| 194 | // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS | |||
| 195 | // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). | |||
| 196 | uint64_t pp0_energy; | |||
| 197 | ||||
| 198 | // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS | |||
| 199 | // Sandy Bridge, Haswell. | |||
| 200 | uint64_t pp1_energy; | |||
| 201 | ||||
| 202 | // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS | |||
| 203 | // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model | |||
| 204 | // 0x57) | |||
| 205 | uint64_t ddr_energy; | |||
| 206 | ||||
| 207 | uint64_t llc_flushed_cycles; | |||
| 208 | uint64_t ring_ratio_instantaneous; | |||
| 209 | uint64_t IA_frequency_clipping_cause; | |||
| 210 | uint64_t GT_frequency_clipping_cause; | |||
| 211 | uint64_t pkg_idle_exits; | |||
| 212 | uint64_t pkg_rtimes[CPU_RTIME_BINS]; | |||
| 213 | uint64_t pkg_itimes[CPU_ITIME_BINS]; | |||
| 214 | uint64_t mbus_delay_time; | |||
| 215 | uint64_t mint_delay_time; | |||
| 216 | uint32_t ncpus; | |||
| 217 | core_energy_stat_t cest[]; | |||
| 218 | } pkg_energy_statistics_t; | |||
| 219 | ||||
| 220 | static int diagCall64(uint64_t aMode, void* aBuf) { | |||
| 221 | // We cannot use syscall() here because it doesn't work with diagnostic | |||
| 222 | // system calls -- it raises SIGSYS if you try. So we have to use asm. | |||
| 223 | ||||
| 224 | # ifdef __x86_64__1 | |||
| 225 | // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01 | |||
| 226 | // suffix indicates the syscall number is 1, which also happens to be the | |||
| 227 | // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more | |||
| 228 | // details. | |||
| 229 | static const uint64_t diagCallNum = 0x4000001; | |||
| 230 | uint64_t rv; | |||
| 231 | ||||
| 232 | __asm__ __volatile__( | |||
| 233 | "syscall" | |||
| 234 | ||||
| 235 | // Return value goes in "a" (%rax). | |||
| 236 | : /* outputs */ "=a"(rv) | |||
| 237 | ||||
| 238 | // The syscall number goes in "0", a synonym (from outputs) for "a" | |||
| 239 | // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi). | |||
| 240 | : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf) | |||
| 241 | ||||
| 242 | // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And | |||
| 243 | // this particular syscall also writes memory (aBuf). | |||
| 244 | : /* clobbers */ "rcx", "r11", "cc", "memory"); | |||
| 245 | return rv; | |||
| 246 | # else | |||
| 247 | # error Sorry, only x86-64 is supported | |||
| 248 | # endif | |||
| 249 | } | |||
| 250 | ||||
| 251 | static void diagCall64_dgPowerStat(pkg_energy_statistics_t* aPkes) { | |||
| 252 | static const uint64_t supported_version = 1; | |||
| 253 | ||||
| 254 | // Write an unsupported version number into pkes_version so that the check | |||
| 255 | // below cannot succeed by dumb luck. | |||
| 256 | aPkes->pkes_version = supported_version - 1; | |||
| 257 | ||||
| 258 | // diagCall64() returns 1 on success, and 0 on failure (which can only happen | |||
| 259 | // if the mode is unrecognized, e.g. in 10.7.x or earlier versions). | |||
| 260 | if (diagCall64(dgPowerStat, aPkes) != 1) { | |||
| 261 | Abort("diagCall64() failed"); | |||
| 262 | } | |||
| 263 | ||||
| 264 | if (aPkes->pkes_version != 1) { | |||
| 265 | Abort("unexpected pkes_version: %llu", aPkes->pkes_version); | |||
| 266 | } | |||
| 267 | } | |||
| 268 | ||||
| 269 | class RAPL { | |||
| 270 | bool mIsGpuSupported; // Is the GPU domain supported by the processor? | |||
| 271 | bool mIsRamSupported; // Is the RAM domain supported by the processor? | |||
| 272 | ||||
| 273 | // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J == | |||
| 274 | // 15.3 microJoules) which is different to the power unit MSR. (See the | |||
| 275 | // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of | |||
| 276 | // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.) | |||
| 277 | // This field records whether the quirk is present. | |||
| 278 | bool mHasRamUnitsQuirk; | |||
| 279 | ||||
| 280 | // The abovementioned 15.3 microJoules value. | |||
| 281 | static const double kQuirkyRamJoulesPerTick; | |||
| 282 | ||||
| 283 | // The previous sample's MSR values. | |||
| 284 | uint64_t mPrevPkgTicks; | |||
| 285 | uint64_t mPrevPp0Ticks; | |||
| 286 | uint64_t mPrevPp1Ticks; | |||
| 287 | uint64_t mPrevDdrTicks; | |||
| 288 | ||||
| 289 | // The struct passed to diagCall64(). | |||
| 290 | pkg_energy_statistics_t* mPkes; | |||
| 291 | ||||
| 292 | public: | |||
| 293 | RAPL() : mHasRamUnitsQuirk(false) { | |||
| 294 | // Work out which RAPL MSRs this CPU model supports. | |||
| 295 | int cpuModel; | |||
| 296 | size_t size = sizeof(cpuModel); | |||
| 297 | if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL__null, 0) != 0) { | |||
| 298 | Abort("sysctlbyname(\"machdep.cpu.model\") failed"); | |||
| 299 | } | |||
| 300 | ||||
| 301 | // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in | |||
| 302 | // linux-4.1.5/. | |||
| 303 | // | |||
| 304 | // By linux-5.6.14/, this stuff had moved into | |||
| 305 | // arch/x86/events/intel/rapl.c, which references processor families in | |||
| 306 | // arch/x86/include/asm/intel-family.h. | |||
| 307 | switch (cpuModel) { | |||
| 308 | case 0x2a: // Sandy Bridge | |||
| 309 | case 0x3a: // Ivy Bridge | |||
| 310 | // Supports package, cores, GPU. | |||
| 311 | mIsGpuSupported = true; | |||
| 312 | mIsRamSupported = false; | |||
| 313 | break; | |||
| 314 | ||||
| 315 | case 0x3f: // Haswell X | |||
| 316 | case 0x4f: // Broadwell X | |||
| 317 | case 0x55: // Skylake X | |||
| 318 | case 0x56: // Broadwell D | |||
| 319 | // Supports package, cores, RAM. Has the units quirk. | |||
| 320 | mIsGpuSupported = false; | |||
| 321 | mIsRamSupported = true; | |||
| 322 | mHasRamUnitsQuirk = true; | |||
| 323 | break; | |||
| 324 | ||||
| 325 | case 0x2d: // Sandy Bridge X | |||
| 326 | case 0x3e: // Ivy Bridge X | |||
| 327 | // Supports package, cores, RAM. | |||
| 328 | mIsGpuSupported = false; | |||
| 329 | mIsRamSupported = true; | |||
| 330 | break; | |||
| 331 | ||||
| 332 | case 0x3c: // Haswell | |||
| 333 | case 0x3d: // Broadwell | |||
| 334 | case 0x45: // Haswell L | |||
| 335 | case 0x46: // Haswell G | |||
| 336 | case 0x47: // Broadwell G | |||
| 337 | // Supports package, cores, GPU, RAM. | |||
| 338 | mIsGpuSupported = true; | |||
| 339 | mIsRamSupported = true; | |||
| 340 | break; | |||
| 341 | ||||
| 342 | case 0x4e: // Skylake L | |||
| 343 | case 0x5e: // Skylake | |||
| 344 | case 0x8e: // Kaby Lake L | |||
| 345 | case 0x9e: // Kaby Lake | |||
| 346 | case 0x66: // Cannon Lake L | |||
| 347 | case 0x7d: // Ice Lake | |||
| 348 | case 0x7e: // Ice Lake L | |||
| 349 | case 0xa5: // Comet Lake | |||
| 350 | case 0xa6: // Comet Lake L | |||
| 351 | // Supports package, cores, GPU, RAM, PSYS. | |||
| 352 | // XXX: this tool currently doesn't measure PSYS. | |||
| 353 | mIsGpuSupported = true; | |||
| 354 | mIsRamSupported = true; | |||
| 355 | break; | |||
| 356 | ||||
| 357 | default: | |||
| 358 | Abort("unknown CPU model: %d", cpuModel); | |||
| 359 | break; | |||
| 360 | } | |||
| 361 | ||||
| 362 | // Get the maximum number of logical CPUs so that we know how big to make | |||
| 363 | // |mPkes|. | |||
| 364 | int logicalcpu_max; | |||
| 365 | size = sizeof(logicalcpu_max); | |||
| 366 | if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL__null, 0) != | |||
| 367 | 0) { | |||
| 368 | Abort("sysctlbyname(\"hw.logicalcpu_max\") failed"); | |||
| 369 | } | |||
| 370 | ||||
| 371 | // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around | |||
| 372 | // core_energy_stat_t::gpmcs and for any other future extensions to that | |||
| 373 | // struct. (The fields we read all come before the core_energy_stat_t | |||
| 374 | // array, so it won't matter to us whether gpmcs is present or not.) | |||
| 375 | size_t pkesSize = sizeof(pkg_energy_statistics_t) + | |||
| 376 | logicalcpu_max * sizeof(core_energy_stat_t) + | |||
| 377 | logicalcpu_max * 1024; | |||
| 378 | mPkes = (pkg_energy_statistics_t*)malloc(pkesSize); | |||
| 379 | if (!mPkes) { | |||
| 380 | Abort("malloc() failed"); | |||
| 381 | } | |||
| 382 | ||||
| 383 | // Do an initial measurement so that the first sample's diffs are sensible. | |||
| 384 | double dummy1, dummy2, dummy3, dummy4; | |||
| 385 | EnergyEstimates(dummy1, dummy2, dummy3, dummy4); | |||
| 386 | } | |||
| 387 | ||||
| 388 | ~RAPL() { free(mPkes); } | |||
| 389 | ||||
| 390 | static double Joules(uint64_t aTicks, double aJoulesPerTick) { | |||
| 391 | return double(aTicks) * aJoulesPerTick; | |||
| 392 | } | |||
| 393 | ||||
| 394 | void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J, | |||
| 395 | double& aRam_J) { | |||
| 396 | diagCall64_dgPowerStat(mPkes); | |||
| 397 | ||||
| 398 | // Bits 12:8 are the ESU. | |||
| 399 | // Energy measurements come in multiples of 1/(2^ESU). | |||
| 400 | uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f; | |||
| 401 | double joulesPerTick = ((double)1 / (1 << energyStatusUnits)); | |||
| 402 | ||||
| 403 | aPkg_J = Joules(mPkes->pkg_energy - mPrevPkgTicks, joulesPerTick); | |||
| 404 | aCores_J = Joules(mPkes->pp0_energy - mPrevPp0Ticks, joulesPerTick); | |||
| 405 | aGpu_J = mIsGpuSupported | |||
| 406 | ? Joules(mPkes->pp1_energy - mPrevPp1Ticks, joulesPerTick) | |||
| 407 | : kUnsupported_j; | |||
| 408 | aRam_J = mIsRamSupported | |||
| 409 | ? Joules(mPkes->ddr_energy - mPrevDdrTicks, | |||
| 410 | mHasRamUnitsQuirk ? kQuirkyRamJoulesPerTick | |||
| 411 | : joulesPerTick) | |||
| 412 | : kUnsupported_j; | |||
| 413 | ||||
| 414 | mPrevPkgTicks = mPkes->pkg_energy; | |||
| 415 | mPrevPp0Ticks = mPkes->pp0_energy; | |||
| 416 | if (mIsGpuSupported) { | |||
| 417 | mPrevPp1Ticks = mPkes->pp1_energy; | |||
| 418 | } | |||
| 419 | if (mIsRamSupported) { | |||
| 420 | mPrevDdrTicks = mPkes->ddr_energy; | |||
| 421 | } | |||
| 422 | } | |||
| 423 | }; | |||
| 424 | ||||
| 425 | /* static */ const double RAPL::kQuirkyRamJoulesPerTick = (double)1 / 65536; | |||
| 426 | ||||
| 427 | //--------------------------------------------------------------------------- | |||
| 428 | // Linux-specific code | |||
| 429 | //--------------------------------------------------------------------------- | |||
| 430 | ||||
| 431 | #elif defined(__linux__1) | |||
| 432 | ||||
| 433 | # include <linux1/perf_event.h> | |||
| 434 | # include <sys/syscall.h> | |||
| 435 | ||||
| 436 | // There is no glibc wrapper for this system call so we provide our own. | |||
| 437 | static int perf_event_open(struct perf_event_attr* aAttr, pid_t aPid, int aCpu, | |||
| 438 | int aGroupFd, unsigned long aFlags) { | |||
| 439 | return syscall(__NR_perf_event_open298, aAttr, aPid, aCpu, aGroupFd, aFlags); | |||
| 440 | } | |||
| 441 | ||||
| 442 | // Returns false if the file cannot be opened. | |||
| 443 | template <typename T> | |||
| 444 | static bool ReadValueFromPowerFile(const char* aStr1, const char* aStr2, | |||
| 445 | const char* aStr3, const char* aScanfString, | |||
| 446 | T* aOut) { | |||
| 447 | // The filenames going into this buffer are under our control and the longest | |||
| 448 | // one is "/sys/bus/event_source/devices/power/events/energy-cores.scale". | |||
| 449 | // So 256 chars is plenty. | |||
| 450 | char filename[256]; | |||
| 451 | ||||
| 452 | sprintf(filename, "/sys/bus/event_source/devices/power/%s%s%s", aStr1, aStr2, | |||
| 453 | aStr3); | |||
| 454 | FILE* fp = fopen(filename, "r"); | |||
| 455 | if (!fp
| |||
| 456 | return false; | |||
| 457 | } | |||
| 458 | if (fscanf(fp, aScanfString, aOut) != 1) { | |||
| 459 | Abort("fscanf() failed"); | |||
| 460 | } | |||
| 461 | fclose(fp); | |||
| 462 | ||||
| 463 | return true; | |||
| 464 | } | |||
| 465 | ||||
| 466 | // This class encapsulates the reading of a single RAPL domain. | |||
| 467 | class Domain { | |||
| 468 | bool mIsSupported; // Is the domain supported by the processor? | |||
| 469 | ||||
| 470 | // These three are only set if |mIsSupported| is true. | |||
| 471 | double mJoulesPerTick; // How many Joules each tick of the MSR represents. | |||
| 472 | int mFd; // The fd through which the MSR is read. | |||
| 473 | double mPrevTicks; // The previous sample's MSR value. | |||
| 474 | ||||
| 475 | public: | |||
| 476 | enum IsOptional { Optional, NonOptional }; | |||
| 477 | ||||
| 478 | Domain(const char* aName, uint32_t aType, | |||
| 479 | IsOptional aOptional = NonOptional) { | |||
| 480 | uint64_t config; | |||
| 481 | if (!ReadValueFromPowerFile("events/energy-", aName, "", "event=%llx", | |||
| 482 | &config)) { | |||
| 483 | // Failure is allowed for optional domains. | |||
| 484 | if (aOptional == NonOptional) { | |||
| 485 | Abort( | |||
| 486 | "failed to open file for non-optional domain '%s'\n" | |||
| 487 | "- Is your kernel version 3.14 or later, as required? " | |||
| 488 | "Run |uname -r| to see.", | |||
| 489 | aName); | |||
| 490 | } | |||
| 491 | mIsSupported = false; | |||
| 492 | return; | |||
| 493 | } | |||
| 494 | ||||
| 495 | mIsSupported = true; | |||
| 496 | ||||
| 497 | if (!ReadValueFromPowerFile("events/energy-", aName, ".scale", "%lf", | |||
| 498 | &mJoulesPerTick)) { | |||
| 499 | Abort("failed to read from .scale file"); | |||
| 500 | } | |||
| 501 | ||||
| 502 | // The unit should be "Joules", so 128 chars should be plenty. | |||
| 503 | char unit[128]; | |||
| 504 | if (!ReadValueFromPowerFile("events/energy-", aName, ".unit", "%127s", | |||
| 505 | unit)) { | |||
| 506 | Abort("failed to read from .unit file"); | |||
| 507 | } | |||
| 508 | if (strcmp(unit, "Joules") != 0) { | |||
| 509 | Abort("unexpected unit '%s' in .unit file", unit); | |||
| 510 | } | |||
| 511 | ||||
| 512 | struct perf_event_attr attr; | |||
| 513 | memset(&attr, 0, sizeof(attr)); | |||
| 514 | attr.type = aType; | |||
| 515 | attr.size = uint32_t(sizeof(attr)); | |||
| 516 | attr.config = config; | |||
| 517 | ||||
| 518 | // Measure all processes/threads. The specified CPU doesn't matter. | |||
| 519 | mFd = perf_event_open(&attr, /* aPid = */ -1, /* aCpu = */ 0, | |||
| 520 | /* aGroupFd = */ -1, /* aFlags = */ 0); | |||
| 521 | if (mFd < 0) { | |||
| 522 | Abort( | |||
| 523 | "perf_event_open() failed\n" | |||
| 524 | "- Did you run as root (e.g. with |sudo|) or set\n" | |||
| 525 | " /proc/sys/kernel/perf_event_paranoid to 0, as required?"); | |||
| 526 | } | |||
| 527 | ||||
| 528 | mPrevTicks = 0; | |||
| 529 | } | |||
| 530 | ||||
| 531 | ~Domain() { | |||
| 532 | if (mIsSupported) { | |||
| 533 | close(mFd); | |||
| 534 | } | |||
| 535 | } | |||
| 536 | ||||
| 537 | double EnergyEstimate() { | |||
| 538 | if (!mIsSupported) { | |||
| 539 | return kUnsupported_j; | |||
| 540 | } | |||
| 541 | ||||
| 542 | uint64_t thisTicks; | |||
| 543 | if (read(mFd, &thisTicks, sizeof(uint64_t)) != sizeof(uint64_t)) { | |||
| 544 | Abort("read() failed"); | |||
| 545 | } | |||
| 546 | ||||
| 547 | uint64_t ticks = thisTicks - mPrevTicks; | |||
| 548 | mPrevTicks = thisTicks; | |||
| 549 | double joules = ticks * mJoulesPerTick; | |||
| 550 | return joules; | |||
| 551 | } | |||
| 552 | }; | |||
| 553 | ||||
| 554 | class RAPL { | |||
| 555 | Domain* mPkg; | |||
| 556 | Domain* mCores; | |||
| 557 | Domain* mGpu; | |||
| 558 | Domain* mRam; | |||
| 559 | ||||
| 560 | public: | |||
| 561 | RAPL() { | |||
| 562 | uint32_t type; | |||
| 563 | if (!ReadValueFromPowerFile("type", "", "", "%u", &type)) { | |||
| 564 | Abort("failed to read from type file"); | |||
| 565 | } | |||
| 566 | ||||
| 567 | mPkg = new Domain("pkg", type); | |||
| ||||
| 568 | mCores = new Domain("cores", type); | |||
| 569 | mGpu = new Domain("gpu", type, Domain::Optional); | |||
| 570 | mRam = new Domain("ram", type, Domain::Optional); | |||
| 571 | if (!mPkg || !mCores || !mGpu || !mRam) { | |||
| 572 | Abort("new Domain() failed"); | |||
| 573 | } | |||
| 574 | } | |||
| 575 | ||||
| 576 | ~RAPL() { | |||
| 577 | delete mPkg; | |||
| 578 | delete mCores; | |||
| 579 | delete mGpu; | |||
| 580 | delete mRam; | |||
| 581 | } | |||
| 582 | ||||
| 583 | void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J, | |||
| 584 | double& aRam_J) { | |||
| 585 | aPkg_J = mPkg->EnergyEstimate(); | |||
| 586 | aCores_J = mCores->EnergyEstimate(); | |||
| 587 | aGpu_J = mGpu->EnergyEstimate(); | |||
| 588 | aRam_J = mRam->EnergyEstimate(); | |||
| 589 | } | |||
| 590 | }; | |||
| 591 | ||||
| 592 | #else | |||
| 593 | ||||
| 594 | //--------------------------------------------------------------------------- | |||
| 595 | // Unsupported platforms | |||
| 596 | //--------------------------------------------------------------------------- | |||
| 597 | ||||
| 598 | # error Sorry, this platform is not supported | |||
| 599 | ||||
| 600 | #endif // platform | |||
| 601 | ||||
| 602 | //--------------------------------------------------------------------------- | |||
| 603 | // The main loop | |||
| 604 | //--------------------------------------------------------------------------- | |||
| 605 | ||||
| 606 | // The sample interval, measured in seconds. | |||
| 607 | static double gSampleInterval_sec; | |||
| 608 | ||||
| 609 | // The platform-specific RAPL-reading machinery. | |||
| 610 | static RAPL* gRapl; | |||
| 611 | ||||
| 612 | // All the sampled "total" values, in Watts. | |||
| 613 | MOZ_RUNINIT static std::vector<double> gTotals_W; | |||
| 614 | ||||
| 615 | // Power = Energy / Time, where power is measured in Watts, Energy is measured | |||
| 616 | // in Joules, and Time is measured in seconds. | |||
| 617 | static double JoulesToWatts(double aJoules) { | |||
| 618 | return aJoules / gSampleInterval_sec; | |||
| 619 | } | |||
| 620 | ||||
| 621 | // "Normalize" here means convert kUnsupported_j to zero so it can be used in | |||
| 622 | // additive expressions. All printed values are 5 or maybe 6 chars (though 6 | |||
| 623 | // chars would require a value > 100 W, which is unlikely). Values above 1000 W | |||
| 624 | // are normalized to " n/a ", so 6 chars is the longest that may be printed. | |||
| 625 | static void NormalizeAndPrintAsWatts(char* aBuf, double& aValue_J) { | |||
| 626 | if (aValue_J == kUnsupported_j || aValue_J >= 1000) { | |||
| 627 | aValue_J = 0; | |||
| 628 | sprintf(aBuf, "%s", " n/a "); | |||
| 629 | } else { | |||
| 630 | sprintf(aBuf, "%5.2f", JoulesToWatts(aValue_J)); | |||
| 631 | } | |||
| 632 | } | |||
| 633 | ||||
| 634 | static void SigAlrmHandler(int aSigNum, siginfo_t* aInfo, void* aContext) { | |||
| 635 | static int sampleNumber = 1; | |||
| 636 | ||||
| 637 | double pkg_J, cores_J, gpu_J, ram_J; | |||
| 638 | gRapl->EnergyEstimates(pkg_J, cores_J, gpu_J, ram_J); | |||
| 639 | ||||
| 640 | // We should have pkg and cores estimates, but might not have gpu and ram | |||
| 641 | // estimates. | |||
| 642 | assert(pkg_J != kUnsupported_j)(static_cast <bool> (pkg_J != kUnsupported_j) ? void (0 ) : __assert_fail ("pkg_J != kUnsupported_j", __builtin_FILE ( ), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 643 | assert(cores_J != kUnsupported_j)(static_cast <bool> (cores_J != kUnsupported_j) ? void ( 0) : __assert_fail ("cores_J != kUnsupported_j", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 644 | ||||
| 645 | // This needs to be big enough to print watt values to two decimal places. 16 | |||
| 646 | // should be plenty. | |||
| 647 | static const size_t kNumStrLen = 16; | |||
| 648 | ||||
| 649 | static char pkgStr[kNumStrLen], coresStr[kNumStrLen], gpuStr[kNumStrLen], | |||
| 650 | ramStr[kNumStrLen]; | |||
| 651 | NormalizeAndPrintAsWatts(pkgStr, pkg_J); | |||
| 652 | NormalizeAndPrintAsWatts(coresStr, cores_J); | |||
| 653 | NormalizeAndPrintAsWatts(gpuStr, gpu_J); | |||
| 654 | NormalizeAndPrintAsWatts(ramStr, ram_J); | |||
| 655 | ||||
| 656 | // Core and GPU power are a subset of the package power. | |||
| 657 | assert(pkg_J >= cores_J + gpu_J)(static_cast <bool> (pkg_J >= cores_J + gpu_J) ? void (0) : __assert_fail ("pkg_J >= cores_J + gpu_J", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 658 | ||||
| 659 | // Compute "other" (i.e. rest of the package) and "total" only after the | |||
| 660 | // other values have been normalized. | |||
| 661 | ||||
| 662 | char otherStr[kNumStrLen]; | |||
| 663 | double other_J = pkg_J - cores_J - gpu_J; | |||
| 664 | NormalizeAndPrintAsWatts(otherStr, other_J); | |||
| 665 | ||||
| 666 | char totalStr[kNumStrLen]; | |||
| 667 | double total_J = pkg_J + ram_J; | |||
| 668 | NormalizeAndPrintAsWatts(totalStr, total_J); | |||
| 669 | ||||
| 670 | gTotals_W.push_back(JoulesToWatts(total_J)); | |||
| 671 | ||||
| 672 | // Print and flush so that the output appears immediately even if being | |||
| 673 | // redirected through |tee| or anything like that. | |||
| 674 | PrintAndFlush("#%02d %s W = %s (%s + %s + %s) + %s W\n", sampleNumber++, | |||
| 675 | totalStr, pkgStr, coresStr, gpuStr, otherStr, ramStr); | |||
| 676 | } | |||
| 677 | ||||
| 678 | static void Finish() { | |||
| 679 | size_t n = gTotals_W.size(); | |||
| 680 | ||||
| 681 | // This time calculation assumes that the timers are perfectly accurate which | |||
| 682 | // is not true but the inaccuracy should be small in practice. | |||
| 683 | double time = n * gSampleInterval_sec; | |||
| 684 | ||||
| 685 | printf("\n"); | |||
| 686 | printf("%d sample%s taken over a period of %.3f second%s\n", int(n), | |||
| 687 | n == 1 ? "" : "s", n * gSampleInterval_sec, time == 1.0 ? "" : "s"); | |||
| 688 | ||||
| 689 | if (n == 0 || n == 1) { | |||
| 690 | exit(0); | |||
| 691 | } | |||
| 692 | ||||
| 693 | // Compute the mean. | |||
| 694 | double sum = std::accumulate(gTotals_W.begin(), gTotals_W.end(), 0.0); | |||
| 695 | double mean = sum / n; | |||
| 696 | ||||
| 697 | // Compute the *population* standard deviation: | |||
| 698 | // | |||
| 699 | // popStdDev = sqrt(Sigma(x - m)^2 / n) | |||
| 700 | // | |||
| 701 | // where |x| is the sum variable, |m| is the mean, and |n| is the | |||
| 702 | // population size. | |||
| 703 | // | |||
| 704 | // This is different from the *sample* standard deviation, which divides by | |||
| 705 | // |n - 1|, and would be appropriate if we were using a random sample of a | |||
| 706 | // larger population. | |||
| 707 | double sumOfSquaredDeviations = 0; | |||
| 708 | for (double& iter : gTotals_W) { | |||
| 709 | double deviation = (iter - mean); | |||
| 710 | sumOfSquaredDeviations += deviation * deviation; | |||
| 711 | } | |||
| 712 | double popStdDev = sqrt(sumOfSquaredDeviations / n); | |||
| 713 | ||||
| 714 | // Sort so that percentiles can be determined. We use the "Nearest Rank" | |||
| 715 | // method of determining percentiles, which is simplest to compute and which | |||
| 716 | // chooses values from those that appear in the input set. | |||
| 717 | std::sort(gTotals_W.begin(), gTotals_W.end()); | |||
| 718 | ||||
| 719 | printf("\n"); | |||
| 720 | printf("Distribution of 'total' values:\n"); | |||
| 721 | printf(" mean = %5.2f W\n", mean); | |||
| 722 | printf(" std dev = %5.2f W\n", popStdDev); | |||
| 723 | printf(" 0th percentile = %5.2f W (min)\n", gTotals_W[0]); | |||
| 724 | printf(" 5th percentile = %5.2f W\n", gTotals_W[ceil(0.05 * n) - 1]); | |||
| 725 | printf(" 25th percentile = %5.2f W\n", gTotals_W[ceil(0.25 * n) - 1]); | |||
| 726 | printf(" 50th percentile = %5.2f W\n", gTotals_W[ceil(0.50 * n) - 1]); | |||
| 727 | printf(" 75th percentile = %5.2f W\n", gTotals_W[ceil(0.75 * n) - 1]); | |||
| 728 | printf(" 95th percentile = %5.2f W\n", gTotals_W[ceil(0.95 * n) - 1]); | |||
| 729 | printf("100th percentile = %5.2f W (max)\n", gTotals_W[n - 1]); | |||
| 730 | ||||
| 731 | exit(0); | |||
| 732 | } | |||
| 733 | ||||
| 734 | static void SigIntHandler(int aSigNum, siginfo_t* aInfo, void* aContext) { | |||
| 735 | Finish(); | |||
| 736 | } | |||
| 737 | ||||
| 738 | static void PrintUsage() { | |||
| 739 | printf( | |||
| 740 | "usage: rapl [options]\n" | |||
| 741 | "\n" | |||
| 742 | "Options:\n" | |||
| 743 | "\n" | |||
| 744 | " -h --help show this message\n" | |||
| 745 | " -i --sample-interval <N> sample every N ms [default=1000]\n" | |||
| 746 | " -n --sample-count <N> get N samples (0 means unlimited) " | |||
| 747 | "[default=0]\n" | |||
| 748 | "\n" | |||
| 749 | #if defined(__APPLE__) | |||
| 750 | "On Mac this program can be run by any user.\n" | |||
| 751 | #elif defined(__linux__1) | |||
| 752 | "On Linux this program can only be run by the super-user unless the " | |||
| 753 | "contents\n" | |||
| 754 | "of /proc/sys/kernel/perf_event_paranoid is set to 0 or lower.\n" | |||
| 755 | #else | |||
| 756 | # error Sorry, this platform is not supported | |||
| 757 | #endif | |||
| 758 | "\n"); | |||
| 759 | } | |||
| 760 | ||||
| 761 | int main(int argc, char** argv) { | |||
| 762 | // Process command line options. | |||
| 763 | ||||
| 764 | gArgv0 = argv[0]; | |||
| 765 | ||||
| 766 | // Default values. | |||
| 767 | int sampleInterval_msec = 1000; | |||
| 768 | int sampleCount = 0; | |||
| 769 | ||||
| 770 | struct option longOptions[] = { | |||
| 771 | {"help", no_argument0, NULL__null, 'h'}, | |||
| 772 | {"sample-interval", required_argument1, NULL__null, 'i'}, | |||
| 773 | {"sample-count", required_argument1, NULL__null, 'n'}, | |||
| 774 | {NULL__null, 0, NULL__null, 0}}; | |||
| 775 | const char* shortOptions = "hi:n:"; | |||
| 776 | ||||
| 777 | int c; | |||
| 778 | char* endPtr; | |||
| 779 | while ((c = getopt_long(argc, argv, shortOptions, longOptions, NULL__null)) != -1) { | |||
| ||||
| 780 | switch (c) { | |||
| 781 | case 'h': | |||
| 782 | PrintUsage(); | |||
| 783 | exit(0); | |||
| 784 | ||||
| 785 | case 'i': | |||
| 786 | sampleInterval_msec = strtol(optarg, &endPtr, /* base = */ 10); | |||
| 787 | if (*endPtr) { | |||
| 788 | CmdLineAbort("sample interval is not an integer"); | |||
| 789 | } | |||
| 790 | if (sampleInterval_msec < 1 || sampleInterval_msec > 3600000) { | |||
| 791 | CmdLineAbort("sample interval must be in the range 1..3600000 ms"); | |||
| 792 | } | |||
| 793 | break; | |||
| 794 | ||||
| 795 | case 'n': | |||
| 796 | sampleCount = strtol(optarg, &endPtr, /* base = */ 10); | |||
| 797 | if (*endPtr) { | |||
| 798 | CmdLineAbort("sample count is not an integer"); | |||
| 799 | } | |||
| 800 | if (sampleCount < 0 || sampleCount > 1000000) { | |||
| 801 | CmdLineAbort("sample count must be in the range 0..1000000"); | |||
| 802 | } | |||
| 803 | break; | |||
| 804 | ||||
| 805 | default: | |||
| 806 | CmdLineAbort(NULL__null); | |||
| 807 | } | |||
| 808 | } | |||
| 809 | ||||
| 810 | // The RAPL MSRs update every ~1 ms, but the measurement period isn't exactly | |||
| 811 | // 1 ms, which means the sample periods are not exact. "Power Measurement | |||
| 812 | // Techniques on Standard Compute Nodes: A Quantitative Comparison" by | |||
| 813 | // Hackenberg et al. suggests the following. | |||
| 814 | // | |||
| 815 | // "RAPL provides energy (and not power) consumption data without | |||
| 816 | // timestamps associated to each counter update. This makes sampling rates | |||
| 817 | // above 20 Samples/s unfeasible if the systematic error should be below | |||
| 818 | // 5%... Constantly polling the RAPL registers will both occupy a processor | |||
| 819 | // core and distort the measurement itself." | |||
| 820 | // | |||
| 821 | // So warn about this case. | |||
| 822 | if (sampleInterval_msec
| |||
| 823 | fprintf(stderrstderr, | |||
| 824 | "\nWARNING: sample intervals < 50 ms are likely to produce " | |||
| 825 | "inaccurate estimates\n\n"); | |||
| 826 | } | |||
| 827 | gSampleInterval_sec = double(sampleInterval_msec) / 1000; | |||
| 828 | ||||
| 829 | // Initialize the platform-specific RAPL reading machinery. | |||
| 830 | gRapl = new RAPL(); | |||
| 831 | if (!gRapl) { | |||
| 832 | Abort("new RAPL() failed"); | |||
| 833 | } | |||
| 834 | ||||
| 835 | // Install the signal handlers. | |||
| 836 | ||||
| 837 | struct sigaction sa; | |||
| 838 | memset(&sa, 0, sizeof(sa)); | |||
| 839 | sa.sa_flags = SA_RESTART0x10000000 | SA_SIGINFO4; | |||
| 840 | // The extra parens around (0) suppress a -Wunreachable-code warning on OS X | |||
| 841 | // where sigemptyset() is a macro that can never fail and always returns 0. | |||
| 842 | if (sigemptyset(&sa.sa_mask) < (0)) { | |||
| 843 | Abort("sigemptyset() failed"); | |||
| 844 | } | |||
| 845 | sa.sa_sigaction__sigaction_handler.sa_sigaction = SigAlrmHandler; | |||
| 846 | if (sigaction(SIGALRM14, &sa, NULL__null) < 0) { | |||
| 847 | Abort("sigaction(SIGALRM) failed"); | |||
| 848 | } | |||
| 849 | sa.sa_sigaction__sigaction_handler.sa_sigaction = SigIntHandler; | |||
| 850 | if (sigaction(SIGINT2, &sa, NULL__null) < 0) { | |||
| 851 | Abort("sigaction(SIGINT) failed"); | |||
| 852 | } | |||
| 853 | ||||
| 854 | // Set up the timer. | |||
| 855 | struct itimerval timer; | |||
| 856 | timer.it_interval.tv_sec = sampleInterval_msec / 1000; | |||
| 857 | timer.it_interval.tv_usec = (sampleInterval_msec % 1000) * 1000; | |||
| 858 | timer.it_value = timer.it_interval; | |||
| 859 | if (setitimer(ITIMER_REALITIMER_REAL, &timer, NULL__null) < 0) { | |||
| 860 | Abort("setitimer() failed"); | |||
| 861 | } | |||
| 862 | ||||
| 863 | // Print header. | |||
| 864 | PrintAndFlush(" total W = _pkg_ (cores + _gpu_ + other) + _ram_ W\n"); | |||
| 865 | ||||
| 866 | // Take samples. | |||
| 867 | if (sampleCount == 0) { | |||
| 868 | while (true) { | |||
| 869 | pause(); | |||
| 870 | } | |||
| 871 | } else { | |||
| 872 | for (int i = 0; i < sampleCount; i++) { | |||
| 873 | pause(); | |||
| 874 | } | |||
| 875 | } | |||
| 876 | ||||
| 877 | Finish(); | |||
| 878 | ||||
| 879 | return 0; | |||
| 880 | } |