blob: 7a334377f92b978fa642a0071b19f33d7e6fe74e [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-only
/*
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
* Copyright (c) 2023 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
#define _GNU_SOURCE
#include MSRHEADER
#include INTEL_FAMILY_HEADER
#include <stdarg.h>
#include <stdio.h>
#include <err.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/select.h>
#include <sys/resource.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/time.h>
#include <stdlib.h>
#include <getopt.h>
#include <dirent.h>
#include <string.h>
#include <ctype.h>
#include <sched.h>
#include <time.h>
#include <cpuid.h>
#include <sys/capability.h>
#include <errno.h>
#include <math.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdbool.h>
#define UNUSED(x) (void)(x)
/*
* This list matches the column headers, except
* 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
* 2. Core and CPU are moved to the end, we can't have strings that contain them
* matching on them for --show and --hide.
*/
/*
* buffer size used by sscanf() for added column names
* Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
*/
#define NAME_BYTES 20
#define PATH_BYTES 128
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
struct msr_counter {
unsigned int msr_num;
char name[NAME_BYTES];
char path[PATH_BYTES];
unsigned int width;
enum counter_type type;
enum counter_format format;
struct msr_counter *next;
unsigned int flags;
#define FLAGS_HIDE (1 << 0)
#define FLAGS_SHOW (1 << 1)
#define SYSFS_PERCPU (1 << 1)
};
struct msr_counter bic[] = {
{ 0x0, "usec", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Package", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Node", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
{ 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
{ 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
{ 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
{ 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
{ 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
{ 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
{ 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
{ 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
{ 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
{ 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
{ 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
{ 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
{ 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
{ 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
{ 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Core", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
{ 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "Die", "", 0, 0, 0, NULL, 0 },
{ 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
{ 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
#define BIC_USEC (1ULL << 0)
#define BIC_TOD (1ULL << 1)
#define BIC_Package (1ULL << 2)
#define BIC_Node (1ULL << 3)
#define BIC_Avg_MHz (1ULL << 4)
#define BIC_Busy (1ULL << 5)
#define BIC_Bzy_MHz (1ULL << 6)
#define BIC_TSC_MHz (1ULL << 7)
#define BIC_IRQ (1ULL << 8)
#define BIC_SMI (1ULL << 9)
#define BIC_sysfs (1ULL << 10)
#define BIC_CPU_c1 (1ULL << 11)
#define BIC_CPU_c3 (1ULL << 12)
#define BIC_CPU_c6 (1ULL << 13)
#define BIC_CPU_c7 (1ULL << 14)
#define BIC_ThreadC (1ULL << 15)
#define BIC_CoreTmp (1ULL << 16)
#define BIC_CoreCnt (1ULL << 17)
#define BIC_PkgTmp (1ULL << 18)
#define BIC_GFX_rc6 (1ULL << 19)
#define BIC_GFXMHz (1ULL << 20)
#define BIC_Pkgpc2 (1ULL << 21)
#define BIC_Pkgpc3 (1ULL << 22)
#define BIC_Pkgpc6 (1ULL << 23)
#define BIC_Pkgpc7 (1ULL << 24)
#define BIC_Pkgpc8 (1ULL << 25)
#define BIC_Pkgpc9 (1ULL << 26)
#define BIC_Pkgpc10 (1ULL << 27)
#define BIC_CPU_LPI (1ULL << 28)
#define BIC_SYS_LPI (1ULL << 29)
#define BIC_PkgWatt (1ULL << 30)
#define BIC_CorWatt (1ULL << 31)
#define BIC_GFXWatt (1ULL << 32)
#define BIC_PkgCnt (1ULL << 33)
#define BIC_RAMWatt (1ULL << 34)
#define BIC_PKG__ (1ULL << 35)
#define BIC_RAM__ (1ULL << 36)
#define BIC_Pkg_J (1ULL << 37)
#define BIC_Cor_J (1ULL << 38)
#define BIC_GFX_J (1ULL << 39)
#define BIC_RAM_J (1ULL << 40)
#define BIC_Mod_c6 (1ULL << 41)
#define BIC_Totl_c0 (1ULL << 42)
#define BIC_Any_c0 (1ULL << 43)
#define BIC_GFX_c0 (1ULL << 44)
#define BIC_CPUGFX (1ULL << 45)
#define BIC_Core (1ULL << 46)
#define BIC_CPU (1ULL << 47)
#define BIC_APIC (1ULL << 48)
#define BIC_X2APIC (1ULL << 49)
#define BIC_Die (1ULL << 50)
#define BIC_GFXACTMHz (1ULL << 51)
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
#define BIC_UNCORE_MHZ (1ULL << 54)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
unsigned int num_iterations;
unsigned int header_iterations;
unsigned int debug;
unsigned int quiet;
unsigned int shown;
unsigned int sums_need_wide_columns;
unsigned int rapl_joules;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
unsigned int has_aperf;
unsigned int has_epb;
unsigned int has_turbo;
unsigned int is_hybrid;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int authentic_amd;
unsigned int hygon_genuine;
unsigned int max_level, max_extended_level;
unsigned int has_invariant_tsc;
unsigned int aperf_mperf_multiplier = 1;
double bclk;
double base_hz;
unsigned int has_base_hz;
double tsc_tweak = 1.0;
unsigned int show_pkg_only;
unsigned int show_core_only;
char *output_buffer, *outp;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
unsigned int gfx_cur_mhz;
unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
double rapl_dram_energy_units, rapl_energy_units;
double rapl_joule_counter_range;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
int ignore_stdin;
int get_msr(int cpu, off_t offset, unsigned long long *msr);
/* Model specific support Start */
/* List of features that may diverge among different platforms */
struct platform_features {
bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
int bclk_freq; /* CPU base clock */
int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
int supported_cstates; /* Core cstates and Package cstates supported */
int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
bool need_perf_multiplier; /* mperf/aperf multiplier */
};
struct platform_data {
unsigned int model;
const struct platform_features *features;
};
/* For BCLK */
enum bclk_freq {
BCLK_100MHZ = 1,
BCLK_133MHZ,
BCLK_SLV,
};
#define SLM_BCLK_FREQS 5
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
double slm_bclk(void)
{
unsigned long long msr = 3;
unsigned int i;
double freq;
if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
fprintf(outf, "SLM BCLK: unknown\n");
i = msr & 0xf;
if (i >= SLM_BCLK_FREQS) {
fprintf(outf, "SLM BCLK[%d] invalid\n", i);
i = 3;
}
freq = slm_freq_table[i];
if (!quiet)
fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
return freq;
}
/* For Package cstate limit */
enum package_cstate_limit {
CST_LIMIT_NHM = 1,
CST_LIMIT_SNB,
CST_LIMIT_HSW,
CST_LIMIT_SKX,
CST_LIMIT_ICX,
CST_LIMIT_SLV,
CST_LIMIT_AMT,
CST_LIMIT_KNL,
CST_LIMIT_GMT,
};
/* For Turbo Ratio Limit MSRs */
enum turbo_ratio_limit_msrs {
TRL_BASE = BIT(0),
TRL_LIMIT1 = BIT(1),
TRL_LIMIT2 = BIT(2),
TRL_ATOM = BIT(3),
TRL_KNL = BIT(4),
TRL_CORECOUNT = BIT(5),
};
/* For Perf Limit Reason MSRs */
enum perf_limit_reason_msrs {
PLR_CORE = BIT(0),
PLR_GFX = BIT(1),
PLR_RING = BIT(2),
};
/* For RAPL MSRs */
enum rapl_msrs {
RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
};
#define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
#define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
#define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
#define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
/* For Cstates */
enum cstates {
CC1 = BIT(0),
CC3 = BIT(1),
CC6 = BIT(2),
CC7 = BIT(3),
PC2 = BIT(4),
PC3 = BIT(5),
PC6 = BIT(6),
PC7 = BIT(7),
PC8 = BIT(8),
PC9 = BIT(9),
PC10 = BIT(10),
};
static const struct platform_features nhm_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_133MHZ,
.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
.cst_limit = CST_LIMIT_NHM,
.trl_msrs = TRL_BASE,
};
static const struct platform_features nhx_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_133MHZ,
.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
.cst_limit = CST_LIMIT_NHM,
};
static const struct platform_features snb_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features snx_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features ivb_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features ivx_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_LIMIT1,
.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features hsw_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features hsx_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
.plr_msrs = PLR_CORE | PLR_RING,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
static const struct platform_features hswl_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features hswg_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdw_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdwg_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdx_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.has_cst_auto_convension = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
static const struct platform_features skl_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.crystal_freq = 24000000,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
.enable_tsc_tweak = 1,
};
static const struct platform_features cnl_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.has_msr_core_c1_res = 1,
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
.enable_tsc_tweak = 1,
};
static const struct platform_features adl_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.has_msr_core_c1_res = 1,
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
.enable_tsc_tweak = 1,
};
static const struct platform_features skx_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_SKX,
.has_irtl_msrs = 1,
.has_cst_auto_convension = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
static const struct platform_features icx_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_ICX,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
static const struct platform_features spr_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_SKX,
.has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features srf_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_SKX,
.has_msr_core_c1_res = 1,
.has_msr_module_c6_res_ms = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features grr_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6,
.cst_limit = CST_LIMIT_SKX,
.has_msr_core_c1_res = 1,
.has_msr_module_c6_res_ms = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features slv_features = {
.has_nhm_msrs = 1,
.bclk_freq = BCLK_SLV,
.supported_cstates = CC1 | CC6 | PC6,
.cst_limit = CST_LIMIT_SLV,
.has_msr_core_c1_res = 1,
.has_msr_module_c6_res_ms = 1,
.has_msr_c6_demotion_policy_config = 1,
.has_msr_atom_pkg_c6_residency = 1,
.trl_msrs = TRL_ATOM,
.rapl_msrs = RAPL_PKG | RAPL_CORE,
.has_rapl_divisor = 1,
.rapl_quirk_tdp = 30,
};
static const struct platform_features slvd_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_SLV,
.supported_cstates = CC1 | CC6 | PC3 | PC6,
.cst_limit = CST_LIMIT_SLV,
.has_msr_atom_pkg_c6_residency = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG | RAPL_CORE,
.rapl_quirk_tdp = 30,
};
static const struct platform_features amt_features = {
.has_nhm_msrs = 1,
.bclk_freq = BCLK_133MHZ,
.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
.cst_limit = CST_LIMIT_AMT,
.trl_msrs = TRL_BASE,
};
static const struct platform_features gmt_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.crystal_freq = 19200000,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
};
static const struct platform_features gmtd_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.crystal_freq = 25000000,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.has_msr_core_c1_res = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
};
static const struct platform_features gmtp_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.crystal_freq = 19200000,
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
};
static const struct platform_features tmt_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
.enable_tsc_tweak = 1,
};
static const struct platform_features tmtd_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6,
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL,
};
static const struct platform_features knl_features = {
.has_msr_misc_pwr_mgmt = 1,
.has_nhm_msrs = 1,
.has_config_tdp = 1,
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC3 | PC6,
.cst_limit = CST_LIMIT_KNL,
.has_msr_knl_core_c6_residency = 1,
.trl_msrs = TRL_KNL,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
.need_perf_multiplier = 1,
};
static const struct platform_features default_features = {
};
static const struct platform_features amd_features_with_rapl = {
.rapl_msrs = RAPL_AMD_F17H,
.has_per_core_rapl = 1,
.rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
};
static const struct platform_data turbostat_pdata[] = {
{ INTEL_FAM6_NEHALEM, &nhm_features },
{ INTEL_FAM6_NEHALEM_G, &nhm_features },
{ INTEL_FAM6_NEHALEM_EP, &nhm_features },
{ INTEL_FAM6_NEHALEM_EX, &nhx_features },
{ INTEL_FAM6_WESTMERE, &nhm_features },
{ INTEL_FAM6_WESTMERE_EP, &nhm_features },
{ INTEL_FAM6_WESTMERE_EX, &nhx_features },
{ INTEL_FAM6_SANDYBRIDGE, &snb_features },
{ INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
{ INTEL_FAM6_IVYBRIDGE, &ivb_features },
{ INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
{ INTEL_FAM6_HASWELL, &hsw_features },
{ INTEL_FAM6_HASWELL_X, &hsx_features },
{ INTEL_FAM6_HASWELL_L, &hswl_features },
{ INTEL_FAM6_HASWELL_G, &hswg_features },
{ INTEL_FAM6_BROADWELL, &bdw_features },
{ INTEL_FAM6_BROADWELL_G, &bdwg_features },
{ INTEL_FAM6_BROADWELL_X, &bdx_features },
{ INTEL_FAM6_BROADWELL_D, &bdx_features },
{ INTEL_FAM6_SKYLAKE_L, &skl_features },
{ INTEL_FAM6_SKYLAKE, &skl_features },
{ INTEL_FAM6_SKYLAKE_X, &skx_features },
{ INTEL_FAM6_KABYLAKE_L, &skl_features },
{ INTEL_FAM6_KABYLAKE, &skl_features },
{ INTEL_FAM6_COMETLAKE, &skl_features },
{ INTEL_FAM6_COMETLAKE_L, &skl_features },
{ INTEL_FAM6_CANNONLAKE_L, &cnl_features },
{ INTEL_FAM6_ICELAKE_X, &icx_features },
{ INTEL_FAM6_ICELAKE_D, &icx_features },
{ INTEL_FAM6_ICELAKE_L, &cnl_features },
{ INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
{ INTEL_FAM6_ROCKETLAKE, &cnl_features },
{ INTEL_FAM6_TIGERLAKE_L, &cnl_features },
{ INTEL_FAM6_TIGERLAKE, &cnl_features },
{ INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
{ INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
{ INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
{ INTEL_FAM6_LAKEFIELD, &cnl_features },
{ INTEL_FAM6_ALDERLAKE, &adl_features },
{ INTEL_FAM6_ALDERLAKE_L, &adl_features },
{ INTEL_FAM6_RAPTORLAKE, &adl_features },
{ INTEL_FAM6_RAPTORLAKE_P, &adl_features },
{ INTEL_FAM6_RAPTORLAKE_S, &adl_features },
{ INTEL_FAM6_METEORLAKE, &cnl_features },
{ INTEL_FAM6_METEORLAKE_L, &cnl_features },
{ INTEL_FAM6_ARROWLAKE, &cnl_features },
{ INTEL_FAM6_LUNARLAKE_M, &cnl_features },
{ INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
{ INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
{ INTEL_FAM6_ATOM_AIRMONT, &amt_features },
{ INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
{ INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
{ INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
{ INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
{ INTEL_FAM6_ATOM_TREMONT, &tmt_features },
{ INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
{ INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
{ INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
{ INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
{ INTEL_FAM6_XEON_PHI_KNL, &knl_features },
{ INTEL_FAM6_XEON_PHI_KNM, &knl_features },
/*
* Missing support for
* INTEL_FAM6_ICELAKE
* INTEL_FAM6_ATOM_SILVERMONT_MID
* INTEL_FAM6_ATOM_AIRMONT_MID
* INTEL_FAM6_ATOM_AIRMONT_NP
*/
{ 0, NULL },
};
static const struct platform_features *platform;
void probe_platform_features(unsigned int family, unsigned int model)
{
int i;
platform = &default_features;
if (authentic_amd || hygon_genuine) {
if (max_extended_level >= 0x80000007) {
unsigned int eax, ebx, ecx, edx;
__cpuid(0x80000007, eax, ebx, ecx, edx);
/* RAPL (Fam 17h+) */
if ((edx & (1 << 14)) && family >= 0x17)
platform = &amd_features_with_rapl;
}
return;
}
if (!genuine_intel || family != 6)
return;
for (i = 0; turbostat_pdata[i].features; i++) {
if (turbostat_pdata[i].model == model) {
platform = turbostat_pdata[i].features;
return;
}
}
}
/* Model specific support End */
#define TJMAX_DEFAULT 100
/* MSRs that are not yet in the kernel-provided header. */
#define MSR_RAPL_PWR_UNIT 0xc0010299
#define MSR_CORE_ENERGY_STAT 0xc001029a
#define MSR_PKG_ENERGY_STAT 0xc001029b
#define MAX(a, b) ((a) > (b) ? (a) : (b))
int backwards_count;
char *progname;
#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_COUNTERS 8
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
struct timeval tv_delta;
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
unsigned long long c1;
unsigned long long instr_count;
unsigned long long irq_count;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int apic_id;
unsigned int x2apic_id;
unsigned int flags;
bool is_atom;
unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
} *thread_even, *thread_odd;
struct core_data {
int base_cpu;
unsigned long long c3;
unsigned long long c6;
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *core_even, *core_odd;
struct pkg_data {
int base_cpu;
unsigned long long pc2;
unsigned long long pc3;
unsigned long long pc6;
unsigned long long pc7;
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
unsigned long long cpu_lpi;
unsigned long long sys_lpi;
unsigned long long pkg_wtd_core_c0;
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
unsigned long long pkg_both_core_gfxe_c0;
long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int gfx_act_mhz;
unsigned int package_id;
unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *package_even, *package_odd;
#define ODD_COUNTERS thread_odd, core_odd, package_odd
#define EVEN_COUNTERS thread_even, core_even, package_even
#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
((thread_base) + \
((pkg_no) * \
topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
((node_no) * topo.cores_per_node * topo.threads_per_core) + \
((core_no) * topo.threads_per_core) + \
(thread_no))
#define GET_CORE(core_base, core_no, node_no, pkg_no) \
((core_base) + \
((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
((node_no) * topo.cores_per_node) + \
(core_no))
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
/*
* The accumulated sum of MSR is defined as a monotonic
* increasing MSR, it will be accumulated periodically,
* despite its register's bit width.
*/
enum {
IDX_PKG_ENERGY,
IDX_DRAM_ENERGY,
IDX_PP0_ENERGY,
IDX_PP1_ENERGY,
IDX_PKG_PERF,
IDX_DRAM_PERF,
IDX_COUNT,
};
int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
struct msr_sum_array {
/* get_msr_sum() = sum + (get_msr() - last) */
struct {
/*The accumulated MSR value is updated by the timer */
unsigned long long sum;
/*The MSR footprint recorded in last timer */
unsigned long long last;
} entries[IDX_COUNT];
};
/* The percpu MSR sum array.*/
struct msr_sum_array *per_cpu_msr_sum;
off_t idx_to_offset(int idx)
{
off_t offset;
switch (idx) {
case IDX_PKG_ENERGY:
if (platform->rapl_msrs & RAPL_AMD_F17H)
offset = MSR_PKG_ENERGY_STAT;
else
offset = MSR_PKG_ENERGY_STATUS;
break;
case IDX_DRAM_ENERGY:
offset = MSR_DRAM_ENERGY_STATUS;
break;
case IDX_PP0_ENERGY:
offset = MSR_PP0_ENERGY_STATUS;
break;
case IDX_PP1_ENERGY:
offset = MSR_PP1_ENERGY_STATUS;
break;
case IDX_PKG_PERF:
offset = MSR_PKG_PERF_STATUS;
break;
case IDX_DRAM_PERF:
offset = MSR_DRAM_PERF_STATUS;
break;
default:
offset = -1;
}
return offset;
}
int offset_to_idx(off_t offset)
{
int idx;
switch (offset) {
case MSR_PKG_ENERGY_STATUS:
case MSR_PKG_ENERGY_STAT:
idx = IDX_PKG_ENERGY;
break;
case MSR_DRAM_ENERGY_STATUS:
idx = IDX_DRAM_ENERGY;
break;
case MSR_PP0_ENERGY_STATUS:
idx = IDX_PP0_ENERGY;
break;
case MSR_PP1_ENERGY_STATUS:
idx = IDX_PP1_ENERGY;
break;
case MSR_PKG_PERF_STATUS:
idx = IDX_PKG_PERF;
break;
case MSR_DRAM_PERF_STATUS:
idx = IDX_DRAM_PERF;
break;
default:
idx = -1;
}
return idx;
}
int idx_valid(int idx)
{
switch (idx) {
case IDX_PKG_ENERGY:
return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
case IDX_DRAM_ENERGY:
return platform->rapl_msrs & RAPL_DRAM;
case IDX_PP0_ENERGY:
return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
case IDX_PP1_ENERGY:
return platform->rapl_msrs & RAPL_GFX;
case IDX_PKG_PERF:
return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
case IDX_DRAM_PERF:
return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
default:
return 0;
}
}
struct sys_counters {
unsigned int added_thread_counters;
unsigned int added_core_counters;
unsigned int added_package_counters;
struct msr_counter *tp;
struct msr_counter *cp;
struct msr_counter *pp;
} sys;
struct system_summary {
struct thread_data threads;
struct core_data cores;
struct pkg_data packages;
} average;
struct cpu_topology {
int physical_package_id;
int die_id;
int logical_cpu_id;
int physical_node_id;
int logical_node_id; /* 0-based count within the package */
int physical_core_id;
int thread_id;
cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
} *cpus;
struct topo_params {
int num_packages;
int num_die;
int num_cpus;
int num_cores;
int allowed_packages;
int allowed_cpus;
int allowed_cores;
int max_cpu_num;
int max_node_num;
int nodes_per_pkg;
int cores_per_node;
int threads_per_core;
} topo;
struct timeval tv_even, tv_odd, tv_delta;
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(bool startup);
char *sys_lpi_file;
char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
int cpu_is_not_present(int cpu)
{
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
}
int cpu_is_not_allowed(int cpu)
{
return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
}
/*
* run func(thread, core, package) in topology order
* skip non-present cpus
*/
int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
{
int retval, pkg_no, core_no, thread_no, node_no;
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
struct thread_data *t;
struct core_data *c;
struct pkg_data *p;
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
if (cpu_is_not_allowed(t->cpu_id))
continue;
c = GET_CORE(core_base, core_no, node_no, pkg_no);
p = GET_PKG(pkg_base, pkg_no);
retval = func(t, c, p);
if (retval)
return retval;
}
}
}
}
return 0;
}
int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
UNUSED(p);
return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
}
int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
UNUSED(c);
return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
}
int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
}
int cpu_migrate(int cpu)
{
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
return -1;
else
return 0;
}
int get_msr_fd(int cpu)
{
char pathname[32];
int fd;
fd = fd_percpu[cpu];
if (fd)
return fd;
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
fd_percpu[cpu] = fd;
return fd;
}
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
static int perf_instr_count_open(int cpu_num)
{
struct perf_event_attr pea;
int fd;
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_HARDWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_HW_INSTRUCTIONS;
/* counter for cpu_num, including user + kernel and all processes */
fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
if (fd == -1) {
warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
BIC_NOT_PRESENT(BIC_IPC);
}
return fd;
}
int get_instr_count_fd(int cpu)
{
if (fd_instr_count_percpu[cpu])
return fd_instr_count_percpu[cpu];
fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
return fd_instr_count_percpu[cpu];
}
int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
return 0;
}
#define MAX_DEFERRED 16
char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
int deferred_add_index;
int deferred_skip_index;
/*
* HIDE_LIST - hide this list of counters, show the rest [default]
* SHOW_LIST - show this list of counters, hide the rest
*/
enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
void help(void)
{
fprintf(outf,
"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
"\n"
"Turbostat forks the specified COMMAND and prints statistics\n"
"when COMMAND completes.\n"
"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
"to print statistics, until interrupted.\n"
" -a, --add add a counter\n"
" eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
" -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
" {core | package | j,k,l..m,n-p }\n"
" -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
" -D, --Dump displays the raw counter values\n"
" -e, --enable [all | column]\n"
" shows all or the specified disabled column\n"
" -H, --hide [column|column,column,...]\n"
" hide the specified column(s)\n"
" -i, --interval sec.subsec\n"
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
" print header every num iterations\n"
" -o, --out file\n"
" create or truncate \"file\" for all output\n"
" -q, --quiet skip decoding system configuration header\n"
" -s, --show [column|column,column,...]\n"
" show only the specified column(s)\n"
" -S, --Summary\n"
" limits output to 1-line system summary per interval\n"
" -T, --TCC temperature\n"
" sets the Thermal Control Circuit temperature in\n"
" degrees Celsius\n"
" -h, --help print this help message\n"
" -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n");
}
/*
* bic_lookup
* for all the strings in comma separate name_list,
* set the approprate bit in return value.
*/
unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
{
unsigned int i;
unsigned long long retval = 0;
while (name_list) {
char *comma;
comma = strchr(name_list, ',');
if (comma)
*comma = '\0';
for (i = 0; i < MAX_BIC; ++i) {
if (!strcmp(name_list, bic[i].name)) {
retval |= (1ULL << i);
break;
}
if (!strcmp(name_list, "all")) {
retval |= ~0;
break;
} else if (!strcmp(name_list, "topology")) {
retval |= BIC_TOPOLOGY;
break;
} else if (!strcmp(name_list, "power")) {
retval |= BIC_THERMAL_PWR;
break;
} else if (!strcmp(name_list, "idle")) {
retval |= BIC_IDLE;
break;
} else if (!strcmp(name_list, "frequency")) {
retval |= BIC_FREQUENCY;
break;
} else if (!strcmp(name_list, "other")) {
retval |= BIC_OTHER;
break;
}
}
if (i == MAX_BIC) {
if (mode == SHOW_LIST) {
deferred_add_names[deferred_add_index++] = name_list;
if (deferred_add_index >= MAX_DEFERRED) {
fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
MAX_DEFERRED, name_list);
help();
exit(1);
}
} else {
deferred_skip_names[deferred_skip_index++] = name_list;
if (debug)
fprintf(stderr, "deferred \"%s\"\n", name_list);
if (deferred_skip_index >= MAX_DEFERRED) {
fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
MAX_DEFERRED, name_list);
help();
exit(1);
}
}
}
name_list = comma;
if (name_list)
name_list++;
}
return retval;
}
void print_header(char *delim)
{
struct msr_counter *mp;
int printed = 0;
if (DO_BIC(BIC_USEC))
outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
if (DO_BIC(BIC_TOD))
outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
if (DO_BIC(BIC_Package))
outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
if (DO_BIC(BIC_Die))
outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
if (DO_BIC(BIC_Node))
outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
if (DO_BIC(BIC_Core))
outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU))
outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
if (DO_BIC(BIC_APIC))
outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
if (DO_BIC(BIC_X2APIC))
outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
if (DO_BIC(BIC_Avg_MHz))
outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
if (DO_BIC(BIC_Busy))
outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_Bzy_MHz))
outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
if (DO_BIC(BIC_TSC_MHz))
outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
if (DO_BIC(BIC_IPC))
outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
if (DO_BIC(BIC_IRQ)) {
if (sums_need_wide_columns)
outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
else
outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
for (mp = sys.tp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
else
outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
} else {
if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
else
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
}
}
if (DO_BIC(BIC_CPU_c1))
outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU_c3))
outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU_c6))
outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU_c7))
outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
if (DO_BIC(BIC_Mod_c6))
outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
if (platform->rapl_msrs && !rapl_joules) {
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
} else if (platform->rapl_msrs && rapl_joules) {
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
}
for (mp = sys.cp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
outp += sprintf(outp, "%s%18.18s", delim, mp->name);
else
outp += sprintf(outp, "%s%10.10s", delim, mp->name);
} else {
if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
outp += sprintf(outp, "%s%8s", delim, mp->name);
else
outp += sprintf(outp, "%s%s", delim, mp->name);
}
}
if (DO_BIC(BIC_PkgTmp))
outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_rc6))
outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFXMHz))
outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_c0))
outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPUGFX))
outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc2))
outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc3))
outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc6))
outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc7))
outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc8))
outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc9))
outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU_LPI))
outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
if (DO_BIC(BIC_SYS_LPI))
outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
if (platform->rapl_msrs && !rapl_joules) {
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFXWatt))
outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAMWatt))
outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_PKG__))
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
} else if (platform->rapl_msrs && rapl_joules) {
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM_J))
outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_PKG__))
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
for (mp = sys.pp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
outp += sprintf(outp, "%s%18.18s", delim, mp->name);
else
outp += sprintf(outp, "%s%10.10s", delim, mp->name);
} else {
if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
outp += sprintf(outp, "%s%8s", delim, mp->name);
else
outp += sprintf(outp, "%s%s", delim, mp->name);
}
}
outp += sprintf(outp, "\n");
}
int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
if (t) {
outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
outp += sprintf(outp, "c1: %016llX\n", t->c1);
if (DO_BIC(BIC_IPC))
outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
if (DO_BIC(BIC_IRQ))
outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
}
}
if (c) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
if (p) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
if (DO_BIC(BIC_Pkgpc3))
outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
if (DO_BIC(BIC_Pkgpc6))
outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
if (DO_BIC(BIC_Pkgpc7))
outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
}
}
outp += sprintf(outp, "\n");
return 0;
}
/*
* column formatting convention & formats
*/
int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
double interval_float, tsc;
char *fmt8;
int i;
struct msr_counter *mp;
char *delim = "\t";
int printed = 0;
/* if showing only 1st thread in core and this isn't one, bail out */
if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
return 0;
/* if showing only 1st thread in pkg and this isn't one, bail out */
if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
return 0;
/*if not summary line and --cpu is used */
if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
return 0;
if (DO_BIC(BIC_USEC)) {
/* on each row, print how many usec each timestamp took to gather */
struct timeval tv;
timersub(&t->tv_end, &t->tv_begin, &tv);
outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
}
/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
if (DO_BIC(BIC_TOD))
outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
tsc = t->tsc * tsc_tweak;
/* topo columns, print blanks on 1st (average) line */
if (t == &average.threads) {
if (DO_BIC(BIC_Package))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_Die))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_Node))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_Core))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_APIC))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_X2APIC))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
} else {
if (DO_BIC(BIC_Package)) {
if (p)
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
else
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_Die)) {
if (c)
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
else
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_Node)) {
if (t)
outp += sprintf(outp, "%s%d",
(printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
else
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_Core)) {
if (c)
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
else
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_CPU))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
if (DO_BIC(BIC_APIC))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
if (DO_BIC(BIC_X2APIC))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
}
if (DO_BIC(BIC_Avg_MHz))
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
if (DO_BIC(BIC_Busy))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
if (DO_BIC(BIC_Bzy_MHz)) {
if (has_base_hz)
outp +=
sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
else
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
tsc / units * t->aperf / t->mperf / interval_float);
}
if (DO_BIC(BIC_TSC_MHz))
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
if (DO_BIC(BIC_IPC))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
/* IRQ */
if (DO_BIC(BIC_IRQ)) {
if (sums_need_wide_columns)
outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
else
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
}
/* SMI */
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
/* Added counters */
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
outp +=
sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
else
outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
else
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
if (mp->type == COUNTER_USEC)
outp +=
sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
t->counter[i] / interval_float / 10000);
else
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
}
}
/* C1 */
if (DO_BIC(BIC_CPU_c1))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
/* print per-core data only for 1st thread in core */
if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
if (DO_BIC(BIC_CPU_c3))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
if (DO_BIC(BIC_CPU_c6))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
if (DO_BIC(BIC_CPU_c7))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
/* Mod%c6 */
if (DO_BIC(BIC_Mod_c6))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
/* Core throttle count */
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
outp +=
sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
else
outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
else
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
}
}
fmt8 = "%s%.2f";
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
/* print per-package data only for 1st core in package */
if (!is_cpu_first_core_in_package(t, c, p))
goto done;
/* PkgTmp */
if (DO_BIC(BIC_PkgTmp))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
/* GFXrc6 */
if (DO_BIC(BIC_GFX_rc6)) {
if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
} else {
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
p->gfx_rc6_ms / 10.0 / interval_float);
}
}
/* GFXMHz */
if (DO_BIC(BIC_GFXMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
/* GFXACTMHz */
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
if (DO_BIC(BIC_GFX_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
if (DO_BIC(BIC_CPUGFX))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
if (DO_BIC(BIC_Pkgpc2))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
if (DO_BIC(BIC_Pkgpc3))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
if (DO_BIC(BIC_Pkgpc6))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
if (DO_BIC(BIC_Pkgpc7))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
if (DO_BIC(BIC_Pkgpc8))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
if (DO_BIC(BIC_Pkgpc9))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
if (DO_BIC(BIC_CPU_LPI))
outp +=
sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
if (DO_BIC(BIC_SYS_LPI))
outp +=
sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
if (DO_BIC(BIC_PkgWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
if (DO_BIC(BIC_GFXWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
if (DO_BIC(BIC_RAMWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
p->energy_dram * rapl_dram_energy_units / interval_float);
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
if (DO_BIC(BIC_RAM_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
if (DO_BIC(BIC_PKG__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
if (DO_BIC(BIC_RAM__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
outp +=
sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
else
outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
else
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
}
}
done:
if (*(outp - 1) != '\n')
outp += sprintf(outp, "\n");
return 0;
}
void flush_output_stdout(void)
{
FILE *filep;
if (outf == stderr)
filep = stdout;
else
filep = outf;
fputs(output_buffer, filep);
fflush(filep);
outp = output_buffer;
}
void flush_output_stderr(void)
{
fputs(output_buffer, outf);
fflush(outf);
outp = output_buffer;
}
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
static int count;
if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
print_header("\t");
format_counters(&average.threads, &average.cores, &average.packages);
count++;
if (summary_only)
return;
for_all_cpus(format_counters, t, c, p);
}
#define DELTA_WRAP32(new, old) \
old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
int delta_package(struct pkg_data *new, struct pkg_data *old)
{
int i;
struct msr_counter *mp;
if (DO_BIC(BIC_Totl_c0))
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
if (DO_BIC(BIC_Any_c0))
old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
if (DO_BIC(BIC_GFX_c0))
old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
if (DO_BIC(BIC_CPUGFX))
old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
old->pc2 = new->pc2 - old->pc2;
if (DO_BIC(BIC_Pkgpc3))
old->pc3 = new->pc3 - old->pc3;
if (DO_BIC(BIC_Pkgpc6))
old->pc6 = new->pc6 - old->pc6;
if (DO_BIC(BIC_Pkgpc7))
old->pc7 = new->pc7 - old->pc7;
old->pc8 = new->pc8 - old->pc8;
old->pc9 = new->pc9 - old->pc9;
old->pc10 = new->pc10 - old->pc10;
old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
old->sys_lpi = new->sys_lpi - old->sys_lpi;
old->pkg_temp_c = new->pkg_temp_c;
/* flag an error when rc6 counter resets/wraps */
if (old->gfx_rc6_ms > new->gfx_rc6_ms)
old->gfx_rc6_ms = -1;
else
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
old->uncore_mhz = new->uncore_mhz;
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
old->energy_pkg = new->energy_pkg - old->energy_pkg;
old->energy_cores = new->energy_cores - old->energy_cores;
old->energy_gfx = new->energy_gfx - old->energy_gfx;
old->energy_dram = new->energy_dram - old->energy_dram;
old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
old->counter[i] = new->counter[i];
else
old->counter[i] = new->counter[i] - old->counter[i];
}
return 0;
}
void delta_core(struct core_data *new, struct core_data *old)
{
int i;
struct msr_counter *mp;
old->c3 = new->c3 - old->c3;
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy, old->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
old->counter[i] = new->counter[i];
else
old->counter[i] = new->counter[i] - old->counter[i];
}
}
int soft_c1_residency_display(int bic)
{
if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
return 0;
return DO_BIC_READ(bic);
}
/*
* old = new - old
*/
int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
{
int i;
struct msr_counter *mp;
/* we run cpuid just the 1st time, copy the results */
if (DO_BIC(BIC_APIC))
new->apic_id = old->apic_id;
if (DO_BIC(BIC_X2APIC))
new->x2apic_id = old->x2apic_id;
/*
* the timestamps from start of measurement interval are in "old"
* the timestamp from end of measurement interval are in "new"
* over-write old w/ new so we can print end of interval values
*/
timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
old->tv_begin = new->tv_begin;
old->tv_end = new->tv_end;
old->tsc = new->tsc - old->tsc;
/* check for TSC < 1 Mcycles over interval */
if (old->tsc < (1000 * 1000))
errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
"You can disable all c-states by booting with \"idle=poll\"\n"
"or just the deep ones with \"processor.max_cstate=1\"");
old->c1 = new->c1 - old->c1;
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
|| soft_c1_residency_display(BIC_Avg_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
} else {
return -1;
}
}
if (platform->has_msr_core_c1_res) {
/*
* Some models have a dedicated C1 residency MSR,
* which should be more accurate than the derivation below.
*/
} else {
/*
* As counter collection is not atomic,
* it is possible for mperf's non-halted cycles + idle states
* to exceed TSC's all cycles: show c1 = 0% in that case.
*/
if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
old->c1 = 0;
else {
/* normal case, derive c1 */
old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
- core_delta->c6 - core_delta->c7;
}
}
if (old->mperf == 0) {
if (debug > 1)
fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
old->mperf = 1; /* divide by 0 protection */
}
if (DO_BIC(BIC_IPC))
old->instr_count = new->instr_count - old->instr_count;
if (DO_BIC(BIC_IRQ))
old->irq_count = new->irq_count - old->irq_count;
if (DO_BIC(BIC_SMI))
old->smi_count = new->smi_count - old->smi_count;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
old->counter[i] = new->counter[i];
else
old->counter[i] = new->counter[i] - old->counter[i];
}
return 0;
}
int delta_cpu(struct thread_data *t, struct core_data *c,
struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
{
int retval = 0;
/* calculate core delta only for 1st thread in core */
if (is_cpu_first_thread_in_core(t, c, p))
delta_core(c, c2);
/* always calculate thread delta */
retval = delta_thread(t, t2, c2); /* c2 is core delta */
if (retval)
return retval;
/* calculate package delta only for 1st core in package */
if (is_cpu_first_core_in_package(t, c, p))
retval = delta_package(p, p2);
return retval;
}
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
t->tv_begin.tv_sec = 0;
t->tv_begin.tv_usec = 0;
t->tv_end.tv_sec = 0;
t->tv_end.tv_usec = 0;
t->tv_delta.tv_sec = 0;
t->tv_delta.tv_usec = 0;
t->tsc = 0;
t->aperf = 0;
t->mperf = 0;
t->c1 = 0;
t->instr_count = 0;
t->irq_count = 0;
t->smi_count = 0;
c->c3 = 0;
c->c6 = 0;
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
c->core_energy = 0;
c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
p->pkg_any_core_c0 = 0;
p->pkg_any_gfxe_c0 = 0;
p->pkg_both_core_gfxe_c0 = 0;
p->pc2 = 0;
if (DO_BIC(BIC_Pkgpc3))
p->pc3 = 0;
if (DO_BIC(BIC_Pkgpc6))
p->pc6 = 0;
if (DO_BIC(BIC_Pkgpc7))
p->pc7 = 0;
p->pc8 = 0;
p->pc9 = 0;
p->pc10 = 0;
p->cpu_lpi = 0;
p->sys_lpi = 0;
p->energy_pkg = 0;
p->energy_dram = 0;
p->energy_cores = 0;
p->energy_gfx = 0;
p->rapl_pkg_perf_status = 0;
p->rapl_dram_perf_status = 0;
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
c->counter[i] = 0;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
p->counter[i] = 0;
}
int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
/* copy un-changing apic_id's */
if (DO_BIC(BIC_APIC))
average.threads.apic_id = t->apic_id;
if (DO_BIC(BIC_X2APIC))
average.threads.x2apic_id = t->x2apic_id;
/* remember first tv_begin */
if (average.threads.tv_begin.tv_sec == 0)
average.threads.tv_begin = t->tv_begin;
/* remember last tv_end */
average.threads.tv_end = t->tv_end;
average.threads.tsc += t->tsc;
average.threads.aperf += t->aperf;
average.threads.mperf += t->mperf;
average.threads.c1 += t->c1;
average.threads.instr_count += t->instr_count;
average.threads.irq_count += t->irq_count;
average.threads.smi_count += t->smi_count;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
average.threads.counter[i] += t->counter[i];
}
/* sum per-core values only for 1st thread in core */
if (!is_cpu_first_thread_in_core(t, c, p))
return 0;
average.cores.c3 += c->c3;
average.cores.c6 += c->c6;
average.cores.c7 += c->c7;
average.cores.mc6_us += c->mc6_us;
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
average.cores.core_energy += c->core_energy;
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
average.cores.counter[i] += c->counter[i];
}
/* sum per-pkg values only for 1st core in pkg */
if (!is_cpu_first_core_in_package(t, c, p))
return 0;
if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
average.packages.pc2 += p->pc2;
if (DO_BIC(BIC_Pkgpc3))
average.packages.pc3 += p->pc3;
if (DO_BIC(BIC_Pkgpc6))
average.packages.pc6 += p->pc6;
if (DO_BIC(BIC_Pkgpc7))
average.packages.pc7 += p->pc7;
average.packages.pc8 += p->pc8;
average.packages.pc9 += p->pc9;
average.packages.pc10 += p->pc10;
average.packages.cpu_lpi = p->cpu_lpi;
average.packages.sys_lpi = p->sys_lpi;
average.packages.energy_pkg += p->energy_pkg;
average.packages.energy_dram += p->energy_dram;
average.packages.energy_cores += p->energy_cores;
average.packages.energy_gfx += p->energy_gfx;
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
average.packages.counter[i] += p->counter[i];
}
return 0;
}
/*
* sum the counters for all cpus in the system
* compute the weighted average
*/
void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
clear_counters(&average.threads, &average.cores, &average.packages);
for_all_cpus(sum_counters, t, c, p);
/* Use the global time delta for the average. */
average.threads.tv_delta = tv_delta;
average.threads.tsc /= topo.allowed_cpus;
average.threads.aperf /= topo.allowed_cpus;
average.threads.mperf /= topo.allowed_cpus;
average.threads.instr_count /= topo.allowed_cpus;
average.threads.c1 /= topo.allowed_cpus;
if (average.threads.irq_count > 9999999)
sums_need_wide_columns = 1;
average.cores.c3 /= topo.allowed_cores;
average.cores.c6 /= topo.allowed_cores;
average.cores.c7 /= topo.allowed_cores;
average.cores.mc6_us /= topo.allowed_cores;
if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
average.packages.pc2 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc3))
average.packages.pc3 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc6))
average.packages.pc6 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc7))
average.packages.pc7 /= topo.allowed_packages;
average.packages.pc8 /= topo.allowed_packages;
average.packages.pc9 /= topo.allowed_packages;
average.packages.pc10 /= topo.allowed_packages;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
if (mp->type == COUNTER_ITEMS) {
if (average.threads.counter[i] > 9999999)
sums_need_wide_columns = 1;
continue;
}
average.threads.counter[i] /= topo.allowed_cpus;
}
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
if (mp->type == COUNTER_ITEMS) {
if (average.cores.counter[i] > 9999999)
sums_need_wide_columns = 1;
}
average.cores.counter[i] /= topo.allowed_cores;
}
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (