Skip to content

Commit

Permalink
perf/x86/intel: Fix broken fixed event constraints extension
Browse files Browse the repository at this point in the history
Unnecessary multiplexing is triggered when running an "instructions"
event on an MTL.

perf stat -e cpu_core/instructions/,cpu_core/instructions/ -a sleep 1

 Performance counter stats for 'system wide':

       115,489,000      cpu_core/instructions/                (50.02%)
       127,433,777      cpu_core/instructions/                (49.98%)

       1.002294504 seconds time elapsed

Linux architectural perf events, e.g., cycles and instructions, usually
have dedicated fixed counters. These events also have equivalent events
which can be used in the general-purpose counters. The counters are
precious. In the intel_pmu_check_event_constraints(), perf check/extend
the event constraints of these events. So these events can utilize both
fixed counters and general-purpose counters.

The following cleanup commit:

  97588df ("perf/x86/intel: Add common intel_pmu_init_hybrid()")

forgot adding the intel_pmu_check_event_constraints() into update_pmu_cap().
The architectural perf events cannot utilize the general-purpose counters.

The code to check and update the counters, event constraints and
extra_regs is the same among hybrid systems. Move
intel_pmu_check_hybrid_pmus() to init_hybrid_pmu(), and
emove the duplicate check in update_pmu_cap().

Fixes: 97588df ("perf/x86/intel: Add common intel_pmu_init_hybrid()")
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230911135128.2322833-1-kan.liang@linux.intel.com
  • Loading branch information
Kan Liang authored and Ingo Molnar committed Sep 12, 2023
1 parent 97588df commit 950ecdc
Showing 1 changed file with 26 additions and 39 deletions.
65 changes: 26 additions & 39 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4598,6 +4598,13 @@ static void intel_pmu_check_num_counters(int *num_counters,
int *num_counters_fixed,
u64 *intel_ctrl, u64 fixed_mask);

static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
int num_counters,
int num_counters_fixed,
u64 intel_ctrl);

static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);

static inline bool intel_pmu_broken_perf_cap(void)
{
/* The Perf Metric (Bit 15) is always cleared */
Expand All @@ -4618,19 +4625,23 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
&eax, &ebx, &ecx, &edx);
pmu->num_counters = fls(eax);
pmu->num_counters_fixed = fls(ebx);
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
&pmu->intel_ctrl, ebx);
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
0, pmu->num_counters, 0, 0);
}


if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
}
}

static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
{
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
&pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
0, pmu->num_counters, 0, 0);

if (pmu->intel_cap.perf_metrics)
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
Expand All @@ -4641,6 +4652,13 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
else
pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;

intel_pmu_check_event_constraints(pmu->event_constraints,
pmu->num_counters,
pmu->num_counters_fixed,
pmu->intel_ctrl);

intel_pmu_check_extra_regs(pmu->extra_regs);
}

static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
Expand Down Expand Up @@ -4696,6 +4714,8 @@ static bool init_hybrid_pmu(int cpu)
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
update_pmu_cap(pmu);

intel_pmu_check_hybrid_pmus(pmu);

if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
return false;

Expand Down Expand Up @@ -5915,36 +5935,6 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
}
}

static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
{
struct x86_hybrid_pmu *pmu;
int i;

for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
pmu = &x86_pmu.hybrid_pmu[i];

intel_pmu_check_num_counters(&pmu->num_counters,
&pmu->num_counters_fixed,
&pmu->intel_ctrl,
fixed_mask);

if (pmu->intel_cap.perf_metrics) {
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS;
}

if (pmu->intel_cap.pebs_output_pt_available)
pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;

intel_pmu_check_event_constraints(pmu->event_constraints,
pmu->num_counters,
pmu->num_counters_fixed,
pmu->intel_ctrl);

intel_pmu_check_extra_regs(pmu->extra_regs);
}
}

static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
{ hybrid_small, "cpu_atom" },
{ hybrid_big, "cpu_core" },
Expand Down Expand Up @@ -6869,9 +6859,6 @@ __init int intel_pmu_init(void)
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;

if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
intel_pmu_check_hybrid_pmus((u64)fixed_mask);

if (x86_pmu.intel_cap.pebs_timing_info)
x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;

Expand Down

0 comments on commit 950ecdc

Please sign in to comment.