diff mbox series

[V2] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10

Message ID 20211007075121.28497-1-atrajeev@linux.vnet.ibm.com (mailing list archive)
State Accepted
Headers show
Series [V2] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10 | expand
Related show

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 7 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 24 jobs.

Commit Message

Athira Rajeev Oct. 7, 2021, 7:51 a.m. UTC
From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>

In power9 and before platforms, the default event used for cyles and
instructions is PM_CYC (0x0001e) and PM_INST_CMPL (0x00002) respectively.
These events uses two programmable PMCs and by default will count
irrespective of the run latch state. But since it is using programmable
PMCs, these events will cause multiplexing with basic event set supported
by perf stat. Hence in power10, performance monitoring unit (PMU) driver
uses performance monitor counter 5 (PMC5) and performance monitor counter6
(PMC6) for counting instructions and cycles.

In power10, event used for cycles is PM_RUN_CYC (0x600F4) and instructions
is PM_RUN_INST_CMPL (0x500fa). But counting of these events in idle state
is controlled by the CC56RUN bit setting in Monitor Mode Control Register0
(MMCR0). If the CC56RUN bit is not set, PMC5/6 will not count when
CTRL[RUN] is zero. This could lead to miss some counts if a thread
is in idle state during system wide profiling.

Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
PMC6 count instructions and cycles regardless of the run bit. Since
this change make PMC5/6 count as PM_INST_CMPL/PM_CYC,  renamed event
code 0x600f4 as PM_CYC instead of PM_RUN_CYC and event code 0x500fa
as PM_INST_CMPL instead of PM_RUN_INST_CMPL. The changes are only for
PMC5/6 event codes and will not affect the  behaviour of
PM_RUN_CYC/PM_RUN_INST_CMPL if progammed in other PMC's.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
---
Changelog:
 Updated commit message to explain in detail on why and
 how it affects counting.

Notes on testing done for this change:
 Tested this patch change with a kernel module that
 turns off and turns on the runlatch. kernel module also
 reads the counter values for PMC5 and PMC6 during the
 period when runlatch is off.
 - Started PMU counters via "perf stat" and loaded the
   test module.
 - Checked the counter values captured from module during
   the runlatch off period.
 - Verified that counters were frozen without the patch and
   with the patch, observed counters were incrementing.

 arch/powerpc/perf/power10-events-list.h |  8 ++---
 arch/powerpc/perf/power10-pmu.c         | 44 +++++++++++++++++--------
 2 files changed, 35 insertions(+), 17 deletions(-)

Comments

Michael Ellerman Oct. 17, 2021, 12:32 p.m. UTC | #1
On Thu, 7 Oct 2021 13:21:21 +0530, Athira Rajeev wrote:
> From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
> 
> In power9 and before platforms, the default event used for cyles and
> instructions is PM_CYC (0x0001e) and PM_INST_CMPL (0x00002) respectively.
> These events uses two programmable PMCs and by default will count
> irrespective of the run latch state. But since it is using programmable
> PMCs, these events will cause multiplexing with basic event set supported
> by perf stat. Hence in power10, performance monitoring unit (PMU) driver
> uses performance monitor counter 5 (PMC5) and performance monitor counter6
> (PMC6) for counting instructions and cycles.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
      https://git.kernel.org/powerpc/c/8f6aca0e0f26eaaee670cd27896993a45cdc8f9e

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
index 93be7197d250..564f14097f07 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -9,10 +9,10 @@ 
 /*
  * Power10 event codes.
  */
-EVENT(PM_RUN_CYC,				0x600f4);
+EVENT(PM_CYC,				0x600f4);
 EVENT(PM_DISP_STALL_CYC,			0x100f8);
 EVENT(PM_EXEC_STALL,				0x30008);
-EVENT(PM_RUN_INST_CMPL,				0x500fa);
+EVENT(PM_INST_CMPL,				0x500fa);
 EVENT(PM_BR_CMPL,                               0x4d05e);
 EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
 EVENT(PM_BR_FIN,				0x2f04a);
@@ -50,8 +50,8 @@  EVENT(PM_DTLB_MISS,				0x300fc);
 /* ITLB Reloaded */
 EVENT(PM_ITLB_MISS,				0x400fc);
 
-EVENT(PM_RUN_CYC_ALT,				0x0001e);
-EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
+EVENT(PM_CYC_ALT,				0x0001e);
+EVENT(PM_INST_CMPL_ALT,				0x00002);
 
 /*
  * Memory Access Events
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index f9d64c63bb4a..9dd75f385837 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -91,8 +91,8 @@  extern u64 PERF_REG_EXTENDED_MASK;
 
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power10_event_alternatives[][MAX_ALT] = {
-	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
-	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
+	{ PM_CYC_ALT,			PM_CYC },
+	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
 };
 
 static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
@@ -118,8 +118,8 @@  static int power10_check_attr_config(struct perf_event *ev)
 	return 0;
 }
 
-GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
-GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
 GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
 GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
 GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
@@ -148,8 +148,8 @@  CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
 CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
 
 static struct attribute *power10_events_attr_dd1[] = {
-	GENERIC_EVENT_PTR(PM_RUN_CYC),
-	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
@@ -173,8 +173,8 @@  static struct attribute *power10_events_attr_dd1[] = {
 };
 
 static struct attribute *power10_events_attr[] = {
-	GENERIC_EVENT_PTR(PM_RUN_CYC),
-	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_FIN),
 	GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
@@ -271,8 +271,8 @@  static const struct attribute_group *power10_pmu_attr_groups[] = {
 };
 
 static int power10_generic_events_dd1[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
-	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
@@ -280,8 +280,8 @@  static int power10_generic_events_dd1[] = {
 };
 
 static int power10_generic_events[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
-	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_MPRED_BR_FIN,
 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
@@ -548,6 +548,24 @@  static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
 #undef C
 
+/*
+ * Set the MMCR0[CC56RUN] bit to enable counting for
+ * PMC5 and PMC6 regardless of the state of CTRL[RUN],
+ * so that we can use counters 5 and 6 as PM_INST_CMPL and
+ * PM_CYC.
+ */
+static int power10_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[], u32 flags)
+{
+	int ret;
+
+	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+	if (!ret)
+		mmcr->mmcr0 |= MMCR0_C56RUN;
+	return ret;
+}
+
 static struct power_pmu power10_pmu = {
 	.name			= "POWER10",
 	.n_counter		= MAX_PMU_COUNTERS,
@@ -555,7 +573,7 @@  static struct power_pmu power10_pmu = {
 	.test_adder		= ISA207_TEST_ADDER,
 	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
 	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
-	.compute_mmcr		= isa207_compute_mmcr,
+	.compute_mmcr		= power10_compute_mmcr,
 	.config_bhrb		= power10_config_bhrb,
 	.bhrb_filter_map	= power10_bhrb_filter_map,
 	.get_constraint		= isa207_get_constraint,