diff mbox

[v3,2/5] perf/x86/intel: Record branch type

Message ID 1491908193-25418-3-git-send-email-yao.jin@linux.intel.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Jin, Yao April 11, 2017, 10:56 a.m. UTC
Perf already has support for disassembling the branch instruction
and using the branch type for filtering. The patch just records
the branch type in perf_branch_entry.

Before recording, the patch converts the x86 branch classification
to common branch classification.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 arch/x86/events/intel/lbr.c | 53 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

Comments

Peter Zijlstra April 11, 2017, 7:52 a.m. UTC | #1
On Tue, Apr 11, 2017 at 06:56:30PM +0800, Jin Yao wrote:
> Perf already has support for disassembling the branch instruction
> and using the branch type for filtering. The patch just records
> the branch type in perf_branch_entry.
> 
> Before recording, the patch converts the x86 branch classification
> to common branch classification.

This is still a completely inadequate changelog. I really will not
accept patches like this.

> 
> Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
> ---
>  arch/x86/events/intel/lbr.c | 53 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 52 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
> index 81b321a..6968c63 100644
> --- a/arch/x86/events/intel/lbr.c
> +++ b/arch/x86/events/intel/lbr.c
> @@ -109,6 +109,9 @@ enum {
>  	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
>  	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
>  	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
> +
> +	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
> +
>  };
>  
>  #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
> @@ -670,6 +673,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
>  
>  	if (br_type & PERF_SAMPLE_BRANCH_CALL)
>  		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
> +
> +	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
> +		mask |= X86_BR_TYPE_SAVE;
> +
>  	/*
>  	 * stash actual user request into reg, it may
>  	 * be used by fixup code for some CPU
> @@ -923,6 +930,44 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
>  	return ret;
>  }
>  
> +#define X86_BR_TYPE_MAP_MAX	16
> +
> +static int
> +common_branch_type(int type)
> +{
> +	int i, mask;
> +	const int branch_map[X86_BR_TYPE_MAP_MAX] = {
> +		PERF_BR_CALL,		/* X86_BR_CALL */
> +		PERF_BR_RET,		/* X86_BR_RET */
> +		PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
> +		PERF_BR_SYSRET,		/* X86_BR_SYSRET */
> +		PERF_BR_INT,		/* X86_BR_INT */
> +		PERF_BR_IRET,		/* X86_BR_IRET */
> +		PERF_BR_JCC,		/* X86_BR_JCC */
> +		PERF_BR_JMP,		/* X86_BR_JMP */
> +		PERF_BR_IRQ,		/* X86_BR_IRQ */
> +		PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
> +		PERF_BR_NONE,		/* X86_BR_ABORT */
> +		PERF_BR_NONE,		/* X86_BR_IN_TX */
> +		PERF_BR_NONE,		/* X86_BR_NO_TX */
> +		PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
> +		PERF_BR_NONE,		/* X86_BR_CALL_STACK */
> +		PERF_BR_IND_JMP,	/* X86_BR_IND_JMP */
> +	};
> +
> +	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
> +	mask = ~(~0 << 1);
> +
> +	for (i = 0; i < X86_BR_TYPE_MAP_MAX; i++) {
> +		if (type & mask)
> +			return branch_map[i];
> +
> +		type >>= 1;
> +	}
> +
> +	return PERF_BR_NONE;
> +}
> +
>  /*
>   * implement actual branch filter based on user demand.
>   * Hardware may not exactly satisfy that request, thus
> @@ -939,7 +984,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
>  	bool compress = false;
>  
>  	/* if sampling all branches, then nothing to filter */
> -	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
> +	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
> +	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
>  		return;
>  
>  	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
> @@ -960,6 +1006,11 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
>  			cpuc->lbr_entries[i].from = 0;
>  			compress = true;
>  		}
> +
> +		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
> +			cpuc->lbr_entries[i].type = common_branch_type(type);
> +		else
> +			cpuc->lbr_entries[i].type = PERF_BR_NONE;
>  	}
>  
>  	if (!compress)
> -- 
> 2.7.4
>
Jin, Yao April 11, 2017, 8:11 a.m. UTC | #2
On 4/11/2017 3:52 PM, Peter Zijlstra wrote:
> This is still a completely inadequate changelog. I really will not
> accept patches like this.
>
Hi,

The changelog is added in the cover-letter ("[PATCH v3 0/5] perf report: Show branch type").

Does the changelog need to be added in each patch's description?

That's fine, I can add and resend this patch.

Thanks
Jin Yao
Peter Zijlstra April 11, 2017, 8:35 a.m. UTC | #3
On Tue, Apr 11, 2017 at 04:11:21PM +0800, Jin, Yao wrote:
> 
> 
> On 4/11/2017 3:52 PM, Peter Zijlstra wrote:
> > This is still a completely inadequate changelog. I really will not
> > accept patches like this.
> > 
> Hi,
> 
> The changelog is added in the cover-letter ("[PATCH v3 0/5] perf report: Show branch type").
> 
> Does the changelog need to be added in each patch's description?
> 
> That's fine, I can add and resend this patch.

The cover letter is not retained; it is throw away information.

Each patch should have a coherent changelog that explain why the patch
was done and explain non trivial things in the implementation.

Simply copy/pasting the same story in multiple patches is not right
either, for the simple fact that the patches were not the same. You did
a different thing, so you need a different story.
Jin, Yao April 11, 2017, 11:37 a.m. UTC | #4
On 4/11/2017 4:35 PM, Peter Zijlstra wrote:
> On Tue, Apr 11, 2017 at 04:11:21PM +0800, Jin, Yao wrote:
>>
>> On 4/11/2017 3:52 PM, Peter Zijlstra wrote:
>>> This is still a completely inadequate changelog. I really will not
>>> accept patches like this.
>>>
>> Hi,
>>
>> The changelog is added in the cover-letter ("[PATCH v3 0/5] perf report: Show branch type").
>>
>> Does the changelog need to be added in each patch's description?
>>
>> That's fine, I can add and resend this patch.
> The cover letter is not retained; it is throw away information.
>
> Each patch should have a coherent changelog that explain why the patch
> was done and explain non trivial things in the implementation.
>
> Simply copy/pasting the same story in multiple patches is not right
> either, for the simple fact that the patches were not the same. You did
> a different thing, so you need a different story.
>
>
>
Thanks so much for the suggestion!

I accept this and decide to make changes on my patch description. Maybe 
not adding a full change-log, I will add a section in patch description 
to describe the major changes from previous version.

Thanks
Jin Yao
diff mbox

Patch

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 81b321a..6968c63 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -109,6 +109,9 @@  enum {
 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
 	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
+
+	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
+
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -670,6 +673,10 @@  static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 
 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+		mask |= X86_BR_TYPE_SAVE;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -923,6 +930,44 @@  static int branch_type(unsigned long from, unsigned long to, int abort)
 	return ret;
 }
 
+#define X86_BR_TYPE_MAP_MAX	16
+
+static int
+common_branch_type(int type)
+{
+	int i, mask;
+	const int branch_map[X86_BR_TYPE_MAP_MAX] = {
+		PERF_BR_CALL,		/* X86_BR_CALL */
+		PERF_BR_RET,		/* X86_BR_RET */
+		PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
+		PERF_BR_SYSRET,		/* X86_BR_SYSRET */
+		PERF_BR_INT,		/* X86_BR_INT */
+		PERF_BR_IRET,		/* X86_BR_IRET */
+		PERF_BR_JCC,		/* X86_BR_JCC */
+		PERF_BR_JMP,		/* X86_BR_JMP */
+		PERF_BR_IRQ,		/* X86_BR_IRQ */
+		PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
+		PERF_BR_NONE,		/* X86_BR_ABORT */
+		PERF_BR_NONE,		/* X86_BR_IN_TX */
+		PERF_BR_NONE,		/* X86_BR_NO_TX */
+		PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
+		PERF_BR_NONE,		/* X86_BR_CALL_STACK */
+		PERF_BR_IND_JMP,	/* X86_BR_IND_JMP */
+	};
+
+	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+	mask = ~(~0 << 1);
+
+	for (i = 0; i < X86_BR_TYPE_MAP_MAX; i++) {
+		if (type & mask)
+			return branch_map[i];
+
+		type >>= 1;
+	}
+
+	return PERF_BR_NONE;
+}
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -939,7 +984,8 @@  intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	bool compress = false;
 
 	/* if sampling all branches, then nothing to filter */
-	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
 		return;
 
 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -960,6 +1006,11 @@  intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 			cpuc->lbr_entries[i].from = 0;
 			compress = true;
 		}
+
+		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+			cpuc->lbr_entries[i].type = common_branch_type(type);
+		else
+			cpuc->lbr_entries[i].type = PERF_BR_NONE;
 	}
 
 	if (!compress)