Message ID | 20200309085806.155823-14-ravi.bangoria@linux.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | powerpc/watchpoint: Preparation for more than one watchpoint | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (ab326587bb5fb91cc97df9b9f48e9e1469f04621) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 2 warnings, 0 checks, 274 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Le 09/03/2020 à 09:58, Ravi Bangoria a écrit : > ptrace and perf watchpoints on powerpc behaves differently. Ptrace On the 8xx, ptrace generates signal after executing the instruction. > watchpoint works in one-shot mode and generates signal before executing > instruction. It's ptrace user's job to single-step the instruction and > re-enable the watchpoint. OTOH, in case of perf watchpoint, kernel > emulates/single-steps the instruction and then generates event. If perf > and ptrace creates two events with same or overlapping address ranges, > it's ambiguous to decide who should single-step the instruction. Because > of this issue ptrace and perf event can't coexist when the address range > overlaps. Ok, and then ? What's the purpose of this (big) patch ? > > Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com> > --- > arch/powerpc/include/asm/hw_breakpoint.h | 2 + > arch/powerpc/kernel/hw_breakpoint.c | 220 +++++++++++++++++++++++ > kernel/events/hw_breakpoint.c | 16 ++ > 3 files changed, 238 insertions(+) > > diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h > index ec61e2b7195c..6e1a19af5177 100644 > --- a/arch/powerpc/include/asm/hw_breakpoint.h > +++ b/arch/powerpc/include/asm/hw_breakpoint.h > @@ -66,6 +66,8 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, > unsigned long val, void *data); > int arch_install_hw_breakpoint(struct perf_event *bp); > void arch_uninstall_hw_breakpoint(struct perf_event *bp); > +int arch_reserve_bp_slot(struct perf_event *bp); > +void arch_release_bp_slot(struct perf_event *bp); > void arch_unregister_hw_breakpoint(struct perf_event *bp); > void hw_breakpoint_pmu_read(struct perf_event *bp); > extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); > diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c > index 2ac89b92590f..d8529d9151e8 100644 > --- a/arch/powerpc/kernel/hw_breakpoint.c > +++ b/arch/powerpc/kernel/hw_breakpoint.c > @@ -123,6 +123,226 @@ static bool is_ptrace_bp(struct perf_event *bp) > return (bp->overflow_handler == ptrace_triggered); > } > > +struct breakpoint { > + struct list_head list; > + struct perf_event *bp; > + bool ptrace_bp; > +}; Don't we have an equivalent struct already ? > + > +static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); > +static LIST_HEAD(task_bps); > + > +static struct breakpoint *alloc_breakpoint(struct perf_event *bp) > +{ > + struct breakpoint *tmp; > + > + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); > + if (!tmp) > + return ERR_PTR(-ENOMEM); > + tmp->bp = bp; > + tmp->ptrace_bp = is_ptrace_bp(bp); > + return tmp; > +} > + > +static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) > +{ > + __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; > + > + bp1_saddr = bp1->attr.bp_addr & ~HW_BREAKPOINT_ALIGN; > + bp1_eaddr = (bp1->attr.bp_addr + bp1->attr.bp_len - 1) | HW_BREAKPOINT_ALIGN; > + bp2_saddr = bp2->attr.bp_addr & ~HW_BREAKPOINT_ALIGN; > + bp2_eaddr = (bp2->attr.bp_addr + bp2->attr.bp_len - 1) | HW_BREAKPOINT_ALIGN; > + > + return (bp1_saddr <= bp2_eaddr && bp1_eaddr >= bp2_saddr); Would be better with something like (HW_BREAKPOINT_SIZE needs to be defined). bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); bp1_eaddr = ALIGN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); bp2_eaddr = ALIGN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); > +} > + > +static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) > +{ > + return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; > +} > + > +static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) > +{ > + return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); > +} > + > +static int task_bps_add(struct perf_event *bp) > +{ > + struct breakpoint *tmp; > + > + tmp = alloc_breakpoint(bp); > + if (IS_ERR(tmp)) > + return PTR_ERR(tmp); > + > + list_add(&tmp->list, &task_bps); > + return 0; > +} > + > +static void task_bps_remove(struct perf_event *bp) > +{ > + struct list_head *pos, *q; > + struct breakpoint *tmp; > + > + list_for_each_safe(pos, q, &task_bps) { > + tmp = list_entry(pos, struct breakpoint, list); > + > + if (tmp->bp == bp) { > + list_del(&tmp->list); > + kfree(tmp); > + break; > + } > + } > +} > + > +/* > + * If any task has breakpoint from alternate infrastructure, > + * return true. Otherwise return false. > + */ > +static bool all_task_bps_check(struct perf_event *bp) > +{ > + struct breakpoint *tmp; > + > + list_for_each_entry(tmp, &task_bps, list) { > + if (!can_co_exist(tmp, bp)) > + return true; > + } > + return false; > +} > + > +/* > + * If same task has breakpoint from alternate infrastructure, > + * return true. Otherwise return false. > + */ > +static bool same_task_bps_check(struct perf_event *bp) > +{ > + struct breakpoint *tmp; > + > + list_for_each_entry(tmp, &task_bps, list) { > + if (tmp->bp->hw.target == bp->hw.target && > + !can_co_exist(tmp, bp)) > + return true; > + } > + return false; > +} > + > +static int cpu_bps_add(struct perf_event *bp) > +{ > + struct breakpoint **cpu_bp; > + struct breakpoint *tmp; > + int i = 0; > + > + tmp = alloc_breakpoint(bp); > + if (IS_ERR(tmp)) > + return PTR_ERR(tmp); > + > + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); > + for (i = 0; i < nr_wp_slots(); i++) { > + if (!cpu_bp[i]) { > + cpu_bp[i] = tmp; > + break; > + } > + } > + return 0; > +} > + > +static void cpu_bps_remove(struct perf_event *bp) > +{ > + struct breakpoint **cpu_bp; > + int i = 0; > + > + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); > + for (i = 0; i < nr_wp_slots(); i++) { > + if (!cpu_bp[i]) > + continue; > + > + if (cpu_bp[i]->bp == bp) { > + kfree(cpu_bp[i]); > + cpu_bp[i] = NULL; > + break; > + } > + } > +} > + > +static bool cpu_bps_check(int cpu, struct perf_event *bp) > +{ > + struct breakpoint **cpu_bp; > + int i; > + > + cpu_bp = per_cpu_ptr(cpu_bps, cpu); > + for (i = 0; i < nr_wp_slots(); i++) { > + if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) > + return true; > + } > + return false; > +} > + > +static bool all_cpu_bps_check(struct perf_event *bp) > +{ > + int cpu; > + > + for_each_online_cpu(cpu) { > + if (cpu_bps_check(cpu, bp)) > + return true; > + } > + return false; > +} > + > +/* > + * We don't use any locks to serialize accesses to cpu_bps or task_bps > + * because are already inside nr_bp_mutex. > + */ > +int arch_reserve_bp_slot(struct perf_event *bp) > +{ > + int ret; > + > + if (is_ptrace_bp(bp)) { > + if (all_cpu_bps_check(bp)) > + return -ENOSPC; > + > + if (same_task_bps_check(bp)) > + return -ENOSPC; > + > + return task_bps_add(bp); > + } else { > + if (is_kernel_addr(bp->attr.bp_addr)) > + return 0; > + > + if (bp->hw.target && bp->cpu == -1) { > + if (same_task_bps_check(bp)) > + return -ENOSPC; > + > + return task_bps_add(bp); > + } else if (!bp->hw.target && bp->cpu != -1) { > + if (all_task_bps_check(bp)) > + return -ENOSPC; > + > + return cpu_bps_add(bp); > + } else { > + if (same_task_bps_check(bp)) > + return -ENOSPC; > + > + ret = cpu_bps_add(bp); > + if (ret) > + return ret; > + ret = task_bps_add(bp); > + if (ret) > + cpu_bps_remove(bp); > + > + return ret; > + } > + } > +} > + > +void arch_release_bp_slot(struct perf_event *bp) > +{ > + if (!is_kernel_addr(bp->attr.bp_addr)) { > + if (bp->hw.target) > + task_bps_remove(bp); > + if (bp->cpu != -1) > + cpu_bps_remove(bp); > + } > +} > + > /* > * Perform cleanup of arch-specific counters during unregistration > * of the perf-event > diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c > index 3cc8416ec844..b48d7039a015 100644 > --- a/kernel/events/hw_breakpoint.c > +++ b/kernel/events/hw_breakpoint.c > @@ -213,6 +213,15 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, > list_del(&bp->hw.bp_list); > } > > +__weak int arch_reserve_bp_slot(struct perf_event *bp) > +{ > + return 0; > +} > + > +__weak void arch_release_bp_slot(struct perf_event *bp) > +{ > +} > + > /* > * Function to perform processor-specific cleanup during unregistration > */ > @@ -270,6 +279,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) > struct bp_busy_slots slots = {0}; > enum bp_type_idx type; > int weight; > + int ret; > > /* We couldn't initialize breakpoint constraints on boot */ > if (!constraints_initialized) > @@ -294,6 +304,10 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) > if (slots.pinned + (!!slots.flexible) > nr_slots[type]) > return -ENOSPC; > > + ret = arch_reserve_bp_slot(bp); > + if (ret) > + return ret; > + > toggle_bp_slot(bp, true, type, weight); > > return 0; > @@ -317,6 +331,8 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type) > enum bp_type_idx type; > int weight; > > + arch_release_bp_slot(bp); > + > type = find_slot_idx(bp_type); > weight = hw_breakpoint_weight(bp); > toggle_bp_slot(bp, false, type, weight); > Christophe
On 3/17/20 4:38 PM, Christophe Leroy wrote: > > > Le 09/03/2020 à 09:58, Ravi Bangoria a écrit : >> ptrace and perf watchpoints on powerpc behaves differently. Ptrace > > On the 8xx, ptrace generates signal after executing the instruction. 8xx logic is unchanged. I should have mentioned "Book3s DAWR". > >> watchpoint works in one-shot mode and generates signal before executing >> instruction. It's ptrace user's job to single-step the instruction and >> re-enable the watchpoint. OTOH, in case of perf watchpoint, kernel >> emulates/single-steps the instruction and then generates event. If perf >> and ptrace creates two events with same or overlapping address ranges, >> it's ambiguous to decide who should single-step the instruction. Because >> of this issue ptrace and perf event can't coexist when the address range >> overlaps. > > Ok, and then ? What's the purpose of this (big) patch ? Don't allow perf and ptrace watchpoint at the same time if their address range overlaps. ... >> +struct breakpoint { >> + struct list_head list; >> + struct perf_event *bp; >> + bool ptrace_bp; >> +}; > > Don't we have an equivalent struct already ? No. Using this we track percpu and perthread watchpoints for both perf and ptrace. This problems is powerpc(DAWR) specific and thus we need to hook arch specific logic in watchopint installation/uninstallation path. ... >> +static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) >> +{ >> + __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; >> + >> + bp1_saddr = bp1->attr.bp_addr & ~HW_BREAKPOINT_ALIGN; >> + bp1_eaddr = (bp1->attr.bp_addr + bp1->attr.bp_len - 1) | HW_BREAKPOINT_ALIGN; >> + bp2_saddr = bp2->attr.bp_addr & ~HW_BREAKPOINT_ALIGN; >> + bp2_eaddr = (bp2->attr.bp_addr + bp2->attr.bp_len - 1) | HW_BREAKPOINT_ALIGN; >> + >> + return (bp1_saddr <= bp2_eaddr && bp1_eaddr >= bp2_saddr); > > Would be better with something like (HW_BREAKPOINT_SIZE needs to be defined). > > bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); > bp1_eaddr = ALIGN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); > bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); > bp2_eaddr = ALIGN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); > > return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); Ok. Thanks, Ravi
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ec61e2b7195c..6e1a19af5177 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -66,6 +66,8 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); int arch_install_hw_breakpoint(struct perf_event *bp); void arch_uninstall_hw_breakpoint(struct perf_event *bp); +int arch_reserve_bp_slot(struct perf_event *bp); +void arch_release_bp_slot(struct perf_event *bp); void arch_unregister_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2ac89b92590f..d8529d9151e8 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -123,6 +123,226 @@ static bool is_ptrace_bp(struct perf_event *bp) return (bp->overflow_handler == ptrace_triggered); } +struct breakpoint { + struct list_head list; + struct perf_event *bp; + bool ptrace_bp; +}; + +static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); +static LIST_HEAD(task_bps); + +static struct breakpoint *alloc_breakpoint(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + tmp->bp = bp; + tmp->ptrace_bp = is_ptrace_bp(bp); + return tmp; +} + +static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) +{ + __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; + + bp1_saddr = bp1->attr.bp_addr & ~HW_BREAKPOINT_ALIGN; + bp1_eaddr = (bp1->attr.bp_addr + bp1->attr.bp_len - 1) | HW_BREAKPOINT_ALIGN; + bp2_saddr = bp2->attr.bp_addr & ~HW_BREAKPOINT_ALIGN; + bp2_eaddr = (bp2->attr.bp_addr + bp2->attr.bp_len - 1) | HW_BREAKPOINT_ALIGN; + + return (bp1_saddr <= bp2_eaddr && bp1_eaddr >= bp2_saddr); +} + +static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) +{ + return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; +} + +static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) +{ + return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); +} + +static int task_bps_add(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + list_add(&tmp->list, &task_bps); + return 0; +} + +static void task_bps_remove(struct perf_event *bp) +{ + struct list_head *pos, *q; + struct breakpoint *tmp; + + list_for_each_safe(pos, q, &task_bps) { + tmp = list_entry(pos, struct breakpoint, list); + + if (tmp->bp == bp) { + list_del(&tmp->list); + kfree(tmp); + break; + } + } +} + +/* + * If any task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool all_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (!can_co_exist(tmp, bp)) + return true; + } + return false; +} + +/* + * If same task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool same_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (tmp->bp->hw.target == bp->hw.target && + !can_co_exist(tmp, bp)) + return true; + } + return false; +} + +static int cpu_bps_add(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + struct breakpoint *tmp; + int i = 0; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) { + cpu_bp[i] = tmp; + break; + } + } + return 0; +} + +static void cpu_bps_remove(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i = 0; + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) + continue; + + if (cpu_bp[i]->bp == bp) { + kfree(cpu_bp[i]); + cpu_bp[i] = NULL; + break; + } + } +} + +static bool cpu_bps_check(int cpu, struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i; + + cpu_bp = per_cpu_ptr(cpu_bps, cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) + return true; + } + return false; +} + +static bool all_cpu_bps_check(struct perf_event *bp) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu_bps_check(cpu, bp)) + return true; + } + return false; +} + +/* + * We don't use any locks to serialize accesses to cpu_bps or task_bps + * because are already inside nr_bp_mutex. + */ +int arch_reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + if (is_ptrace_bp(bp)) { + if (all_cpu_bps_check(bp)) + return -ENOSPC; + + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } else { + if (is_kernel_addr(bp->attr.bp_addr)) + return 0; + + if (bp->hw.target && bp->cpu == -1) { + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } else if (!bp->hw.target && bp->cpu != -1) { + if (all_task_bps_check(bp)) + return -ENOSPC; + + return cpu_bps_add(bp); + } else { + if (same_task_bps_check(bp)) + return -ENOSPC; + + ret = cpu_bps_add(bp); + if (ret) + return ret; + ret = task_bps_add(bp); + if (ret) + cpu_bps_remove(bp); + + return ret; + } + } +} + +void arch_release_bp_slot(struct perf_event *bp) +{ + if (!is_kernel_addr(bp->attr.bp_addr)) { + if (bp->hw.target) + task_bps_remove(bp); + if (bp->cpu != -1) + cpu_bps_remove(bp); + } +} + /* * Perform cleanup of arch-specific counters during unregistration * of the perf-event diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 3cc8416ec844..b48d7039a015 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -213,6 +213,15 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, list_del(&bp->hw.bp_list); } +__weak int arch_reserve_bp_slot(struct perf_event *bp) +{ + return 0; +} + +__weak void arch_release_bp_slot(struct perf_event *bp) +{ +} + /* * Function to perform processor-specific cleanup during unregistration */ @@ -270,6 +279,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) struct bp_busy_slots slots = {0}; enum bp_type_idx type; int weight; + int ret; /* We couldn't initialize breakpoint constraints on boot */ if (!constraints_initialized) @@ -294,6 +304,10 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) if (slots.pinned + (!!slots.flexible) > nr_slots[type]) return -ENOSPC; + ret = arch_reserve_bp_slot(bp); + if (ret) + return ret; + toggle_bp_slot(bp, true, type, weight); return 0; @@ -317,6 +331,8 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type) enum bp_type_idx type; int weight; + arch_release_bp_slot(bp); + type = find_slot_idx(bp_type); weight = hw_breakpoint_weight(bp); toggle_bp_slot(bp, false, type, weight);
ptrace and perf watchpoints on powerpc behaves differently. Ptrace watchpoint works in one-shot mode and generates signal before executing instruction. It's ptrace user's job to single-step the instruction and re-enable the watchpoint. OTOH, in case of perf watchpoint, kernel emulates/single-steps the instruction and then generates event. If perf and ptrace creates two events with same or overlapping address ranges, it's ambiguous to decide who should single-step the instruction. Because of this issue ptrace and perf event can't coexist when the address range overlaps. Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com> --- arch/powerpc/include/asm/hw_breakpoint.h | 2 + arch/powerpc/kernel/hw_breakpoint.c | 220 +++++++++++++++++++++++ kernel/events/hw_breakpoint.c | 16 ++ 3 files changed, 238 insertions(+)