[v1,1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC

Message ID	2bf90acf7d29641ba6643934ff8dbba897dbd2d9.1718873074.git.jialong.yang@shingroup.cn (mailing list archive)
State	Changes Requested
Headers	show Return-Path: <linuxppc-dev-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org> From: Jialong Yang <jialong.yang@shingroup.cn> To: Michael Ellerman <mpe@ellerman.id.au>, Nicholas Piggin <npiggin@gmail.com>, Christophe Leroy <christophe.leroy@csgroup.eu>, "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> Subject: [PATCH v1 1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC Date: Thu, 20 Jun 2024 16:51:17 +0800 Message-Id: <2bf90acf7d29641ba6643934ff8dbba897dbd2d9.1718873074.git.jialong.yang@shingroup.cn> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Feedback-ID: bizesmtpsz:shingroup.cn:qybglogicsvrgz:qybglogicsvrgz6a-1 Precedence: list Cc: shenghui.qu@shingroup.cn, luming.yu@shingroup.cn, linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org, Jialong Yang <jialong.yang@shingroup.cn> Errors-To: linuxppc-dev-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Linuxppc-dev" <linuxppc-dev-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>
Series	[v1,1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC \| expand [v1,1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC [v1,2/2] powerpc/mmiotrace: bind ioremap and page fault to active mmiotrace

Hi Jialong, kernel test robot noticed the following build errors: [auto build test ERROR on powerpc/next] [also build test ERROR on powerpc/fixes linus/master v6.10-rc5 next-20240626] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Jialong-Yang/powerpc-mmiotrace-bind-ioremap-and-page-fault-to-active-mmiotrace/20240624-163027 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next patch link: https://lore.kernel.org/r/2bf90acf7d29641ba6643934ff8dbba897dbd2d9.1718873074.git.jialong.yang%40shingroup.cn patch subject: [PATCH v1 1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC config: powerpc-randconfig-r113-20240627 (https://download.01.org/0day-ci/archive/20240627/202406271946.A6jwFfaY-lkp@intel.com/config) compiler: powerpc-linux-gcc (GCC) 13.2.0 reproduce: (https://download.01.org/0day-ci/archive/20240627/202406271946.A6jwFfaY-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406271946.A6jwFfaY-lkp@intel.com/ All error/warnings (new ones prefixed by >>): arch/powerpc/mm/kmmio.c: In function 'pmd_mkinvalid': >> arch/powerpc/mm/kmmio.c:140:16: error: implicit declaration of function '__pmd_raw' [-Werror=implicit-function-declaration] 140 | return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); | ^~~~~~~~~ >> arch/powerpc/mm/kmmio.c:140:26: error: implicit declaration of function 'pmd_raw'; did you mean 'pmd_bad'? [-Werror=implicit-function-declaration] 140 | return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); | ^~~~~~~ | pmd_bad In file included from include/linux/byteorder/big_endian.h:5, from arch/powerpc/include/uapi/asm/byteorder.h:14, from include/asm-generic/bitops/le.h:6, from arch/powerpc/include/asm/bitops.h:325, from include/linux/bitops.h:63, from include/linux/thread_info.h:27, from arch/powerpc/include/asm/ptrace.h:342, from arch/powerpc/include/asm/hw_irq.h:12, from arch/powerpc/include/asm/irqflags.h:12, from include/linux/irqflags.h:18, from include/asm-generic/cmpxchg-local.h:6, from arch/powerpc/include/asm/cmpxchg.h:755, from arch/powerpc/include/asm/atomic.h:11, from include/linux/atomic.h:7, from include/linux/rcupdate.h:25, from include/linux/rculist.h:11, from arch/powerpc/mm/kmmio.c:10: >> arch/powerpc/mm/kmmio.c:140:70: error: '_PAGE_INVALID' undeclared (first use in this function); did you mean 'RPM_INVALID'? 140 | return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); | ^~~~~~~~~~~~~ include/uapi/linux/byteorder/big_endian.h:38:51: note: in definition of macro '__cpu_to_be64' 38 | #define __cpu_to_be64(x) ((__force __be64)(__u64)(x)) | ^ arch/powerpc/mm/kmmio.c:140:42: note: in expansion of macro 'cpu_to_be64' 140 | return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); | ^~~~~~~~~~~ arch/powerpc/mm/kmmio.c:140:70: note: each undeclared identifier is reported only once for each function it appears in 140 | return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); | ^~~~~~~~~~~~~ include/uapi/linux/byteorder/big_endian.h:38:51: note: in definition of macro '__cpu_to_be64' 38 | #define __cpu_to_be64(x) ((__force __be64)(__u64)(x)) | ^ arch/powerpc/mm/kmmio.c:140:42: note: in expansion of macro 'cpu_to_be64' 140 | return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); | ^~~~~~~~~~~ arch/powerpc/mm/kmmio.c: In function 'kmmio_handler': >> arch/powerpc/mm/kmmio.c:318:32: error: 'struct pt_regs' has no member named 'softe' 318 | ctx->saved_softe = regs->softe; | ^~ arch/powerpc/mm/kmmio.c:330:13: error: 'struct pt_regs' has no member named 'softe' 330 | regs->softe = IRQS_DISABLED; // soft interrupt | ^~ >> arch/powerpc/mm/kmmio.c:332:9: error: 'local_paca' undeclared (first use in this function); did you mean 'local_lock'? 332 | local_paca->srr_valid = 0; | ^~~~~~~~~~ | local_lock arch/powerpc/mm/kmmio.c: In function 'post_kmmio_handler': arch/powerpc/mm/kmmio.c:383:13: error: 'struct pt_regs' has no member named 'softe' 383 | regs->softe = ctx->saved_softe; | ^~ arch/powerpc/mm/kmmio.c: In function 'pmd_mkinvalid': >> arch/powerpc/mm/kmmio.c:141:1: warning: control reaches end of non-void function [-Wreturn-type] 141 | } | ^ cc1: some warnings being treated as errors vim +/__pmd_raw +140 arch/powerpc/mm/kmmio.c 137 138 static inline pmd_t pmd_mkinvalid(pmd_t pmd) 139 { > 140 return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); > 141 } 142 143 static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) 144 { 145 pmd_t new_pmd; 146 pmdval_t v = pmd_val(*pmd); 147 148 if (clear) { 149 *old = v; 150 new_pmd = pmd_mkinvalid(*pmd); 151 } else { 152 /* Presume this has been called with clear==true previously */ 153 new_pmd = __pmd(*old); 154 } 155 *pmd = new_pmd; 156 } 157 158 static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old, unsigned long addr) 159 { 160 pteval_t v = pte_val(*pte); 161 162 if (clear) { 163 *old = v; 164 /* Nothing should care about address */ 165 pte_clear(&init_mm, addr, pte); 166 } else { 167 /* Presume this has been called with clear==true previously */ 168 set_pte_at(&init_mm, addr, pte, __pte(*old)); 169 } 170 } 171 172 static int clear_page_presence(struct kmmio_fault_page *f, bool clear) 173 { 174 unsigned int level; 175 pte_t *pte = lookup_address(f->addr, &level); 176 177 if (!pte) { 178 pr_err("no pte for addr 0x%08lx\n", f->addr); 179 return -1; 180 } 181 182 if (level == PMD_SHIFT) 183 clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); 184 else if (level == PAGE_SHIFT) 185 clear_pte_presence(pte, clear, &f->old_presence, f->addr); 186 else { 187 pr_err("unexpected page level 0x%x.\n", level); 188 return -1; 189 } 190 191 mmap_read_lock(&init_mm); 192 struct vm_area_struct *vma = find_vma(&init_mm, f->addr); 193 194 mmap_read_unlock(&init_mm); 195 196 flush_tlb_page(vma, f->addr); 197 198 return 0; 199 } 200 201 /* 202 * Mark the given page as not present. Access to it will trigger a fault. 203 * 204 * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the 205 * protection is ignored here. RCU read lock is assumed held, so the struct 206 * will not disappear unexpectedly. Furthermore, the caller must guarantee, 207 * that double arming the same virtual address (page) cannot occur. 208 * 209 * Double disarming on the other hand is allowed, and may occur when a fault 210 * and mmiotrace shutdown happen simultaneously. 211 */ 212 static int arm_kmmio_fault_page(struct kmmio_fault_page *f) 213 { 214 int ret; 215 216 WARN_ONCE(f->armed, pr_fmt("kmmio page already armed.\n")); 217 if (f->armed) { 218 pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n", 219 f->addr, f->count, !!f->old_presence); 220 } 221 ret = clear_page_presence(f, true); 222 WARN_ONCE(ret < 0, pr_fmt("arming at 0x%08lx failed.\n"), 223 f->addr); 224 f->armed = true; 225 return ret; 226 } 227 228 /** Restore the given page to saved presence state. */ 229 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) 230 { 231 int ret = clear_page_presence(f, false); 232 233 WARN_ONCE(ret < 0, 234 KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); 235 f->armed = false; 236 } 237 238 /* 239 * This is being called from do_page_fault(). 240 * 241 * We may be in an interrupt or a critical section. Also prefecthing may 242 * trigger a page fault. We may be in the middle of process switch. 243 * We cannot take any locks, because we could be executing especially 244 * within a kmmio critical section. 245 * 246 * Local interrupts are disabled, so preemption cannot happen. 247 * Do not enable interrupts, do not sleep, and watch out for other CPUs. 248 */ 249 /* 250 * Interrupts are disabled on entry as trap3 is an interrupt gate 251 * and they remain disabled throughout this function. 252 */ 253 int kmmio_handler(struct pt_regs *regs, unsigned long addr) 254 { 255 struct kmmio_context *ctx; 256 struct kmmio_fault_page *faultpage; 257 int ret = 0; /* default to fault not handled */ 258 unsigned long page_base = addr; 259 unsigned int l; 260 pte_t *pte = lookup_address(addr, &l); 261 262 if (!pte) 263 return -EINVAL; 264 page_base &= page_level_mask(l); 265 266 /* 267 * Hold the RCU read lock over single stepping to avoid looking 268 * up the probe and kmmio_fault_page again. The rcu_read_lock_sched() 269 * also disables preemption and prevents process switch during 270 * the single stepping. We can only handle one active kmmio trace 271 * per cpu, so ensure that we finish it before something else 272 * gets to run. 273 */ 274 rcu_read_lock_sched_notrace(); 275 276 faultpage = get_kmmio_fault_page(page_base); 277 if (!faultpage) { 278 /* 279 * Either this page fault is not caused by kmmio, or 280 * another CPU just pulled the kmmio probe from under 281 * our feet. The latter case should not be possible. 282 */ 283 goto no_kmmio; 284 } 285 286 ctx = this_cpu_ptr(&kmmio_ctx); 287 if (ctx->active) { 288 if (page_base == ctx->addr) { 289 /* 290 * A second fault on the same page means some other 291 * condition needs handling by do_page_fault(), the 292 * page really not being present is the most common. 293 */ 294 pr_debug("secondary hit for 0x%08lx CPU %d.\n", 295 addr, smp_processor_id()); 296 297 if (!faultpage->old_presence) 298 pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", 299 addr, smp_processor_id()); 300 } else { 301 /* 302 * Prevent overwriting already in-flight context. 303 * This should not happen, let's hope disarming at 304 * least prevents a panic. 305 */ 306 pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", 307 smp_processor_id(), addr); 308 pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); 309 disarm_kmmio_fault_page(faultpage); 310 } 311 goto no_kmmio; 312 } 313 ctx->active++; 314 315 ctx->fpage = faultpage; 316 ctx->probe = get_kmmio_probe(page_base); 317 ctx->saved_flags = (regs->msr & (MSR_SE | MSR_EE)); > 318 ctx->saved_softe = regs->softe; 319 ctx->addr = page_base; 320 321 if (ctx->probe && ctx->probe->pre_handler) 322 ctx->probe->pre_handler(ctx->probe, regs, addr); 323 324 /* 325 * Enable single-stepping and disable interrupts for the faulting 326 * context. Local interrupts must not get enabled during stepping. 327 */ 328 regs->msr |= MSR_SE; // single step 329 regs->msr &= ~MSR_EE; // hard interrupt 330 regs->softe = IRQS_DISABLED; // soft interrupt 331 > 332 local_paca->srr_valid = 0; 333 334 /* Now we set present bit in PTE and single step. */ 335 disarm_kmmio_fault_page(ctx->fpage); 336 337 /* 338 * If another cpu accesses the same page while we are stepping, 339 * the access will not be caught. It will simply succeed and the 340 * only downside is we lose the event. If this becomes a problem, 341 * the user should drop to single cpu before tracing. 342 */ 343 344 return 1; /* fault handled */ 345 346 no_kmmio: 347 rcu_read_unlock_sched_notrace(); 348 return ret; 349 } 350

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 8c80b154e814..8a69188aa75a 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 +config HAVE_MMIOTRACE_SUPPORT + def_bool y + config PPC_DISABLE_WERROR bool "Don't build arch/powerpc code with -Werror" help diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 0fe2f085c05a..cb92049f1239 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_PTDUMP_CORE) += ptdump/ obj-$(CONFIG_KASAN) += kasan/ +obj-$(CONFIG_MMIOTRACE) += kmmio.o mmio-mod.o pf_in.o mmiotrace_arch.o diff --git a/arch/powerpc/mm/kmmio.c b/arch/powerpc/mm/kmmio.c new file mode 100644 index 000000000000..f4374e721b37 --- /dev/null +++ b/arch/powerpc/mm/kmmio.c @@ -0,0 +1,649 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Support for MMIO probes. + * Derived from arch/x86/mm/kmmio.c: + * Copyright (C) 2024 Jialong Yang (jialong.yang@shingroup.cn) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/spinlock.h> +#include <linux/hash.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/uaccess.h> +#include <linux/ptrace.h> +#include <linux/preempt.h> +#include <linux/percpu.h> +#include <linux/kdebug.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/paca.h> +#include <linux/errno.h> +#include <linux/mmiotrace.h> + +#include "mmiotrace_arch.h" + +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; + +#define KMMIO_PAGE_HASH_BITS 4 +#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) + +struct kmmio_fault_page { + struct list_head list; + struct kmmio_fault_page *release_next; + unsigned long addr; /* the requested address */ + pteval_t old_presence; /* page presence prior to arming */ + bool armed; + + /* + * Number of times this page has been registered as a part + * of a probe. If zero, page is disarmed and this may be freed. + * Used only by writers (RCU) and post_kmmio_handler(). + * Protected by kmmio_lock, when linked into kmmio_page_table. + */ + int count; + + bool scheduled_for_release; +}; + +struct kmmio_delayed_release { + struct rcu_head rcu; + struct kmmio_fault_page *release_list; +}; + +struct kmmio_context { + struct kmmio_fault_page *fpage; + struct kmmio_probe *probe; + unsigned long saved_flags; + unsigned long saved_softe; + unsigned long addr; + int active; +}; + +/* + * The kmmio_lock is taken in int3 context, which is treated as NMI context. + * This causes lockdep to complain about it bein in both NMI and normal + * context. Hide it from lockdep, as it should not have any other locks + * taken under it, and this is only enabled for debugging mmio anyway. + */ +static arch_spinlock_t kmmio_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +/* Protected by kmmio_lock */ +unsigned int kmmio_count; + +/* Read-protected by RCU, write-protected by kmmio_lock. */ +static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; +static LIST_HEAD(kmmio_probes); + +static struct list_head *kmmio_page_list(unsigned long addr) +{ + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + + return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; +} + +/* Accessed per-cpu */ +static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); + +/* + * this is basically a dynamic stabbing problem: + * Could use the existing prio tree code or + * Possible better implementations: + * The Interval Skip List: A Data Structure for Finding All Intervals That + * Overlap a Point (might be simple) + * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup + */ +/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ +static struct kmmio_probe *get_kmmio_probe(unsigned long addr) +{ + struct kmmio_probe *p; + + list_for_each_entry_rcu(p, &kmmio_probes, list) { + if (addr >= p->addr && addr < (p->addr + p->len)) + return p; + } + return NULL; +} + +/* You must be holding RCU read lock. */ +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) +{ + struct list_head *head; + struct kmmio_fault_page *f; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + head = kmmio_page_list(addr); + list_for_each_entry_rcu(f, head, list) { + if (f->addr == addr) + return f; + } + return NULL; +} + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); +} + +static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) +{ + pmd_t new_pmd; + pmdval_t v = pmd_val(*pmd); + + if (clear) { + *old = v; + new_pmd = pmd_mkinvalid(*pmd); + } else { + /* Presume this has been called with clear==true previously */ + new_pmd = __pmd(*old); + } + *pmd = new_pmd; +} + +static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old, unsigned long addr) +{ + pteval_t v = pte_val(*pte); + + if (clear) { + *old = v; + /* Nothing should care about address */ + pte_clear(&init_mm, addr, pte); + } else { + /* Presume this has been called with clear==true previously */ + set_pte_at(&init_mm, addr, pte, __pte(*old)); + } +} + +static int clear_page_presence(struct kmmio_fault_page *f, bool clear) +{ + unsigned int level; + pte_t *pte = lookup_address(f->addr, &level); + + if (!pte) { + pr_err("no pte for addr 0x%08lx\n", f->addr); + return -1; + } + + if (level == PMD_SHIFT) + clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); + else if (level == PAGE_SHIFT) + clear_pte_presence(pte, clear, &f->old_presence, f->addr); + else { + pr_err("unexpected page level 0x%x.\n", level); + return -1; + } + + mmap_read_lock(&init_mm); + struct vm_area_struct *vma = find_vma(&init_mm, f->addr); + + mmap_read_unlock(&init_mm); + + flush_tlb_page(vma, f->addr); + + return 0; +} + +/* + * Mark the given page as not present. Access to it will trigger a fault. + * + * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the + * protection is ignored here. RCU read lock is assumed held, so the struct + * will not disappear unexpectedly. Furthermore, the caller must guarantee, + * that double arming the same virtual address (page) cannot occur. + * + * Double disarming on the other hand is allowed, and may occur when a fault + * and mmiotrace shutdown happen simultaneously. + */ +static int arm_kmmio_fault_page(struct kmmio_fault_page *f) +{ + int ret; + + WARN_ONCE(f->armed, pr_fmt("kmmio page already armed.\n")); + if (f->armed) { + pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); + } + ret = clear_page_presence(f, true); + WARN_ONCE(ret < 0, pr_fmt("arming at 0x%08lx failed.\n"), + f->addr); + f->armed = true; + return ret; +} + +/** Restore the given page to saved presence state. */ +static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) +{ + int ret = clear_page_presence(f, false); + + WARN_ONCE(ret < 0, + KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); + f->armed = false; +} + +/* + * This is being called from do_page_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * We cannot take any locks, because we could be executing especially + * within a kmmio critical section. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate + * and they remain disabled throughout this function. + */ +int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + struct kmmio_context *ctx; + struct kmmio_fault_page *faultpage; + int ret = 0; /* default to fault not handled */ + unsigned long page_base = addr; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return -EINVAL; + page_base &= page_level_mask(l); + + /* + * Hold the RCU read lock over single stepping to avoid looking + * up the probe and kmmio_fault_page again. The rcu_read_lock_sched() + * also disables preemption and prevents process switch during + * the single stepping. We can only handle one active kmmio trace + * per cpu, so ensure that we finish it before something else + * gets to run. + */ + rcu_read_lock_sched_notrace(); + + faultpage = get_kmmio_fault_page(page_base); + if (!faultpage) { + /* + * Either this page fault is not caused by kmmio, or + * another CPU just pulled the kmmio probe from under + * our feet. The latter case should not be possible. + */ + goto no_kmmio; + } + + ctx = this_cpu_ptr(&kmmio_ctx); + if (ctx->active) { + if (page_base == ctx->addr) { + /* + * A second fault on the same page means some other + * condition needs handling by do_page_fault(), the + * page really not being present is the most common. + */ + pr_debug("secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); + + if (!faultpage->old_presence) + pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", + addr, smp_processor_id()); + } else { + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at + * least prevents a panic. + */ + pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", + smp_processor_id(), addr); + pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); + disarm_kmmio_fault_page(faultpage); + } + goto no_kmmio; + } + ctx->active++; + + ctx->fpage = faultpage; + ctx->probe = get_kmmio_probe(page_base); + ctx->saved_flags = (regs->msr & (MSR_SE | MSR_EE)); + ctx->saved_softe = regs->softe; + ctx->addr = page_base; + + if (ctx->probe && ctx->probe->pre_handler) + ctx->probe->pre_handler(ctx->probe, regs, addr); + + /* + * Enable single-stepping and disable interrupts for the faulting + * context. Local interrupts must not get enabled during stepping. + */ + regs->msr |= MSR_SE; // single step + regs->msr &= ~MSR_EE; // hard interrupt + regs->softe = IRQS_DISABLED; // soft interrupt + + local_paca->srr_valid = 0; + + /* Now we set present bit in PTE and single step. */ + disarm_kmmio_fault_page(ctx->fpage); + + /* + * If another cpu accesses the same page while we are stepping, + * the access will not be caught. It will simply succeed and the + * only downside is we lose the event. If this becomes a problem, + * the user should drop to single cpu before tracing. + */ + + return 1; /* fault handled */ + +no_kmmio: + rcu_read_unlock_sched_notrace(); + return ret; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate + * and they remain disabled throughout this function. + * This must always get called as the pair to kmmio_handler(). + */ +static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) +{ + int ret = 0; + struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx); + + if (!ctx->active) { + /* + * debug traps without an active context are due to either + * something external causing them (f.e. using a debugger while + * mmio tracing enabled), or erroneous behaviour + */ + pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id()); + goto out; + } + + if (ctx->probe && ctx->probe->post_handler) + ctx->probe->post_handler(ctx->probe, condition, regs); + + /* Prevent racing against release_kmmio_fault_page(). */ + arch_spin_lock(&kmmio_lock); + if (ctx->fpage->count) + arm_kmmio_fault_page(ctx->fpage); + arch_spin_unlock(&kmmio_lock); + + // disabled single step in entry of single_step_exception. + // regs->msr &= ~MSR_SE; + regs->msr |= ctx->saved_flags; + regs->softe = ctx->saved_softe; + + /* These were acquired in kmmio_handler(). */ + ctx->active--; + BUG_ON(ctx->active); + rcu_read_unlock_sched_notrace(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (!(regs->msr & MSR_SE)) + ret = 1; +out: + return ret; +} + +/* You must be holding kmmio_lock. */ +static int add_kmmio_fault_page(unsigned long addr) +{ + struct kmmio_fault_page *f; + + f = get_kmmio_fault_page(addr); + if (f) { + if (!f->count) + arm_kmmio_fault_page(f); + f->count++; + return 0; + } + + f = kzalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -1; + + f->count = 1; + f->addr = addr; + + if (arm_kmmio_fault_page(f)) { + kfree(f); + return -1; + } + + list_add_rcu(&f->list, kmmio_page_list(f->addr)); + + return 0; +} + +/* You must be holding kmmio_lock. */ +static void release_kmmio_fault_page(unsigned long addr, + struct kmmio_fault_page **release_list) +{ + struct kmmio_fault_page *f; + + f = get_kmmio_fault_page(addr); + if (!f) + return; + + f->count--; + BUG_ON(f->count < 0); + if (!f->count) { + disarm_kmmio_fault_page(f); + if (!f->scheduled_for_release) { + f->release_next = *release_list; + *release_list = f; + f->scheduled_for_release = true; + } + } +} + +/* + * With page-unaligned ioremaps, one or two armed pages may contain + * addresses from outside the intended mapping. Events for these addresses + * are currently silently dropped. The events may result only from programming + * mistakes by accessing addresses before the beginning or past the end of a + * mapping. + */ +int register_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long flags; + int ret = 0; + unsigned long size = 0; + unsigned long addr = p->addr & PAGE_MASK; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + unsigned int l; + pte_t *pte; + + local_irq_save(flags); + arch_spin_lock(&kmmio_lock); + if (get_kmmio_probe(addr)) { + ret = -EEXIST; + goto out; + } + + pte = lookup_address(addr, &l); + if (!pte) { + ret = -EINVAL; + goto out; + } + + kmmio_count++; + list_add_rcu(&p->list, &kmmio_probes); + while (size < size_lim) { + if (add_kmmio_fault_page(addr + size)) + pr_err("Unable to set page fault.\n"); + size += page_level_size(l); + } +out: + arch_spin_unlock(&kmmio_lock); + local_irq_restore(flags); + + /* + * XXX: What should I do here? + * Here was a call to global_flush_tlb(), but it does not exist + * anymore. It seems it's not needed after all. + */ + return ret; +} +EXPORT_SYMBOL(register_kmmio_probe); + +static void rcu_free_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *f = dr->release_list; + + while (f) { + struct kmmio_fault_page *next = f->release_next; + + BUG_ON(f->count); + kfree(f); + f = next; + } + kfree(dr); +} + +static void remove_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); + struct kmmio_fault_page *f = dr->release_list; + struct kmmio_fault_page **prevp = &dr->release_list; + unsigned long flags; + + local_irq_save(flags); + arch_spin_lock(&kmmio_lock); + while (f) { + if (!f->count) { + list_del_rcu(&f->list); + prevp = &f->release_next; + } else { + *prevp = f->release_next; + f->release_next = NULL; + f->scheduled_for_release = false; + } + f = *prevp; + } + arch_spin_unlock(&kmmio_lock); + local_irq_restore(flags); + + /* This is the real RCU destroy call. */ + call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); +} + +/* + * Remove a kmmio probe. You have to synchronize_rcu() before you can be + * sure that the callbacks will not be called anymore. Only after that + * you may actually release your struct kmmio_probe. + * + * Unregistering a kmmio fault page has three steps: + * 1. release_kmmio_fault_page() + * Disarm the page, wait a grace period to let all faults finish. + * 2. remove_kmmio_fault_pages() + * Remove the pages from kmmio_page_table. + * 3. rcu_free_kmmio_fault_pages() + * Actually free the kmmio_fault_page structs as with RCU. + */ +void unregister_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long flags; + unsigned long size = 0; + unsigned long addr = p->addr & PAGE_MASK; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + struct kmmio_fault_page *release_list = NULL; + struct kmmio_delayed_release *drelease; + unsigned int l; + pte_t *pte; + + pte = lookup_address(addr, &l); + if (!pte) + return; + + local_irq_save(flags); + arch_spin_lock(&kmmio_lock); + while (size < size_lim) { + release_kmmio_fault_page(addr + size, &release_list); + size += page_level_size(l); + } + list_del_rcu(&p->list); + kmmio_count--; + arch_spin_unlock(&kmmio_lock); + local_irq_restore(flags); + + if (!release_list) + return; + + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); + if (!drelease) + return; + + drelease->release_list = release_list; + + /* + * This is not really RCU here. We have just disarmed a set of + * pages so that they cannot trigger page faults anymore. However, + * we cannot remove the pages from kmmio_page_table, + * because a probe hit might be in flight on another CPU. The + * pages are collected into a list, and they will be removed from + * kmmio_page_table when it is certain that no probe hit related to + * these pages can be in flight. RCU grace period sounds like a + * good choice. + * + * If we removed the pages too early, kmmio page fault handler might + * not find the respective kmmio_fault_page and determine it's not + * a kmmio fault, when it actually is. This would lead to madness. + */ + call_rcu(&drelease->rcu, remove_kmmio_fault_pages); +} +EXPORT_SYMBOL(unregister_kmmio_probe); + +static int +kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) +{ + struct die_args *arg = args; + + if (val == DIE_SSTEP && post_kmmio_handler(0, arg->regs) == 1) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +int kmmio_init(void) +{ + int i; + + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + + return register_die_notifier(&nb_die); +} + +void kmmio_cleanup(void) +{ + int i; + + unregister_die_notifier(&nb_die); + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { + WARN_ONCE(!list_empty(&kmmio_page_table[i]), + pr_fmt("kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n")); + } +} diff --git a/arch/powerpc/mm/mmio-mod.c b/arch/powerpc/mm/mmio-mod.c new file mode 100644 index 000000000000..68ba9f028678 --- /dev/null +++ b/arch/powerpc/mm/mmio-mod.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/x86/mm/mmio-mod.c: + * Copyright (C) 2024 Jialong Yang (jialong.yang@shingroup.cn) + */ + +#define pr_fmt(fmt) "mmiotrace: " fmt + +#include <linux/moduleparam.h> +#include <linux/debugfs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/mmiotrace.h> +#include <linux/pgtable.h> +#include <linux/atomic.h> +#include <linux/percpu.h> +#include <linux/cpu.h> + +#include "pf_in.h" +#include "mmiotrace_arch.h" + +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; + resource_size_t phys; + unsigned long id; +}; + +/* Accessed per-cpu. */ +static DEFINE_PER_CPU(struct trap_reason, pf_reason); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); + +static DEFINE_MUTEX(mmiotrace_mutex); +static DEFINE_SPINLOCK(trace_lock); +static atomic_t mmiotrace_enabled; +static LIST_HEAD(trace_list); /* struct remap_trace */ + +/* + * Locking in this file: + * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. + * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex + * and trace_lock. + * - Routines depending on is_enabled() must take trace_lock. + * - trace_list users must hold trace_lock. + * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed. + * - pre/post callbacks assume the effect of is_enabled() being true. + */ + +/* module parameters */ +static unsigned long filter_offset; +static bool nommiotrace; + +module_param(filter_offset, ulong, 0); +module_param(nommiotrace, bool, 0); + +MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); +MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); + +static bool is_enabled(void) +{ + return atomic_read(&mmiotrace_enabled); +} + +static void print_pte(unsigned long address) +{ + unsigned int level; + pte_t *pte = lookup_address(address, &level); + + if (!pte) { + pr_err("Error in %s: no pte for page 0x%08lx\n", + __func__, address); + return; + } + + if (level == PMD_SHIFT) { + pr_emerg("4MB pages are not currently supported: 0x%08lx\n", + address); + BUG(); + } + pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", + address, + (unsigned long long)pte_val(*pte), + (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); +} + +/* + * For some reason the pre/post pairs have been called in an + * unmatched order. Report and die. + */ +static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) +{ + const struct trap_reason *my_reason = &get_cpu_var(pf_reason); + + pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", + addr, my_reason->addr); + print_pte(addr); + pr_emerg("faulting IP is at %pS\n", (void *)regs->nip); + pr_emerg("last faulting IP was at %pS\n", (void *)my_reason->ip); + put_cpu_var(pf_reason); + BUG(); +} + +static void pre(struct kmmio_probe *p, struct pt_regs *regs, + unsigned long addr) +{ + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); + const unsigned long instptr = instruction_pointer(regs); + struct opcode_t *opcode = get_opcode((unsigned int *)instptr); + enum mm_io_opcode type = get_ins_type(opcode); + struct remap_trace *trace = p->private; + + /* it doesn't make sense to have more than one active trace per cpu */ + if (my_reason->active_traces) + die_kmmio_nesting_error(regs, addr); + else + my_reason->active_traces++; + + if (!opcode) { + pr_warn("The ins may be not included in src. Tell the dever follow info:"); + pr_warn("ins_addr: 0x%lx ins: 0x%lx", instptr, *(unsigned long *)instptr); + } + + my_reason->opcode = opcode; + + my_reason->addr = addr; + my_reason->ip = instptr; + + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; + + my_trace->pc = instptr; + + my_trace->opcode = type; + my_trace->width = get_ins_width(opcode); + + if (type == MMIO_WRITE) + my_trace->value = get_ins_val(my_reason, regs); + + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); +} + +static void post(struct kmmio_probe *p, unsigned long condition, + struct pt_regs *regs) +{ + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); + struct opcode_t *opcode = my_reason->opcode; + enum mm_io_opcode type = get_ins_type(opcode); + + /* this should always return the active_trace count to 0 */ + my_reason->active_traces--; + if (my_reason->active_traces) { + pr_emerg("unexpected post handler"); + BUG(); + } + + if (type == MMIO_READ) + my_trace->value = get_ins_val(my_reason, regs); + + mmio_trace_rw(my_trace); + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); +} + +static void ioremap_trace_core(resource_size_t offset, unsigned long size, + void __iomem *addr) +{ + static atomic_t next_id; + struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + /* These are page-unaligned. */ + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE + }; + + if (!trace) { + pr_err("kmalloc failed in ioremap\n"); + return; + } + + *trace = (struct remap_trace) { + .probe = { + .addr = (unsigned long)addr, + .len = size, + .pre_handler = pre, + .post_handler = post, + .private = trace + }, + .phys = offset, + .id = atomic_inc_return(&next_id) + }; + map.map_id = trace->id; + + spin_lock_irq(&trace_lock); + if (!is_enabled()) { + kfree(trace); + goto not_enabled; + } + + mmio_trace_mapping(&map); + list_add_tail(&trace->list, &trace_list); + if (!nommiotrace) + register_kmmio_probe(&trace->probe); + +not_enabled: + spin_unlock_irq(&trace_lock); +} + +void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr) +{ + pr_err("ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); + if (!is_enabled()) /* recheck and proper locking in *_core() */ + return; + + pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); + if ((filter_offset) && (offset != filter_offset)) + return; + ioremap_trace_core(offset, size, addr); +} + +static void iounmap_trace_core(volatile void __iomem *addr) +{ + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE + }; + struct remap_trace *trace; + struct remap_trace *tmp; + struct remap_trace *found_trace = NULL; + + pr_debug("Unmapping %p.\n", addr); + + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + if ((unsigned long)addr == trace->probe.addr) { + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + found_trace = trace; + break; + } + } + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); + +not_enabled: + spin_unlock_irq(&trace_lock); + if (found_trace) { + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + kfree(found_trace); + } +} + +void mmiotrace_iounmap(volatile void __iomem *addr) +{ + might_sleep(); + if (is_enabled()) /* recheck and proper locking in *_core() */ + iounmap_trace_core(addr); +} + +int mmiotrace_printk(const char *fmt, ...) +{ + int ret = 0; + va_list args; + unsigned long flags; + + va_start(args, fmt); + + spin_lock_irqsave(&trace_lock, flags); + if (is_enabled()) + ret = mmio_trace_printk(fmt, args); + spin_unlock_irqrestore(&trace_lock, flags); + + va_end(args); + return ret; +} +EXPORT_SYMBOL(mmiotrace_printk); + +static void clear_trace_list(void) +{ + struct remap_trace *trace; + struct remap_trace *tmp; + + /* + * No locking required, because the caller ensures we are in a + * critical section via mutex, and is_enabled() is false, + * i.e. nothing can traverse or modify this list. + * Caller also ensures is_enabled() cannot change. + */ + list_for_each_entry(trace, &trace_list, list) { + pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + } + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + list_del(&trace->list); + kfree(trace); + } +} + +#ifdef CONFIG_HOTPLUG_CPU +static cpumask_var_t downed_cpus; + +static void enter_uniprocessor(void) +{ + int cpu; + int err; + + if (!cpumask_available(downed_cpus) && + !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { + pr_notice("Failed to allocate mask\n"); + goto out; + } + + cpus_read_lock(); + cpumask_copy(downed_cpus, cpu_online_mask); + cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); + if (num_online_cpus() > 1) + pr_notice("Disabling non-boot CPUs...\n"); + cpus_read_unlock(); + + for_each_cpu(cpu, downed_cpus) { + err = remove_cpu(cpu); + if (!err) + pr_info("CPU%d is down.\n", cpu); + else + pr_err("Error taking CPU%d down: %d\n", cpu, err); + } +out: + if (num_online_cpus() > 1) + pr_warn("multiple CPUs still online, may miss events.\n"); +} + +static void leave_uniprocessor(void) +{ + int cpu; + int err; + + if (!cpumask_available(downed_cpus) || cpumask_empty(downed_cpus)) + return; + pr_notice("Re-enabling CPUs...\n"); + for_each_cpu(cpu, downed_cpus) { + err = add_cpu(cpu); + if (!err) + pr_info("enabled CPU%d.\n", cpu); + else + pr_err("cannot re-enable CPU%d: %d\n", cpu, err); + } +} + +#else /* !CONFIG_HOTPLUG_CPU */ +static void enter_uniprocessor(void) +{ + if (num_online_cpus() > 1) + pr_warn("multiple CPUs are online, may miss events. Suggest booting with maxcpus=1 kernel argument.\n"); +} + +static void leave_uniprocessor(void) +{ +} +#endif + +void enable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (is_enabled()) + goto out; + + if (nommiotrace) + pr_info("MMIO tracing disabled.\n"); + kmmio_init(); + enter_uniprocessor(); + spin_lock_irq(&trace_lock); + atomic_inc(&mmiotrace_enabled); + spin_unlock_irq(&trace_lock); + pr_info("enabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} + +void disable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (!is_enabled()) + goto out; + + spin_lock_irq(&trace_lock); + atomic_dec(&mmiotrace_enabled); + BUG_ON(is_enabled()); + spin_unlock_irq(&trace_lock); + + clear_trace_list(); /* guarantees: no more kmmio callbacks */ + leave_uniprocessor(); + kmmio_cleanup(); + pr_info("disabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} diff --git a/arch/powerpc/mm/mmiotrace_arch.c b/arch/powerpc/mm/mmiotrace_arch.c new file mode 100644 index 000000000000..ccc8032384ef --- /dev/null +++ b/arch/powerpc/mm/mmiotrace_arch.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/powerpc/mm/pgtable.c: + * Copyright (C) 2024 Jialong Yang (jialong.yang@shingroup.cn) + */ + +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <linux/hugetlb.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> +#include <asm/hugetlb.h> + +#include "mmiotrace_arch.h" + +static pte_t *mmiotrace_find_linux_pte(pgd_t *pgdp, unsigned long ea, + bool *is_thp, unsigned int *hpage_shift) +{ + p4d_t p4d, *p4dp; + pud_t pud, *pudp; + pmd_t pmd, *pmdp; + pte_t *ret_pte; + hugepd_t *hpdp = NULL; + unsigned int pdshift; + + if (hpage_shift) + *hpage_shift = 0; + + if (is_thp) + *is_thp = false; + + /* + * Always operate on the local stack value. This make sure the + * value don't get updated by a parallel THP split/collapse, + * page fault or a page unmap. The return pte_t * is still not + * stable. So should be checked there for above conditions. + * Top level is an exception because it is folded into p4d. + */ + p4dp = p4d_offset(pgdp, ea); + p4d = READ_ONCE(*p4dp); + pdshift = P4D_SHIFT; + + if (p4d_none(p4d)) + return NULL; + + if (p4d_leaf(p4d)) { + ret_pte = (pte_t *)p4dp; + goto out; + } + + if (is_hugepd(__hugepd(p4d_val(p4d)))) { + hpdp = (hugepd_t *)&p4d; + goto out_huge; + } + + /* + * Even if we end up with an unmap, the pgtable will not + * be freed, because we do an rcu free and here we are + * irq disabled + */ + pdshift = PUD_SHIFT; + pudp = pud_offset(&p4d, ea); + pud = READ_ONCE(*pudp); + + if (pud_none(pud)) + return NULL; + + if (pud_leaf(pud)) { + ret_pte = (pte_t *)pudp; + goto out; + } + + if (is_hugepd(__hugepd(pud_val(pud)))) { + hpdp = (hugepd_t *)&pud; + goto out_huge; + } + + pdshift = PMD_SHIFT; + pmdp = pmd_offset(&pud, ea); + pmd = READ_ONCE(*pmdp); + + /* + * A hugepage collapse is captured by this condition, see + * pmdp_collapse_flush. + */ + if (pmd_none(pmd)) + return NULL; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * A hugepage split is captured by this condition, see + * pmdp_invalidate. + * + * Huge page modification can be caught here too. + */ + if (pmd_is_serializing(pmd)) + return NULL; +#endif + + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { + if (is_thp) + *is_thp = true; + ret_pte = (pte_t *)pmdp; + goto out; + } + + if (pmd_leaf(pmd)) { + ret_pte = (pte_t *)pmdp; + goto out; + } + + if (is_hugepd(__hugepd(pmd_val(pmd)))) { + hpdp = (hugepd_t *)&pmd; + goto out_huge; + } + + pdshift = PAGE_SHIFT; + + if (hpage_shift) + *hpage_shift = pdshift; + + return pte_offset_kernel(&pmd, ea); + +out_huge: + if (!hpdp) + return NULL; + + ret_pte = hugepte_offset(*hpdp, ea, pdshift); + pdshift = hugepd_shift(*hpdp); +out: + if (hpage_shift) + *hpage_shift = pdshift; + return ret_pte; +} + +pte_t *lookup_address(unsigned long address, unsigned int *shift) +{ + unsigned long flags; + + local_irq_save(flags); + pte_t *pte = mmiotrace_find_linux_pte(pgd_offset_k(address), address, NULL, shift); + + local_irq_restore(flags); + + return pte; +} diff --git a/arch/powerpc/mm/mmiotrace_arch.h b/arch/powerpc/mm/mmiotrace_arch.h new file mode 100644 index 000000000000..f4a5bff24a07 --- /dev/null +++ b/arch/powerpc/mm/mmiotrace_arch.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Derived from arch/powerpc/mm/pgtable.c: + * Copyright (C) 2024 Jialong Yang (jialong.yang@shingroup.cn) + */ + +#ifndef __MMIOTRACE_ARCH_ +#define __MMIOTRACE_ARCH_ +#include <asm/pgtable.h> + +static inline int page_level_shift(unsigned int level) +{ + return level; +} +static inline unsigned long page_level_size(unsigned int level) +{ + return 1UL << page_level_shift(level); +} +static inline unsigned long page_level_mask(unsigned int level) +{ + return ~(page_level_size(level) - 1); +} + +pte_t *lookup_address(unsigned long address, unsigned int *level); +#endif // __MMIOTRACE_ARCH_ diff --git a/arch/powerpc/mm/pf_in.c b/arch/powerpc/mm/pf_in.c new file mode 100644 index 000000000000..e6c90b383e7f --- /dev/null +++ b/arch/powerpc/mm/pf_in.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/x86/mm/pf_in.c: + * Copyright (C) 2024 Jialong Yang (jialong.yang@shingroup.cn) + */ + +#include <linux/ptrace.h> /* struct pt_regs */ +#include "pf_in.h" +#include <linux/printk.h> +#include <linux/mmiotrace.h> + +/* D 32 0x80000000 B lwz Load Word and Zero */ +/* D 33 0x84000000 B lwz Load Word and Zero with Update */ +/* D 34 0x88000000 B lbz Load Byte and Zero */ +/* D 33 0x8C000000 B lbzu Load Word and Zero with Update */ +/* D 35 0x90000000 B stw Store Word */ +/* D 36 0x94000000 B stwu Store Word with Update */ +/* D 37 0x98000000 B stb Store Byte */ +/* D 38 0x9C000000 B stbu Store Byte with Update */ +/* D 40 0xA0000000 B lhz Load Halfword and Zero with Update */ +/* D 41 0xA4000000 B lhzu Load Halfword and Zero with Update */ +/* D 42 0xA8000000 B lha Load Halfword Algebraic */ +/* D 43 0xAC000000 B lhau Load Halfword Algebraic with Update */ +/* D 44 0xB0000000 B sth Store Halfword */ +/* D 45 0xB4000000 B sthu Store Halfword with Update */ +/* D 46 0xB8000000 B lmw Load Multiple Word */ +/* D 47 0xBC000000 B stmw Store Multiple Word */ +/* D 48 0xC0000000 FP lfs Load Floating-Point Single */ +/* D 49 0xC4000000 FP lfsu Load Floating-Point Single with Update */ +/* D 50 0xC8000000 FP lfd Load Floating-Point Double */ +/* D 51 0xCC000000 FP lfdu Load Floating-Point Double with Update */ +/* D 52 0xD0000000 FP stfs Store Floating-Point Single */ +/* D 53 0xD4000000 FP stfsu Store Floating-Point Single with Update */ +/* D 54 0xD8000000 FP stfd Store Floating-Point Double */ +/* D 55 0xDC000000 FP stfdu Store Floating-Point Double with Update */ +/* DQ 56 0xE0000000 P 58 LSQ lq Load Quadword */ +/* DS 57 0xE4000000 140 FP.out Lfdp Load Floating-Point Double Pair */ +/* DS 58 0xE8000000 53 64 Ldu Load Doubleword with Update */ +/* DS 58 0xE8000001 53 64 Ld Load Doubleword */ +/* DS 58 0xE8000002 52 64 Lwa Load Word Algebraic */ +/* DS 62 0xF8000000 57 64 std Store Doubleword */ +/* DS 62 0xF8000001 57 64 stdu Store Doubleword with Update */ +/* DS 62 0xF8000002 59 LSQ stq Store Quadword */ + +// D-form: +// 0-5 6-10 11-15 16-31 +// opcode RT RA Offset + +// DQ-form: +// 0-5 6-10 11-15 16-27 +// opcode RT RA Offset + +// DS-form: +// 0-5 6-10 11-15 16-29 30-31 +// opcode RT RA Offset opcode + +#define D_OPCODE_MASK GENMASK(31, 26) +#define DQ_OPCODE_MASK D_OPCODE_MASK +#define DS_OPCODE_MASK (D_OPCODE_MASK | GENMASK(0, 1)) +#define RS_RT_OFFSET 21UL +#define RS_RT_MASK GENMASK(25, 21) +#define RA_MASK GENMASK(20, 16) +#define D_OFFSET GENMASK(15, 0) +#define DQ_OFFSET GENMASK(15, 4) +#define DS_OFFSET GENMASK(15, 2) + +struct opcode_t opcodes[] = { + {0x80000000, D_FORMAT, "lwz", }, + {0x84000000, D_FORMAT, "lwzu", }, + {0x88000000, D_FORMAT, "lbz", }, + {0x8C000000, D_FORMAT, "lbzu", }, + {0x90000000, D_FORMAT, "stw", }, + {0x94000000, D_FORMAT, "stwu", }, + {0x98000000, D_FORMAT, "stb", }, + {0x9C000000, D_FORMAT, "stbu", }, + {0xA0000000, D_FORMAT, "lhz", }, + {0xA4000000, D_FORMAT, "lhzu", }, + {0xA8000000, D_FORMAT, "lha", }, + {0xAC000000, D_FORMAT, "lhau", }, + {0xB0000000, D_FORMAT, "sth", }, + {0xB4000000, D_FORMAT, "sthu", }, + {0xB8000000, D_FORMAT, "lmw", }, + {0xBC000000, D_FORMAT, "stmw", }, + {0xC0000000, D_FORMAT, "lfs", }, + {0xC4000000, D_FORMAT, "lfsu", }, + {0xC8000000, D_FORMAT, "lfd", }, + {0xCC000000, D_FORMAT, "lfdu", }, + {0xD0000000, D_FORMAT, "stfs", }, + {0xD4000000, D_FORMAT, "stfsu", }, + {0xD8000000, D_FORMAT, "stfd", }, + {0xDC000000, D_FORMAT, "stfdu", }, + {0xE0000000, DQ_FORMAT, "lq", }, + {0xE4000000, DS_FORMAT, "lfdp", }, + {0xE8000000, DS_FORMAT, "ldu", }, + {0xE8000001, DS_FORMAT, "ld", }, + {0xE8000002, DS_FORMAT, "lwa", }, + {0xF8000000, DS_FORMAT, "std", }, + {0xF8000001, DS_FORMAT, "stdu", }, + {0xF8000002, DS_FORMAT, "stq", } +}; + +struct opcode_t *get_opcode(unsigned int *addr) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(opcodes); i++) { + switch (opcodes[i].form) { + case D_FORMAT: + if (opcodes[i].opcode == (*addr & D_OPCODE_MASK)) + return &opcodes[i]; + break; + case DQ_FORMAT: + if (opcodes[i].opcode == (*addr & DQ_OPCODE_MASK)) + return &opcodes[i]; + break; + case DS_FORMAT: + if (opcodes[i].opcode == (*addr & DQ_OPCODE_MASK)) + return &opcodes[i]; + break; + } + } + + return NULL; +} + +inline enum mm_io_opcode get_ins_type(struct opcode_t *opcode) +{ + if (!opcode) + return MMIO_UNKNOWN_OP; + + if (opcode->name[0] == 'l') + return MMIO_READ; + + if (opcode->name[0] == 's') + return MMIO_WRITE; + + return MMIO_UNKNOWN_OP; +} + +unsigned int get_ins_width(struct opcode_t *opcode) +{ + char width_ch; + + if (!opcode) + return 0; + + if (opcode->name[0] == 'l') + width_ch = opcode->name[1]; + + if (opcode->name[0] == 's') + width_ch = opcode->name[2]; + + switch (width_ch) { + case 'b': /* byte */ + return 1; + case 'h': /* half word */ + return sizeof(long) / 2; + case 'w': /* word */ + /* return sizeof(long); */ + case 'm': /* multi words(can be calculated out by (32-RT) * sizeof(long)) */ + case 'f': /* float(not too much. So ignore word number) */ + case 'd': /* double words */ + /* return 2 * sizeof(long); */ + case 'q': /* quad words */ + /* return 4 * sizeof(long); */ + default: + return sizeof(long); + } +} + +unsigned long get_ins_val(struct trap_reason *reason, struct pt_regs *regs) +{ + struct opcode_t *opcode = reason->opcode; + unsigned int ins = *(unsigned int *)(reason->ip); + unsigned int reg_no; + unsigned long mask = ~0UL; + + if (!opcode) + return 0; + + mask >>= 8 * (sizeof(long) - get_ins_width(opcode)); + reg_no = (ins & RS_RT_MASK) >> RS_RT_OFFSET; + + return regs->gpr[reg_no] & mask; +} diff --git a/arch/powerpc/mm/pf_in.h b/arch/powerpc/mm/pf_in.h new file mode 100644 index 000000000000..905ba4937137 --- /dev/null +++ b/arch/powerpc/mm/pf_in.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Derived from arch/x86/mm/pf_in.h: + * Copyright (C) 2024 Jialong Yang (jialong.yang@shingroup.cn) + */ + +#ifndef __PF_H_ +#define __PF_H_ + +enum OPCODE_FORMAT { + D_FORMAT, + DQ_FORMAT, + DS_FORMAT, +}; + +struct opcode_t { + unsigned int opcode; + enum OPCODE_FORMAT form; + const char *name; +}; + +struct trap_reason { + unsigned long addr; + unsigned long ip; + struct opcode_t *opcode; + int active_traces; +}; + +struct opcode_t *get_opcode(unsigned int *addr); +enum mm_io_opcode get_ins_type(struct opcode_t *opcode); +unsigned int get_ins_width(struct opcode_t *opcode); +unsigned long get_ins_val(struct trap_reason *reason, struct pt_regs *regs); +#endif /* __PF_H_ */

[v1,1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC

Commit Message

Comments

Patch