@@ -102,6 +102,18 @@ config GENERIC_HWEIGHT
bool
default y
+config TOOLCHAIN_SUPPORTS_LOCALENTRY1
+ bool
+ depends on PPC64_ELF_ABI_V2
+ default y if LD_VERSION >= 23200 || LLD_VERSION >= 110000
+ help
+ A section of the ELF symbol st_other field can be given the value 1
+ using the directive '.localentry NAME, 1' to mean the local and global
+ entry points are the same, and r2 should be treated as caller-saved.
+
+ Older versions of Clang and binutils do not recognise this form of the
+ directive and will error if it is used.
+
config PPC
bool
default y
@@ -248,7 +260,7 @@ config PPC
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
- select HAVE_STATIC_CALL if PPC32
+ select HAVE_STATIC_CALL if PPC32 || (PPC64_ELF_ABI_V2 && TOOLCHAIN_SUPPORTS_LOCALENTRY1)
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE
@@ -152,6 +152,7 @@ int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
bool is_conditional_branch(ppc_inst_t instr);
#define OP_RT_RA_MASK 0xffff0000UL
+#define OP_SI_MASK 0x0000ffffUL
#define LIS_R2 (PPC_RAW_LIS(_R2, 0))
#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0))
#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0))
@@ -2,12 +2,75 @@
#ifndef _ASM_POWERPC_STATIC_CALL_H
#define _ASM_POWERPC_STATIC_CALL_H
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+
+#ifdef MODULE
+
+#define __PPC_SCT(name, inst) \
+ asm(".pushsection .text, \"ax\" \n" \
+ ".align 6 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".localentry " STATIC_CALL_TRAMP_STR(name) ", 1 \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ " mflr 11 \n" \
+ " bcl 20, 31, $+4 \n" \
+ "0: mflr 12 \n" \
+ " mtlr 11 \n" \
+ " addi 12, 12, (" STATIC_CALL_TRAMP_STR(name) " - 0b) \n" \
+ " addis 2, 12, (.TOC.-" STATIC_CALL_TRAMP_STR(name) ")@ha \n" \
+ " addi 2, 2, (.TOC.-" STATIC_CALL_TRAMP_STR(name) ")@l \n" \
+ " " inst " \n" \
+ " ld 12, (2f - " STATIC_CALL_TRAMP_STR(name) ")(12) \n" \
+ " mtctr 12 \n" \
+ " bctr \n" \
+ "1: li 3, 0 \n" \
+ " blr \n" \
+ ".balign 8 \n" \
+ "2: .8byte 0 \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
+#else /* KERNEL */
+
+#define __PPC_SCT(name, inst) \
+ asm(".pushsection .text, \"ax\" \n" \
+ ".align 5 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".localentry " STATIC_CALL_TRAMP_STR(name) ", 1 \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ " ld 2, 16(13) \n" \
+ " " inst " \n" \
+ " addis 12, 2, 2f@toc@ha \n" \
+ " ld 12, 2f@toc@l(12) \n" \
+ " mtctr 12 \n" \
+ " bctr \n" \
+ "1: li 3, 0 \n" \
+ " blr \n" \
+ ".balign 8 \n" \
+ "2: .8byte 0 \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
+#endif /* MODULE */
+
+#define PPC_SCT_INST_MODULE 28 /* Offset of instruction to update */
+#define PPC_SCT_RET0_MODULE 44 /* Offset of label 1 */
+#define PPC_SCT_DATA_MODULE 56 /* Offset of label 2 (aligned) */
+
+#define PPC_SCT_INST_KERNEL 4 /* Offset of instruction to update */
+#define PPC_SCT_RET0_KERNEL 24 /* Offset of label 1 */
+#define PPC_SCT_DATA_KERNEL 32 /* Offset of label 2 (aligned) */
+
+#elif defined(CONFIG_PPC32)
+
#define __PPC_SCT(name, inst) \
asm(".pushsection .text, \"ax\" \n" \
".align 5 \n" \
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
- inst " \n" \
+ " " inst " \n" \
" lis 12,2f@ha \n" \
" lwz 12,2f@l(12) \n" \
" mtctr 12 \n" \
@@ -19,11 +82,20 @@
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
-#define PPC_SCT_RET0 20 /* Offset of label 1 */
-#define PPC_SCT_DATA 28 /* Offset of label 2 */
+#define PPC_SCT_INST_MODULE 0 /* Offset of instruction to update */
+#define PPC_SCT_RET0_MODULE 20 /* Offset of label 1 */
+#define PPC_SCT_DATA_MODULE 28 /* Offset of label 2 */
+
+#define PPC_SCT_INST_KERNEL PPC_SCT_INST_MODULE
+#define PPC_SCT_RET0_KERNEL PPC_SCT_RET0_MODULE
+#define PPC_SCT_DATA_KERNEL PPC_SCT_DATA_MODULE
+
+#else /* !CONFIG_PPC64_ELF_ABI_V2 && !CONFIG_PPC32 */
+#error "Unsupported ABI"
+#endif /* CONFIG_PPC64_ELF_ABI_V2 */
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func)
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr")
-#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20")
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b 1f")
#endif /* _ASM_POWERPC_STATIC_CALL_H */
@@ -128,8 +128,9 @@ extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o static_call.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
+obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_SMP) += smp.o
@@ -1,33 +1,151 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
#include <linux/memory.h>
#include <linux/static_call.h>
#include <asm/code-patching.h>
+static long sign_extend_long(unsigned long value, int index)
+{
+ if (sizeof(long) == 8)
+ return sign_extend64(value, index);
+ else
+ return sign_extend32(value, index);
+}
+
+static void *ppc_function_toc(u32 *func)
+{
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) {
+ /* There are two common global entry sequences we handle below
+ *
+ * 1. addis r2, r12, SI1
+ * addi r2, SI2
+ *
+ * 2. lis r2, SI1
+ * addi r2, SI2
+ *
+ * Where r12 contains the global entry point address (it is otherwise
+ * uninitialised, so doesn't matter what value we use if this is not
+ * a separate global entry point).
+ *
+ * Here we simulate running the given sequence and return the result it
+ * would calculate. If the sequence is not recognised we return NULL.
+ */
+ u32 insn1 = *func;
+ u32 insn2 = *(func + 1);
+ unsigned long op_regs1 = insn1 & OP_RT_RA_MASK;
+ unsigned long op_regs2 = insn2 & OP_RT_RA_MASK;
+ unsigned long si1 = insn1 & OP_SI_MASK;
+ unsigned long si2 = insn2 & OP_SI_MASK;
+ unsigned long imm1 = sign_extend_long(si1 << 16, 31);
+ unsigned long imm2 = sign_extend_long(si2, 15);
+ unsigned long addr = 0;
+
+ /* Simulate the first instruction */
+ if (op_regs1 == ADDIS_R2_R12)
+ addr += (unsigned long)func + imm1;
+ else if (op_regs1 == LIS_R2)
+ addr += imm1;
+ else
+ return NULL;
+
+ /* Simulate the second instruction */
+ if (op_regs2 == ADDI_R2_R2)
+ addr += imm2;
+ else
+ return NULL;
+
+ return (void *)addr;
+ }
+
+ return NULL;
+}
+
+static bool shares_toc(void *func1, void *func2)
+{
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) {
+ void *func1_toc;
+ void *func2_toc;
+
+ if (func1 == NULL || func2 == NULL)
+ return false;
+
+ /* Assume the kernel only uses a single TOC */
+ if (core_kernel_text((unsigned long)func1) &&
+ core_kernel_text((unsigned long)func2))
+ return true;
+
+ /* Fall back to calculating the TOC from common patterns
+ * if modules are involved
+ */
+ func1_toc = ppc_function_toc(func1);
+ func2_toc = ppc_function_toc(func2);
+ return func1_toc != NULL && func2_toc != NULL && func1_toc == func2_toc;
+ }
+
+ return true;
+}
+
+static void *get_inst_addr(void *tramp)
+{
+ return tramp + (core_kernel_text((unsigned long)tramp)
+ ? PPC_SCT_INST_KERNEL
+ : PPC_SCT_INST_MODULE);
+}
+
+static void *get_ret0_addr(void *tramp)
+{
+ return tramp + (core_kernel_text((unsigned long)tramp)
+ ? PPC_SCT_RET0_KERNEL
+ : PPC_SCT_RET0_MODULE);
+}
+
+static void *get_data_addr(void *tramp)
+{
+ return tramp + (core_kernel_text((unsigned long) tramp)
+ ? PPC_SCT_DATA_KERNEL
+ : PPC_SCT_DATA_MODULE);
+}
+
void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
{
int err;
bool is_ret0 = (func == __static_call_return0);
- unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
- bool is_short = is_offset_in_branch_range((long)target - (long)tramp);
+ bool is_short;
+ void *target = is_ret0 ? get_ret0_addr(tramp) : func;
+ void *tramp_inst = get_inst_addr(tramp);
if (!tramp)
return;
+ if (is_ret0)
+ is_short = true;
+ else if (shares_toc(tramp, target))
+ is_short = is_offset_in_branch_range(
+ (long)ppc_function_entry(target) - (long)tramp_inst);
+ else
+ /* Combine out-of-range with not sharing a TOC. Though it's possible
+ * an out-of-range target shares a TOC, handling this separately
+ * complicates the trampoline. It's simpler to always use the global
+ * entry point in this case.
+ */
+ is_short = false;
+
mutex_lock(&text_mutex);
if (func && !is_short) {
- err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
+ err = patch_ulong(get_data_addr(tramp), (unsigned long)target);
if (err)
goto out;
}
if (!func)
- err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
+ err = patch_instruction(tramp_inst, ppc_inst(PPC_RAW_BLR()));
else if (is_short)
- err = patch_branch(tramp, target, 0);
+ err = patch_branch(tramp_inst, ppc_function_entry(target), 0);
else
- err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
+ err = patch_instruction(tramp_inst, ppc_inst(PPC_RAW_NOP()));
+
out:
mutex_unlock(&text_mutex);