@@ -6,6 +6,8 @@
#define _ASM_POWERPC_BARRIER_H
#include <asm/asm-const.h>
+#include <asm/cputable.h>
+#include <asm/feature-fixups.h>
#ifndef __ASSEMBLY__
#include <asm/ppc-opcode.h>
@@ -41,7 +43,12 @@
/* The sub-arch has lwsync */
#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
-# define SMPWMB LWSYNC
+# define SMPWMB \
+ BEGIN_FTR_SECTION; \
+ LWSYNC; \
+ FTR_SECTION_ELSE; \
+ .long PPC_RAW_STNCISYNC(); \
+ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_31)
#elif defined(CONFIG_BOOKE)
# define SMPWMB mbar
#else
stncisync orders less than lwsync (only cacheable store-store, not load-load or load-store) so it should be as cheap or cheaper. Microbenchmarks with no actual loads to order shows that the basic execution cost is the same on POWER10. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- arch/powerpc/include/asm/barrier.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)