diff mbox series

[iwl-next,v2,01/14] cache: add __cacheline_group_{begin, end}_aligned() (+ couple more)

Message ID 20240620135347.3006818-2-aleksander.lobakin@intel.com
State Accepted
Delegated to: Anthony Nguyen
Headers show
Series idpf: XDP chapter I: convert Rx to libeth | expand

Commit Message

Alexander Lobakin June 20, 2024, 1:53 p.m. UTC
__cacheline_group_begin(), unfortunately, doesn't align the group
anyhow. If it is wanted, then you need to do something like

__cacheline_group_begin(grp) __aligned(ALIGN)

which isn't really convenient nor compact.
Add the _aligned() counterparts to align the groups automatically to
either the specified alignment (optional) or ``SMP_CACHE_BYTES``.
Note that the actual struct layout will then be (on x64 with 64-byte CL):

struct x {
	u32 y;				// offset 0, size 4, padding 56
	__cacheline_group_begin__grp;	// offset 64, size 0
	u32 z;				// offset 64, size 4, padding 4
	__cacheline_group_end__grp;	// offset 72, size 0
	__cacheline_group_pad__grp;	// offset 72, size 0, padding 56
	u32 w;				// offset 128
};

The end marker is aligned to long, so that you can assert the struct
size more strictly, but the offset of the next field in the structure
will be aligned to the group alignment, so that the next field won't
fall into the group it's not intended to.

Add __LARGEST_ALIGN definition and LARGEST_ALIGN() macro.
__LARGEST_ALIGN is the value to which the compilers align fields when
__aligned_largest is specified. Sometimes, it might be needed to get
this value outside of variable definitions. LARGEST_ALIGN() is macro
which just aligns a value to __LARGEST_ALIGN.
Also add SMP_CACHE_ALIGN(), similar to L1_CACHE_ALIGN(), but using
``SMP_CACHE_BYTES`` instead of ``L1_CACHE_BYTES`` as the former
also accounts L2, needed in some cases.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
 include/linux/cache.h | 59 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

Comments

Przemek Kitszel June 20, 2024, 3:15 p.m. UTC | #1
On 6/20/24 15:53, Alexander Lobakin wrote:
> __cacheline_group_begin(), unfortunately, doesn't align the group
> anyhow. If it is wanted, then you need to do something like
> 
> __cacheline_group_begin(grp) __aligned(ALIGN)
> 
> which isn't really convenient nor compact.
> Add the _aligned() counterparts to align the groups automatically to
> either the specified alignment (optional) or ``SMP_CACHE_BYTES``.
> Note that the actual struct layout will then be (on x64 with 64-byte CL):
> 
> struct x {
> 	u32 y;				// offset 0, size 4, padding 56
> 	__cacheline_group_begin__grp;	// offset 64, size 0
> 	u32 z;				// offset 64, size 4, padding 4
> 	__cacheline_group_end__grp;	// offset 72, size 0
> 	__cacheline_group_pad__grp;	// offset 72, size 0, padding 56
> 	u32 w;				// offset 128
> };
> 
> The end marker is aligned to long, so that you can assert the struct
> size more strictly, but the offset of the next field in the structure
> will be aligned to the group alignment, so that the next field won't
> fall into the group it's not intended to.
> 
> Add __LARGEST_ALIGN definition and LARGEST_ALIGN() macro.
> __LARGEST_ALIGN is the value to which the compilers align fields when
> __aligned_largest is specified. Sometimes, it might be needed to get
> this value outside of variable definitions. LARGEST_ALIGN() is macro
> which just aligns a value to __LARGEST_ALIGN.
> Also add SMP_CACHE_ALIGN(), similar to L1_CACHE_ALIGN(), but using
> ``SMP_CACHE_BYTES`` instead of ``L1_CACHE_BYTES`` as the former
> also accounts L2, needed in some cases.
> 
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> ---
>   include/linux/cache.h | 59 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 59 insertions(+)
> 

[...]

> +/**
> + * __cacheline_group_begin_aligned - declare an aligned group start
> + * @GROUP: name of the group
> + * @...: optional group alignment

didn't know that you could document "..." :)

> + *
> + * The following block inside a struct:
> + *
> + *	__cacheline_group_begin_aligned(grp);
> + *	field a;
> + *	field b;
> + *	__cacheline_group_end_aligned(grp);
> + *
> + * will always be aligned to either the specified alignment or
> + * ``SMP_CACHE_BYTES``.
> + */
> +#define __cacheline_group_begin_aligned(GROUP, ...)		\
> +	__cacheline_group_begin(GROUP)				\
> +	__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)

nice trick :) +0

> +
> +/**
> + * __cacheline_group_end_aligned - declare an aligned group end
> + * @GROUP: name of the group
> + * @...: optional alignment (same as was in __cacheline_group_begin_aligned())
> + *
> + * Note that the end marker is aligned to sizeof(long) to allow more precise
> + * size assertion. It also declares a padding at the end to avoid next field
> + * falling into this cacheline.
> + */
> +#define __cacheline_group_end_aligned(GROUP, ...)		\
> +	__cacheline_group_end(GROUP) __aligned(sizeof(long));	\
> +	struct { } __cacheline_group_pad__##GROUP		\
> +	__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
> +
>   #ifndef CACHELINE_ASSERT_GROUP_MEMBER
>   #define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \
>   	BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Alexander Lobakin June 25, 2024, 10:24 a.m. UTC | #2
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Thu, 20 Jun 2024 17:15:53 +0200

> On 6/20/24 15:53, Alexander Lobakin wrote:
>> __cacheline_group_begin(), unfortunately, doesn't align the group
>> anyhow. If it is wanted, then you need to do something like
>>
>> __cacheline_group_begin(grp) __aligned(ALIGN)
>>
>> which isn't really convenient nor compact.
>> Add the _aligned() counterparts to align the groups automatically to
>> either the specified alignment (optional) or ``SMP_CACHE_BYTES``.
>> Note that the actual struct layout will then be (on x64 with 64-byte CL):
>>
>> struct x {
>>     u32 y;                // offset 0, size 4, padding 56
>>     __cacheline_group_begin__grp;    // offset 64, size 0
>>     u32 z;                // offset 64, size 4, padding 4
>>     __cacheline_group_end__grp;    // offset 72, size 0
>>     __cacheline_group_pad__grp;    // offset 72, size 0, padding 56
>>     u32 w;                // offset 128
>> };
>>
>> The end marker is aligned to long, so that you can assert the struct
>> size more strictly, but the offset of the next field in the structure
>> will be aligned to the group alignment, so that the next field won't
>> fall into the group it's not intended to.
>>
>> Add __LARGEST_ALIGN definition and LARGEST_ALIGN() macro.
>> __LARGEST_ALIGN is the value to which the compilers align fields when
>> __aligned_largest is specified. Sometimes, it might be needed to get
>> this value outside of variable definitions. LARGEST_ALIGN() is macro
>> which just aligns a value to __LARGEST_ALIGN.
>> Also add SMP_CACHE_ALIGN(), similar to L1_CACHE_ALIGN(), but using
>> ``SMP_CACHE_BYTES`` instead of ``L1_CACHE_BYTES`` as the former
>> also accounts L2, needed in some cases.
>>
>> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>> ---
>>   include/linux/cache.h | 59 +++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 59 insertions(+)
>>
> 
> [...]
> 
>> +/**
>> + * __cacheline_group_begin_aligned - declare an aligned group start
>> + * @GROUP: name of the group
>> + * @...: optional group alignment
> 
> didn't know that you could document "..." :)
> 
>> + *
>> + * The following block inside a struct:
>> + *
>> + *    __cacheline_group_begin_aligned(grp);
>> + *    field a;
>> + *    field b;
>> + *    __cacheline_group_end_aligned(grp);
>> + *
>> + * will always be aligned to either the specified alignment or
>> + * ``SMP_CACHE_BYTES``.
>> + */
>> +#define __cacheline_group_begin_aligned(GROUP, ...)        \
>> +    __cacheline_group_begin(GROUP)                \
>> +    __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
> 
> nice trick :) +0

The usual way to handle varargs. However, this one:

	__cacheline_group_begin_aligned(grp, 63 & 31);

will trigger a compiler warning as it expands to

	__aligned(63 & 31 + 0)

The compilers don't like bitops and arithmetic ops not separated by
parenthesis even in such simple case =\

Thanks,
Olek
diff mbox series

Patch

diff --git a/include/linux/cache.h b/include/linux/cache.h
index 0ecb17bb6883..ca2a05682a54 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -13,6 +13,32 @@ 
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+/**
+ * SMP_CACHE_ALIGN - align a value to the L2 cacheline size
+ * @x: value to align
+ *
+ * On some architectures, L2 ("SMP") CL size is bigger than L1, and sometimes,
+ * this needs to be accounted.
+ *
+ * Return: aligned value.
+ */
+#ifndef SMP_CACHE_ALIGN
+#define SMP_CACHE_ALIGN(x)	ALIGN(x, SMP_CACHE_BYTES)
+#endif
+
+/*
+ * ``__aligned_largest`` aligns a field to the value most optimal for the
+ * target architecture to perform memory operations. Get the actual value
+ * to be able to use it anywhere else.
+ */
+#ifndef __LARGEST_ALIGN
+#define __LARGEST_ALIGN		sizeof(struct { long x; } __aligned_largest)
+#endif
+
+#ifndef LARGEST_ALIGN
+#define LARGEST_ALIGN(x)	ALIGN(x, __LARGEST_ALIGN)
+#endif
+
 /*
  * __read_mostly is used to keep rarely changing variables out of frequently
  * updated cachelines. Its use should be reserved for data that is used
@@ -95,6 +121,39 @@ 
 	__u8 __cacheline_group_end__##GROUP[0]
 #endif
 
+/**
+ * __cacheline_group_begin_aligned - declare an aligned group start
+ * @GROUP: name of the group
+ * @...: optional group alignment
+ *
+ * The following block inside a struct:
+ *
+ *	__cacheline_group_begin_aligned(grp);
+ *	field a;
+ *	field b;
+ *	__cacheline_group_end_aligned(grp);
+ *
+ * will always be aligned to either the specified alignment or
+ * ``SMP_CACHE_BYTES``.
+ */
+#define __cacheline_group_begin_aligned(GROUP, ...)		\
+	__cacheline_group_begin(GROUP)				\
+	__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
+
+/**
+ * __cacheline_group_end_aligned - declare an aligned group end
+ * @GROUP: name of the group
+ * @...: optional alignment (same as was in __cacheline_group_begin_aligned())
+ *
+ * Note that the end marker is aligned to sizeof(long) to allow more precise
+ * size assertion. It also declares a padding at the end to avoid next field
+ * falling into this cacheline.
+ */
+#define __cacheline_group_end_aligned(GROUP, ...)		\
+	__cacheline_group_end(GROUP) __aligned(sizeof(long));	\
+	struct { } __cacheline_group_pad__##GROUP		\
+	__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
+
 #ifndef CACHELINE_ASSERT_GROUP_MEMBER
 #define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \
 	BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \