@@ -417,6 +417,29 @@ static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32
return &__select_bucket(htab, hash)->head;
}
+/* key1 must be aligned to sizeof long */
+static bool key_equal(void *key1, void *key2, u32 size)
+{
+ /* Check for key1 */
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct htab_elem, key),
+ sizeof(long)));
+
+ if (IS_ALIGNED((unsigned long)key2 | (unsigned long)size,
+ sizeof(long))) {
+ unsigned long *lkey1, *lkey2;
+
+ for (lkey1 = key1, lkey2 = key2; size > 0;
+ lkey1++, lkey2++, size -= sizeof(long)) {
+ if (*lkey1 != *lkey2)
+ return false;
+ }
+
+ return true;
+ }
+
+ return !memcmp(key1, key2, size);
+}
+
/* this lookup function can only be called with bucket lock taken */
static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
void *key, u32 key_size)
@@ -425,7 +448,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash
struct htab_elem *l;
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
- if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ if (l->hash == hash && key_equal(&l->key, key, key_size))
return l;
return NULL;
@@ -444,7 +467,7 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
again:
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
- if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ if (l->hash == hash && key_equal(&l->key, key, key_size))
return l;
if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
memcmp() is generally slow. Compare keys in long if possible. This improves xdp_flow performance. This is included in this series just to demonstrate to what extent xdp_flow performance can increase. Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com> --- kernel/bpf/hashtab.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-)