Message ID | 1580832945-28331-1-git-send-email-cai@lca.pw |
---|---|
State | Superseded |
Delegated to: | David Miller |
Headers | show |
Series | [v2] skbuff: fix a data race in skb_queue_len() | expand |
On 2/4/20 8:15 AM, Qian Cai wrote: > sk_buff.qlen can be accessed concurrently as noticed by KCSAN, > > > Since only the read is operating as lockless, it could introduce a logic > bug in unix_recvq_full() due to the load tearing. Fix it by adding > a lockless variant of skb_queue_len() and unix_recvq_full() where > READ_ONCE() is on the read while WRITE_ONCE() is on the write similar to > the commit d7d16a89350a ("net: add skb_queue_empty_lockless()"). > > Signed-off-by: Qian Cai <cai@lca.pw> > --- > > v2: add lockless variant helpers and WRITE_ONCE(). > > include/linux/skbuff.h | 14 +++++++++++++- > net/unix/af_unix.c | 9 ++++++++- > 2 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index 3d13a4b717e9..de5eade20e52 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -1822,6 +1822,18 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) > } > > /** > + * skb_queue_len - get queue length Please fix to use the exact name. > + * @list_: list to measure > + * > + * Return the length of an &sk_buff queue. > + * This variant can be used in lockless contexts. > + */ > +static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_) > +{ > + return READ_ONCE(list_->qlen); > +} > + > +/** > * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head > * @list: queue to initialize > * > @@ -2026,7 +2038,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) > { > struct sk_buff *next, *prev; > > - list->qlen--; > + WRITE_ONCE(list->qlen, list->qlen - 1); > next = skb->next; > prev = skb->prev; > skb->next = skb->prev = NULL; > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > index 321af97c7bbe..349e7fbfbc67 100644 > --- a/net/unix/af_unix.c > +++ b/net/unix/af_unix.c > @@ -194,6 +194,12 @@ static inline int unix_recvq_full(struct sock const *sk) > return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; > } > > +static inline int unix_recvq_full_lockless(struct sock const *sk) The const attribute is misplaced. It should be : static inline bool unix_recvq_full_lockless(const struct sock *sk) > +{ > + return skb_queue_len_lockless(&sk->sk_receive_queue) > > + sk->sk_max_ack_backlog; You probably also need a READ_ONCE() for sk->sk_max_ack_backlog It is a matter of time before syzbot finds how to trigger the race. Since you added a nice unix_recvq_full_lockless() helper, lets make it right. Thanks.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3d13a4b717e9..de5eade20e52 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1822,6 +1822,18 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) } /** + * skb_queue_len - get queue length + * @list_: list to measure + * + * Return the length of an &sk_buff queue. + * This variant can be used in lockless contexts. + */ +static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_) +{ + return READ_ONCE(list_->qlen); +} + +/** * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head * @list: queue to initialize * @@ -2026,7 +2038,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; - list->qlen--; + WRITE_ONCE(list->qlen, list->qlen - 1); next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 321af97c7bbe..349e7fbfbc67 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -194,6 +194,12 @@ static inline int unix_recvq_full(struct sock const *sk) return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } +static inline int unix_recvq_full_lockless(struct sock const *sk) +{ + return skb_queue_len_lockless(&sk->sk_receive_queue) > + sk->sk_max_ack_backlog; +} + struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -1758,7 +1764,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, * - unix_peer(sk) == sk by time of get but disconnected before lock */ if (other != sk && - unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + unlikely(unix_peer(other) != sk && + unix_recvq_full_lockless(other))) { if (timeo) { timeo = unix_wait_for_peer(other, timeo);
sk_buff.qlen can be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in __skb_try_recv_from_queue / unix_dgram_sendmsg read to 0xffff8a1b1d8a81c0 of 4 bytes by task 5371 on cpu 96: unix_dgram_sendmsg+0x9a9/0xb70 include/linux/skbuff.h:1821 net/unix/af_unix.c:1761 ____sys_sendmsg+0x33e/0x370 ___sys_sendmsg+0xa6/0xf0 __sys_sendmsg+0x69/0xf0 __x64_sys_sendmsg+0x51/0x70 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe write to 0xffff8a1b1d8a81c0 of 4 bytes by task 1 on cpu 99: __skb_try_recv_from_queue+0x327/0x410 include/linux/skbuff.h:2029 __skb_try_recv_datagram+0xbe/0x220 unix_dgram_recvmsg+0xee/0x850 ____sys_recvmsg+0x1fb/0x210 ___sys_recvmsg+0xa2/0xf0 __sys_recvmsg+0x66/0xf0 __x64_sys_recvmsg+0x51/0x70 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe Since only the read is operating as lockless, it could introduce a logic bug in unix_recvq_full() due to the load tearing. Fix it by adding a lockless variant of skb_queue_len() and unix_recvq_full() where READ_ONCE() is on the read while WRITE_ONCE() is on the write similar to the commit d7d16a89350a ("net: add skb_queue_empty_lockless()"). Signed-off-by: Qian Cai <cai@lca.pw> --- v2: add lockless variant helpers and WRITE_ONCE(). include/linux/skbuff.h | 14 +++++++++++++- net/unix/af_unix.c | 9 ++++++++- 2 files changed, 21 insertions(+), 2 deletions(-)