diff mbox series

ksleftest nfqueue race with dnat

Message ID 20240901220228.4157482-1-aojea@google.com
State Superseded, archived
Headers show
Series ksleftest nfqueue race with dnat | expand

Commit Message

Antonio Ojea Sept. 1, 2024, 10:02 p.m. UTC
The netfilter race happens when two packets with the same tuple are DNATed
and enqueued with nfqueue in the postrouting hook.
Once one of the packet is reinjected it may be DNATed again to a different
destination, but the conntrack entry remains the same and the return packet
is dropped.

Link: https://bugzilla.netfilter.org/show_bug.cgi?id=1766

Signed-off-by: Antonio Ojea <aojea@google.com>

---
 .../selftests/net/netfilter/nft_queue.sh      | 84 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

Comments

Pablo Neira Ayuso Sept. 10, 2024, 9:24 p.m. UTC | #1
Hi Antonio,

On Sun, Sep 01, 2024 at 10:02:28PM +0000, Antonio Ojea wrote:
> The netfilter race happens when two packets with the same tuple are DNATed
> and enqueued with nfqueue in the postrouting hook.
> Once one of the packet is reinjected it may be DNATed again to a different
> destination, but the conntrack entry remains the same and the return packet
> is dropped.

maybe this patch is not your last version?

I need this chunk for ping ns3 to work:

diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index f754c014baa2..1720a49026a3 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -495,6 +495,7 @@ EOF
 ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null

 load_ruleset "filter" 0

then if I comment out this new test_udp_race (doing so to make sure
test still work), then test_queue 10 fails.

I think maybe you posted an older incomplete version of this patch?

Thanks.
Antonio Ojea Sept. 11, 2024, 7:50 a.m. UTC | #2
On Tue, Sep 10, 2024 at 11:24 PM Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>
> Hi Antonio,
>
> On Sun, Sep 01, 2024 at 10:02:28PM +0000, Antonio Ojea wrote:
> > The netfilter race happens when two packets with the same tuple are DNATed
> > and enqueued with nfqueue in the postrouting hook.
> > Once one of the packet is reinjected it may be DNATed again to a different
> > destination, but the conntrack entry remains the same and the return packet
> > is dropped.
>
> maybe this patch is not your last version?
>

It is indeed not the last version, I just wanted to share a reproducer
of the issue, I've tried to attach it to the bugzilla issue but I
couldn't, so I've decided to share it over the mailing list.
I'm still learning the development workflows of this community so feel
free to guide me and correct me if I'm wrong ... I just replied as
HTML before, sorry :(

> I need this chunk for ping ns3 to work:
>
> diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
> index f754c014baa2..1720a49026a3 100755
> --- a/tools/testing/selftests/net/netfilter/nft_queue.sh
> +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
> @@ -495,6 +495,7 @@ EOF
>  ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
>  ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
>  ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
> +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
>
>  load_ruleset "filter" 0
>
> then if I comment out this new test_udp_race (doing so to make sure
> test still work), then test_queue 10 fails.
>
> I think maybe you posted an older incomplete version of this patch?
>
> Thanks.
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index f3bdeb1271eb..bd7f20cf1ce5 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -14,6 +14,7 @@  cleanup()
 {
 	ip netns pids "$ns1" | xargs kill 2>/dev/null
 	ip netns pids "$ns2" | xargs kill 2>/dev/null
+	ip netns pids "$ns3" | xargs kill 2>/dev/null
 	ip netns pids "$nsrouter" | xargs kill 2>/dev/null
 
 	cleanup_all_ns
@@ -31,7 +32,7 @@  modprobe -q sctp
 
 trap cleanup EXIT
 
-setup_ns ns1 ns2 nsrouter
+setup_ns ns1 ns2 ns3 nsrouter
 
 TMPFILE0=$(mktemp)
 TMPFILE1=$(mktemp)
@@ -46,6 +47,7 @@  if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" >
     exit $ksft_skip
 fi
 ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
 
 ip -net "$nsrouter" link set veth0 up
 ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
@@ -55,8 +57,13 @@  ip -net "$nsrouter" link set veth1 up
 ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
 ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
 
+ip -net "$nsrouter" link set veth2 up
+ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
+ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
+
 ip -net "$ns1" link set eth0 up
 ip -net "$ns2" link set eth0 up
+ip -net "$ns3" link set eth0 up
 
 ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
 ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
@@ -68,6 +75,11 @@  ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
 ip -net "$ns2" route add default via 10.0.2.1
 ip -net "$ns2" route add default via dead:2::1
 
+ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
+ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
+ip -net "$ns3" route add default via 10.0.3.1
+ip -net "$ns3" route add default via dead:3::1
+
 load_ruleset() {
 	local name=$1
 	local prio=$2
@@ -143,6 +155,14 @@  test_ping() {
 	return 2
   fi
 
+    if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then
+	return 1
+  fi
+
+  if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then
+	return 2
+  fi
+
   return 0
 }
 
@@ -453,6 +473,67 @@  EOF
 	fi
 }
 
+udp_listener_ready()
+{
+	ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
+}
+
+test_udp_race()
+{
+        ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet udpq {
+	chain prerouting {
+	type nat hook prerouting priority dstnat - 5; policy accept;
+		 ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 }
+	}
+        chain postrouting {
+        type filter hook postrouting priority srcnat - 5; policy accept;
+                udp dport 12345 counter queue num 12
+        }
+}
+EOF
+
+	timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork EXEC:cat &
+	local rpid1=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
+
+	timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork EXEC:cat &
+	local rpid2=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
+
+	ip netns exec "$nsrouter" ./nf_queue -q 12 &
+	local nfqpid=$!
+
+	echo > "$TMPFILE1"
+	echo > "$TMPFILE2"
+	# send UDP packets with the same tuple multiple times to hit the race
+	for i in $(seq 1 10); do
+		echo "dns-packet1dns-packet2" | ip netns exec "$ns1" socat -b10 STDIO UDP:10.6.6.6:12345 >>"$TMPFILE1"
+		# expected to receive two
+		echo "dns-packet1dns-packet2" >> "$TMPFILE2"
+	done
+
+	# must wait before checking completeness of output file.
+	wait "$rpid1"
+	wait "$rpid2"
+	kill "$nfqpid"
+
+	if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
+		echo "FAIL:  Could not delete udpq table"
+		exit 1
+	fi
+
+	if ! diff -u "$TMPFILE1" "$TMPFILE2" ; then
+		echo "FAIL: lost packets?!" 1>&2
+		exit 1
+	fi
+
+	echo "PASS: udp race"
+}
+
 test_queue_removal()
 {
 	read tainted_then < /proc/sys/kernel/tainted
@@ -529,6 +610,7 @@  test_tcp_localhost_connectclose
 test_tcp_localhost_requeue
 test_sctp_forward
 test_sctp_output
+test_udp_race
 
 # should be last, adds vrf device in ns1 and changes routes
 test_icmp_vrf