Change 3360379 by cratiu@cratiu on 2011/09/02 09:44:48 *pending*
TCP/md5: Switch from a stack var to a percpu var to avoid a crash.
tcp_v4_send_ack uses a stack variable to construct the TCP header for
the response packet.
When using TCP MD5 signatures on mips architecture a crash happens
sometimes when the current core is the master core using the initial
stack allocated in vmlinux.
The reason for this is that the initial stack is mapped in kseg2
so it can't be directly translated to a physical address by
virt_to_phys as expected by sg_set_buf from the following call chain:
> (optimized: sg_set_buf)
> sg_init_one+0x58/0xa4
> tcp_md5_hash_header+0x30/0x64
> tcp_v4_md5_hash_hdr+0xb4/0x134
> tcp_v4_send_ack+0x16c/0x25c
> (optimized: tcp_v4_timewait_ack)
> tcp_v4_rcv+0x1b3c/0x1e58
As a temporary fix that should not affect performance, the stack
variable is converted in a percpu variable allocated at boot time.
Affected files ...
... //packages/linux_2.6.32/main/src/include/net/tcp.h#6 edit
... //packages/linux_2.6.32/main/src/net/ipv4/tcp.c#11 edit
... //packages/linux_2.6.32/main/src/net/ipv4/tcp_ipv4.c#15 edit
include/net/tcp.h | 10 +++++++++
net/ipv4/tcp.c | 5 ++++
net/ipv4/tcp_ipv4.c | 53 ++++++++++++++++++++++++----------------------------
3 files changed, 40 insertions(+), 28 deletions(-)
Signed-off-by: Cosmin Ratiu <cratiu@ixiacom.com>
@@ -1570,5 +1570,15 @@
return skc->skc_net_params->tcp.rmem;
}
+struct tcp_reply_hdr {
+ struct tcphdr th;
+ __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
+#ifdef CONFIG_TCP_MD5SIG
+ + (TCPOLEN_MD5SIG_ALIGNED >> 2)
+#endif
+ ];
+};
+
+extern struct tcp_reply_hdr *tcp_rep_percpu;
#endif /* _TCP_H */
@@ -3150,6 +3150,11 @@
tcp_hashinfo.lhash_size);
tcp_register_congestion_control(&tcp_reno);
+
+ /* Hack alert: a proper fix should be implemented for the md5 crash */
+ tcp_rep_percpu = alloc_percpu(struct tcp_reply_hdr);
+ if (!tcp_rep_percpu)
+ panic("Cannot allocate per cpu tcp reply hdr\n");
}
EXPORT_SYMBOL(tcp_close);
@@ -680,6 +680,8 @@
SOCK_STAT_INC(groupptr, TcpRstSent, skb_get_portid(skb));
}
+struct tcp_reply_hdr *tcp_rep_percpu;
+
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
outside socket context is ugly, certainly. What can I do?
*/
@@ -691,53 +693,48 @@
int reply_flags, u32 vlanprio)
{
struct tcphdr *th = tcp_hdr(skb);
- struct {
- struct tcphdr th;
- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
-#ifdef CONFIG_TCP_MD5SIG
- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
-#endif
- ];
- } rep;
+ struct tcp_reply_hdr *rep;
struct ip_reply_arg arg;
- memset(&rep.th, 0, sizeof(struct tcphdr));
+ rep = per_cpu_ptr(tcp_rep_percpu, get_cpu());
+
+ memset(&rep->th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
- arg.iov[0].iov_base = (unsigned char *)&rep;
- arg.iov[0].iov_len = sizeof(rep.th);
+ arg.iov[0].iov_base = (unsigned char *)rep;
+ arg.iov[0].iov_len = sizeof(rep->th);
if (ts) {
- rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ rep->opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
- rep.opt[1] = htonl(tcp_time_stamp);
- rep.opt[2] = htonl(ts);
+ rep->opt[1] = htonl(tcp_time_stamp);
+ rep->opt[2] = htonl(ts);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
}
/* Swap the send and the receive. */
- rep.th.dest = th->source;
- rep.th.source = th->dest;
- rep.th.doff = arg.iov[0].iov_len / 4;
- rep.th.seq = htonl(seq);
- rep.th.ack_seq = htonl(ack);
- rep.th.ack = 1;
- rep.th.window = htons(win);
+ rep->th.dest = th->source;
+ rep->th.source = th->dest;
+ rep->th.doff = arg.iov[0].iov_len / 4;
+ rep->th.seq = htonl(seq);
+ rep->th.ack_seq = htonl(ack);
+ rep->th.ack = 1;
+ rep->th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
if (key) {
int offset = (ts) ? 3 : 0;
- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
+ rep->opt[offset++] = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len/4;
+ rep->th.doff = arg.iov[0].iov_len/4;
- tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
+ tcp_v4_md5_hash_hdr((__u8 *) &rep->opt[offset],
key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
+ ip_hdr(skb)->daddr, &rep->th);
}
#endif
arg.flags = reply_flags;