@@ -129,6 +129,38 @@ struct nf_conn
struct rcu_head rcu;
};
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
+ defined(CONFIG_PROVE_LOCKING)
+
+/*
+ * We reserve 16 locks per cpu (typical cache line size is 64 bytes)
+ * maxed to 512 locks so that ct_hlock[] uses at most 2048 bytes of memory.
+ * (on lockdep we have a quite big spinlock_t, so keep the size down there)
+ */
+#ifdef CONFIG_LOCKDEP
+#define CT_HASH_LOCK_SZ 64
+#elif NR_CPUS >= 32
+#define CT_HASH_LOCK_SZ 512
+#else
+#define CT_HASH_LOCK_SZ (roundup_pow_of_two(NR_CPUS) * 16)
+#endif
+
+extern spinlock_t ct_hlock[CT_HASH_LOCK_SZ];
+
+#else
+#define CT_HASH_LOCK_SZ 0
+#endif
+static inline spinlock_t *ct_lock_addr(const struct nf_conn *ct)
+{
+ if (CT_HASH_LOCK_SZ) {
+ unsigned long hash = (unsigned long)ct;
+ hash ^= hash >> 16;
+ hash ^= hash >> 8;
+ return &ct_hlock[hash % CT_HASH_LOCK_SZ];
+ }
+ return NULL;
+}
+
static inline struct nf_conn *
nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
{
@@ -61,9 +61,9 @@ struct nf_conn nf_conntrack_untracked __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
-static int nf_conntrack_hash_rnd_initted;
-static unsigned int nf_conntrack_hash_rnd;
+spinlock_t ct_hlock[CT_HASH_LOCK_SZ];
+static int nf_conntrack_hash_rnd_initted __read_mostly;
+static unsigned int nf_conntrack_hash_rnd __read_mostly;
static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
unsigned int size, unsigned int rnd)
@@ -1141,7 +1141,7 @@ module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
static int nf_conntrack_init_init_net(void)
{
int max_factor = 8;
- int ret;
+ int i, ret;
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1174,6 +1174,8 @@ static int nf_conntrack_init_init_net(void)
ret = -ENOMEM;
goto err_cache;
}
+ for (i = 0; i < CT_HASH_LOCK_SZ; i++)
+ spin_lock_init(&ct_hlock[i]);
ret = nf_conntrack_proto_init();
if (ret < 0)
@@ -26,9 +26,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_log.h>
-/* Protects ct->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -297,9 +294,7 @@ static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
{
enum tcp_conntrack state;
- read_lock_bh(&tcp_lock);
state = ct->proto.tcp.state;
- read_unlock_bh(&tcp_lock);
return seq_printf(s, "%s ", tcp_conntrack_names[state]);
}
@@ -705,14 +700,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(ct_lock_addr(ct));
/*
* We have to worry for the ack in the reply packet only...
*/
if (after(end, ct->proto.tcp.seen[dir].td_end))
ct->proto.tcp.seen[dir].td_end = end;
ct->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -821,7 +816,7 @@ static int tcp_packet(struct nf_conn *ct,
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
BUG_ON(th == NULL);
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(ct_lock_addr(ct));
old_state = ct->proto.tcp.state;
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(th);
@@ -851,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct,
&& ct->proto.tcp.last_index == TCP_RST_SET)) {
/* Attempt to reopen a closed/aborted connection.
* Delete this connection and look up again. */
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
/* Only repeat if we can actually remove the timer.
* Destruction may already be in progress in process
@@ -887,7 +882,7 @@ static int tcp_packet(struct nf_conn *ct,
* that the client cannot but retransmit its SYN and
* thus initiate a clean new session.
*/
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: killing out of sync session ");
@@ -900,7 +895,7 @@ static int tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored ");
@@ -909,7 +904,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Invalid packet */
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
@@ -940,7 +935,7 @@ static int tcp_packet(struct nf_conn *ct,
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
skb, dataoff, th, pf)) {
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
return -NF_ACCEPT;
}
in_window:
@@ -969,7 +964,7 @@ static int tcp_packet(struct nf_conn *ct,
timeout = nf_ct_tcp_timeout_unacknowledged;
else
timeout = tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
if (new_state != old_state)
@@ -1022,6 +1017,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("nf_ct_tcp: invalid new deleting.\n");
return false;
}
+ spin_lock_init(ct_lock_addr(ct));
if (new_state == TCP_CONNTRACK_SYN_SENT) {
/* SYN packet */
@@ -1092,7 +1088,7 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
struct nlattr *nest_parms;
struct nf_ct_tcp_flags tmp = {};
- read_lock_bh(&tcp_lock);
+ spin_lock_bh(ct_lock_addr(ct));
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -1112,14 +1108,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
tmp.flags = ct->proto.tcp.seen[1].flags;
NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
sizeof(struct nf_ct_tcp_flags), &tmp);
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
nla_nest_end(skb, nest_parms);
return 0;
nla_put_failure:
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
return -1;
}
@@ -1150,7 +1146,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
return -EINVAL;
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(ct_lock_addr(ct));
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
@@ -1177,7 +1173,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
ct->proto.tcp.seen[1].td_scale =
nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
}
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(ct_lock_addr(ct));
return 0;
}