@@ -135,8 +135,9 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
/* Is this tuple taken? (ignoring any belonging to the given
conntrack). */
-int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
- const struct nf_conn *ignored_conntrack);
+int nf_conntrack_reply_tuple_taken(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_conntrack,
+ bool ignore_same_orig);
#define NFCT_INFOMASK 7UL
#define NFCT_PTRMASK ~(NFCT_INFOMASK)
@@ -49,7 +49,8 @@ struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct);
/* Is this tuple already taken? (not by us)*/
int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
- const struct nf_conn *ignored_conntrack);
+ const struct nf_conn *ignored_conntrack,
+ bool ignore_same_orig);
static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
{
@@ -72,7 +72,7 @@ gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
for (i = 0; ; ++key) {
*keyptr = htons(min + key % range_size);
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct, false))
return;
}
@@ -47,7 +47,7 @@ icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
for (i = 0; ; ++id) {
tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
(id % range_size));
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct, false))
return;
}
return;
@@ -49,7 +49,7 @@ icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
for (i = 0; ; ++id) {
tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
(id % range_size));
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct, false))
return;
}
}
@@ -847,8 +847,9 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
/* Returns true if a connection correspondings to the tuple (required
for NAT). */
int
-nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
- const struct nf_conn *ignored_conntrack)
+nf_conntrack_reply_tuple_taken(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_conntrack,
+ bool ignore_same_orig)
{
struct net *net = nf_ct_net(ignored_conntrack);
const struct nf_conntrack_zone *zone;
@@ -877,6 +878,11 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
}
if (nf_ct_key_equal(h, tuple, zone, net)) {
+ if (ignore_same_orig &&
+ nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
+ continue;
+ }
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
@@ -892,7 +898,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
return 0;
}
-EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
+EXPORT_SYMBOL_GPL(nf_conntrack_reply_tuple_taken);
#define NF_CT_EVICTION_RANGE 8
@@ -154,7 +154,8 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
/* Is this tuple already taken? (not by us) */
int
nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
- const struct nf_conn *ignored_conntrack)
+ const struct nf_conn *ignored_conntrack,
+ bool ignore_same_orig)
{
/* Conntrack tracking doesn't keep track of outgoing tuples; only
* incoming ones. NAT means they don't have a fixed mapping,
@@ -165,7 +166,15 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple reply;
nf_ct_invert_tuplepr(&reply, tuple);
- return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+ /* If ignore_same_orig is enabled, the following function will ignore
+ * any matching CT with the same IP_CT_DIR_ORIGINAL tuple.
+ *
+ * Used when calling the function for a CT of a connection-less protocol
+ * such as UDP to ignore a clashing CT which originated from the same
+ * socket.
+ */
+ return nf_conntrack_reply_tuple_taken(&reply, ignored_conntrack,
+ ignore_same_orig);
}
EXPORT_SYMBOL(nf_nat_used_tuple);
@@ -323,7 +332,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *ct_l4proto;
struct net *net = nf_ct_net(ct);
+ bool ignore_same_orig = false;
zone = nf_ct_zone(ct);
@@ -331,6 +342,16 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
orig_tuple->dst.protonum);
+ ct_l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+
+ /* If the protocol allows the clash resolution, then when searching
+ * for clashing CTs ignore the ones with the same IP_CT_DIR_ORIGINAL
+ * tuple as they originate from the same socket. This prevents from
+ * generating different reply tuples for two racing packets from
+ * the same connection-less (e.g. UDP) socket which results in dropping
+ * one of the packets in __nf_conntrack_confirm.
+ */
+ ignore_same_orig = ct_l4proto->allow_clash;
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
@@ -344,14 +365,15 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(l3proto, l4proto, orig_tuple, range)) {
- if (!nf_nat_used_tuple(orig_tuple, ct)) {
+ if (!nf_nat_used_tuple(orig_tuple, ct,
+ ignore_same_orig)) {
*tuple = *orig_tuple;
goto out;
}
} else if (find_appropriate_src(net, zone, l3proto, l4proto,
orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
- if (!nf_nat_used_tuple(tuple, ct))
+ if (!nf_nat_used_tuple(tuple, ct, ignore_same_orig))
goto out;
}
}
@@ -372,9 +394,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
&range->min_proto,
&range->max_proto) &&
(range->min_proto.all == range->max_proto.all ||
- !nf_nat_used_tuple(tuple, ct)))
+ !nf_nat_used_tuple(tuple, ct, ignore_same_orig)))
goto out;
- } else if (!nf_nat_used_tuple(tuple, ct)) {
+ } else if (!nf_nat_used_tuple(tuple, ct, ignore_same_orig)) {
goto out;
}
}
@@ -91,7 +91,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
for (i = 0; ; ++off) {
*portptr = htons(min + off % range_size);
- if (++i != range_size && nf_nat_used_tuple(tuple, ct))
+ if (++i != range_size && nf_nat_used_tuple(tuple, ct, false))
continue;
if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
NF_NAT_RANGE_PROTO_OFFSET)))