Message ID | 1552538743-31392-1-git-send-email-dlu998@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v4,1/2] conntrack: Fix race for NAT cleanup. | expand |
> Reference lists are not fully protected during cleanup of > NAT connections where the bucket lock is transiently not held during > list traversal. This can lead to referencing freed memory during > cleaning from multiple contexts. Fix this by protecting with > the existing 'cleanup' mutex in the missed cases where 'conn_clean()' > is called. 'conntrack_flush()' is converted to expiry list traversal > to support the proper bucket level protection with the 'cleanup' mutex. > > Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.") > Reported-by: solomon <liwei.solomon at gmail.com> > Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-March/357056.html > Tested-by: solomon <liwei.solomon at gmail.com> > Signed-off-by: Darrell Ball <dlu998 at gmail.com> > --- > > This patch is targeted for earlier releases as new RCU patches > inherently don't have this race. > > v4: Fix exhaustion case cleanup race in conn_not_found() as well. > At the same time, simplify the related code. > > v3: Use cleanup_mutex in conntrack_destroy(). > > v2: Fix typo. > > lib/conntrack.c | 147 ++++++++++++++++++++++++++++++++++++++------------------ > 1 file changed, 99 insertions(+), 48 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 691782c..96d7db8 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -355,7 +355,7 @@ conntrack_destroy(struct conntrack *ct) > struct conntrack_bucket *ctb = &ct->buckets[i]; > struct conn *conn; > > - ovs_mutex_destroy(&ctb->cleanup_mutex); > + ovs_mutex_lock(&ctb->cleanup_mutex); > ct_lock_lock(&ctb->lock); > HMAP_FOR_EACH_POP (conn, node, &ctb->connections) { > if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { > @@ -365,7 +365,9 @@ conntrack_destroy(struct conntrack *ct) > } > hmap_destroy(&ctb->connections); > ct_lock_unlock(&ctb->lock); > + ovs_mutex_unlock(&ctb->cleanup_mutex); > ct_lock_destroy(&ctb->lock); > + ovs_mutex_destroy(&ctb->cleanup_mutex); > } > ct_rwlock_wrlock(&ct->resources_lock); > struct nat_conn_key_node *nat_conn_key_node; > @@ -753,6 +755,24 @@ conn_lookup(struct conntrack *ct, const struct conn_key *key, long long now) > return ctx.conn; > } > > +/* This function is called with the bucket lock held. */ > +static struct conn * > +conn_lookup_any(const struct conn_key *key, > + const struct conntrack_bucket *ctb, uint32_t hash) > +{ > + struct conn *conn = NULL; > + > + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) { > + if (!conn_key_cmp(&conn->key, key)) { > + break; > + } > + if (!conn_key_cmp(&conn->rev_key, key)) { > + break; > + } > + } > + return conn; > +} > + > static void > conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key, > long long now, int seq_skew, bool seq_skew_dir) > @@ -823,6 +843,20 @@ conn_clean(struct conntrack *ct, struct conn *conn, > } > } > > +static void > +conn_clean_safe(struct conntrack *ct, struct conn *conn, > + struct conntrack_bucket *ctb, uint32_t hash) > +{ > + ovs_mutex_lock(&ctb->cleanup_mutex); > + ct_lock_lock(&ctb->lock); > + conn = conn_lookup_any(&conn->key, ctb, hash); > + if (conn) { > + conn_clean(ct, conn, ctb); > + } > + ct_lock_unlock(&ctb->lock); > + ovs_mutex_unlock(&ctb->cleanup_mutex); > +} > + > static bool > ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl) > { > @@ -876,9 +910,11 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, > } > > unsigned bucket = hash_to_bucket(ctx->hash); > - nc = new_conn(&ct->buckets[bucket], pkt, &ctx->key, now); > - ctx->conn = nc; > - nc->rev_key = nc->key; > + struct conn connl; > + nc = &connl; > + memset(nc, 0, sizeof *nc); > + memcpy(&nc->key, &ctx->key, sizeof nc->key); > + memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); > conn_key_reverse(&nc->rev_key); > > if (ct_verify_helper(helper, ct_alg_ctl)) { > @@ -921,6 +957,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, > ct_rwlock_wrlock(&ct->resources_lock); > bool nat_res = nat_select_range_tuple(ct, nc, > conn_for_un_nat_copy); > + ct_rwlock_unlock(&ct->resources_lock); > > if (!nat_res) { > goto nat_res_exhaustion; > @@ -928,15 +965,25 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, > > /* Update nc with nat adjustments made to > * conn_for_un_nat_copy by nat_select_range_tuple(). */ > - *nc = *conn_for_un_nat_copy; > - ct_rwlock_unlock(&ct->resources_lock); > + memcpy(nc, conn_for_un_nat_copy, sizeof *nc); > } > conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT; > conn_for_un_nat_copy->nat_info = NULL; > conn_for_un_nat_copy->alg = NULL; > nat_packet(pkt, nc, ctx->icmp_related); > } > - hmap_insert(&ct->buckets[bucket].connections, &nc->node, ctx->hash); > + struct conn *nconn = new_conn(&ct->buckets[bucket], pkt, &ctx->key, > + now); > + memcpy(&nconn->key, &nc->key, sizeof nconn->key); > + memcpy(&nconn->rev_key, &nc->rev_key, sizeof nconn->rev_key); > + memcpy(&nconn->master_key, &nc->master_key, sizeof nconn->master_key); > + nconn->alg_related = nc->alg_related; > + nconn->alg = nc->alg; > + nconn->mark = nc->mark; > + nconn->label = nc->label; > + nconn->nat_info = nc->nat_info; > + ctx->conn = nc = nconn; > + hmap_insert(&ct->buckets[bucket].connections, &nconn->node, ctx->hash); > atomic_count_inc(&ct->n_conn); > } > > @@ -949,13 +996,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, > * against with firewall rules or a separate firewall. > * Also using zone partitioning can limit DoS impact. */ > nat_res_exhaustion: > - ovs_list_remove(&nc->exp_node); > - delete_conn(nc); > - /* conn_for_un_nat_copy is a local variable in process_one; this > - * memset() serves to document that conn_for_un_nat_copy is from > - * this point on unused. */ > - memset(conn_for_un_nat_copy, 0, sizeof *conn_for_un_nat_copy); > - ct_rwlock_unlock(&ct->resources_lock); > + free(nc->alg); Newer GCC complains here: lib/conntrack.c:1016:12: error: 'connl.alg' may be used uninitialized in this function [-Werror=maybe-uninitialized] free(nc->alg); ~~^~~~~ "struct conn connl" is allocated on stack, but you're using pointer to it after jumping out of its frame. Best regards, Ilya Maximets.
On Thu, Mar 14, 2019 at 8:25 AM Ilya Maximets <i.maximets@samsung.com> wrote: > > Reference lists are not fully protected during cleanup of > > NAT connections where the bucket lock is transiently not held during > > list traversal. This can lead to referencing freed memory during > > cleaning from multiple contexts. Fix this by protecting with > > the existing 'cleanup' mutex in the missed cases where 'conn_clean()' > > is called. 'conntrack_flush()' is converted to expiry list traversal > > to support the proper bucket level protection with the 'cleanup' mutex. > > > > Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.") > > Reported-by: solomon <liwei.solomon at gmail.com> > > Reported-at: > https://mail.openvswitch.org/pipermail/ovs-dev/2019-March/357056.html > > Tested-by: solomon <liwei.solomon at gmail.com> > > Signed-off-by: Darrell Ball <dlu998 at gmail.com> > > --- > > > > This patch is targeted for earlier releases as new RCU patches > > inherently don't have this race. > > > > v4: Fix exhaustion case cleanup race in conn_not_found() as well. > > At the same time, simplify the related code. > > > > v3: Use cleanup_mutex in conntrack_destroy(). > > > > v2: Fix typo. > > > > lib/conntrack.c | 147 > ++++++++++++++++++++++++++++++++++++++------------------ > > 1 file changed, 99 insertions(+), 48 deletions(-) > > > > diff --git a/lib/conntrack.c b/lib/conntrack.c > > index 691782c..96d7db8 100644 > > --- a/lib/conntrack.c > > +++ b/lib/conntrack.c > > @@ -355,7 +355,7 @@ conntrack_destroy(struct conntrack *ct) > > struct conntrack_bucket *ctb = &ct->buckets[i]; > > struct conn *conn; > > > > - ovs_mutex_destroy(&ctb->cleanup_mutex); > > + ovs_mutex_lock(&ctb->cleanup_mutex); > > ct_lock_lock(&ctb->lock); > > HMAP_FOR_EACH_POP (conn, node, &ctb->connections) { > > if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { > > @@ -365,7 +365,9 @@ conntrack_destroy(struct conntrack *ct) > > } > > hmap_destroy(&ctb->connections); > > ct_lock_unlock(&ctb->lock); > > + ovs_mutex_unlock(&ctb->cleanup_mutex); > > ct_lock_destroy(&ctb->lock); > > + ovs_mutex_destroy(&ctb->cleanup_mutex); > > } > > ct_rwlock_wrlock(&ct->resources_lock); > > struct nat_conn_key_node *nat_conn_key_node; > > @@ -753,6 +755,24 @@ conn_lookup(struct conntrack *ct, const struct > conn_key *key, long long now) > > return ctx.conn; > > } > > > > +/* This function is called with the bucket lock held. */ > > +static struct conn * > > +conn_lookup_any(const struct conn_key *key, > > + const struct conntrack_bucket *ctb, uint32_t hash) > > +{ > > + struct conn *conn = NULL; > > + > > + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) { > > + if (!conn_key_cmp(&conn->key, key)) { > > + break; > > + } > > + if (!conn_key_cmp(&conn->rev_key, key)) { > > + break; > > + } > > + } > > + return conn; > > +} > > + > > static void > > conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key, > > long long now, int seq_skew, bool seq_skew_dir) > > @@ -823,6 +843,20 @@ conn_clean(struct conntrack *ct, struct conn *conn, > > } > > } > > > > +static void > > +conn_clean_safe(struct conntrack *ct, struct conn *conn, > > + struct conntrack_bucket *ctb, uint32_t hash) > > +{ > > + ovs_mutex_lock(&ctb->cleanup_mutex); > > + ct_lock_lock(&ctb->lock); > > + conn = conn_lookup_any(&conn->key, ctb, hash); > > + if (conn) { > > + conn_clean(ct, conn, ctb); > > + } > > + ct_lock_unlock(&ctb->lock); > > + ovs_mutex_unlock(&ctb->cleanup_mutex); > > +} > > + > > static bool > > ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl) > > { > > @@ -876,9 +910,11 @@ conn_not_found(struct conntrack *ct, struct > dp_packet *pkt, > > } > > > > unsigned bucket = hash_to_bucket(ctx->hash); > > - nc = new_conn(&ct->buckets[bucket], pkt, &ctx->key, now); > > - ctx->conn = nc; > > - nc->rev_key = nc->key; > > + struct conn connl; > > + nc = &connl; > > + memset(nc, 0, sizeof *nc); > > + memcpy(&nc->key, &ctx->key, sizeof nc->key); > > + memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); > > conn_key_reverse(&nc->rev_key); > > > > if (ct_verify_helper(helper, ct_alg_ctl)) { > > @@ -921,6 +957,7 @@ conn_not_found(struct conntrack *ct, struct > dp_packet *pkt, > > ct_rwlock_wrlock(&ct->resources_lock); > > bool nat_res = nat_select_range_tuple(ct, nc, > > > conn_for_un_nat_copy); > > + ct_rwlock_unlock(&ct->resources_lock); > > > > if (!nat_res) { > > goto nat_res_exhaustion; > > @@ -928,15 +965,25 @@ conn_not_found(struct conntrack *ct, struct > dp_packet *pkt, > > > > /* Update nc with nat adjustments made to > > * conn_for_un_nat_copy by nat_select_range_tuple(). */ > > - *nc = *conn_for_un_nat_copy; > > - ct_rwlock_unlock(&ct->resources_lock); > > + memcpy(nc, conn_for_un_nat_copy, sizeof *nc); > > } > > conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT; > > conn_for_un_nat_copy->nat_info = NULL; > > conn_for_un_nat_copy->alg = NULL; > > nat_packet(pkt, nc, ctx->icmp_related); > > } > > - hmap_insert(&ct->buckets[bucket].connections, &nc->node, > ctx->hash); > > + struct conn *nconn = new_conn(&ct->buckets[bucket], pkt, > &ctx->key, > > + now); > > + memcpy(&nconn->key, &nc->key, sizeof nconn->key); > > + memcpy(&nconn->rev_key, &nc->rev_key, sizeof nconn->rev_key); > > + memcpy(&nconn->master_key, &nc->master_key, sizeof > nconn->master_key); > > + nconn->alg_related = nc->alg_related; > > + nconn->alg = nc->alg; > > + nconn->mark = nc->mark; > > + nconn->label = nc->label; > > + nconn->nat_info = nc->nat_info; > > + ctx->conn = nc = nconn; > > + hmap_insert(&ct->buckets[bucket].connections, &nconn->node, > ctx->hash); > > atomic_count_inc(&ct->n_conn); > > } > > > > @@ -949,13 +996,8 @@ conn_not_found(struct conntrack *ct, struct > dp_packet *pkt, > > * against with firewall rules or a separate firewall. > > * Also using zone partitioning can limit DoS impact. */ > > nat_res_exhaustion: > > - ovs_list_remove(&nc->exp_node); > > - delete_conn(nc); > > - /* conn_for_un_nat_copy is a local variable in process_one; this > > - * memset() serves to document that conn_for_un_nat_copy is from > > - * this point on unused. */ > > - memset(conn_for_un_nat_copy, 0, sizeof *conn_for_un_nat_copy); > > - ct_rwlock_unlock(&ct->resources_lock); > > + free(nc->alg); > > Newer GCC complains here: > > lib/conntrack.c:1016:12: error: 'connl.alg' may be used uninitialized in > this function [-Werror=maybe-uninitialized] > free(nc->alg); > ~~^~~~~ > "struct conn connl" is allocated on stack, but you're using pointer > to it after jumping out of its frame. > Thanks Theoretically, it is possible for a compiler implementation not to allocate enough stack for the deepest level of block nesting for a function. > > Best regards, Ilya Maximets. >
diff --git a/lib/conntrack.c b/lib/conntrack.c index 691782c..96d7db8 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -355,7 +355,7 @@ conntrack_destroy(struct conntrack *ct) struct conntrack_bucket *ctb = &ct->buckets[i]; struct conn *conn; - ovs_mutex_destroy(&ctb->cleanup_mutex); + ovs_mutex_lock(&ctb->cleanup_mutex); ct_lock_lock(&ctb->lock); HMAP_FOR_EACH_POP (conn, node, &ctb->connections) { if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { @@ -365,7 +365,9 @@ conntrack_destroy(struct conntrack *ct) } hmap_destroy(&ctb->connections); ct_lock_unlock(&ctb->lock); + ovs_mutex_unlock(&ctb->cleanup_mutex); ct_lock_destroy(&ctb->lock); + ovs_mutex_destroy(&ctb->cleanup_mutex); } ct_rwlock_wrlock(&ct->resources_lock); struct nat_conn_key_node *nat_conn_key_node; @@ -753,6 +755,24 @@ conn_lookup(struct conntrack *ct, const struct conn_key *key, long long now) return ctx.conn; } +/* This function is called with the bucket lock held. */ +static struct conn * +conn_lookup_any(const struct conn_key *key, + const struct conntrack_bucket *ctb, uint32_t hash) +{ + struct conn *conn = NULL; + + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) { + if (!conn_key_cmp(&conn->key, key)) { + break; + } + if (!conn_key_cmp(&conn->rev_key, key)) { + break; + } + } + return conn; +} + static void conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key, long long now, int seq_skew, bool seq_skew_dir) @@ -823,6 +843,20 @@ conn_clean(struct conntrack *ct, struct conn *conn, } } +static void +conn_clean_safe(struct conntrack *ct, struct conn *conn, + struct conntrack_bucket *ctb, uint32_t hash) +{ + ovs_mutex_lock(&ctb->cleanup_mutex); + ct_lock_lock(&ctb->lock); + conn = conn_lookup_any(&conn->key, ctb, hash); + if (conn) { + conn_clean(ct, conn, ctb); + } + ct_lock_unlock(&ctb->lock); + ovs_mutex_unlock(&ctb->cleanup_mutex); +} + static bool ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl) { @@ -876,9 +910,11 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, } unsigned bucket = hash_to_bucket(ctx->hash); - nc = new_conn(&ct->buckets[bucket], pkt, &ctx->key, now); - ctx->conn = nc; - nc->rev_key = nc->key; + struct conn connl; + nc = &connl; + memset(nc, 0, sizeof *nc); + memcpy(&nc->key, &ctx->key, sizeof nc->key); + memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); conn_key_reverse(&nc->rev_key); if (ct_verify_helper(helper, ct_alg_ctl)) { @@ -921,6 +957,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, ct_rwlock_wrlock(&ct->resources_lock); bool nat_res = nat_select_range_tuple(ct, nc, conn_for_un_nat_copy); + ct_rwlock_unlock(&ct->resources_lock); if (!nat_res) { goto nat_res_exhaustion; @@ -928,15 +965,25 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, /* Update nc with nat adjustments made to * conn_for_un_nat_copy by nat_select_range_tuple(). */ - *nc = *conn_for_un_nat_copy; - ct_rwlock_unlock(&ct->resources_lock); + memcpy(nc, conn_for_un_nat_copy, sizeof *nc); } conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT; conn_for_un_nat_copy->nat_info = NULL; conn_for_un_nat_copy->alg = NULL; nat_packet(pkt, nc, ctx->icmp_related); } - hmap_insert(&ct->buckets[bucket].connections, &nc->node, ctx->hash); + struct conn *nconn = new_conn(&ct->buckets[bucket], pkt, &ctx->key, + now); + memcpy(&nconn->key, &nc->key, sizeof nconn->key); + memcpy(&nconn->rev_key, &nc->rev_key, sizeof nconn->rev_key); + memcpy(&nconn->master_key, &nc->master_key, sizeof nconn->master_key); + nconn->alg_related = nc->alg_related; + nconn->alg = nc->alg; + nconn->mark = nc->mark; + nconn->label = nc->label; + nconn->nat_info = nc->nat_info; + ctx->conn = nc = nconn; + hmap_insert(&ct->buckets[bucket].connections, &nconn->node, ctx->hash); atomic_count_inc(&ct->n_conn); } @@ -949,13 +996,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, * against with firewall rules or a separate firewall. * Also using zone partitioning can limit DoS impact. */ nat_res_exhaustion: - ovs_list_remove(&nc->exp_node); - delete_conn(nc); - /* conn_for_un_nat_copy is a local variable in process_one; this - * memset() serves to document that conn_for_un_nat_copy is from - * this point on unused. */ - memset(conn_for_un_nat_copy, 0, sizeof *conn_for_un_nat_copy); - ct_rwlock_unlock(&ct->resources_lock); + free(nc->alg); + free(nc->nat_info); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - " "if DoS attack, use firewalling and/or zone partitioning."); @@ -969,6 +1011,7 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, OVS_REQUIRES(ct->buckets[bucket].lock) { bool create_new_conn = false; + struct conn lconn; if (ctx->icmp_related) { pkt->md.ct_state |= CS_RELATED; @@ -995,7 +1038,10 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, pkt->md.ct_state = CS_INVALID; break; case CT_UPDATE_NEW: - conn_clean(ct, *conn, &ct->buckets[bucket]); + memcpy(&lconn, *conn, sizeof lconn); + ct_lock_unlock(&ct->buckets[bucket].lock); + conn_clean_safe(ct, &lconn, &ct->buckets[bucket], ctx->hash); + ct_lock_lock(&ct->buckets[bucket].lock); create_new_conn = true; break; default: @@ -1184,8 +1230,12 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, conn = ctx->conn; /* Delete found entry if in wrong direction. 'force' implies commit. */ - if (conn && force && ctx->reply) { - conn_clean(ct, conn, &ct->buckets[bucket]); + if (OVS_UNLIKELY(force && ctx->reply && conn)) { + struct conn lconn; + memcpy(&lconn, conn, sizeof lconn); + ct_lock_unlock(&ct->buckets[bucket].lock); + conn_clean_safe(ct, &lconn, &ct->buckets[bucket], ctx->hash); + ct_lock_lock(&ct->buckets[bucket].lock); conn = NULL; } @@ -1391,19 +1441,17 @@ sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb, for (unsigned i = 0; i < N_CT_TM; i++) { LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) { - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { - if (!conn_expired(conn, now) || count >= limit) { - min_expiration = MIN(min_expiration, conn->expiration); - if (count >= limit) { - /* Do not check other lists. */ - COVERAGE_INC(conntrack_long_cleanup); - return min_expiration; - } - break; + if (!conn_expired(conn, now) || count >= limit) { + min_expiration = MIN(min_expiration, conn->expiration); + if (count >= limit) { + /* Do not check other lists. */ + COVERAGE_INC(conntrack_long_cleanup); + return min_expiration; } - conn_clean(ct, conn, ctb); - count++; + break; } + conn_clean(ct, conn, ctb); + count++; } } return min_expiration; @@ -2264,8 +2312,10 @@ nat_conn_keys_insert(struct hmap *nat_conn_keys, const struct conn *nat_conn, if (!nat_conn_key_node) { struct nat_conn_key_node *nat_conn_key = xzalloc(sizeof *nat_conn_key); - nat_conn_key->key = nat_conn->rev_key; - nat_conn_key->value = nat_conn->key; + memcpy(&nat_conn_key->key, &nat_conn->rev_key, + sizeof nat_conn_key->key); + memcpy(&nat_conn_key->value, &nat_conn->key, + sizeof nat_conn_key->value); hmap_insert(nat_conn_keys, &nat_conn_key->node, conn_key_hash(&nat_conn_key->key, basis)); return true; @@ -2344,12 +2394,7 @@ static struct conn * new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt, struct conn_key *key, long long now) { - struct conn *newconn = l4_protos[key->nw_proto]->new_conn(ctb, pkt, now); - if (newconn) { - newconn->key = *key; - } - - return newconn; + return(l4_protos[key->nw_proto]->new_conn(ctb, pkt, now)); } static void @@ -2547,16 +2592,19 @@ int conntrack_flush(struct conntrack *ct, const uint16_t *zone) { for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) { - struct conn *conn, *next; - - ct_lock_lock(&ct->buckets[i].lock); - HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections) { - if ((!zone || *zone == conn->key.zone) && - (conn->conn_type == CT_CONN_TYPE_DEFAULT)) { - conn_clean(ct, conn, &ct->buckets[i]); + struct conntrack_bucket *ctb = &ct->buckets[i]; + ovs_mutex_lock(&ctb->cleanup_mutex); + ct_lock_lock(&ctb->lock); + for (unsigned j = 0; j < N_CT_TM; j++) { + struct conn *conn, *next; + LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) { + if (!zone || *zone == conn->key.zone) { + conn_clean(ct, conn, ctb); + } } } - ct_lock_unlock(&ct->buckets[i].lock); + ct_lock_unlock(&ctb->lock); + ovs_mutex_unlock(&ctb->cleanup_mutex); } return 0; @@ -2573,16 +2621,19 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple, tuple_to_conn_key(tuple, zone, &ctx.key); ctx.hash = conn_key_hash(&ctx.key, ct->hash_basis); unsigned bucket = hash_to_bucket(ctx.hash); + struct conntrack_bucket *ctb = &ct->buckets[bucket]; - ct_lock_lock(&ct->buckets[bucket].lock); - conn_key_lookup(&ct->buckets[bucket], &ctx, time_msec()); + ovs_mutex_lock(&ctb->cleanup_mutex); + ct_lock_lock(&ctb->lock); + conn_key_lookup(ctb, &ctx, time_msec()); if (ctx.conn && ctx.conn->conn_type == CT_CONN_TYPE_DEFAULT) { - conn_clean(ct, ctx.conn, &ct->buckets[bucket]); + conn_clean(ct, ctx.conn, ctb); } else { VLOG_WARN("Must flush tuple using the original pre-NATed tuple"); error = ENOENT; } - ct_lock_unlock(&ct->buckets[bucket].lock); + ct_lock_unlock(&ctb->lock); + ovs_mutex_unlock(&ctb->cleanup_mutex); return error; }