diff mbox series

[ovs-dev,v3] ofproto-dpif-upcall: Avoid stale ukeys leaks.

Message ID 3519b107b74a3301783492cc649eec26cdeef087.1723625382.git.echaudro@redhat.com
State Changes Requested
Delegated to: Eelco Chaudron
Headers show
Series [ovs-dev,v3] ofproto-dpif-upcall: Avoid stale ukeys leaks. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Eelco Chaudron Aug. 14, 2024, 8:49 a.m. UTC
It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch records the last dump timestamp. If the flow was not dumped
for at least twice the idle time, we can assume the datapath flow now
longer exists and the ukey should be deleted.

Reported-by: Roi Dayan <roid@nvidia.com>
Co-authored-by: Han Zhou <hzhou@ovn.org>
Co-authored-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---

v3: Rewrote fix to use actual dump state, and added a tests case.
---
 ofproto/ofproto-dpif-upcall.c                | 14 ++++-
 tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
 utilities/usdt-scripts/flow_reval_monitor.py |  2 +
 3 files changed, 79 insertions(+), 1 deletion(-)

Comments

Eelco Chaudron Aug. 14, 2024, 8:55 a.m. UTC | #1
On 14 Aug 2024, at 10:49, Eelco Chaudron wrote:

> It is observed in some environments that there are much more ukeys than
> actual DP flows. For example:
>
> $ ovs-appctl upcall/show
> system@ovs-system:
> flows : (current 7) (avg 6) (max 117) (limit 2125)
> offloaded flows : 525
> dump duration : 1063ms
> ufid enabled : true
>
> 23: (keys 3612)
> 24: (keys 3625)
> 25: (keys 3485)
>
> The revalidator threads are busy revalidating the stale ukeys leading to
> high CPU and long dump duration.
>
> This patch records the last dump timestamp. If the flow was not dumped
> for at least twice the idle time, we can assume the datapath flow now
> longer exists and the ukey should be deleted.

Roi can you make sure the test passes on your system? I did 1024+ runs on my system and it passes each time. Also on GitHub it works, but your machine might be different, as it failed their in the past.

Thanks,

Eelco

> Reported-by: Roi Dayan <roid@nvidia.com>
> Co-authored-by: Han Zhou <hzhou@ovn.org>
> Co-authored-by: Roi Dayan <roid@nvidia.com>
> Signed-off-by: Han Zhou <hzhou@ovn.org>
> Signed-off-by: Roi Dayan <roid@nvidia.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>
> v3: Rewrote fix to use actual dump state, and added a tests case.
> ---
>  ofproto/ofproto-dpif-upcall.c                | 14 ++++-
>  tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
>  utilities/usdt-scripts/flow_reval_monitor.py |  2 +
>  3 files changed, 79 insertions(+), 1 deletion(-)
>
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 4d39bc5a7..5c130b694 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>  COVERAGE_DEFINE(dumped_new_flow);
>  COVERAGE_DEFINE(handler_duplicate_upcall);
>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
> +COVERAGE_DEFINE(revalidate_missing_dp_flow);
>  COVERAGE_DEFINE(ukey_dp_change);
>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>  COVERAGE_DEFINE(ukey_replace_contention);
> @@ -278,6 +279,7 @@ enum flow_del_reason {
>      FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
>      FDR_FLOW_IDLE,          /* Flow idle timeout. */
>      FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
> +    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
>      FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
>      FDR_NO_OFPROTO,         /* Bridge not found. */
>      FDR_PURGE,              /* User requested flow deletion. */
> @@ -315,6 +317,7 @@ struct udpif_key {
>      struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
>      const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
>      long long int created OVS_GUARDED;        /* Estimate of creation time. */
> +    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
>      uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
>      uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
>      enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
> @@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
>      ukey->state_thread = ovsthread_id_self();
>      ukey->state_where = OVS_SOURCE_LOCATOR;
>      ukey->created = ukey->flow_time = time_msec();
> +    ukey->last_dumped = 0;
>      memset(&ukey->stats, 0, sizeof ukey->stats);
>      ukey->stats.used = used;
>      ukey->dp_layer = NULL;
> @@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
>          log_unexpected_stats_jump(ukey, stats);
>      }
>
> -    if (need_revalidate) {
> +    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
> +        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
> +        /* If the flow was not dumped for at least twice the idle time,
> +         * we can assume the datapath flow now longer exists and the ukey
> +         * should be deleted. */
> +        COVERAGE_INC(revalidate_missing_dp_flow);
> +        *del_reason = FDR_FLOW_MISSING_DP;
> +    } else if (need_revalidate) {
>          if (should_revalidate(udpif, ukey, push.n_packets)) {
>              if (!ukey->xcache) {
>                  ukey->xcache = xlate_cache_new();
> @@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
>                  continue;
>              }
>
> +            ukey->last_dumped = now;
>              ukey->offloaded = f->attrs.offloaded;
>              if (!ukey->dp_layer
>                  || (!dpif_synced_dp_layers(udpif->dpif)
> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
> index d1da33d96..a9416cbce 100644
> --- a/tests/system-offloads-traffic.at
> +++ b/tests/system-offloads-traffic.at
> @@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>
> +AT_SETUP([offloads - Forced removed datapath entries])
> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
> +
> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
> +
> +dnl Set idle timeout to 1 second.
> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
> +
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
> +in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
> +in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
> +
> +dnl Delete IPv4 entries, but keep the ARP ones.
> +AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
> +AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
> +
> +dnl Bring the remote ports down to avoid traffic.
> +AT_CHECK([ip -n at_ns0 link set p0 down])
> +AT_CHECK([ip -n at_ns1 link set p1 down])
> +
> +dnl Wait until the ARP flow has timed out.
> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
> +                  grep "arp" | wc -l` -eq 0])
> +
> +dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
> +
> +dnl Add and delete a port to force the revalidation.
> +for i in `seq 3`; do
> +  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
> +  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
> +              set interface ovs-p2 type=internal])
> +  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
> +  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
> +                    -gt $REV_RECONFIGURE])
> +done
> +
> +dnl Wait for another full round of revalidation.  After this all ukeys should
> +dnl be gone.
> +AT_CHECK([ovs-appctl revalidator/wait], [0])
> +AT_CHECK([ovs-appctl revalidator/wait], [0])
> +
> +dnl Make sure no more ukeys exists.
> +AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
> +            grep -qv '0)'], [1])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
> +AT_CLEANUP
> +
>  AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
>  AT_KEYWORDS([ingress_policing])
>  OVS_CHECK_TC_QDISC()
> diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
> index 28479a565..1de06c73b 100755
> --- a/utilities/usdt-scripts/flow_reval_monitor.py
> +++ b/utilities/usdt-scripts/flow_reval_monitor.py
> @@ -249,6 +249,7 @@ FdrReasons = IntEnum(
>          "FDR_BAD_ODP_FIT",
>          "FDR_FLOW_IDLE",
>          "FDR_FLOW_LIMIT",
> +        "FDR_FLOW_MISSING_DP",
>          "FDR_FLOW_WILDCARDED",
>          "FDR_NO_OFPROTO",
>          "FDR_PURGE",
> @@ -265,6 +266,7 @@ FdrReasonStrings = {
>      FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
>      FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
>      FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
> +    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
>      FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
>      FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
>      FdrReasons.FDR_PURGE: "User requested flow deletion",
> -- 
> 2.45.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Simon Horman Aug. 14, 2024, 12:02 p.m. UTC | #2
On Wed, Aug 14, 2024 at 10:49:42AM +0200, Eelco Chaudron wrote:
> It is observed in some environments that there are much more ukeys than
> actual DP flows. For example:
> 
> $ ovs-appctl upcall/show
> system@ovs-system:
> flows : (current 7) (avg 6) (max 117) (limit 2125)
> offloaded flows : 525
> dump duration : 1063ms
> ufid enabled : true
> 
> 23: (keys 3612)
> 24: (keys 3625)
> 25: (keys 3485)
> 
> The revalidator threads are busy revalidating the stale ukeys leading to
> high CPU and long dump duration.
> 
> This patch records the last dump timestamp. If the flow was not dumped
> for at least twice the idle time, we can assume the datapath flow now
> longer exists and the ukey should be deleted.
> 
> Reported-by: Roi Dayan <roid@nvidia.com>
> Co-authored-by: Han Zhou <hzhou@ovn.org>
> Co-authored-by: Roi Dayan <roid@nvidia.com>
> Signed-off-by: Han Zhou <hzhou@ovn.org>
> Signed-off-by: Roi Dayan <roid@nvidia.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

Acked-by: Simon Horman <horms@ovn.org>
Roi Dayan Aug. 18, 2024, 8:30 a.m. UTC | #3
On 14/08/2024 11:55, Eelco Chaudron wrote:
> 
> 
> On 14 Aug 2024, at 10:49, Eelco Chaudron wrote:
> 
>> It is observed in some environments that there are much more ukeys than
>> actual DP flows. For example:
>>
>> $ ovs-appctl upcall/show
>> system@ovs-system:
>> flows : (current 7) (avg 6) (max 117) (limit 2125)
>> offloaded flows : 525
>> dump duration : 1063ms
>> ufid enabled : true
>>
>> 23: (keys 3612)
>> 24: (keys 3625)
>> 25: (keys 3485)
>>
>> The revalidator threads are busy revalidating the stale ukeys leading to
>> high CPU and long dump duration.
>>
>> This patch records the last dump timestamp. If the flow was not dumped
>> for at least twice the idle time, we can assume the datapath flow now
>> longer exists and the ukey should be deleted.
> 
> Roi can you make sure the test passes on your system? I did 1024+ runs on my system and it passes each time. Also on GitHub it works, but your machine might be different, as it failed their in the past.
> 
> Thanks,
> 
> Eelco

Hi,

Everything seems to be working fine. the test pass for me.

Thanks,
Roi

> 
>> Reported-by: Roi Dayan <roid@nvidia.com>
>> Co-authored-by: Han Zhou <hzhou@ovn.org>
>> Co-authored-by: Roi Dayan <roid@nvidia.com>
>> Signed-off-by: Han Zhou <hzhou@ovn.org>
>> Signed-off-by: Roi Dayan <roid@nvidia.com>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>
>> v3: Rewrote fix to use actual dump state, and added a tests case.
>> ---
>>  ofproto/ofproto-dpif-upcall.c                | 14 ++++-
>>  tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
>>  utilities/usdt-scripts/flow_reval_monitor.py |  2 +
>>  3 files changed, 79 insertions(+), 1 deletion(-)
>>
>> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
>> index 4d39bc5a7..5c130b694 100644
>> --- a/ofproto/ofproto-dpif-upcall.c
>> +++ b/ofproto/ofproto-dpif-upcall.c
>> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>>  COVERAGE_DEFINE(dumped_new_flow);
>>  COVERAGE_DEFINE(handler_duplicate_upcall);
>>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
>> +COVERAGE_DEFINE(revalidate_missing_dp_flow);
>>  COVERAGE_DEFINE(ukey_dp_change);
>>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>>  COVERAGE_DEFINE(ukey_replace_contention);
>> @@ -278,6 +279,7 @@ enum flow_del_reason {
>>      FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
>>      FDR_FLOW_IDLE,          /* Flow idle timeout. */
>>      FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
>> +    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
>>      FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
>>      FDR_NO_OFPROTO,         /* Bridge not found. */
>>      FDR_PURGE,              /* User requested flow deletion. */
>> @@ -315,6 +317,7 @@ struct udpif_key {
>>      struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
>>      const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
>>      long long int created OVS_GUARDED;        /* Estimate of creation time. */
>> +    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
>>      uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
>>      uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
>>      enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
>> @@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
>>      ukey->state_thread = ovsthread_id_self();
>>      ukey->state_where = OVS_SOURCE_LOCATOR;
>>      ukey->created = ukey->flow_time = time_msec();
>> +    ukey->last_dumped = 0;
>>      memset(&ukey->stats, 0, sizeof ukey->stats);
>>      ukey->stats.used = used;
>>      ukey->dp_layer = NULL;
>> @@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
>>          log_unexpected_stats_jump(ukey, stats);
>>      }
>>
>> -    if (need_revalidate) {
>> +    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
>> +        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
>> +        /* If the flow was not dumped for at least twice the idle time,
>> +         * we can assume the datapath flow now longer exists and the ukey
>> +         * should be deleted. */
>> +        COVERAGE_INC(revalidate_missing_dp_flow);
>> +        *del_reason = FDR_FLOW_MISSING_DP;
>> +    } else if (need_revalidate) {
>>          if (should_revalidate(udpif, ukey, push.n_packets)) {
>>              if (!ukey->xcache) {
>>                  ukey->xcache = xlate_cache_new();
>> @@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
>>                  continue;
>>              }
>>
>> +            ukey->last_dumped = now;
>>              ukey->offloaded = f->attrs.offloaded;
>>              if (!ukey->dp_layer
>>                  || (!dpif_synced_dp_layers(udpif->dpif)
>> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
>> index d1da33d96..a9416cbce 100644
>> --- a/tests/system-offloads-traffic.at
>> +++ b/tests/system-offloads-traffic.at
>> @@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
>>  OVS_TRAFFIC_VSWITCHD_STOP
>>  AT_CLEANUP
>>
>> +AT_SETUP([offloads - Forced removed datapath entries])
>> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
>> +
>> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
>> +
>> +ADD_NAMESPACES(at_ns0, at_ns1)
>> +
>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>> +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
>> +
>> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
>> +
>> +dnl Set idle timeout to 1 second.
>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
>> +
>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>> +])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>> +in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>> +])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
>> +
>> +dnl Delete IPv4 entries, but keep the ARP ones.
>> +AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
>> +AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
>> +
>> +dnl Bring the remote ports down to avoid traffic.
>> +AT_CHECK([ip -n at_ns0 link set p0 down])
>> +AT_CHECK([ip -n at_ns1 link set p1 down])
>> +
>> +dnl Wait until the ARP flow has timed out.
>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
>> +                  grep "arp" | wc -l` -eq 0])
>> +
>> +dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
>> +
>> +dnl Add and delete a port to force the revalidation.
>> +for i in `seq 3`; do
>> +  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
>> +  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
>> +              set interface ovs-p2 type=internal])
>> +  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
>> +  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
>> +                    -gt $REV_RECONFIGURE])
>> +done
>> +
>> +dnl Wait for another full round of revalidation.  After this all ukeys should
>> +dnl be gone.
>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>> +
>> +dnl Make sure no more ukeys exists.
>> +AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
>> +            grep -qv '0)'], [1])
>> +
>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
>> +AT_CLEANUP
>> +
>>  AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
>>  AT_KEYWORDS([ingress_policing])
>>  OVS_CHECK_TC_QDISC()
>> diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
>> index 28479a565..1de06c73b 100755
>> --- a/utilities/usdt-scripts/flow_reval_monitor.py
>> +++ b/utilities/usdt-scripts/flow_reval_monitor.py
>> @@ -249,6 +249,7 @@ FdrReasons = IntEnum(
>>          "FDR_BAD_ODP_FIT",
>>          "FDR_FLOW_IDLE",
>>          "FDR_FLOW_LIMIT",
>> +        "FDR_FLOW_MISSING_DP",
>>          "FDR_FLOW_WILDCARDED",
>>          "FDR_NO_OFPROTO",
>>          "FDR_PURGE",
>> @@ -265,6 +266,7 @@ FdrReasonStrings = {
>>      FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
>>      FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
>>      FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
>> +    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
>>      FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
>>      FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
>>      FdrReasons.FDR_PURGE: "User requested flow deletion",
>> -- 
>> 2.45.2
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Eelco Chaudron Aug. 19, 2024, 7:01 a.m. UTC | #4
On 18 Aug 2024, at 10:30, Roi Dayan wrote:

> On 14/08/2024 11:55, Eelco Chaudron wrote:
>>
>>
>> On 14 Aug 2024, at 10:49, Eelco Chaudron wrote:
>>
>>> It is observed in some environments that there are much more ukeys than
>>> actual DP flows. For example:
>>>
>>> $ ovs-appctl upcall/show
>>> system@ovs-system:
>>> flows : (current 7) (avg 6) (max 117) (limit 2125)
>>> offloaded flows : 525
>>> dump duration : 1063ms
>>> ufid enabled : true
>>>
>>> 23: (keys 3612)
>>> 24: (keys 3625)
>>> 25: (keys 3485)
>>>
>>> The revalidator threads are busy revalidating the stale ukeys leading to
>>> high CPU and long dump duration.
>>>
>>> This patch records the last dump timestamp. If the flow was not dumped
>>> for at least twice the idle time, we can assume the datapath flow now
>>> longer exists and the ukey should be deleted.
>>
>> Roi can you make sure the test passes on your system? I did 1024+ runs on my system and it passes each time. Also on GitHub it works, but your machine might be different, as it failed their in the past.
>>
>> Thanks,
>>
>> Eelco
>
> Hi,
>
> Everything seems to be working fine. the test pass for me.

Thanks for testing!

> Thanks,
> Roi
>
>>
>>> Reported-by: Roi Dayan <roid@nvidia.com>
>>> Co-authored-by: Han Zhou <hzhou@ovn.org>
>>> Co-authored-by: Roi Dayan <roid@nvidia.com>
>>> Signed-off-by: Han Zhou <hzhou@ovn.org>
>>> Signed-off-by: Roi Dayan <roid@nvidia.com>
>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>> ---
>>>
>>> v3: Rewrote fix to use actual dump state, and added a tests case.
>>> ---
>>>  ofproto/ofproto-dpif-upcall.c                | 14 ++++-
>>>  tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
>>>  utilities/usdt-scripts/flow_reval_monitor.py |  2 +
>>>  3 files changed, 79 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
>>> index 4d39bc5a7..5c130b694 100644
>>> --- a/ofproto/ofproto-dpif-upcall.c
>>> +++ b/ofproto/ofproto-dpif-upcall.c
>>> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>>>  COVERAGE_DEFINE(dumped_new_flow);
>>>  COVERAGE_DEFINE(handler_duplicate_upcall);
>>>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
>>> +COVERAGE_DEFINE(revalidate_missing_dp_flow);
>>>  COVERAGE_DEFINE(ukey_dp_change);
>>>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>>>  COVERAGE_DEFINE(ukey_replace_contention);
>>> @@ -278,6 +279,7 @@ enum flow_del_reason {
>>>      FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
>>>      FDR_FLOW_IDLE,          /* Flow idle timeout. */
>>>      FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
>>> +    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
>>>      FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
>>>      FDR_NO_OFPROTO,         /* Bridge not found. */
>>>      FDR_PURGE,              /* User requested flow deletion. */
>>> @@ -315,6 +317,7 @@ struct udpif_key {
>>>      struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
>>>      const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
>>>      long long int created OVS_GUARDED;        /* Estimate of creation time. */
>>> +    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
>>>      uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
>>>      uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
>>>      enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
>>> @@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
>>>      ukey->state_thread = ovsthread_id_self();
>>>      ukey->state_where = OVS_SOURCE_LOCATOR;
>>>      ukey->created = ukey->flow_time = time_msec();
>>> +    ukey->last_dumped = 0;
>>>      memset(&ukey->stats, 0, sizeof ukey->stats);
>>>      ukey->stats.used = used;
>>>      ukey->dp_layer = NULL;
>>> @@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
>>>          log_unexpected_stats_jump(ukey, stats);
>>>      }
>>>
>>> -    if (need_revalidate) {
>>> +    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
>>> +        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
>>> +        /* If the flow was not dumped for at least twice the idle time,
>>> +         * we can assume the datapath flow now longer exists and the ukey
>>> +         * should be deleted. */
>>> +        COVERAGE_INC(revalidate_missing_dp_flow);
>>> +        *del_reason = FDR_FLOW_MISSING_DP;
>>> +    } else if (need_revalidate) {
>>>          if (should_revalidate(udpif, ukey, push.n_packets)) {
>>>              if (!ukey->xcache) {
>>>                  ukey->xcache = xlate_cache_new();
>>> @@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
>>>                  continue;
>>>              }
>>>
>>> +            ukey->last_dumped = now;
>>>              ukey->offloaded = f->attrs.offloaded;
>>>              if (!ukey->dp_layer
>>>                  || (!dpif_synced_dp_layers(udpif->dpif)
>>> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
>>> index d1da33d96..a9416cbce 100644
>>> --- a/tests/system-offloads-traffic.at
>>> +++ b/tests/system-offloads-traffic.at
>>> @@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>  AT_CLEANUP
>>>
>>> +AT_SETUP([offloads - Forced removed datapath entries])
>>> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
>>> +
>>> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
>>> +
>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>> +
>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
>>> +
>>> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
>>> +
>>> +dnl Set idle timeout to 1 second.
>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
>>> +
>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
>>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>>> +])
>>> +
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>>> +in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>>> +])
>>> +
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
>>> +
>>> +dnl Delete IPv4 entries, but keep the ARP ones.
>>> +AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
>>> +AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
>>> +
>>> +dnl Bring the remote ports down to avoid traffic.
>>> +AT_CHECK([ip -n at_ns0 link set p0 down])
>>> +AT_CHECK([ip -n at_ns1 link set p1 down])
>>> +
>>> +dnl Wait until the ARP flow has timed out.
>>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
>>> +                  grep "arp" | wc -l` -eq 0])
>>> +
>>> +dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
>>> +
>>> +dnl Add and delete a port to force the revalidation.
>>> +for i in `seq 3`; do
>>> +  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
>>> +  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
>>> +              set interface ovs-p2 type=internal])
>>> +  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
>>> +  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
>>> +                    -gt $REV_RECONFIGURE])
>>> +done
>>> +
>>> +dnl Wait for another full round of revalidation.  After this all ukeys should
>>> +dnl be gone.
>>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>>> +
>>> +dnl Make sure no more ukeys exists.
>>> +AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
>>> +            grep -qv '0)'], [1])
>>> +
>>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
>>> +AT_CLEANUP
>>> +
>>>  AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
>>>  AT_KEYWORDS([ingress_policing])
>>>  OVS_CHECK_TC_QDISC()
>>> diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
>>> index 28479a565..1de06c73b 100755
>>> --- a/utilities/usdt-scripts/flow_reval_monitor.py
>>> +++ b/utilities/usdt-scripts/flow_reval_monitor.py
>>> @@ -249,6 +249,7 @@ FdrReasons = IntEnum(
>>>          "FDR_BAD_ODP_FIT",
>>>          "FDR_FLOW_IDLE",
>>>          "FDR_FLOW_LIMIT",
>>> +        "FDR_FLOW_MISSING_DP",
>>>          "FDR_FLOW_WILDCARDED",
>>>          "FDR_NO_OFPROTO",
>>>          "FDR_PURGE",
>>> @@ -265,6 +266,7 @@ FdrReasonStrings = {
>>>      FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
>>>      FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
>>>      FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
>>> +    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
>>>      FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
>>>      FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
>>>      FdrReasons.FDR_PURGE: "User requested flow deletion",
>>> -- 
>>> 2.45.2
>>>
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
Ilya Maximets Aug. 21, 2024, 6:34 p.m. UTC | #5
On 8/14/24 10:49, Eelco Chaudron wrote:
> It is observed in some environments that there are much more ukeys than
> actual DP flows. For example:
> 
> $ ovs-appctl upcall/show
> system@ovs-system:
> flows : (current 7) (avg 6) (max 117) (limit 2125)
> offloaded flows : 525
> dump duration : 1063ms
> ufid enabled : true
> 
> 23: (keys 3612)
> 24: (keys 3625)
> 25: (keys 3485)
> 
> The revalidator threads are busy revalidating the stale ukeys leading to
> high CPU and long dump duration.
> 
> This patch records the last dump timestamp. If the flow was not dumped
> for at least twice the idle time, we can assume the datapath flow now
> longer exists and the ukey should be deleted.
> 
> Reported-by: Roi Dayan <roid@nvidia.com>
> Co-authored-by: Han Zhou <hzhou@ovn.org>
> Co-authored-by: Roi Dayan <roid@nvidia.com>
> Signed-off-by: Han Zhou <hzhou@ovn.org>
> Signed-off-by: Roi Dayan <roid@nvidia.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
> 
> v3: Rewrote fix to use actual dump state, and added a tests case.

Hi, Eelco.  Thanks for the new version!  See some comments inline.

> ---
>  ofproto/ofproto-dpif-upcall.c                | 14 ++++-
>  tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++

It's OK to have an ofload test, I suppose.  However, it should be possible
to write a unit test for this issue.  We also have a lot more control over
the time (stop/warp) in unit tests, so it should be easier to write a
reliable test.

>  utilities/usdt-scripts/flow_reval_monitor.py |  2 +
>  3 files changed, 79 insertions(+), 1 deletion(-)
> 
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 4d39bc5a7..5c130b694 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>  COVERAGE_DEFINE(dumped_new_flow);
>  COVERAGE_DEFINE(handler_duplicate_upcall);
>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
> +COVERAGE_DEFINE(revalidate_missing_dp_flow);
>  COVERAGE_DEFINE(ukey_dp_change);
>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>  COVERAGE_DEFINE(ukey_replace_contention);
> @@ -278,6 +279,7 @@ enum flow_del_reason {
>      FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
>      FDR_FLOW_IDLE,          /* Flow idle timeout. */
>      FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
> +    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */

Do we want to have at least some level of compatibility between minor
versions for this enum, or we don't want the script to work with any
version other then the exactly matching one?

i.e. Should we try to define new values at the end of the enum?

>      FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
>      FDR_NO_OFPROTO,         /* Bridge not found. */
>      FDR_PURGE,              /* User requested flow deletion. */
> @@ -315,6 +317,7 @@ struct udpif_key {
>      struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
>      const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
>      long long int created OVS_GUARDED;        /* Estimate of creation time. */
> +    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
>      uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
>      uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
>      enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
> @@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
>      ukey->state_thread = ovsthread_id_self();
>      ukey->state_where = OVS_SOURCE_LOCATOR;
>      ukey->created = ukey->flow_time = time_msec();
> +    ukey->last_dumped = 0;
>      memset(&ukey->stats, 0, sizeof ukey->stats);
>      ukey->stats.used = used;
>      ukey->dp_layer = NULL;
> @@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
>          log_unexpected_stats_jump(ukey, stats);
>      }
>  
> -    if (need_revalidate) {
> +    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
> +        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {

Should we also check that we actually tried to dump this flow a few times?
e.g. if the process was stuck doing something else or the idle timeout is
too small and we didn't have a chance to dump more than once?

> +        /* If the flow was not dumped for at least twice the idle time,
> +         * we can assume the datapath flow now longer exists and the ukey

s/now/no/

> +         * should be deleted. */
> +        COVERAGE_INC(revalidate_missing_dp_flow);
> +        *del_reason = FDR_FLOW_MISSING_DP;
> +    } else if (need_revalidate) {
>          if (should_revalidate(udpif, ukey, push.n_packets)) {
>              if (!ukey->xcache) {
>                  ukey->xcache = xlate_cache_new();
> @@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
>                  continue;
>              }
>  
> +            ukey->last_dumped = now;
>              ukey->offloaded = f->attrs.offloaded;
>              if (!ukey->dp_layer
>                  || (!dpif_synced_dp_layers(udpif->dpif)
> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
> index d1da33d96..a9416cbce 100644
> --- a/tests/system-offloads-traffic.at
> +++ b/tests/system-offloads-traffic.at
> @@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +AT_SETUP([offloads - Forced removed datapath entries])

OVS_CHECK_TC_QDISC ?

> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])

Seems unnecessary.

> +
> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
> +
> +dnl Set idle timeout to 1 second.
> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
> +
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
> +in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
> +in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])

No need for ", [0]".

> +
> +dnl Delete IPv4 entries, but keep the ARP ones.
> +AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
> +AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
> +
> +dnl Bring the remote ports down to avoid traffic.
> +AT_CHECK([ip -n at_ns0 link set p0 down])
> +AT_CHECK([ip -n at_ns1 link set p1 down])
> +
> +dnl Wait until the ARP flow has timed out.
> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
> +                  grep "arp" | wc -l` -eq 0])

OVS_WAIT_WHILE([... | grep -q "arp"])  should be simpler.

> +
> +dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
> +
> +dnl Add and delete a port to force the revalidation.
> +for i in `seq 3`; do
> +  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
> +  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
> +              set interface ovs-p2 type=internal])
> +  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
> +  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
> +                    -gt $REV_RECONFIGURE])
> +done

It seems strange that we need to force revalidation this way.
Why the revalidator/wait with a 1 second sleep is not enough?

> +
> +dnl Wait for another full round of revalidation.  After this all ukeys should
> +dnl be gone.
> +AT_CHECK([ovs-appctl revalidator/wait], [0])
> +AT_CHECK([ovs-appctl revalidator/wait], [0])
> +
> +dnl Make sure no more ukeys exists.
> +AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
> +            grep -qv '0)'], [1])

awk is seemingly unnecessary.

OVS_WAIT_WHILE([... | grep '(keys' | grep -qv '0)' ]) ?

> +
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
> +AT_CLEANUP
> +
>  AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
>  AT_KEYWORDS([ingress_policing])
>  OVS_CHECK_TC_QDISC()
> diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
> index 28479a565..1de06c73b 100755
> --- a/utilities/usdt-scripts/flow_reval_monitor.py
> +++ b/utilities/usdt-scripts/flow_reval_monitor.py
> @@ -249,6 +249,7 @@ FdrReasons = IntEnum(
>          "FDR_BAD_ODP_FIT",
>          "FDR_FLOW_IDLE",
>          "FDR_FLOW_LIMIT",
> +        "FDR_FLOW_MISSING_DP",
>          "FDR_FLOW_WILDCARDED",
>          "FDR_NO_OFPROTO",
>          "FDR_PURGE",
> @@ -265,6 +266,7 @@ FdrReasonStrings = {
>      FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
>      FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
>      FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
> +    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
>      FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
>      FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
>      FdrReasons.FDR_PURGE: "User requested flow deletion",
Eelco Chaudron Aug. 22, 2024, 7:46 a.m. UTC | #6
On 21 Aug 2024, at 20:34, Ilya Maximets wrote:

> On 8/14/24 10:49, Eelco Chaudron wrote:
>> It is observed in some environments that there are much more ukeys than
>> actual DP flows. For example:
>>
>> $ ovs-appctl upcall/show
>> system@ovs-system:
>> flows : (current 7) (avg 6) (max 117) (limit 2125)
>> offloaded flows : 525
>> dump duration : 1063ms
>> ufid enabled : true
>>
>> 23: (keys 3612)
>> 24: (keys 3625)
>> 25: (keys 3485)
>>
>> The revalidator threads are busy revalidating the stale ukeys leading to
>> high CPU and long dump duration.
>>
>> This patch records the last dump timestamp. If the flow was not dumped
>> for at least twice the idle time, we can assume the datapath flow now
>> longer exists and the ukey should be deleted.
>>
>> Reported-by: Roi Dayan <roid@nvidia.com>
>> Co-authored-by: Han Zhou <hzhou@ovn.org>
>> Co-authored-by: Roi Dayan <roid@nvidia.com>
>> Signed-off-by: Han Zhou <hzhou@ovn.org>
>> Signed-off-by: Roi Dayan <roid@nvidia.com>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>
>> v3: Rewrote fix to use actual dump state, and added a tests case.
>
> Hi, Eelco.  Thanks for the new version!  See some comments inline.
>
>> ---
>>  ofproto/ofproto-dpif-upcall.c                | 14 ++++-
>>  tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
>
> It's OK to have an ofload test, I suppose.  However, it should be possible
> to write a unit test for this issue.  We also have a lot more control over
> the time (stop/warp) in unit tests, so it should be easier to write a
> reliable test.

Well, the problem is we need the DP rules to not exist (disappear) and the only way I could find was through TC. If you have any other idea let me know.x

>>  utilities/usdt-scripts/flow_reval_monitor.py |  2 +
>>  3 files changed, 79 insertions(+), 1 deletion(-)
>>
>> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
>> index 4d39bc5a7..5c130b694 100644
>> --- a/ofproto/ofproto-dpif-upcall.c
>> +++ b/ofproto/ofproto-dpif-upcall.c
>> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>>  COVERAGE_DEFINE(dumped_new_flow);
>>  COVERAGE_DEFINE(handler_duplicate_upcall);
>>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
>> +COVERAGE_DEFINE(revalidate_missing_dp_flow);
>>  COVERAGE_DEFINE(ukey_dp_change);
>>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>>  COVERAGE_DEFINE(ukey_replace_contention);
>> @@ -278,6 +279,7 @@ enum flow_del_reason {
>>      FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
>>      FDR_FLOW_IDLE,          /* Flow idle timeout. */
>>      FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
>> +    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
>
> Do we want to have at least some level of compatibility between minor
> versions for this enum, or we don't want the script to work with any
> version other then the exactly matching one?
>
> i.e. Should we try to define new values at the end of the enum?

Good idea, let me add it at the end.

>>      FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
>>      FDR_NO_OFPROTO,         /* Bridge not found. */
>>      FDR_PURGE,              /* User requested flow deletion. */
>> @@ -315,6 +317,7 @@ struct udpif_key {
>>      struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
>>      const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
>>      long long int created OVS_GUARDED;        /* Estimate of creation time. */
>> +    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
>>      uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
>>      uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
>>      enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
>> @@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
>>      ukey->state_thread = ovsthread_id_self();
>>      ukey->state_where = OVS_SOURCE_LOCATOR;
>>      ukey->created = ukey->flow_time = time_msec();
>> +    ukey->last_dumped = 0;
>>      memset(&ukey->stats, 0, sizeof ukey->stats);
>>      ukey->stats.used = used;
>>      ukey->dp_layer = NULL;
>> @@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
>>          log_unexpected_stats_jump(ukey, stats);
>>      }
>>
>> -    if (need_revalidate) {
>> +    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
>> +        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
>
> Should we also check that we actually tried to dump this flow a few times?
> e.g. if the process was stuck doing something else or the idle timeout is
> too small and we didn't have a chance to dump more than once?

Let me think about this a bit more, maybe we can do a miss count instead.

>> +        /* If the flow was not dumped for at least twice the idle time,
>> +         * we can assume the datapath flow now longer exists and the ukey
>
> s/now/no/

ACK

>> +         * should be deleted. */
>> +        COVERAGE_INC(revalidate_missing_dp_flow);
>> +        *del_reason = FDR_FLOW_MISSING_DP;
>> +    } else if (need_revalidate) {
>>          if (should_revalidate(udpif, ukey, push.n_packets)) {
>>              if (!ukey->xcache) {
>>                  ukey->xcache = xlate_cache_new();
>> @@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
>>                  continue;
>>              }
>>
>> +            ukey->last_dumped = now;
>>              ukey->offloaded = f->attrs.offloaded;
>>              if (!ukey->dp_layer
>>                  || (!dpif_synced_dp_layers(udpif->dpif)
>> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
>> index d1da33d96..a9416cbce 100644
>> --- a/tests/system-offloads-traffic.at
>> +++ b/tests/system-offloads-traffic.at
>> @@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
>>  OVS_TRAFFIC_VSWITCHD_STOP
>>  AT_CLEANUP
>>
>> +AT_SETUP([offloads - Forced removed datapath entries])
>
> OVS_CHECK_TC_QDISC ?

Why would we need this?

>> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
>> +
>> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
>> +
>> +ADD_NAMESPACES(at_ns0, at_ns1)
>> +
>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>> +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
>
> Seems unnecessary.

ACK

>> +
>> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
>> +
>> +dnl Set idle timeout to 1 second.
>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
>> +
>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>> +])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>> +in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>> +])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
>
> No need for ", [0]".

ACK

>> +
>> +dnl Delete IPv4 entries, but keep the ARP ones.
>> +AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
>> +AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
>> +
>> +dnl Bring the remote ports down to avoid traffic.
>> +AT_CHECK([ip -n at_ns0 link set p0 down])
>> +AT_CHECK([ip -n at_ns1 link set p1 down])
>> +
>> +dnl Wait until the ARP flow has timed out.
>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
>> +                  grep "arp" | wc -l` -eq 0])
>
> OVS_WAIT_WHILE([... | grep -q "arp"])  should be simpler.

ACK

>> +
>> +dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
>> +
>> +dnl Add and delete a port to force the revalidation.
>> +for i in `seq 3`; do
>> +  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
>> +  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
>> +              set interface ovs-p2 type=internal])
>> +  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
>> +  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
>> +                    -gt $REV_RECONFIGURE])
>> +done
>
> It seems strange that we need to force revalidation this way.
> Why the revalidator/wait with a 1 second sleep is not enough?

Because we need a ‘real’ revalidation run, not just gathering statistics, a configuration change will trigger this.

>
>> +
>> +dnl Wait for another full round of revalidation.  After this all ukeys should
>> +dnl be gone.
>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>> +
>> +dnl Make sure no more ukeys exists.
>> +AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
>> +            grep -qv '0)'], [1])
>
> awk is seemingly unnecessary.
>
> OVS_WAIT_WHILE([... | grep '(keys' | grep -qv '0)' ]) ?

ACK will clean this up.

>> +
>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
>> +AT_CLEANUP
>> +
>>  AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
>>  AT_KEYWORDS([ingress_policing])
>>  OVS_CHECK_TC_QDISC()
>> diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
>> index 28479a565..1de06c73b 100755
>> --- a/utilities/usdt-scripts/flow_reval_monitor.py
>> +++ b/utilities/usdt-scripts/flow_reval_monitor.py
>> @@ -249,6 +249,7 @@ FdrReasons = IntEnum(
>>          "FDR_BAD_ODP_FIT",
>>          "FDR_FLOW_IDLE",
>>          "FDR_FLOW_LIMIT",
>> +        "FDR_FLOW_MISSING_DP",
>>          "FDR_FLOW_WILDCARDED",
>>          "FDR_NO_OFPROTO",
>>          "FDR_PURGE",
>> @@ -265,6 +266,7 @@ FdrReasonStrings = {
>>      FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
>>      FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
>>      FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
>> +    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
>>      FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
>>      FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
>>      FdrReasons.FDR_PURGE: "User requested flow deletion",
Ilya Maximets Aug. 22, 2024, 1:18 p.m. UTC | #7
On 8/22/24 09:46, Eelco Chaudron wrote:
> 
> 
> On 21 Aug 2024, at 20:34, Ilya Maximets wrote:
> 
>> On 8/14/24 10:49, Eelco Chaudron wrote:
>>> It is observed in some environments that there are much more ukeys than
>>> actual DP flows. For example:
>>>
>>> $ ovs-appctl upcall/show
>>> system@ovs-system:
>>> flows : (current 7) (avg 6) (max 117) (limit 2125)
>>> offloaded flows : 525
>>> dump duration : 1063ms
>>> ufid enabled : true
>>>
>>> 23: (keys 3612)
>>> 24: (keys 3625)
>>> 25: (keys 3485)
>>>
>>> The revalidator threads are busy revalidating the stale ukeys leading to
>>> high CPU and long dump duration.
>>>
>>> This patch records the last dump timestamp. If the flow was not dumped
>>> for at least twice the idle time, we can assume the datapath flow now
>>> longer exists and the ukey should be deleted.
>>>
>>> Reported-by: Roi Dayan <roid@nvidia.com>
>>> Co-authored-by: Han Zhou <hzhou@ovn.org>
>>> Co-authored-by: Roi Dayan <roid@nvidia.com>
>>> Signed-off-by: Han Zhou <hzhou@ovn.org>
>>> Signed-off-by: Roi Dayan <roid@nvidia.com>
>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>> ---
>>>
>>> v3: Rewrote fix to use actual dump state, and added a tests case.
>>
>> Hi, Eelco.  Thanks for the new version!  See some comments inline.
>>
>>> ---
>>>  ofproto/ofproto-dpif-upcall.c                | 14 ++++-
>>>  tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
>>
>> It's OK to have an ofload test, I suppose.  However, it should be possible
>> to write a unit test for this issue.  We also have a lot more control over
>> the time (stop/warp) in unit tests, so it should be easier to write a
>> reliable test.
> 
> Well, the problem is we need the DP rules to not exist (disappear) and the only
> way I could find was through TC. If you have any other idea let me know.x

'ovs-appctl dpctl/del-flows' should delete flows from the datapath
without notifying upper layers.  Is it not enough?

> 
>>>  utilities/usdt-scripts/flow_reval_monitor.py |  2 +
>>>  3 files changed, 79 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
>>> index 4d39bc5a7..5c130b694 100644
>>> --- a/ofproto/ofproto-dpif-upcall.c
>>> +++ b/ofproto/ofproto-dpif-upcall.c
>>> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>>>  COVERAGE_DEFINE(dumped_new_flow);
>>>  COVERAGE_DEFINE(handler_duplicate_upcall);
>>>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
>>> +COVERAGE_DEFINE(revalidate_missing_dp_flow);
>>>  COVERAGE_DEFINE(ukey_dp_change);
>>>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>>>  COVERAGE_DEFINE(ukey_replace_contention);
>>> @@ -278,6 +279,7 @@ enum flow_del_reason {
>>>      FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
>>>      FDR_FLOW_IDLE,          /* Flow idle timeout. */
>>>      FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
>>> +    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
>>
>> Do we want to have at least some level of compatibility between minor
>> versions for this enum, or we don't want the script to work with any
>> version other then the exactly matching one?
>>
>> i.e. Should we try to define new values at the end of the enum?
> 
> Good idea, let me add it at the end.
> 
>>>      FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
>>>      FDR_NO_OFPROTO,         /* Bridge not found. */
>>>      FDR_PURGE,              /* User requested flow deletion. */
>>> @@ -315,6 +317,7 @@ struct udpif_key {
>>>      struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
>>>      const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
>>>      long long int created OVS_GUARDED;        /* Estimate of creation time. */
>>> +    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
>>>      uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
>>>      uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
>>>      enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
>>> @@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
>>>      ukey->state_thread = ovsthread_id_self();
>>>      ukey->state_where = OVS_SOURCE_LOCATOR;
>>>      ukey->created = ukey->flow_time = time_msec();
>>> +    ukey->last_dumped = 0;
>>>      memset(&ukey->stats, 0, sizeof ukey->stats);
>>>      ukey->stats.used = used;
>>>      ukey->dp_layer = NULL;
>>> @@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
>>>          log_unexpected_stats_jump(ukey, stats);
>>>      }
>>>
>>> -    if (need_revalidate) {
>>> +    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
>>> +        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
>>
>> Should we also check that we actually tried to dump this flow a few times?
>> e.g. if the process was stuck doing something else or the idle timeout is
>> too small and we didn't have a chance to dump more than once?
> 
> Let me think about this a bit more, maybe we can do a miss count instead.
> 
>>> +        /* If the flow was not dumped for at least twice the idle time,
>>> +         * we can assume the datapath flow now longer exists and the ukey
>>
>> s/now/no/
> 
> ACK
> 
>>> +         * should be deleted. */
>>> +        COVERAGE_INC(revalidate_missing_dp_flow);
>>> +        *del_reason = FDR_FLOW_MISSING_DP;
>>> +    } else if (need_revalidate) {
>>>          if (should_revalidate(udpif, ukey, push.n_packets)) {
>>>              if (!ukey->xcache) {
>>>                  ukey->xcache = xlate_cache_new();
>>> @@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
>>>                  continue;
>>>              }
>>>
>>> +            ukey->last_dumped = now;
>>>              ukey->offloaded = f->attrs.offloaded;
>>>              if (!ukey->dp_layer
>>>                  || (!dpif_synced_dp_layers(udpif->dpif)
>>> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
>>> index d1da33d96..a9416cbce 100644
>>> --- a/tests/system-offloads-traffic.at
>>> +++ b/tests/system-offloads-traffic.at
>>> @@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>  AT_CLEANUP
>>>
>>> +AT_SETUP([offloads - Forced removed datapath entries])
>>
>> OVS_CHECK_TC_QDISC ?
> 
> Why would we need this?

We use TC utility and that macro checks that it is available.

> 
>>> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
>>> +
>>> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
>>> +
>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>> +
>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
>>
>> Seems unnecessary.
> 
> ACK
> 
>>> +
>>> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
>>> +
>>> +dnl Set idle timeout to 1 second.
>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
>>> +
>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
>>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>>> +])
>>> +
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>>> +in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
>>> +])
>>> +
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
>>
>> No need for ", [0]".
> 
> ACK
> 
>>> +
>>> +dnl Delete IPv4 entries, but keep the ARP ones.
>>> +AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
>>> +AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
>>> +
>>> +dnl Bring the remote ports down to avoid traffic.
>>> +AT_CHECK([ip -n at_ns0 link set p0 down])
>>> +AT_CHECK([ip -n at_ns1 link set p1 down])
>>> +
>>> +dnl Wait until the ARP flow has timed out.
>>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
>>> +                  grep "arp" | wc -l` -eq 0])
>>
>> OVS_WAIT_WHILE([... | grep -q "arp"])  should be simpler.
> 
> ACK
> 
>>> +
>>> +dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
>>> +
>>> +dnl Add and delete a port to force the revalidation.
>>> +for i in `seq 3`; do
>>> +  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
>>> +  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
>>> +              set interface ovs-p2 type=internal])
>>> +  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
>>> +  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
>>> +                    -gt $REV_RECONFIGURE])
>>> +done
>>
>> It seems strange that we need to force revalidation this way.
>> Why the revalidator/wait with a 1 second sleep is not enough?
> 
> Because we need a ‘real’ revalidation run, not just gathering statistics,
> a configuration change will trigger this.

Such flows will have a dump_seq mismatch, so revalidate_ukey() should
be called form the sweep stage.  And reval_seq in this patch is evaluated
after the time check, i.e. 'need_revalidate' condition is checked after
the 'last_dumped' logic.  Why do we need an actual ukey revalidation here?
There is no point in re-translation of these ukeys.  Or what am I missing?

Also, there is no need for the 'last_dumped' check in case revalidate_ukey()
is called from the main revalidate() and not from the sweep.  Should the
logic be moved to revalidator_sweep__() in the 'else if' after 'seq_mismatch'
check?

> 
>>
>>> +
>>> +dnl Wait for another full round of revalidation.  After this all ukeys should
>>> +dnl be gone.
>>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>>> +AT_CHECK([ovs-appctl revalidator/wait], [0])
>>> +
>>> +dnl Make sure no more ukeys exists.
>>> +AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
>>> +            grep -qv '0)'], [1])
>>
>> awk is seemingly unnecessary.
>>
>> OVS_WAIT_WHILE([... | grep '(keys' | grep -qv '0)' ]) ?
> 
> ACK will clean this up.
> 
>>> +
>>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
>>> +AT_CLEANUP
>>> +
>>>  AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
>>>  AT_KEYWORDS([ingress_policing])
>>>  OVS_CHECK_TC_QDISC()
>>> diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
>>> index 28479a565..1de06c73b 100755
>>> --- a/utilities/usdt-scripts/flow_reval_monitor.py
>>> +++ b/utilities/usdt-scripts/flow_reval_monitor.py
>>> @@ -249,6 +249,7 @@ FdrReasons = IntEnum(
>>>          "FDR_BAD_ODP_FIT",
>>>          "FDR_FLOW_IDLE",
>>>          "FDR_FLOW_LIMIT",
>>> +        "FDR_FLOW_MISSING_DP",
>>>          "FDR_FLOW_WILDCARDED",
>>>          "FDR_NO_OFPROTO",
>>>          "FDR_PURGE",
>>> @@ -265,6 +266,7 @@ FdrReasonStrings = {
>>>      FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
>>>      FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
>>>      FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
>>> +    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
>>>      FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
>>>      FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
>>>      FdrReasons.FDR_PURGE: "User requested flow deletion",
>
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 4d39bc5a7..5c130b694 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -57,6 +57,7 @@  COVERAGE_DEFINE(dumped_inconsistent_flow);
 COVERAGE_DEFINE(dumped_new_flow);
 COVERAGE_DEFINE(handler_duplicate_upcall);
 COVERAGE_DEFINE(revalidate_missed_dp_flow);
+COVERAGE_DEFINE(revalidate_missing_dp_flow);
 COVERAGE_DEFINE(ukey_dp_change);
 COVERAGE_DEFINE(ukey_invalid_stat_reset);
 COVERAGE_DEFINE(ukey_replace_contention);
@@ -278,6 +279,7 @@  enum flow_del_reason {
     FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
     FDR_FLOW_IDLE,          /* Flow idle timeout. */
     FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
+    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
     FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
     FDR_NO_OFPROTO,         /* Bridge not found. */
     FDR_PURGE,              /* User requested flow deletion. */
@@ -315,6 +317,7 @@  struct udpif_key {
     struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
     const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
     long long int created OVS_GUARDED;        /* Estimate of creation time. */
+    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
     uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
     uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
     enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
@@ -1825,6 +1828,7 @@  ukey_create__(const struct nlattr *key, size_t key_len,
     ukey->state_thread = ovsthread_id_self();
     ukey->state_where = OVS_SOURCE_LOCATOR;
     ukey->created = ukey->flow_time = time_msec();
+    ukey->last_dumped = 0;
     memset(&ukey->stats, 0, sizeof ukey->stats);
     ukey->stats.used = used;
     ukey->dp_layer = NULL;
@@ -2456,7 +2460,14 @@  revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
         log_unexpected_stats_jump(ukey, stats);
     }
 
-    if (need_revalidate) {
+    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
+        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
+        /* If the flow was not dumped for at least twice the idle time,
+         * we can assume the datapath flow now longer exists and the ukey
+         * should be deleted. */
+        COVERAGE_INC(revalidate_missing_dp_flow);
+        *del_reason = FDR_FLOW_MISSING_DP;
+    } else if (need_revalidate) {
         if (should_revalidate(udpif, ukey, push.n_packets)) {
             if (!ukey->xcache) {
                 ukey->xcache = xlate_cache_new();
@@ -2890,6 +2901,7 @@  revalidate(struct revalidator *revalidator)
                 continue;
             }
 
+            ukey->last_dumped = now;
             ukey->offloaded = f->attrs.offloaded;
             if (!ukey->dp_layer
                 || (!dpif_synced_dp_layers(udpif->dpif)
diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
index d1da33d96..a9416cbce 100644
--- a/tests/system-offloads-traffic.at
+++ b/tests/system-offloads-traffic.at
@@ -93,6 +93,70 @@  AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([offloads - Forced removed datapath entries])
+OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
+
+AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
+
+dnl Set idle timeout to 1 second.
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
+in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
+in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
+
+dnl Delete IPv4 entries, but keep the ARP ones.
+AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
+AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
+
+dnl Bring the remote ports down to avoid traffic.
+AT_CHECK([ip -n at_ns0 link set p0 down])
+AT_CHECK([ip -n at_ns1 link set p1 down])
+
+dnl Wait until the ARP flow has timed out.
+OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
+                  grep "arp" | wc -l` -eq 0])
+
+dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
+
+dnl Add and delete a port to force the revalidation.
+for i in `seq 3`; do
+  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
+  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
+              set interface ovs-p2 type=internal])
+  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
+  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
+                    -gt $REV_RECONFIGURE])
+done
+
+dnl Wait for another full round of revalidation.  After this all ukeys should
+dnl be gone.
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+
+dnl Make sure no more ukeys exists.
+AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
+            grep -qv '0)'], [1])
+
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
+AT_CLEANUP
+
 AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled])
 AT_KEYWORDS([ingress_policing])
 OVS_CHECK_TC_QDISC()
diff --git a/utilities/usdt-scripts/flow_reval_monitor.py b/utilities/usdt-scripts/flow_reval_monitor.py
index 28479a565..1de06c73b 100755
--- a/utilities/usdt-scripts/flow_reval_monitor.py
+++ b/utilities/usdt-scripts/flow_reval_monitor.py
@@ -249,6 +249,7 @@  FdrReasons = IntEnum(
         "FDR_BAD_ODP_FIT",
         "FDR_FLOW_IDLE",
         "FDR_FLOW_LIMIT",
+        "FDR_FLOW_MISSING_DP",
         "FDR_FLOW_WILDCARDED",
         "FDR_NO_OFPROTO",
         "FDR_PURGE",
@@ -265,6 +266,7 @@  FdrReasonStrings = {
     FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
     FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
     FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
+    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
     FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
     FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
     FdrReasons.FDR_PURGE: "User requested flow deletion",