Message ID | 20170816064748.7972-1-sysugaozhenyu@gmail.com |
---|---|
State | Superseded |
Delegated to: | Russell Bryant |
Headers | show |
Hi Zhenyu Gao Is it possible for you to add a test case for this scenario. This test on the master code( without your patch) should fail, and your patch should make the test pass. Thanks Anil On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com> wrote: > The bfd_calculate_chassis function calculates gateway's peer > datapaths to figure our which chassis should be add in bfd_chassis. > But in most circumstance, a gateway's peer datapath has NO chassis. > So it may disable BFD on some tunnel ports. Then a port on a remote > ovs cannot send packet out because it believes all remote gateway are > down. > > This patch will go though whole graph and visit all datapath's port > which has connect with gateway. > > Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> > --- > ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++ > ++++++++++++--------- > 1 file changed, 84 insertions(+), 18 deletions(-) > > diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c > index d1448b1..dccfc98 100644 > --- a/ovn/controller/bfd.c > +++ b/ovn/controller/bfd.c > @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct > ovsrec_bridge *br_int, > } > } > > +struct local_datapath_node { > + struct ovs_list node; > + struct local_datapath *dp; > +}; > + > +static void > +bfd_travel_gw_related_chassis(struct local_datapath *dp, > + const struct hmap *local_datapaths, > + struct ovsdb_idl_index_cursor *cursor, > + struct sbrec_port_binding *lpval, > + struct sset *bfd_chassis) > +{ > + struct ovs_list dp_list; > + const struct sbrec_port_binding *pb; > + struct sset visited_dp = SSET_INITIALIZER(&visited_dp); > + const char *dp_key; > + struct local_datapath_node *dp_binding; > + > + if (!(dp_key = smap_get(&dp->datapath->external_ids, > "logical-router")) && > + !(dp_key = smap_get(&dp->datapath->external_ids, > "logical-switch"))) { > + VLOG_INFO("datapath has no uuid, cannot travel graph"); > + return; > + } > + > + sset_add(&visited_dp, dp_key); > + > + ovs_list_init(&dp_list); > + dp_binding = xmalloc(sizeof *dp_binding); > + dp_binding->dp = dp; > + ovs_list_push_back(&dp_list, &dp_binding->node); > + > + /* > + * Go through whole graph to figure out all chassis which may deliver > + * packetsto gateway. */ > + do { > + dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list), > + struct local_datapath_node, node); > + dp = dp_binding->dp; > + free(dp_binding); > + for (size_t i = 0; i < dp->n_peer_dps; i++) { > + const struct sbrec_datapath_binding *pdp = dp->peer_dps[i]; > + if (!pdp) { > + continue; > + } > + > + if (!(dp_key = smap_get(&pdp->external_ids, > "logical-router")) && > + !(dp_key = smap_get(&pdp->external_ids, > "logical-switch"))) { > + continue; > + } > + > + if (sset_contains(&visited_dp, dp_key)) { > + continue; > + } > + > + sset_add(&visited_dp, dp_key); > + > + struct hmap_node *node = hmap_first_with_hash(local_ > datapaths, > + > pdp->tunnel_key); > + if (!node) { > + continue; > + } > + > + dp_binding = xmalloc(sizeof *dp_binding); > + dp_binding->dp = CONTAINER_OF(node, struct local_datapath, > + hmap_node); > + ovs_list_push_back(&dp_list, &dp_binding->node); > + > + sbrec_port_binding_index_set_datapath(lpval, pdp); > + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) { > + if (pb->chassis) { > + const char *chassis_name = pb->chassis->name; > + if (chassis_name) { > + sset_add(bfd_chassis, chassis_name); > + } > + } > + } > + } > + } while (!ovs_list_is_empty(&dp_list)); > + > + sset_destroy(&visited_dp); > +} > + > static void > bfd_calculate_chassis(struct controller_ctx *ctx, > const struct sbrec_chassis *our_chassis, > @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx, > } > } > if (our_chassis_is_gw_for_dp) { > - for (size_t i = 0; i < dp->n_peer_dps; i++) { > - const struct sbrec_datapath_binding *pdp = > dp->peer_dps[i]; > - if (!pdp) { > - continue; > - } > - > - sbrec_port_binding_index_set_datapath(lpval, pdp); > - SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) { > - if (pb->chassis) { > - /* Gateway node has to enable bfd to all nodes > hosting > - * connected network ports */ > - const char *chassis_name = pb->chassis->name; > - if (chassis_name) { > - sset_add(bfd_chassis, chassis_name); > - } > - } > - } > - } > + bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor, > + lpval, bfd_chassis); > } > } > sbrec_port_binding_index_destroy_row(lpval); > -- > 1.8.3.1 > >
Thanks for the suggestion. A testcase would be add in ovn testings. But I am not familiar with ovn test and busy on other stuff now. Since this issue affects many consumers who try to use HA gateways. I prefer to push this fix in ovs master first, then we have more time to make a testcase for it. Thanks Zhenyu Gao 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>: > Hi Zhenyu Gao > > Is it possible for you to add a test case for this scenario. This test on > the master code( without your patch) should fail, and your patch should > make the test pass. > > Thanks > Anil > > On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com> > wrote: > >> The bfd_calculate_chassis function calculates gateway's peer >> datapaths to figure our which chassis should be add in bfd_chassis. >> But in most circumstance, a gateway's peer datapath has NO chassis. >> So it may disable BFD on some tunnel ports. Then a port on a remote >> ovs cannot send packet out because it believes all remote gateway are >> down. >> >> This patch will go though whole graph and visit all datapath's port >> which has connect with gateway. >> >> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> >> --- >> ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++ >> ++++++++++++--------- >> 1 file changed, 84 insertions(+), 18 deletions(-) >> >> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c >> index d1448b1..dccfc98 100644 >> --- a/ovn/controller/bfd.c >> +++ b/ovn/controller/bfd.c >> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct >> ovsrec_bridge *br_int, >> } >> } >> >> +struct local_datapath_node { >> + struct ovs_list node; >> + struct local_datapath *dp; >> +}; >> + >> +static void >> +bfd_travel_gw_related_chassis(struct local_datapath *dp, >> + const struct hmap *local_datapaths, >> + struct ovsdb_idl_index_cursor *cursor, >> + struct sbrec_port_binding *lpval, >> + struct sset *bfd_chassis) >> +{ >> + struct ovs_list dp_list; >> + const struct sbrec_port_binding *pb; >> + struct sset visited_dp = SSET_INITIALIZER(&visited_dp); >> + const char *dp_key; >> + struct local_datapath_node *dp_binding; >> + >> + if (!(dp_key = smap_get(&dp->datapath->external_ids, >> "logical-router")) && >> + !(dp_key = smap_get(&dp->datapath->external_ids, >> "logical-switch"))) { >> + VLOG_INFO("datapath has no uuid, cannot travel graph"); >> + return; >> + } >> + >> + sset_add(&visited_dp, dp_key); >> + >> + ovs_list_init(&dp_list); >> + dp_binding = xmalloc(sizeof *dp_binding); >> + dp_binding->dp = dp; >> + ovs_list_push_back(&dp_list, &dp_binding->node); >> + >> + /* >> + * Go through whole graph to figure out all chassis which may deliver >> + * packetsto gateway. */ >> + do { >> + dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list), >> + struct local_datapath_node, node); >> + dp = dp_binding->dp; >> + free(dp_binding); >> + for (size_t i = 0; i < dp->n_peer_dps; i++) { >> + const struct sbrec_datapath_binding *pdp = dp->peer_dps[i]; >> + if (!pdp) { >> + continue; >> + } >> + >> + if (!(dp_key = smap_get(&pdp->external_ids, >> "logical-router")) && >> + !(dp_key = smap_get(&pdp->external_ids, >> "logical-switch"))) { >> + continue; >> + } >> + >> + if (sset_contains(&visited_dp, dp_key)) { >> + continue; >> + } >> + >> + sset_add(&visited_dp, dp_key); >> + >> + struct hmap_node *node = hmap_first_with_hash(local_dat >> apaths, >> + >> pdp->tunnel_key); >> + if (!node) { >> + continue; >> + } >> + >> + dp_binding = xmalloc(sizeof *dp_binding); >> + dp_binding->dp = CONTAINER_OF(node, struct local_datapath, >> + hmap_node); >> + ovs_list_push_back(&dp_list, &dp_binding->node); >> + >> + sbrec_port_binding_index_set_datapath(lpval, pdp); >> + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) { >> + if (pb->chassis) { >> + const char *chassis_name = pb->chassis->name; >> + if (chassis_name) { >> + sset_add(bfd_chassis, chassis_name); >> + } >> + } >> + } >> + } >> + } while (!ovs_list_is_empty(&dp_list)); >> + >> + sset_destroy(&visited_dp); >> +} >> + >> static void >> bfd_calculate_chassis(struct controller_ctx *ctx, >> const struct sbrec_chassis *our_chassis, >> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx, >> } >> } >> if (our_chassis_is_gw_for_dp) { >> - for (size_t i = 0; i < dp->n_peer_dps; i++) { >> - const struct sbrec_datapath_binding *pdp = >> dp->peer_dps[i]; >> - if (!pdp) { >> - continue; >> - } >> - >> - sbrec_port_binding_index_set_datapath(lpval, pdp); >> - SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) { >> - if (pb->chassis) { >> - /* Gateway node has to enable bfd to all nodes >> hosting >> - * connected network ports */ >> - const char *chassis_name = pb->chassis->name; >> - if (chassis_name) { >> - sset_add(bfd_chassis, chassis_name); >> - } >> - } >> - } >> - } >> + bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor, >> + lpval, bfd_chassis); >> } >> } >> sbrec_port_binding_index_destroy_row(lpval); >> -- >> 1.8.3.1 >> >> >
On Fri, Aug 18, 2017 at 3:24 AM, Gao Zhenyu <sysugaozhenyu@gmail.com> wrote: > Thanks for the suggestion. A testcase would be add in ovn testings. But I > am not familiar with ovn test and busy on other stuff now. > Since this issue affects many consumers who try to use HA gateways. I > prefer to push this fix in ovs master first, then we have more time to make > a testcase for it. > I know your feeling Gao, but, IMO it's better if we put the tests and code together, it's has a couple of benefits: 1) Your expected behaviour can more easily be understood from the patch itself. 2) Your behaviour changes are locked in, because if later patches break them the test will fail. Anil, I'll be away for the next two weeks, but could you give Gao a hand to write the tests quickly? Then we need to backport this to 2.8.0 > > Thanks > Zhenyu Gao > > 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>: > >> Hi Zhenyu Gao >> >> Is it possible for you to add a test case for this scenario. This test on >> the master code( without your patch) should fail, and your patch should >> make the test pass. >> >> Thanks >> Anil >> >> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com> >> wrote: >> >>> The bfd_calculate_chassis function calculates gateway's peer >>> datapaths to figure our which chassis should be add in bfd_chassis. >>> But in most circumstance, a gateway's peer datapath has NO chassis. >>> So it may disable BFD on some tunnel ports. Then a port on a remote >>> ovs cannot send packet out because it believes all remote gateway are >>> down. >>> >>> This patch will go though whole graph and visit all datapath's port >>> which has connect with gateway. >>> >>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> >>> --- >>> ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++ >>> ++++++++++++--------- >>> 1 file changed, 84 insertions(+), 18 deletions(-) >>> >>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c >>> index d1448b1..dccfc98 100644 >>> --- a/ovn/controller/bfd.c >>> +++ b/ovn/controller/bfd.c >>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct >>> ovsrec_bridge *br_int, >>> } >>> } >>> >>> +struct local_datapath_node { >>> + struct ovs_list node; >>> + struct local_datapath *dp; >>> +}; >>> + >>> +static void >>> +bfd_travel_gw_related_chassis(struct local_datapath *dp, >>> + const struct hmap *local_datapaths, >>> + struct ovsdb_idl_index_cursor *cursor, >>> + struct sbrec_port_binding *lpval, >>> + struct sset *bfd_chassis) >>> +{ >>> + struct ovs_list dp_list; >>> + const struct sbrec_port_binding *pb; >>> + struct sset visited_dp = SSET_INITIALIZER(&visited_dp); >>> + const char *dp_key; >>> + struct local_datapath_node *dp_binding; >>> + >>> + if (!(dp_key = smap_get(&dp->datapath->external_ids, >>> "logical-router")) && >>> + !(dp_key = smap_get(&dp->datapath->external_ids, >>> "logical-switch"))) { >>> + VLOG_INFO("datapath has no uuid, cannot travel graph"); >>> + return; >>> + } >>> + >>> + sset_add(&visited_dp, dp_key); >>> + >>> + ovs_list_init(&dp_list); >>> + dp_binding = xmalloc(sizeof *dp_binding); >>> + dp_binding->dp = dp; >>> + ovs_list_push_back(&dp_list, &dp_binding->node); >>> + >>> + /* >>> + * Go through whole graph to figure out all chassis which may >>> deliver >>> + * packetsto gateway. */ >>> + do { >>> + dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list), >>> + struct local_datapath_node, node); >>> + dp = dp_binding->dp; >>> + free(dp_binding); >>> + for (size_t i = 0; i < dp->n_peer_dps; i++) { >>> + const struct sbrec_datapath_binding *pdp = dp->peer_dps[i]; >>> + if (!pdp) { >>> + continue; >>> + } >>> + >>> + if (!(dp_key = smap_get(&pdp->external_ids, >>> "logical-router")) && >>> + !(dp_key = smap_get(&pdp->external_ids, >>> "logical-switch"))) { >>> + continue; >>> + } >>> + >>> + if (sset_contains(&visited_dp, dp_key)) { >>> + continue; >>> + } >>> + >>> + sset_add(&visited_dp, dp_key); >>> + >>> + struct hmap_node *node = hmap_first_with_hash(local_dat >>> apaths, >>> + >>> pdp->tunnel_key); >>> + if (!node) { >>> + continue; >>> + } >>> + >>> + dp_binding = xmalloc(sizeof *dp_binding); >>> + dp_binding->dp = CONTAINER_OF(node, struct local_datapath, >>> + hmap_node); >>> + ovs_list_push_back(&dp_list, &dp_binding->node); >>> + >>> + sbrec_port_binding_index_set_datapath(lpval, pdp); >>> + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) { >>> + if (pb->chassis) { >>> + const char *chassis_name = pb->chassis->name; >>> + if (chassis_name) { >>> + sset_add(bfd_chassis, chassis_name); >>> + } >>> + } >>> + } >>> + } >>> + } while (!ovs_list_is_empty(&dp_list)); >>> + >>> + sset_destroy(&visited_dp); >>> +} >>> + >>> static void >>> bfd_calculate_chassis(struct controller_ctx *ctx, >>> const struct sbrec_chassis *our_chassis, >>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx, >>> } >>> } >>> if (our_chassis_is_gw_for_dp) { >>> - for (size_t i = 0; i < dp->n_peer_dps; i++) { >>> - const struct sbrec_datapath_binding *pdp = >>> dp->peer_dps[i]; >>> - if (!pdp) { >>> - continue; >>> - } >>> - >>> - sbrec_port_binding_index_set_datapath(lpval, pdp); >>> - SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) >>> { >>> - if (pb->chassis) { >>> - /* Gateway node has to enable bfd to all nodes >>> hosting >>> - * connected network ports */ >>> - const char *chassis_name = pb->chassis->name; >>> - if (chassis_name) { >>> - sset_add(bfd_chassis, chassis_name); >>> - } >>> - } >>> - } >>> - } >>> + bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor, >>> + lpval, bfd_chassis); >>> } >>> } >>> sbrec_port_binding_index_destroy_row(lpval); >>> -- >>> 1.8.3.1 >>> >>> >> >
Thanks for the comments. OK, I see. I will revise it and add testing in ovn tests, but it may be in next week. Thanks Zhenyu Gao 2017-08-18 14:55 GMT+08:00 Miguel Angel Ajo Pelayo <majopela@redhat.com>: > > > On Fri, Aug 18, 2017 at 3:24 AM, Gao Zhenyu <sysugaozhenyu@gmail.com> > wrote: > >> Thanks for the suggestion. A testcase would be add in ovn testings. But >> I am not familiar with ovn test and busy on other stuff now. >> Since this issue affects many consumers who try to use HA gateways. I >> prefer to push this fix in ovs master first, then we have more time to make >> a testcase for it. >> > > > I know your feeling Gao, but, IMO it's better if we put the tests and code > together, it's has a couple of benefits: > > 1) Your expected behaviour can more easily be understood from the patch > itself. > > 2) Your behaviour changes are locked in, because if later patches break > them the test will fail. > > > Anil, I'll be away for the next two weeks, but could you give Gao a hand > to write the tests quickly? > > Then we need to backport this to 2.8.0 > > >> >> Thanks >> Zhenyu Gao >> >> 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>: >> >>> Hi Zhenyu Gao >>> >>> Is it possible for you to add a test case for this scenario. This test >>> on the master code( without your patch) should fail, and your patch should >>> make the test pass. >>> >>> Thanks >>> Anil >>> >>> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com> >>> wrote: >>> >>>> The bfd_calculate_chassis function calculates gateway's peer >>>> datapaths to figure our which chassis should be add in bfd_chassis. >>>> But in most circumstance, a gateway's peer datapath has NO chassis. >>>> So it may disable BFD on some tunnel ports. Then a port on a remote >>>> ovs cannot send packet out because it believes all remote gateway are >>>> down. >>>> >>>> This patch will go though whole graph and visit all datapath's port >>>> which has connect with gateway. >>>> >>>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> >>>> --- >>>> ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++ >>>> ++++++++++++--------- >>>> 1 file changed, 84 insertions(+), 18 deletions(-) >>>> >>>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c >>>> index d1448b1..dccfc98 100644 >>>> --- a/ovn/controller/bfd.c >>>> +++ b/ovn/controller/bfd.c >>>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct >>>> ovsrec_bridge *br_int, >>>> } >>>> } >>>> >>>> +struct local_datapath_node { >>>> + struct ovs_list node; >>>> + struct local_datapath *dp; >>>> +}; >>>> + >>>> +static void >>>> +bfd_travel_gw_related_chassis(struct local_datapath *dp, >>>> + const struct hmap *local_datapaths, >>>> + struct ovsdb_idl_index_cursor *cursor, >>>> + struct sbrec_port_binding *lpval, >>>> + struct sset *bfd_chassis) >>>> +{ >>>> + struct ovs_list dp_list; >>>> + const struct sbrec_port_binding *pb; >>>> + struct sset visited_dp = SSET_INITIALIZER(&visited_dp); >>>> + const char *dp_key; >>>> + struct local_datapath_node *dp_binding; >>>> + >>>> + if (!(dp_key = smap_get(&dp->datapath->external_ids, >>>> "logical-router")) && >>>> + !(dp_key = smap_get(&dp->datapath->external_ids, >>>> "logical-switch"))) { >>>> + VLOG_INFO("datapath has no uuid, cannot travel graph"); >>>> + return; >>>> + } >>>> + >>>> + sset_add(&visited_dp, dp_key); >>>> + >>>> + ovs_list_init(&dp_list); >>>> + dp_binding = xmalloc(sizeof *dp_binding); >>>> + dp_binding->dp = dp; >>>> + ovs_list_push_back(&dp_list, &dp_binding->node); >>>> + >>>> + /* >>>> + * Go through whole graph to figure out all chassis which may >>>> deliver >>>> + * packetsto gateway. */ >>>> + do { >>>> + dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list), >>>> + struct local_datapath_node, node); >>>> + dp = dp_binding->dp; >>>> + free(dp_binding); >>>> + for (size_t i = 0; i < dp->n_peer_dps; i++) { >>>> + const struct sbrec_datapath_binding *pdp = dp->peer_dps[i]; >>>> + if (!pdp) { >>>> + continue; >>>> + } >>>> + >>>> + if (!(dp_key = smap_get(&pdp->external_ids, >>>> "logical-router")) && >>>> + !(dp_key = smap_get(&pdp->external_ids, >>>> "logical-switch"))) { >>>> + continue; >>>> + } >>>> + >>>> + if (sset_contains(&visited_dp, dp_key)) { >>>> + continue; >>>> + } >>>> + >>>> + sset_add(&visited_dp, dp_key); >>>> + >>>> + struct hmap_node *node = hmap_first_with_hash(local_dat >>>> apaths, >>>> + >>>> pdp->tunnel_key); >>>> + if (!node) { >>>> + continue; >>>> + } >>>> + >>>> + dp_binding = xmalloc(sizeof *dp_binding); >>>> + dp_binding->dp = CONTAINER_OF(node, struct local_datapath, >>>> + hmap_node); >>>> + ovs_list_push_back(&dp_list, &dp_binding->node); >>>> + >>>> + sbrec_port_binding_index_set_datapath(lpval, pdp); >>>> + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) { >>>> + if (pb->chassis) { >>>> + const char *chassis_name = pb->chassis->name; >>>> + if (chassis_name) { >>>> + sset_add(bfd_chassis, chassis_name); >>>> + } >>>> + } >>>> + } >>>> + } >>>> + } while (!ovs_list_is_empty(&dp_list)); >>>> + >>>> + sset_destroy(&visited_dp); >>>> +} >>>> + >>>> static void >>>> bfd_calculate_chassis(struct controller_ctx *ctx, >>>> const struct sbrec_chassis *our_chassis, >>>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx, >>>> } >>>> } >>>> if (our_chassis_is_gw_for_dp) { >>>> - for (size_t i = 0; i < dp->n_peer_dps; i++) { >>>> - const struct sbrec_datapath_binding *pdp = >>>> dp->peer_dps[i]; >>>> - if (!pdp) { >>>> - continue; >>>> - } >>>> - >>>> - sbrec_port_binding_index_set_datapath(lpval, pdp); >>>> - SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, >>>> lpval) { >>>> - if (pb->chassis) { >>>> - /* Gateway node has to enable bfd to all nodes >>>> hosting >>>> - * connected network ports */ >>>> - const char *chassis_name = pb->chassis->name; >>>> - if (chassis_name) { >>>> - sset_add(bfd_chassis, chassis_name); >>>> - } >>>> - } >>>> - } >>>> - } >>>> + bfd_travel_gw_related_chassis(dp, local_datapaths, >>>> &cursor, >>>> + lpval, bfd_chassis); >>>> } >>>> } >>>> sbrec_port_binding_index_destroy_row(lpval); >>>> -- >>>> 1.8.3.1 >>>> >>>> >>> >> >
Please take a look at version 2: https://patchwork.ozlabs.org/patch/803731/ :) Thanks Zhenyu Gao 2017-08-18 15:15 GMT+08:00 Gao Zhenyu <sysugaozhenyu@gmail.com>: > Thanks for the comments. > OK, I see. I will revise it and add testing in ovn tests, but it may be in > next week. > > Thanks > Zhenyu Gao > > 2017-08-18 14:55 GMT+08:00 Miguel Angel Ajo Pelayo <majopela@redhat.com>: > >> >> >> On Fri, Aug 18, 2017 at 3:24 AM, Gao Zhenyu <sysugaozhenyu@gmail.com> >> wrote: >> >>> Thanks for the suggestion. A testcase would be add in ovn testings. But >>> I am not familiar with ovn test and busy on other stuff now. >>> Since this issue affects many consumers who try to use HA gateways. I >>> prefer to push this fix in ovs master first, then we have more time to make >>> a testcase for it. >>> >> >> >> I know your feeling Gao, but, IMO it's better if we put the tests and >> code together, it's has a couple of benefits: >> >> 1) Your expected behaviour can more easily be understood from the patch >> itself. >> >> 2) Your behaviour changes are locked in, because if later patches break >> them the test will fail. >> >> >> Anil, I'll be away for the next two weeks, but could you give Gao a hand >> to write the tests quickly? >> >> Then we need to backport this to 2.8.0 >> >> >>> >>> Thanks >>> Zhenyu Gao >>> >>> 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>: >>> >>>> Hi Zhenyu Gao >>>> >>>> Is it possible for you to add a test case for this scenario. This test >>>> on the master code( without your patch) should fail, and your patch should >>>> make the test pass. >>>> >>>> Thanks >>>> Anil >>>> >>>> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com> >>>> wrote: >>>> >>>>> The bfd_calculate_chassis function calculates gateway's peer >>>>> datapaths to figure our which chassis should be add in bfd_chassis. >>>>> But in most circumstance, a gateway's peer datapath has NO chassis. >>>>> So it may disable BFD on some tunnel ports. Then a port on a remote >>>>> ovs cannot send packet out because it believes all remote gateway are >>>>> down. >>>>> >>>>> This patch will go though whole graph and visit all datapath's port >>>>> which has connect with gateway. >>>>> >>>>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> >>>>> --- >>>>> ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++ >>>>> ++++++++++++--------- >>>>> 1 file changed, 84 insertions(+), 18 deletions(-) >>>>> >>>>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c >>>>> index d1448b1..dccfc98 100644 >>>>> --- a/ovn/controller/bfd.c >>>>> +++ b/ovn/controller/bfd.c >>>>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct >>>>> ovsrec_bridge *br_int, >>>>> } >>>>> } >>>>> >>>>> +struct local_datapath_node { >>>>> + struct ovs_list node; >>>>> + struct local_datapath *dp; >>>>> +}; >>>>> + >>>>> +static void >>>>> +bfd_travel_gw_related_chassis(struct local_datapath *dp, >>>>> + const struct hmap *local_datapaths, >>>>> + struct ovsdb_idl_index_cursor *cursor, >>>>> + struct sbrec_port_binding *lpval, >>>>> + struct sset *bfd_chassis) >>>>> +{ >>>>> + struct ovs_list dp_list; >>>>> + const struct sbrec_port_binding *pb; >>>>> + struct sset visited_dp = SSET_INITIALIZER(&visited_dp); >>>>> + const char *dp_key; >>>>> + struct local_datapath_node *dp_binding; >>>>> + >>>>> + if (!(dp_key = smap_get(&dp->datapath->external_ids, >>>>> "logical-router")) && >>>>> + !(dp_key = smap_get(&dp->datapath->external_ids, >>>>> "logical-switch"))) { >>>>> + VLOG_INFO("datapath has no uuid, cannot travel graph"); >>>>> + return; >>>>> + } >>>>> + >>>>> + sset_add(&visited_dp, dp_key); >>>>> + >>>>> + ovs_list_init(&dp_list); >>>>> + dp_binding = xmalloc(sizeof *dp_binding); >>>>> + dp_binding->dp = dp; >>>>> + ovs_list_push_back(&dp_list, &dp_binding->node); >>>>> + >>>>> + /* >>>>> + * Go through whole graph to figure out all chassis which may >>>>> deliver >>>>> + * packetsto gateway. */ >>>>> + do { >>>>> + dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list), >>>>> + struct local_datapath_node, node); >>>>> + dp = dp_binding->dp; >>>>> + free(dp_binding); >>>>> + for (size_t i = 0; i < dp->n_peer_dps; i++) { >>>>> + const struct sbrec_datapath_binding *pdp = >>>>> dp->peer_dps[i]; >>>>> + if (!pdp) { >>>>> + continue; >>>>> + } >>>>> + >>>>> + if (!(dp_key = smap_get(&pdp->external_ids, >>>>> "logical-router")) && >>>>> + !(dp_key = smap_get(&pdp->external_ids, >>>>> "logical-switch"))) { >>>>> + continue; >>>>> + } >>>>> + >>>>> + if (sset_contains(&visited_dp, dp_key)) { >>>>> + continue; >>>>> + } >>>>> + >>>>> + sset_add(&visited_dp, dp_key); >>>>> + >>>>> + struct hmap_node *node = hmap_first_with_hash(local_dat >>>>> apaths, >>>>> + >>>>> pdp->tunnel_key); >>>>> + if (!node) { >>>>> + continue; >>>>> + } >>>>> + >>>>> + dp_binding = xmalloc(sizeof *dp_binding); >>>>> + dp_binding->dp = CONTAINER_OF(node, struct local_datapath, >>>>> + hmap_node); >>>>> + ovs_list_push_back(&dp_list, &dp_binding->node); >>>>> + >>>>> + sbrec_port_binding_index_set_datapath(lpval, pdp); >>>>> + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) { >>>>> + if (pb->chassis) { >>>>> + const char *chassis_name = pb->chassis->name; >>>>> + if (chassis_name) { >>>>> + sset_add(bfd_chassis, chassis_name); >>>>> + } >>>>> + } >>>>> + } >>>>> + } >>>>> + } while (!ovs_list_is_empty(&dp_list)); >>>>> + >>>>> + sset_destroy(&visited_dp); >>>>> +} >>>>> + >>>>> static void >>>>> bfd_calculate_chassis(struct controller_ctx *ctx, >>>>> const struct sbrec_chassis *our_chassis, >>>>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx, >>>>> } >>>>> } >>>>> if (our_chassis_is_gw_for_dp) { >>>>> - for (size_t i = 0; i < dp->n_peer_dps; i++) { >>>>> - const struct sbrec_datapath_binding *pdp = >>>>> dp->peer_dps[i]; >>>>> - if (!pdp) { >>>>> - continue; >>>>> - } >>>>> - >>>>> - sbrec_port_binding_index_set_datapath(lpval, pdp); >>>>> - SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, >>>>> lpval) { >>>>> - if (pb->chassis) { >>>>> - /* Gateway node has to enable bfd to all >>>>> nodes hosting >>>>> - * connected network ports */ >>>>> - const char *chassis_name = pb->chassis->name; >>>>> - if (chassis_name) { >>>>> - sset_add(bfd_chassis, chassis_name); >>>>> - } >>>>> - } >>>>> - } >>>>> - } >>>>> + bfd_travel_gw_related_chassis(dp, local_datapaths, >>>>> &cursor, >>>>> + lpval, bfd_chassis); >>>>> } >>>>> } >>>>> sbrec_port_binding_index_destroy_row(lpval); >>>>> -- >>>>> 1.8.3.1 >>>>> >>>>> >>>> >>> >> >
diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c index d1448b1..dccfc98 100644 --- a/ovn/controller/bfd.c +++ b/ovn/controller/bfd.c @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct ovsrec_bridge *br_int, } } +struct local_datapath_node { + struct ovs_list node; + struct local_datapath *dp; +}; + +static void +bfd_travel_gw_related_chassis(struct local_datapath *dp, + const struct hmap *local_datapaths, + struct ovsdb_idl_index_cursor *cursor, + struct sbrec_port_binding *lpval, + struct sset *bfd_chassis) +{ + struct ovs_list dp_list; + const struct sbrec_port_binding *pb; + struct sset visited_dp = SSET_INITIALIZER(&visited_dp); + const char *dp_key; + struct local_datapath_node *dp_binding; + + if (!(dp_key = smap_get(&dp->datapath->external_ids, "logical-router")) && + !(dp_key = smap_get(&dp->datapath->external_ids, "logical-switch"))) { + VLOG_INFO("datapath has no uuid, cannot travel graph"); + return; + } + + sset_add(&visited_dp, dp_key); + + ovs_list_init(&dp_list); + dp_binding = xmalloc(sizeof *dp_binding); + dp_binding->dp = dp; + ovs_list_push_back(&dp_list, &dp_binding->node); + + /* + * Go through whole graph to figure out all chassis which may deliver + * packetsto gateway. */ + do { + dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list), + struct local_datapath_node, node); + dp = dp_binding->dp; + free(dp_binding); + for (size_t i = 0; i < dp->n_peer_dps; i++) { + const struct sbrec_datapath_binding *pdp = dp->peer_dps[i]; + if (!pdp) { + continue; + } + + if (!(dp_key = smap_get(&pdp->external_ids, "logical-router")) && + !(dp_key = smap_get(&pdp->external_ids, "logical-switch"))) { + continue; + } + + if (sset_contains(&visited_dp, dp_key)) { + continue; + } + + sset_add(&visited_dp, dp_key); + + struct hmap_node *node = hmap_first_with_hash(local_datapaths, + pdp->tunnel_key); + if (!node) { + continue; + } + + dp_binding = xmalloc(sizeof *dp_binding); + dp_binding->dp = CONTAINER_OF(node, struct local_datapath, + hmap_node); + ovs_list_push_back(&dp_list, &dp_binding->node); + + sbrec_port_binding_index_set_datapath(lpval, pdp); + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) { + if (pb->chassis) { + const char *chassis_name = pb->chassis->name; + if (chassis_name) { + sset_add(bfd_chassis, chassis_name); + } + } + } + } + } while (!ovs_list_is_empty(&dp_list)); + + sset_destroy(&visited_dp); +} + static void bfd_calculate_chassis(struct controller_ctx *ctx, const struct sbrec_chassis *our_chassis, @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx, } } if (our_chassis_is_gw_for_dp) { - for (size_t i = 0; i < dp->n_peer_dps; i++) { - const struct sbrec_datapath_binding *pdp = dp->peer_dps[i]; - if (!pdp) { - continue; - } - - sbrec_port_binding_index_set_datapath(lpval, pdp); - SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) { - if (pb->chassis) { - /* Gateway node has to enable bfd to all nodes hosting - * connected network ports */ - const char *chassis_name = pb->chassis->name; - if (chassis_name) { - sset_add(bfd_chassis, chassis_name); - } - } - } - } + bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor, + lpval, bfd_chassis); } } sbrec_port_binding_index_destroy_row(lpval);
The bfd_calculate_chassis function calculates gateway's peer datapaths to figure our which chassis should be add in bfd_chassis. But in most circumstance, a gateway's peer datapath has NO chassis. So it may disable BFD on some tunnel ports. Then a port on a remote ovs cannot send packet out because it believes all remote gateway are down. This patch will go though whole graph and visit all datapath's port which has connect with gateway. Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> --- ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 18 deletions(-)