diff mbox series

[ovs-dev] controller: Prevent crash when db is empty.

Message ID 20241021181232.51672-1-rosemarie@redhat.com
State Changes Requested
Headers show
Series [ovs-dev] controller: Prevent crash when db is empty. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Rosemarie O'Riorden Oct. 21, 2024, 6:12 p.m. UTC
ovn-controller crashed when connected to an empty local db, forcing the user to
initialize a local db before starting the controller daemon. With this fix, the
controller will no longer crash in this case, but instead keep running and wait
until the database is initialized to do anything else.

Signed-off-by: Rosemarie O'Riorden <rosemarie@redhat.com>
---
 controller/chassis.c        |  3 +++
 controller/ovn-controller.c | 19 ++++++++++---------
 tests/ovn-controller.at     | 23 +++++++++++++++++++++++
 3 files changed, 36 insertions(+), 9 deletions(-)

Comments

Rosemarie O'Riorden Oct. 28, 2024, 4:02 p.m. UTC | #1
Reported-at: https://issues.redhat.com/browse/FDP-715

On 10/21/24 2:12 PM, Rosemarie O'Riorden wrote:
> ovn-controller crashed when connected to an empty local db, forcing the user to
> initialize a local db before starting the controller daemon. With this fix, the
> controller will no longer crash in this case, but instead keep running and wait
> until the database is initialized to do anything else.
> 
> Signed-off-by: Rosemarie O'Riorden <rosemarie@redhat.com>
> ---
>  controller/chassis.c        |  3 +++
>  controller/ovn-controller.c | 19 ++++++++++---------
>  tests/ovn-controller.at     | 23 +++++++++++++++++++++++
>  3 files changed, 36 insertions(+), 9 deletions(-)
> 
> diff --git a/controller/chassis.c b/controller/chassis.c
> index 2991a0af3..b2eefd58a 100644
> --- a/controller/chassis.c
> +++ b/controller/chassis.c
> @@ -1016,6 +1016,9 @@ store_chassis_index_if_needed(
>  {
>      const struct ovsrec_open_vswitch *cfg =
>          ovsrec_open_vswitch_table_first(ovs_table);
> +    if (!cfg) {
> +        return;
> +    }
>      const char *chassis_id = get_ovs_chassis_id(ovs_table);
>  
>      char *idx_key = xasprintf(CHASSIS_IDX_PREFIX "%s", chassis_id);
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index c48667887..4c3c79337 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -5434,6 +5434,8 @@ main(int argc, char *argv[])
>              ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
>          const struct ovsrec_bridge *br_int = NULL;
>          const struct ovsrec_datapath *br_int_dp = NULL;
> +        const struct ovsrec_open_vswitch *cfg =
> +            ovsrec_open_vswitch_table_first(ovs_table);
>          process_br_int(ovs_idl_txn, bridge_table, ovs_table, &br_int,
>                         ovsrec_server_has_datapath_table(ovs_idl_loop.idl)
>                         ? &br_int_dp
> @@ -5446,13 +5448,13 @@ main(int argc, char *argv[])
>  
>          /* Enable ACL matching for double tagged traffic. */
>          if (ovs_idl_txn) {
> -            const struct ovsrec_open_vswitch *cfg =
> -                ovsrec_open_vswitch_table_first(ovs_table);
> -            int vlan_limit = smap_get_int(
> -                &cfg->other_config, "vlan-limit", -1);
> -            if (vlan_limit != 0) {
> -                ovsrec_open_vswitch_update_other_config_setkey(
> -                    cfg, "vlan-limit", "0");
> +            if (cfg) {
> +                int vlan_limit = smap_get_int(
> +                    &cfg->other_config, "vlan-limit", -1);
> +                if (vlan_limit != 0) {
> +                    ovsrec_open_vswitch_update_other_config_setkey(
> +                        cfg, "vlan-limit", "0");
> +                }
>              }
>          }
>  
> @@ -5463,7 +5465,7 @@ main(int argc, char *argv[])
>          }
>  
>          if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl) &&
> -            northd_version_match) {
> +            northd_version_match && cfg) {
>  
>              /* Unconditionally remove all deleted lflows from the lflow
>               * cache.
> @@ -5877,7 +5879,6 @@ loop_done:
>                  = ovsrec_bridge_table_get(ovs_idl_loop.idl);
>              const struct ovsrec_open_vswitch_table *ovs_table
>                  = ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
> -
>              const struct sbrec_port_binding_table *port_binding_table
>                  = sbrec_port_binding_table_get(ovnsb_idl_loop.idl);
>  
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index a2e451880..0ef3d4582 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -3419,3 +3419,26 @@ AT_CHECK([test $(grep -c "HANDLER_MESSAGE" hv1/ovn-controller.log) -ge 1])
>  
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> +
> +AT_SETUP([ovn-controller - Start controller with empty db])
> +AT_KEYWORDS([ovn])
> +
> +check ovsdb-tool create conf.db $ovs_srcdir/vswitchd/vswitch.ovsschema
> +start_daemon ovsdb-server --remote=punix:db.sock
> +start_daemon ovn-controller unix:db.sock
> +
> +AT_CHECK([ovsdb-client --bare dump unix:db.sock Open_vSwitch Open_vSwitch], [], [Open_vSwitch table
> +])
> +check ovs-vsctl --no-wait init
> +AT_CHECK([ovs-vsctl show| tail -n +2], [], [    Bridge br-int
> +        fail_mode: secure
> +        datapath_type: system
> +        Port br-int
> +            Interface br-int
> +                type: internal
> +])
> +
> +# Gracefully terminate daemons
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +AT_CLEANUP
Ilya Maximets Nov. 4, 2024, 6:38 p.m. UTC | #2
On 10/21/24 20:12, Rosemarie O'Riorden wrote:
> ovn-controller crashed when connected to an empty local db, forcing the user to
> initialize a local db before starting the controller daemon. With this fix, the
> controller will no longer crash in this case, but instead keep running and wait
> until the database is initialized to do anything else.
> 
> Signed-off-by: Rosemarie O'Riorden <rosemarie@redhat.com>
> ---
>  controller/chassis.c        |  3 +++
>  controller/ovn-controller.c | 19 ++++++++++---------
>  tests/ovn-controller.at     | 23 +++++++++++++++++++++++
>  3 files changed, 36 insertions(+), 9 deletions(-)

Hi, Rosemarie.  Thanks for the patch!

See some comments below.

Best regards, Ilya Maximets.

> 
> diff --git a/controller/chassis.c b/controller/chassis.c
> index 2991a0af3..b2eefd58a 100644
> --- a/controller/chassis.c
> +++ b/controller/chassis.c
> @@ -1016,6 +1016,9 @@ store_chassis_index_if_needed(
>  {
>      const struct ovsrec_open_vswitch *cfg =
>          ovsrec_open_vswitch_table_first(ovs_table);
> +    if (!cfg) {
> +        return;
> +    }
>      const char *chassis_id = get_ovs_chassis_id(ovs_table);
>  
>      char *idx_key = xasprintf(CHASSIS_IDX_PREFIX "%s", chassis_id);
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index c48667887..4c3c79337 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -5434,6 +5434,8 @@ main(int argc, char *argv[])
>              ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
>          const struct ovsrec_bridge *br_int = NULL;
>          const struct ovsrec_datapath *br_int_dp = NULL;
> +        const struct ovsrec_open_vswitch *cfg =
> +            ovsrec_open_vswitch_table_first(ovs_table);
>          process_br_int(ovs_idl_txn, bridge_table, ovs_table, &br_int,
>                         ovsrec_server_has_datapath_table(ovs_idl_loop.idl)
>                         ? &br_int_dp
> @@ -5446,13 +5448,13 @@ main(int argc, char *argv[])
>  
>          /* Enable ACL matching for double tagged traffic. */
>          if (ovs_idl_txn) {
> -            const struct ovsrec_open_vswitch *cfg =
> -                ovsrec_open_vswitch_table_first(ovs_table);
> -            int vlan_limit = smap_get_int(
> -                &cfg->other_config, "vlan-limit", -1);
> -            if (vlan_limit != 0) {
> -                ovsrec_open_vswitch_update_other_config_setkey(
> -                    cfg, "vlan-limit", "0");
> +            if (cfg) {

These are now two nested 'if' statements.  They can be combined into
one to save the extra indentation level.

> +                int vlan_limit = smap_get_int(
> +                    &cfg->other_config, "vlan-limit", -1);
> +                if (vlan_limit != 0) {
> +                    ovsrec_open_vswitch_update_other_config_setkey(
> +                        cfg, "vlan-limit", "0");
> +                }
>              }
>          }
>  
> @@ -5463,7 +5465,7 @@ main(int argc, char *argv[])
>          }
>  
>          if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl) &&
> -            northd_version_match) {
> +            northd_version_match && cfg) {
>  
>              /* Unconditionally remove all deleted lflows from the lflow
>               * cache.
> @@ -5877,7 +5879,6 @@ loop_done:
>                  = ovsrec_bridge_table_get(ovs_idl_loop.idl);
>              const struct ovsrec_open_vswitch_table *ovs_table
>                  = ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
> -

An unrelated change.

>              const struct sbrec_port_binding_table *port_binding_table
>                  = sbrec_port_binding_table_get(ovnsb_idl_loop.idl);
>  
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index a2e451880..0ef3d4582 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -3419,3 +3419,26 @@ AT_CHECK([test $(grep -c "HANDLER_MESSAGE" hv1/ovn-controller.log) -ge 1])
>  
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> +
> +AT_SETUP([ovn-controller - Start controller with empty db])
> +AT_KEYWORDS([ovn])
> +
> +check ovsdb-tool create conf.db $ovs_srcdir/vswitchd/vswitch.ovsschema
> +start_daemon ovsdb-server --remote=punix:db.sock
> +start_daemon ovn-controller unix:db.sock
> +
> +AT_CHECK([ovsdb-client --bare dump unix:db.sock Open_vSwitch Open_vSwitch], [], [Open_vSwitch table
> +])

You may use 'dnl' to move the output to a separate line, e.g.:

AT_CHECK([ovsdb-client --bare dump unix:db.sock Open_vSwitch Open_vSwitch], [], [dnl
Open_vSwitch table
])

> +check ovs-vsctl --no-wait init
> +AT_CHECK([ovs-vsctl show| tail -n +2], [], [    Bridge br-int

Same here.  The output can be moved to a separate line, so it
looks a bit better.  And there is a missing space before the '|'.

Also, there is no guarantee that ovn-controller already created
the bridge at the time we call ovs-vsctl.  We should wait for it.
For example with OVS_WAIT_FOR_OUTPUT() macro.

> +        fail_mode: secure
> +        datapath_type: system
> +        Port br-int
> +            Interface br-int
> +                type: internal
> +])
> +
> +# Gracefully terminate daemons

Period at the end of the comment.

> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +AT_CLEANUP
diff mbox series

Patch

diff --git a/controller/chassis.c b/controller/chassis.c
index 2991a0af3..b2eefd58a 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -1016,6 +1016,9 @@  store_chassis_index_if_needed(
 {
     const struct ovsrec_open_vswitch *cfg =
         ovsrec_open_vswitch_table_first(ovs_table);
+    if (!cfg) {
+        return;
+    }
     const char *chassis_id = get_ovs_chassis_id(ovs_table);
 
     char *idx_key = xasprintf(CHASSIS_IDX_PREFIX "%s", chassis_id);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index c48667887..4c3c79337 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -5434,6 +5434,8 @@  main(int argc, char *argv[])
             ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
         const struct ovsrec_bridge *br_int = NULL;
         const struct ovsrec_datapath *br_int_dp = NULL;
+        const struct ovsrec_open_vswitch *cfg =
+            ovsrec_open_vswitch_table_first(ovs_table);
         process_br_int(ovs_idl_txn, bridge_table, ovs_table, &br_int,
                        ovsrec_server_has_datapath_table(ovs_idl_loop.idl)
                        ? &br_int_dp
@@ -5446,13 +5448,13 @@  main(int argc, char *argv[])
 
         /* Enable ACL matching for double tagged traffic. */
         if (ovs_idl_txn) {
-            const struct ovsrec_open_vswitch *cfg =
-                ovsrec_open_vswitch_table_first(ovs_table);
-            int vlan_limit = smap_get_int(
-                &cfg->other_config, "vlan-limit", -1);
-            if (vlan_limit != 0) {
-                ovsrec_open_vswitch_update_other_config_setkey(
-                    cfg, "vlan-limit", "0");
+            if (cfg) {
+                int vlan_limit = smap_get_int(
+                    &cfg->other_config, "vlan-limit", -1);
+                if (vlan_limit != 0) {
+                    ovsrec_open_vswitch_update_other_config_setkey(
+                        cfg, "vlan-limit", "0");
+                }
             }
         }
 
@@ -5463,7 +5465,7 @@  main(int argc, char *argv[])
         }
 
         if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl) &&
-            northd_version_match) {
+            northd_version_match && cfg) {
 
             /* Unconditionally remove all deleted lflows from the lflow
              * cache.
@@ -5877,7 +5879,6 @@  loop_done:
                 = ovsrec_bridge_table_get(ovs_idl_loop.idl);
             const struct ovsrec_open_vswitch_table *ovs_table
                 = ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
-
             const struct sbrec_port_binding_table *port_binding_table
                 = sbrec_port_binding_table_get(ovnsb_idl_loop.idl);
 
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index a2e451880..0ef3d4582 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -3419,3 +3419,26 @@  AT_CHECK([test $(grep -c "HANDLER_MESSAGE" hv1/ovn-controller.log) -ge 1])
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+AT_SETUP([ovn-controller - Start controller with empty db])
+AT_KEYWORDS([ovn])
+
+check ovsdb-tool create conf.db $ovs_srcdir/vswitchd/vswitch.ovsschema
+start_daemon ovsdb-server --remote=punix:db.sock
+start_daemon ovn-controller unix:db.sock
+
+AT_CHECK([ovsdb-client --bare dump unix:db.sock Open_vSwitch Open_vSwitch], [], [Open_vSwitch table
+])
+check ovs-vsctl --no-wait init
+AT_CHECK([ovs-vsctl show| tail -n +2], [], [    Bridge br-int
+        fail_mode: secure
+        datapath_type: system
+        Port br-int
+            Interface br-int
+                type: internal
+])
+
+# Gracefully terminate daemons
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+AT_CLEANUP