diff mbox series

[ovs-dev,3/9] ipsec: libreswan: Try to bring non-active connections up.

Message ID 20241029101608.2991596-4-i.maximets@ovn.org
State Changes Requested
Headers show
Series ipsec: Resiliency to Libreswan failures. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Ilya Maximets Oct. 29, 2024, 10:15 a.m. UTC
Sometimes connections are getting loaded, but do not become active for
some reason on a first try.  We can try and bring them up manually.
However, if they are still not active after that, it's better to just
remove the connection and try to add them from scratch, as there must
be some internal issue in libreswan that doesn't allow these connections
to actually become active.

Note: Once the "defunct" connection is removed, the second connection
for the same tunnel will also be removed as "half-loaded".  This ensures
that all the shared SAs will also be cleaned up, so we can truly start
from scratch.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 ipsec/ovs-monitor-ipsec.in | 12 ++++++++++++
 1 file changed, 12 insertions(+)
diff mbox series

Patch

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 09a29e2ca..d667c08bc 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -514,6 +514,7 @@  conn prevent_unencrypted_vxlan
         self.IPSEC_D = "sql:" + libreswan_root_prefix + ipsec_d
         self.IPSEC_CTL = libreswan_root_prefix + ipsec_ctl
         self.conf_file = None
+        self.conns_not_active = set()
         self.secrets_file = None
         vlog.dbg("Using: " + self.IPSEC)
         vlog.dbg("Configuration file: " + self.IPSEC_CONF)
@@ -638,6 +639,14 @@  conn prevent_unencrypted_vxlan
             loaded = set(loaded_conns.get(name, dict()).keys())
             active = set(active_conns.get(name, dict()).keys())
 
+            # Untrack connections that became active.
+            self.conns_not_active.difference_update(active)
+            # Remove connections that didn't become active after --start
+            # and another explicit --up.
+            for conn in self.conns_not_active & loaded:
+                self._delete_ipsec_connection(conn, "is defunct")
+                loaded.remove(conn)
+
             # Remove all the loaded or active but not desired connections.
             for conn in loaded | active:
                 if conn not in desired:
@@ -668,6 +677,8 @@  conn prevent_unencrypted_vxlan
                 # so loaded >= active
                 for conn in loaded - active:
                     vlog.info("Bringing up ipsec connection %s" % conn)
+                    # On failure to --up it will be removed from the set.
+                    self.conns_not_active.add(conn)
                     self._start_ipsec_connection(conn, "up")
 
         # Update shunt policy if changed
@@ -797,6 +808,7 @@  conn prevent_unencrypted_vxlan
 
     def _delete_ipsec_connection(self, conn, reason):
         vlog.info("%s %s, removing" % (conn, reason))
+        self.conns_not_active.discard(conn)
         run_command(self.IPSEC_AUTO +
                     ["--ctlsocket", self.IPSEC_CTL,
                      "--config", self.IPSEC_CONF,