Message ID | 20240526080818.64373-1-svc.ovs-community@vmware.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v2,1/1] datapath-windows : Add sanity check in OvsInitConntrack. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
ovsrobot/intel-ovs-compilation | success | test: success |
On Sun, May 26, 2024 at 04:08:18PM +0800, Wilson Peng via dev wrote: > From: Wilson Peng <pweisong@vmware.com> > > While deploying Tanzu Kubernetes(Antrea based solution) in Broadcom customer, > Sometimes it is found that the kernel thread OvsConntrackEntryCleaner is not started > After the Windows node is rebooted on unexpected condition. It could be also > observed a similar issue in local Antrea setup via Clean-AntreaNetwork.ps1 which will > Remove-VMSwitch and re-create it on Windows node. > > After checking the local conntrack dump, OVS doesn’t remove the connection entries > Even though the time is overdue, we could find the connection entries created several > Hours ago in the dump , within a state (TIME_WAIT) that was supposed to be deleted earlier. > At that time, the count of the existing entries in the OVS conntrack zone is far from the > Up limit, the actual number is 19k. Then we tried to flush the conntrack with CMD > "ovs-dpctl.exe flush-conntrack" and all the conntrack entries could be removed. > > In this patch, it is adding the complete sanity check when starting OvsConntrackEntryCleaner > Thread. If anything abnormal is happening it will rollback the thread creating. > > Antrea team does help do the regression test with build including the patch and it could PASS > The testing. And it is not find the Connectract not timeout issue with same reproducing condition. > > It is good to backport the fix to main and backported until 2.17 > > Signed-off-by: Wilson Peng <pweisong@vmware.com> Hi Wilson, Thanks for your patch. Some mechanical feedback on the patch submission: * Please avoid the term "sanity check", as per the inclusive naming Word List v1.0, which has been adopted by OvS. * Please line wrap the patch description to 75 characters, excluding where that doesn't make sense such as Fixes tags. Some high-level feedback on the patch. This patch seems to take a defensive programming approach, where various state is kept such that OvsCleanupConntrack() is idempotent. But this does lead to extra code, and I suggest it is error prone. In all I would prefer to see some root-cause analysis of the problem pinpointing which resources aren't being released and why. Ideally this would lead to a targeted fix for the problem at hand. > --- > datapath-windows/ovsext/Conntrack.c | 86 ++++++++++++++++++++--------- > 1 file changed, 61 insertions(+), 25 deletions(-) > > diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c > index 39ba5cc10..fbd036418 100644 > --- a/datapath-windows/ovsext/Conntrack.c > +++ b/datapath-windows/ovsext/Conntrack.c > @@ -36,6 +36,9 @@ static PLIST_ENTRY ovsConntrackTable; > static OVS_CT_THREAD_CTX ctThreadCtx; > static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; > static NDIS_SPIN_LOCK ovsCtZoneLock; > +static BOOLEAN ovsCtZoneLock_release = FALSE; > +static BOOLEAN OvsNatInitDone = FALSE; > +static BOOLEAN OvsConntrackCleanThreadExist = FALSE; > static POVS_CT_ZONE_INFO zoneInfo = NULL; > extern POVS_SWITCH_CONTEXT gOvsSwitchContext; > static ULONG ctTotalEntries; > @@ -95,32 +98,44 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) > goto freeBucketLock; > } > > - ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode, > - &ctThreadCtx.threadObject, NULL); > + OvsConntrackCleanThreadExist = FALSE; > + ctThreadCtx.exit = 0; > + status = ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode, > + &ctThreadCtx.threadObject, NULL); > ZwClose(threadHandle); > threadHandle = NULL; > + if (!NT_SUCCESS(status)) { > + ctThreadCtx.exit = 1; > + KeSetEvent(&ctThreadCtx.event, 0, FALSE); > + KeWaitForSingleObject(ctThreadCtx.threadObject, Executive, > + KernelMode, FALSE, NULL); > + goto cleanupConntrack; > + } > > zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * > CT_MAX_ZONE, OVS_CT_POOL_TAG); > if (zoneInfo == NULL) { > status = STATUS_INSUFFICIENT_RESOURCES; > - goto freeBucketLock; > + goto cleanupConntrack; > } > > NdisAllocateSpinLock(&ovsCtZoneLock); > + ovsCtZoneLock_release = FALSE; > defaultCtLimit = CT_MAX_ENTRIES; > for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { > zoneInfo[i].entries = 0; > zoneInfo[i].limit = defaultCtLimit; > } > > - status = OvsNatInit(); > - > - if (status != STATUS_SUCCESS) { > - OvsCleanupConntrack(); > + if (OvsNatInitDone == FALSE) { > + status = OvsNatInit(); > + if (status != STATUS_SUCCESS) { > + goto cleanupConntrack; > + } > + OvsNatInitDone = TRUE; > } > + OvsConntrackCleanThreadExist = TRUE; > return STATUS_SUCCESS; > - > freeBucketLock: > for (UINT32 i = 0; i < numBucketLocks; i++) { > if (ovsCtBucketLock[i] != NULL) { > @@ -132,6 +147,9 @@ freeBucketLock: > freeTable: > OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); > ovsConntrackTable = NULL; > +cleanupConntrack: > + OvsCleanupConntrack(); In some error paths code above the 'cleanupConntrack' line will execute and others it won't. This doesn't seem consistent. I would prefer a situation where the code looked more like it did before this patch: status = alloc_A() if (...) goto unwind_A; status = alloc_B() if (...) goto unwind_B; ... return STATUS_SUCCESS; unwind_B: release_B(); unwind_A: release_A(); return status; And for OvsCleanupConntrack() to only handle the case where initialisation succeeded. > + > return status; > } > ...
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 39ba5cc10..fbd036418 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -36,6 +36,9 @@ static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; static NDIS_SPIN_LOCK ovsCtZoneLock; +static BOOLEAN ovsCtZoneLock_release = FALSE; +static BOOLEAN OvsNatInitDone = FALSE; +static BOOLEAN OvsConntrackCleanThreadExist = FALSE; static POVS_CT_ZONE_INFO zoneInfo = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; @@ -95,32 +98,44 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) goto freeBucketLock; } - ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode, - &ctThreadCtx.threadObject, NULL); + OvsConntrackCleanThreadExist = FALSE; + ctThreadCtx.exit = 0; + status = ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode, + &ctThreadCtx.threadObject, NULL); ZwClose(threadHandle); threadHandle = NULL; + if (!NT_SUCCESS(status)) { + ctThreadCtx.exit = 1; + KeSetEvent(&ctThreadCtx.event, 0, FALSE); + KeWaitForSingleObject(ctThreadCtx.threadObject, Executive, + KernelMode, FALSE, NULL); + goto cleanupConntrack; + } zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * CT_MAX_ZONE, OVS_CT_POOL_TAG); if (zoneInfo == NULL) { status = STATUS_INSUFFICIENT_RESOURCES; - goto freeBucketLock; + goto cleanupConntrack; } NdisAllocateSpinLock(&ovsCtZoneLock); + ovsCtZoneLock_release = FALSE; defaultCtLimit = CT_MAX_ENTRIES; for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { zoneInfo[i].entries = 0; zoneInfo[i].limit = defaultCtLimit; } - status = OvsNatInit(); - - if (status != STATUS_SUCCESS) { - OvsCleanupConntrack(); + if (OvsNatInitDone == FALSE) { + status = OvsNatInit(); + if (status != STATUS_SUCCESS) { + goto cleanupConntrack; + } + OvsNatInitDone = TRUE; } + OvsConntrackCleanThreadExist = TRUE; return STATUS_SUCCESS; - freeBucketLock: for (UINT32 i = 0; i < numBucketLocks; i++) { if (ovsCtBucketLock[i] != NULL) { @@ -132,6 +147,9 @@ freeBucketLock: freeTable: OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); ovsConntrackTable = NULL; +cleanupConntrack: + OvsCleanupConntrack(); + return status; } @@ -144,35 +162,51 @@ freeTable: VOID OvsCleanupConntrack(VOID) { - ctThreadCtx.exit = 1; - KeSetEvent(&ctThreadCtx.event, 0, FALSE); - KeWaitForSingleObject(ctThreadCtx.threadObject, Executive, - KernelMode, FALSE, NULL); - ObDereferenceObject(ctThreadCtx.threadObject); - + if (OvsConntrackCleanThreadExist) { + ctThreadCtx.exit = 1; + KeSetEvent(&ctThreadCtx.event, 0, FALSE); + KeWaitForSingleObject(ctThreadCtx.threadObject, Executive, + KernelMode, FALSE, NULL); + ObDereferenceObject(ctThreadCtx.threadObject); + } /* Force flush all entries before removing */ - OvsCtFlush(0, NULL); + if (ovsConntrackTable) { + OvsCtFlush(0, NULL); + } if (ovsConntrackTable) { OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); ovsConntrackTable = NULL; } - for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { - /* Disabling the uninitialized memory warning because it should - * always be initialized during OvsInitConntrack */ -#pragma warning(suppress: 6001) - if (ovsCtBucketLock[i] != NULL) { - NdisFreeRWLock(ovsCtBucketLock[i]); + if (ovsCtBucketLock) { + for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { + /* Disabling the uninitialized memory warning because it should + * always be initialized during OvsInitConntrack */ + #pragma warning(suppress: 6001) + if (ovsCtBucketLock[i] != NULL) { + NdisFreeRWLock(ovsCtBucketLock[i]); + } } } - OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); - ovsCtBucketLock = NULL; - OvsNatCleanup(); - NdisFreeSpinLock(&ovsCtZoneLock); + + if (ovsCtBucketLock) { + OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); + ovsCtBucketLock = NULL; + } + if (OvsNatInitDone) { + OvsNatCleanup(); + OvsNatInitDone = FALSE; + } + if (ovsCtZoneLock_release == FALSE) { + NdisFreeSpinLock(&ovsCtZoneLock); + ovsCtZoneLock_release = TRUE; + } if (zoneInfo) { OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); + zoneInfo = NULL; } + OvsConntrackCleanThreadExist = FALSE; } VOID @@ -1520,6 +1554,7 @@ OvsConntrackEntryCleaner(PVOID data) LOCK_STATE_EX lockState; BOOLEAN success = TRUE; + OVS_LOG_INFO("Start the ConntrackEntry Cleaner Thread, context: %p", context); while (success) { if (context->exit) { break; @@ -1541,6 +1576,7 @@ OvsConntrackEntryCleaner(PVOID data) KeWaitForSingleObject(&context->event, Executive, KernelMode, FALSE, (LARGE_INTEGER *)&threadSleepTimeout); } + OVS_LOG_INFO("Terminating the OVS ConntrackEntry Cleaner system thread"); PsTerminateSystemThread(STATUS_SUCCESS); }