diff mbox series

[V4,01/15] core/pldm: Handle Watchdog timer.

Message ID 20230620150101.88802-2-clombard@linux.ibm.com
State Superseded
Headers show
Series Complete PLDM responder and enable PLDM support | expand

Commit Message

Christophe Lombard June 20, 2023, 3 p.m. UTC
Encode a PLDM platform event message to send the heartbeat to the BMC.
Watchdog is "armed" when a
PLDM_EVENT_MESSAGE_GLOBAL_ENABLE_ASYNC_KEEP_ALIVE is received.

Signed-off-by: Christophe Lombard <clombard@linux.ibm.com>
---
 core/pldm/Makefile.inc    |   1 +
 core/pldm/pldm-watchdog.c | 142 ++++++++++++++++++++++++++++++++++++++
 include/pldm.h            |   5 ++
 3 files changed, 148 insertions(+)
 create mode 100644 core/pldm/pldm-watchdog.c

Comments

Nicholas Piggin June 21, 2023, 5:25 a.m. UTC | #1
On Wed Jun 21, 2023 at 1:00 AM AEST, Christophe Lombard wrote:
> Encode a PLDM platform event message to send the heartbeat to the BMC.
> Watchdog is "armed" when a
> PLDM_EVENT_MESSAGE_GLOBAL_ENABLE_ASYNC_KEEP_ALIVE is received.

I did a little bit on IPMI watchdog. Does PLDM have an OS boot watchdog
like IPMI (not that we enable it in skiboot)? If yes and we did enable
such a thing, we could hook in the watchdog commands in the OPAL IPMI
calls so the OS does not have to be PLDM aware, right?

Would there be any plans to have OPAL PLDM calls for OS native PLDM, or
would that get handled a different way?

Thanks,
Nick
Christophe Lombard June 21, 2023, 3:51 p.m. UTC | #2
Le 21/06/2023 à 07:25, Nicholas Piggin a écrit :
> On Wed Jun 21, 2023 at 1:00 AM AEST, Christophe Lombard wrote:
>> Encode a PLDM platform event message to send the heartbeat to the BMC.
>> Watchdog is "armed" when a
>> PLDM_EVENT_MESSAGE_GLOBAL_ENABLE_ASYNC_KEEP_ALIVE is received.
> I did a little bit on IPMI watchdog. Does PLDM have an OS boot watchdog
> like IPMI (not that we enable it in skiboot)? If yes and we did enable
> such a thing, we could hook in the watchdog commands in the OPAL IPMI
> calls so the OS does not have to be PLDM aware, right?

No, we haven't implemented an OS boot watchdog.
I'm curious to know where this is managed in Skiboot

> Would there be any plans to have OPAL PLDM calls for OS native PLDM, or
> would that get handled a different way?

For the time being, there is no plan to have OPAL PLDM calls for OS 
native PLDM.
We have set up and replaced the OPAL IPMI calls where we convert IPMI 
requetes to PLDM requests.
Today only2 IPMI requests are supported.
This is done in pldm-opal.c

> Thanks,
> Nick
diff mbox series

Patch

diff --git a/core/pldm/Makefile.inc b/core/pldm/Makefile.inc
index 87952dbf..e3efcc2b 100644
--- a/core/pldm/Makefile.inc
+++ b/core/pldm/Makefile.inc
@@ -11,6 +11,7 @@  PLDM_OBJS = pldm-mctp.o pldm-responder.o pldm-requester.o
 PLDM_OBJS += pldm-base-requests.o pldm-platform-requests.o
 PLDM_OBJS += pldm-bios-requests.o pldm-fru-requests.o
 PLDM_OBJS += pldm-file-io-requests.o pldm-lid-files.o
+PLDM_OBJS += pldm-watchdog.o
 
 PLDM = $(PLDM_DIR)/built-in.a
 $(PLDM): $(PLDM_OBJS:%=$(PLDM_DIR)/%)
diff --git a/core/pldm/pldm-watchdog.c b/core/pldm/pldm-watchdog.c
new file mode 100644
index 00000000..098ee265
--- /dev/null
+++ b/core/pldm/pldm-watchdog.c
@@ -0,0 +1,142 @@ 
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+// Copyright 2022 IBM Corp.
+
+#define pr_fmt(fmt) "PLDM: " fmt
+
+#include <lock.h>
+#include <stdlib.h>
+#include <string.h>
+#include <opal.h>
+#include <timebase.h>
+#include <timer.h>
+#include <pldm/include/libpldm/platform.h>
+#include "pldm.h"
+
+#define DEFAULT_WATCHDOG_TIMEOUT_SEC (10 * 60) /* 10 min */
+
+/* Whether the watchdog timer is armed and Skiboot should be sending
+ * regular heartbeats.
+ */
+bool watchdog_armed;
+
+/* The period (in seconds) of the PLDM watchdog, as dictated by BMC */
+int watchdog_period_sec = DEFAULT_WATCHDOG_TIMEOUT_SEC;
+
+static uint8_t sequence_number;
+struct timer watchdog_timer;
+
+static void watchdog_reset_timer_complete(struct pldm_rx_data *rx,
+					  void *data __unused)
+{
+	struct pldm_platform_event_message_resp response;
+	size_t payload_len;
+	int rc;
+
+	if (rx == NULL) {
+		prlog(PR_ERR, "%s: Response not received\n", __func__);
+		return;
+	}
+
+	/* Decode the message */
+	payload_len = rx->msg_len - sizeof(struct pldm_msg_hdr);
+
+	rc = decode_platform_event_message_resp(
+			rx->msg,
+			payload_len,
+			&response.completion_code,
+			&response.platform_event_status);
+	if (rc != PLDM_SUCCESS || response.completion_code != PLDM_SUCCESS) {
+		prlog(PR_ERR, "Decode PlatformEventMessage Error, rc: %d, cc: %d, pes: %d\n",
+			       rc, response.completion_code,
+			       response.platform_event_status);
+	}
+}
+
+static int pldm_watchdog_reset_timer(void)
+{
+	uint8_t heartbeat_elapsed_data[2];
+	struct pldm_tx_data *tx;
+	size_t payload_len;
+	size_t data_size;
+	int rc;
+
+	struct pldm_platform_event_message_req event_message_req = {
+		.format_version = PLDM_PLATFORM_EVENT_MESSAGE_FORMAT_VERSION,
+		.tid = HOST_TID,
+		.event_class = PLDM_HEARTBEAT_TIMER_ELAPSED_EVENT,
+	};
+
+	prlog(PR_DEBUG, "%s - send the heartbeat to the BMC, sequence: %d, period: %d\n",
+		       __func__, sequence_number, watchdog_period_sec);
+
+	/* Send the event request */
+	heartbeat_elapsed_data[0] = PLDM_PLATFORM_EVENT_MESSAGE_FORMAT_VERSION;
+
+	/* We need to make sure that we send the BMC the correct
+	 * sequence number. To prevent possible race conditions for the
+	 * sequence number, lock it while we're incrementing and
+	 * sending it down.
+	 */
+	heartbeat_elapsed_data[1] = sequence_number++;
+
+	payload_len = PLDM_PLATFORM_EVENT_MESSAGE_MIN_REQ_BYTES + sizeof(heartbeat_elapsed_data);
+
+	data_size = sizeof(struct pldm_msg_hdr) +
+		    sizeof(struct pldm_platform_event_message_req) +
+		    sizeof(heartbeat_elapsed_data);
+	tx = zalloc(sizeof(struct pldm_tx_data) + data_size);
+	if (!tx)
+		return OPAL_NO_MEM;
+	tx->data_size = data_size - 1;
+
+	/* Encode the platform event message request */
+	rc = encode_platform_event_message_req(
+			DEFAULT_INSTANCE_ID,
+			event_message_req.format_version,
+			event_message_req.tid,
+			event_message_req.event_class,
+			heartbeat_elapsed_data,
+			sizeof(heartbeat_elapsed_data),
+			(struct pldm_msg *)tx->data,
+			payload_len);
+	if (rc != PLDM_SUCCESS) {
+		prlog(PR_ERR, "Encode PlatformEventMessage Error, rc: %d\n", rc);
+		free(tx);
+		return OPAL_PARAMETER;
+	}
+
+	/* Send and get the response message bytes */
+	rc = pldm_requester_queue(tx, watchdog_reset_timer_complete, NULL);
+	if (rc) {
+		prlog(PR_ERR, "Communication Error, req: PlatformEventMessage, rc: %d\n", rc);
+		free(tx);
+		return rc;
+	}
+
+	free(tx);
+	return OPAL_SUCCESS;
+}
+
+static void watchdog_poller(struct timer *t __unused,
+			    void *data __unused,
+			    uint64_t now __unused)
+{
+	/* Whether the watchdog timer is armed and Skiboot should be sending
+	 * regular heartbeats.
+	 */
+	if (watchdog_armed)
+		pldm_watchdog_reset_timer();
+
+	schedule_timer(&watchdog_timer, secs_to_tb(watchdog_period_sec));
+}
+
+int pldm_watchdog_init(void)
+{
+	if (watchdog_armed)
+		pldm_watchdog_reset_timer();
+
+	init_timer(&watchdog_timer, watchdog_poller, NULL);
+	schedule_timer(&watchdog_timer, secs_to_tb(watchdog_period_sec));
+
+	return OPAL_SUCCESS;
+}
diff --git a/include/pldm.h b/include/pldm.h
index 8622453b..80ee85c0 100644
--- a/include/pldm.h
+++ b/include/pldm.h
@@ -48,4 +48,9 @@  int pldm_lid_files_init(struct blocklevel_device **bl);
  */
 bool pldm_lid_files_exit(struct blocklevel_device *bl);
 
+/**
+ * Initialize and reset the watchdog
+ */
+int pldm_watchdog_init(void);
+
 #endif /* __PLDM_H__ */