Message ID | 20220429094744.72855-2-clombard@linux.vnet.ibm.com |
---|---|
State | Superseded |
Headers | show |
Series | Complete PLDM responder and enable PLDM support | expand |
On 29/04/2022 11:47, Christophe Lombard wrote: > Encode a PLDM platform event message to send the heartbeat to the BMC. > Watchdog is "armed" when a > PLDM_EVENT_MESSAGE_GLOBAL_ENABLE_ASYNC_KEEP_ALIVE is received. > > Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com> > --- > core/pldm/Makefile.inc | 2 + > core/pldm/pldm-watchdog.c | 127 ++++++++++++++++++++++++++++++++++++++ > core/pldm/pldm.h | 1 + > include/pldm.h | 5 ++ > 4 files changed, 135 insertions(+) > create mode 100644 core/pldm/pldm-watchdog.c > > diff --git a/core/pldm/Makefile.inc b/core/pldm/Makefile.inc > index 506bf5d7..c6f78822 100644 > --- a/core/pldm/Makefile.inc > +++ b/core/pldm/Makefile.inc > @@ -9,11 +9,13 @@ CPPFLAGS += -I$(SRC)/pldm/ibm/libpldm/ > > CFLAGS_$(PLDM_DIR)/pldm-platform-requests.o = -Wno-strict-prototypes > CFLAGS_$(PLDM_DIR)/pldm-bios-requests.o = -Wno-strict-prototypes > +CFLAGS_$(PLDM_DIR)/pldm-watchdog.o = -Wno-strict-prototypes I know you're going to update to latest libpldm, but it seems that -Wno-strict-prototypes is no longer going to be needed (they fixed the prototypes with no arguments). Might also be true for the other file-specific CFLAGS, this is just a reminder that we should check it. > > PLDM_OBJS = pldm-common.o pldm-responder.o pldm-requester.o > PLDM_OBJS += pldm-base-requests.o pldm-platform-requests.o > PLDM_OBJS += pldm-bios-requests.o pldm-fru-requests.o > PLDM_OBJS += pldm-file-io-requests.o pldm-lid-files.o > +PLDM_OBJS += pldm-watchdog.o > > PLDM = $(PLDM_DIR)/built-in.a > $(PLDM): $(PLDM_OBJS:%=$(PLDM_DIR)/%) > diff --git a/core/pldm/pldm-watchdog.c b/core/pldm/pldm-watchdog.c > new file mode 100644 > index 00000000..9d2aabe8 > --- /dev/null > +++ b/core/pldm/pldm-watchdog.c > @@ -0,0 +1,127 @@ > +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later > +// Copyright 2022 IBM Corp. > + > +#define pr_fmt(fmt) "PLDM: " fmt > + > +#include <lock.h> > +#include <stdlib.h> > +#include <string.h> > +#include <opal.h> > +#include <timebase.h> > +#include <timer.h> > +#include <pldm/libpldm/platform.h> > +#include "pldm.h" > + > +#define DEFAULT_WATCHDOG_TIMEOUT_SEC (10 * 60) /* 10 min */ > + > +/* Whether the watchdog timer is armed and Skiboot should be sending > + * regular heartbeats. > + */ > +bool watchdog_armed; > + > +/* The period (in seconds) of the PLDM watchdog, as dictated by BMC */ > +int watchdog_period_sec = DEFAULT_WATCHDOG_TIMEOUT_SEC; > + > +static struct lock sequence_lock; > +static uint8_t sequence_number; > + > +int pldm_watchdog_reset_timer(void) > +{ > + uint8_t heartbeat_elapsed_data[2]; > + size_t response_len, payload_len; > + uint32_t request_length; > + void *response_msg; > + char *request_msg; > + int rc; > + > + struct pldm_platform_event_message_req event_message_req = { > + .format_version = PLDM_PLATFORM_EVENT_MESSAGE_FORMAT_VERSION, > + .tid = HOST_TID, > + .event_class = PLDM_HEARTBEAT_TIMER_ELAPSED_EVENT, > + }; > + > + struct pldm_platform_event_message_resp response; > + > + /* watchdog is not armed, so no need to send the heartbeat */ > + if (!watchdog_armed) { > + prlog(PR_ERR, "%s - PLDM watchdog is not armed, not sending the heartbeat\n", > + __func__); > + return OPAL_PARAMETER; > + } > + > + prlog(PR_INFO, "%s - send the heartbeat to the BMC, sequence: %d, period: %d\n", > + __func__, sequence_number, watchdog_period_sec); > + > + /* Send the event request */ > + heartbeat_elapsed_data[0] = PLDM_PLATFORM_EVENT_MESSAGE_FORMAT_VERSION; > + > + /* We need to make sure that we send the BMC the correct > + * sequence number. To prevent possible race conditions for the > + * sequence number, lock it while we're incrementing and > + * sending it down. > + */ > + lock(&sequence_lock); > + heartbeat_elapsed_data[1] = sequence_number++; > + > + payload_len = PLDM_PLATFORM_EVENT_MESSAGE_MIN_REQ_BYTES + sizeof(heartbeat_elapsed_data); > + > + request_length = sizeof(struct pldm_msg_hdr) + > + sizeof(struct pldm_platform_event_message_req) + > + sizeof(heartbeat_elapsed_data); > + request_msg = malloc(request_length); > + > + /* Encode the platform event message request */ > + rc = encode_platform_event_message_req( > + DEFAULT_INSTANCE_ID, > + event_message_req.format_version, > + event_message_req.tid, > + event_message_req.event_class, > + heartbeat_elapsed_data, > + sizeof(heartbeat_elapsed_data), > + (struct pldm_msg *)request_msg, > + payload_len); > + if (rc != PLDM_SUCCESS) { > + prlog(PR_ERR, "Encode PlatformEventMessage Error, rc: %d\n", rc); > + free(request_msg); > + sequence_number--; We don't release the sequence_lock in this error path. Do we really care about decrementing sequence_number in case of error? The spec says it allows the BMC to know if it misses one hearbeat. Which would be the case in case of error, so decrementing is actually misleading. And it would simplify the code. > + return OPAL_PARAMETER; > + } > + unlock(&sequence_lock); > + > + /* Send and get the response message bytes */ > + rc = pldm_do_request(BMC_EID, request_msg, request_length - 1, > + &response_msg, &response_len); > + if (rc) { > + prlog(PR_ERR, "Communication Error, req: PlatformEventMessage, rc: %d\n", rc); > + free(request_msg); > + sequence_number--; sequence_number is decremented with no lock held. But see previous comment, we may not need to decrement. > + return rc; > + } > + free(request_msg); > + > + /* Decode the message */ > + payload_len = response_len - sizeof(struct pldm_msg_hdr); > + rc = decode_platform_event_message_resp( > + response_msg, > + payload_len, > + &response.completion_code, > + &response.platform_event_status); > + if (rc != PLDM_SUCCESS || response.completion_code != PLDM_SUCCESS) { > + prlog(PR_ERR, "Decode PlatformEventMessage Error, rc: %d, cc: %d, pes: %d\n", > + rc, response.completion_code, > + response.platform_event_status); > + free(response_msg); > + return OPAL_PARAMETER; > + } > + > + free(response_msg); > + So we only send one heartbeat message? Not emitted periodically? From the spec: "heartbeatTimer Amount of time in seconds after *each* elapsing of which the terminus shall emit a heartbeat event (the heartbeatTimerElapsedEvent) to the event receiver. If the terminus cannot produce heartbeat events at the requested rate, it shall return completion code HEARTBEAT_FREQUENCY_TOO_HIGH." So it seems to me that once a SetEventReceiver command is received requesting a heartbeat, we should send one periodically. > + return OPAL_SUCCESS; > +} > + > +int pldm_watchdog_init(void) > +{ > + init_lock(&sequence_lock); pldm_watchdog_reset_timer() is an exported API and a misbehaving caller might call it and use the lock before it's init. We should use a static initializer when declaring the global variable: static struct lock sequence_lock = LOCK_UNLOCKED > + > + return pldm_watchdog_reset_timer(); Does it make sense to call pldm_watchdog_reset_timer() during init? Can watchdog_timer already be set? Fred
diff --git a/core/pldm/Makefile.inc b/core/pldm/Makefile.inc index 506bf5d7..c6f78822 100644 --- a/core/pldm/Makefile.inc +++ b/core/pldm/Makefile.inc @@ -9,11 +9,13 @@ CPPFLAGS += -I$(SRC)/pldm/ibm/libpldm/ CFLAGS_$(PLDM_DIR)/pldm-platform-requests.o = -Wno-strict-prototypes CFLAGS_$(PLDM_DIR)/pldm-bios-requests.o = -Wno-strict-prototypes +CFLAGS_$(PLDM_DIR)/pldm-watchdog.o = -Wno-strict-prototypes PLDM_OBJS = pldm-common.o pldm-responder.o pldm-requester.o PLDM_OBJS += pldm-base-requests.o pldm-platform-requests.o PLDM_OBJS += pldm-bios-requests.o pldm-fru-requests.o PLDM_OBJS += pldm-file-io-requests.o pldm-lid-files.o +PLDM_OBJS += pldm-watchdog.o PLDM = $(PLDM_DIR)/built-in.a $(PLDM): $(PLDM_OBJS:%=$(PLDM_DIR)/%) diff --git a/core/pldm/pldm-watchdog.c b/core/pldm/pldm-watchdog.c new file mode 100644 index 00000000..9d2aabe8 --- /dev/null +++ b/core/pldm/pldm-watchdog.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +// Copyright 2022 IBM Corp. + +#define pr_fmt(fmt) "PLDM: " fmt + +#include <lock.h> +#include <stdlib.h> +#include <string.h> +#include <opal.h> +#include <timebase.h> +#include <timer.h> +#include <pldm/libpldm/platform.h> +#include "pldm.h" + +#define DEFAULT_WATCHDOG_TIMEOUT_SEC (10 * 60) /* 10 min */ + +/* Whether the watchdog timer is armed and Skiboot should be sending + * regular heartbeats. + */ +bool watchdog_armed; + +/* The period (in seconds) of the PLDM watchdog, as dictated by BMC */ +int watchdog_period_sec = DEFAULT_WATCHDOG_TIMEOUT_SEC; + +static struct lock sequence_lock; +static uint8_t sequence_number; + +int pldm_watchdog_reset_timer(void) +{ + uint8_t heartbeat_elapsed_data[2]; + size_t response_len, payload_len; + uint32_t request_length; + void *response_msg; + char *request_msg; + int rc; + + struct pldm_platform_event_message_req event_message_req = { + .format_version = PLDM_PLATFORM_EVENT_MESSAGE_FORMAT_VERSION, + .tid = HOST_TID, + .event_class = PLDM_HEARTBEAT_TIMER_ELAPSED_EVENT, + }; + + struct pldm_platform_event_message_resp response; + + /* watchdog is not armed, so no need to send the heartbeat */ + if (!watchdog_armed) { + prlog(PR_ERR, "%s - PLDM watchdog is not armed, not sending the heartbeat\n", + __func__); + return OPAL_PARAMETER; + } + + prlog(PR_INFO, "%s - send the heartbeat to the BMC, sequence: %d, period: %d\n", + __func__, sequence_number, watchdog_period_sec); + + /* Send the event request */ + heartbeat_elapsed_data[0] = PLDM_PLATFORM_EVENT_MESSAGE_FORMAT_VERSION; + + /* We need to make sure that we send the BMC the correct + * sequence number. To prevent possible race conditions for the + * sequence number, lock it while we're incrementing and + * sending it down. + */ + lock(&sequence_lock); + heartbeat_elapsed_data[1] = sequence_number++; + + payload_len = PLDM_PLATFORM_EVENT_MESSAGE_MIN_REQ_BYTES + sizeof(heartbeat_elapsed_data); + + request_length = sizeof(struct pldm_msg_hdr) + + sizeof(struct pldm_platform_event_message_req) + + sizeof(heartbeat_elapsed_data); + request_msg = malloc(request_length); + + /* Encode the platform event message request */ + rc = encode_platform_event_message_req( + DEFAULT_INSTANCE_ID, + event_message_req.format_version, + event_message_req.tid, + event_message_req.event_class, + heartbeat_elapsed_data, + sizeof(heartbeat_elapsed_data), + (struct pldm_msg *)request_msg, + payload_len); + if (rc != PLDM_SUCCESS) { + prlog(PR_ERR, "Encode PlatformEventMessage Error, rc: %d\n", rc); + free(request_msg); + sequence_number--; + return OPAL_PARAMETER; + } + unlock(&sequence_lock); + + /* Send and get the response message bytes */ + rc = pldm_do_request(BMC_EID, request_msg, request_length - 1, + &response_msg, &response_len); + if (rc) { + prlog(PR_ERR, "Communication Error, req: PlatformEventMessage, rc: %d\n", rc); + free(request_msg); + sequence_number--; + return rc; + } + free(request_msg); + + /* Decode the message */ + payload_len = response_len - sizeof(struct pldm_msg_hdr); + rc = decode_platform_event_message_resp( + response_msg, + payload_len, + &response.completion_code, + &response.platform_event_status); + if (rc != PLDM_SUCCESS || response.completion_code != PLDM_SUCCESS) { + prlog(PR_ERR, "Decode PlatformEventMessage Error, rc: %d, cc: %d, pes: %d\n", + rc, response.completion_code, + response.platform_event_status); + free(response_msg); + return OPAL_PARAMETER; + } + + free(response_msg); + + return OPAL_SUCCESS; +} + +int pldm_watchdog_init(void) +{ + init_lock(&sequence_lock); + + return pldm_watchdog_reset_timer(); +} diff --git a/core/pldm/pldm.h b/core/pldm/pldm.h index 7054ba54..a6de9ae8 100644 --- a/core/pldm/pldm.h +++ b/core/pldm/pldm.h @@ -50,6 +50,7 @@ struct pldm_type { }; int pldm_send(uint8_t dest_id, uint8_t *buf, int len); +int pldm_watchdog_reset_timer(void); /* Responder support */ int pldm_rx_handle_request(struct pldm_rx_data *rx); diff --git a/include/pldm.h b/include/pldm.h index 5ad724af..01af9a33 100644 --- a/include/pldm.h +++ b/include/pldm.h @@ -42,4 +42,9 @@ int pldm_lid_files_init(struct blocklevel_device **bl); */ bool pldm_lid_files_exit(struct blocklevel_device *bl); +/** + * Initialize and reset the watchdog + */ +int pldm_watchdog_init(void); + #endif /* __PLDM_H__ */
Encode a PLDM platform event message to send the heartbeat to the BMC. Watchdog is "armed" when a PLDM_EVENT_MESSAGE_GLOBAL_ENABLE_ASYNC_KEEP_ALIVE is received. Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com> --- core/pldm/Makefile.inc | 2 + core/pldm/pldm-watchdog.c | 127 ++++++++++++++++++++++++++++++++++++++ core/pldm/pldm.h | 1 + include/pldm.h | 5 ++ 4 files changed, 135 insertions(+) create mode 100644 core/pldm/pldm-watchdog.c