diff mbox series

discover: implement a periodic requery for network devices

Message ID 20180703062458.7594-1-jk@ozlabs.org
State Accepted
Headers show
Series discover: implement a periodic requery for network devices | expand

Commit Message

Jeremy Kerr July 3, 2018, 6:24 a.m. UTC
If we boot a machine before external (network) dependencies are properly
configured, it will have tried once to download configuration, and
possibly failed due to that configuration not being present.

This change introduces a periodic requery of network resources. After a
timeout, petitboot will either re-acquire its DHCP lease (causing any
downloads to be re-processed, possibly with different parameters from
the new lease), or re-download a statically defined URL.

This timeout defaults to five minutes (similar to pxelinux), and is
configurable by DHCP option 211, "reboot time".

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
 discover/device-handler.c | 132 ++++++++++++++++++++++++++++++++++++++++++++--
 discover/device-handler.h |   4 ++
 discover/network.c        |  46 ++++++++++++++++
 discover/network.h        |   2 +
 utils/pb-udhcpc           |   4 +-
 5 files changed, 182 insertions(+), 6 deletions(-)

Comments

Jeremy Kerr July 3, 2018, 6:44 a.m. UTC | #1
Hi all,

> If we boot a machine before external (network) dependencies are properly
> configured, it will have tried once to download configuration, and
> possibly failed due to that configuration not being present.
> 
> This change introduces a periodic requery of network resources. After a
> timeout, petitboot will either re-acquire its DHCP lease (causing any
> downloads to be re-processed, possibly with different parameters from
> the new lease), or re-download a statically defined URL.
> 
> This timeout defaults to five minutes (similar to pxelinux), and is
> configurable by DHCP option 211, "reboot time".

Note that these depend on a corresponding change to udhcpc:

  http://lists.busybox.net/pipermail/busybox/2018-July/086540.html

Cheers,


Jeremy
Sam Mendoza-Jonas July 9, 2018, 5:24 a.m. UTC | #2
On Tue, 2018-07-03 at 16:24 +1000, Jeremy Kerr wrote:
> If we boot a machine before external (network) dependencies are properly
> configured, it will have tried once to download configuration, and
> possibly failed due to that configuration not being present.
> 
> This change introduces a periodic requery of network resources. After a
> timeout, petitboot will either re-acquire its DHCP lease (causing any
> downloads to be re-processed, possibly with different parameters from
> the new lease), or re-download a statically defined URL.
> 
> This timeout defaults to five minutes (similar to pxelinux), and is
> configurable by DHCP option 211, "reboot time".
> 
> Signed-off-by: Jeremy Kerr <jk@ozlabs.org>

Thanks, merged as 6fa0edf.
This technically broke building the parser tests since
network_requery_device() couldn't be resolved but I added a stub for it.

> ---
>  discover/device-handler.c | 132 ++++++++++++++++++++++++++++++++++++++++++++--
>  discover/device-handler.h |   4 ++
>  discover/network.c        |  46 ++++++++++++++++
>  discover/network.h        |   2 +
>  utils/pb-udhcpc           |   4 +-
>  5 files changed, 182 insertions(+), 6 deletions(-)
> 
> diff --git a/discover/device-handler.c b/discover/device-handler.c
> index aa61bd2..42c95bb 100644
> --- a/discover/device-handler.c
> +++ b/discover/device-handler.c
> @@ -49,6 +49,8 @@ enum default_priority {
>  	DEFAULT_PRIORITY_DISABLED	= 0xff,
>  };
>  
> +static int default_rescan_timeout = 5 * 60; /* seconds */
> +
>  struct progress_info {
>  	unsigned int			percentage;
>  	unsigned long			size;		/* size in bytes */
> @@ -418,10 +420,13 @@ void device_handler_reinit(struct device_handler *handler)
>  
>  	/* drop all devices */
>  	for (i = 0; i < handler->n_devices; i++) {
> +		struct discover_device *device = handler->devices[i];
>  		discover_server_notify_device_remove(handler->server,
> -				handler->devices[i]->device);
> -		ramdisk = handler->devices[i]->ramdisk;
> -		talloc_free(handler->devices[i]);
> +				device->device);
> +		ramdisk = device->ramdisk;
> +		if (device->requery_waiter)
> +			waiter_remove(device->requery_waiter);
> +		talloc_free(device);
>  		talloc_free(ramdisk);
>  	}
>  
> @@ -463,6 +468,9 @@ void device_handler_remove(struct device_handler *handler,
>  	struct discover_boot_option *opt, *tmp;
>  	unsigned int i;
>  
> +	if (device->requery_waiter)
> +		waiter_remove(device->requery_waiter);
> +
>  	list_for_each_entry_safe(&device->boot_options, opt, tmp, list) {
>  		if (opt == handler->default_boot_option) {
>  			pb_log("Default option %s cancelled since device removed",
> @@ -702,7 +710,17 @@ void device_handler_status_download_remove(struct device_handler *handler,
>  
>  static void device_handler_boot_status_cb(void *arg, struct status *status)
>  {
> -	device_handler_status(arg, status);
> +	struct device_handler *handler = arg;
> +
> +	/* boot had failed; update handler state to allow a new default if one
> +	 * is found later
> +	 */
> +	if (status->type == STATUS_ERROR) {
> +		handler->pending_boot = NULL;
> +		handler->default_boot_option = NULL;
> +	}
> +
> +	device_handler_status(handler, status);
>  }
>  
>  static void countdown_status(struct device_handler *handler,
> @@ -1165,6 +1183,109 @@ out:
>  	return 0;
>  }
>  
> +struct requery_data {
> +	struct device_handler	*handler;
> +	struct discover_device	*device;
> +};
> +
> +static int device_handler_requery_timeout_fn(void *data)
> +{
> +	struct discover_boot_option *opt, *tmp;
> +	struct requery_data *rqd = data;
> +	struct device_handler *handler;
> +	struct discover_device *device;
> +
> +	handler = rqd->handler;
> +	device = rqd->device;
> +
> +	talloc_free(rqd);
> +
> +	/* network_requery_device may re-add a timeout, so clear the device
> +	 * waiter here, so we can potentially start a new one. */
> +	device->requery_waiter = NULL;
> +
> +	/* We keep the device around, but get rid of the parsed boot
> +	 * options on that device. That involves delaring out the lists,
> +	 * and potentially cancelling a default.
> +	 */
> +	list_for_each_entry_safe(&handler->unresolved_boot_options,
> +			opt, tmp, list) {
> +		if (opt->device != device)
> +			continue;
> +		list_remove(&opt->list);
> +		talloc_free(opt);
> +	}
> +
> +	list_for_each_entry_safe(&device->boot_options, opt, tmp, list) {
> +		if (opt == handler->default_boot_option) {
> +			pb_log("Default option %s cancelled since device is being requeried",
> +					opt->option->name);
> +			device_handler_cancel_default(handler);
> +		}
> +		list_remove(&opt->list);
> +		talloc_free(opt);
> +	}
> +
> +	discover_server_notify_device_remove(handler->server, device->device);
> +	device->notified = false;
> +
> +	network_requery_device(handler->network, device);
> +
> +	return 0;
> +}
> +
> +/* Schedule a requery in timeout (seconds).
> + *
> + * Special values of timeout:
> + *   0: no requery
> + *  -1: use default
> + */
> +void device_handler_start_requery_timeout( struct device_handler *handler,
> +		struct discover_device *dev, int timeout)
> +{
> +	struct requery_data *rqd;
> +
> +	if (dev->requery_waiter)
> +		return;
> +
> +	if (timeout == -1)
> +		timeout = default_rescan_timeout;
> +	else if (timeout == 0)
> +		return;
> +
> +	rqd = talloc(dev, struct requery_data);
> +	rqd->handler = handler;
> +	rqd->device = dev;
> +
> +	pb_debug("starting requery timeout for device %s, in %d sec\n",
> +			dev->device->id, timeout);
> +
> +	dev->requery_waiter = waiter_register_timeout(handler->waitset,
> +			timeout * 1000, device_handler_requery_timeout_fn, rqd);
> +}
> +
> +static int event_requery_timeout(struct event *event)
> +{
> +	int timeout = -1;
> +	unsigned long x;
> +	const char *str;
> +	char *endp;
> +
> +	if (!event)
> +		return timeout;
> +
> +	str = event_get_param(event, "reboottime");
> +	if (!str)
> +		return timeout;
> +
> +	x = strtoul(str, &endp, 0);
> +	if (endp != str)
> +		timeout = x;
> +
> +	return timeout;
> +}
> +
> +
>  /* Incoming dhcp event */
>  int device_handler_dhcp(struct device_handler *handler,
>  		struct discover_device *dev, struct event *event)
> @@ -1182,6 +1303,9 @@ int device_handler_dhcp(struct device_handler *handler,
>  	talloc_steal(ctx, event);
>  	ctx->event = event;
>  
> +	device_handler_start_requery_timeout(handler, dev,
> +			event_requery_timeout(event));
> +
>  	iterate_parsers(ctx);
>  
>  	device_handler_discover_context_commit(handler, ctx);
> diff --git a/discover/device-handler.h b/discover/device-handler.h
> index 771cd06..427a94a 100644
> --- a/discover/device-handler.h
> +++ b/discover/device-handler.h
> @@ -38,6 +38,8 @@ struct discover_device {
>  
>  	struct list		boot_options;
>  	struct list		params;
> +
> +	struct waiter		*requery_waiter;
>  };
>  
>  struct discover_boot_option {
> @@ -102,6 +104,8 @@ int device_handler_dhcp(struct device_handler *handler,
>  		struct discover_device *dev, struct event *event);
>  void device_handler_remove(struct device_handler *handler,
>  		struct discover_device *device);
> +void device_handler_start_requery_timeout( struct device_handler *handler,
> +		struct discover_device *dev, int timeout);
>  
>  void device_handler_status(struct device_handler *handler,
>  		struct status *status);
> diff --git a/discover/network.c b/discover/network.c
> index 9594b2e..5a3b0b4 100644
> --- a/discover/network.c
> +++ b/discover/network.c
> @@ -331,6 +331,7 @@ static void configure_interface_dhcp(struct network *network,
>  		"-f",
>  		"-O", "pxeconffile",
>  		"-O", "pxepathprefix",
> +		"-O", "reboottime",
>  		"-p", pidfile,
>  		"-i", interface->name,
>  		"-x", id, /* [11,12] - dhcp client identifier */
> @@ -417,6 +418,8 @@ static void configure_interface_static(struct network *network,
>  						interface->hwaddr,
>  						sizeof(interface->hwaddr)),
>  				config->static_config.address);
> +		device_handler_start_requery_timeout(network->handler,
> +				interface->dev, -1);
>  	}
>  
>  	return;
> @@ -498,6 +501,49 @@ static void configure_interface(struct network *network,
>  	interface->state = IFSTATE_CONFIGURED;
>  }
>  
> +void network_requery_device(struct network *network,
> +		struct discover_device *dev)
> +{
> +	const struct interface_config *config;
> +	struct interface *interface;
> +
> +	interface = find_interface_by_uuid(network, dev->uuid);
> +	if (!interface)
> +		return;
> +
> +	if (interface->udhcpc_process) {
> +		interface->udhcpc_process->exit_cb = NULL;
> +		interface->udhcpc_process->data = NULL;
> +		process_stop_async(interface->udhcpc_process);
> +		process_release(interface->udhcpc_process);
> +	}
> +
> +	config = find_config_by_hwaddr(interface->hwaddr);
> +
> +	if (config && config->ignore)
> +		return;
> +
> +	if (!config || config->method == CONFIG_METHOD_DHCP) {
> +		/* Restart DHCP. Once we acquire a lease, we'll re-start
> +		 * the requery timeout (based on any reboottime DHCP option)
> +		 */
> +		configure_interface_dhcp(network, interface);
> +
> +	} else if (config->method == CONFIG_METHOD_STATIC &&
> +			config->static_config.url) {
> +		/* Redownload statically-provided URL, and manually restart
> +		 * requery timeout */
> +		device_handler_process_url(network->handler,
> +				config->static_config.url,
> +				mac_bytes_to_string(interface->dev,
> +						interface->hwaddr,
> +						sizeof(interface->hwaddr)),
> +				config->static_config.address);
> +		device_handler_start_requery_timeout(network->handler,
> +				dev, -1);
> +	}
> +}
> +
>  static int network_handle_nlmsg(struct network *network, struct nlmsghdr *nlmsg)
>  {
>  	bool have_ifaddr, have_ifname;
> diff --git a/discover/network.h b/discover/network.h
> index bf1f2de..0cea6f2 100644
> --- a/discover/network.h
> +++ b/discover/network.h
> @@ -14,6 +14,8 @@ void network_register_device(struct network *network,
>  		struct discover_device *dev);
>  void network_unregister_device(struct network *network,
>  		struct discover_device *dev);
> +void network_requery_device(struct network *network,
> +		struct discover_device *dev);
>  
>  uint8_t *find_mac_by_name(void *ctx, struct network *network,
>  		const char *name);
> diff --git a/utils/pb-udhcpc b/utils/pb-udhcpc
> index 4495266..e73495d 100644
> --- a/utils/pb-udhcpc
> +++ b/utils/pb-udhcpc
> @@ -18,8 +18,8 @@ pb_add () {
>  	paramstr=''
>  
>  	# Collect relevant DHCP response parameters into $paramstr
> -	for name in pxeconffile pxepathprefix bootfile mac ip siaddr \
> -		serverid tftp
> +	for name in pxeconffile pxepathprefix reboottime bootfile mac ip \
> +	        siaddr serverid tftp
>  	do
>  		value=$(eval "echo \${$name}")
>  		[ -n "$value" ] || continue;
diff mbox series

Patch

diff --git a/discover/device-handler.c b/discover/device-handler.c
index aa61bd2..42c95bb 100644
--- a/discover/device-handler.c
+++ b/discover/device-handler.c
@@ -49,6 +49,8 @@  enum default_priority {
 	DEFAULT_PRIORITY_DISABLED	= 0xff,
 };
 
+static int default_rescan_timeout = 5 * 60; /* seconds */
+
 struct progress_info {
 	unsigned int			percentage;
 	unsigned long			size;		/* size in bytes */
@@ -418,10 +420,13 @@  void device_handler_reinit(struct device_handler *handler)
 
 	/* drop all devices */
 	for (i = 0; i < handler->n_devices; i++) {
+		struct discover_device *device = handler->devices[i];
 		discover_server_notify_device_remove(handler->server,
-				handler->devices[i]->device);
-		ramdisk = handler->devices[i]->ramdisk;
-		talloc_free(handler->devices[i]);
+				device->device);
+		ramdisk = device->ramdisk;
+		if (device->requery_waiter)
+			waiter_remove(device->requery_waiter);
+		talloc_free(device);
 		talloc_free(ramdisk);
 	}
 
@@ -463,6 +468,9 @@  void device_handler_remove(struct device_handler *handler,
 	struct discover_boot_option *opt, *tmp;
 	unsigned int i;
 
+	if (device->requery_waiter)
+		waiter_remove(device->requery_waiter);
+
 	list_for_each_entry_safe(&device->boot_options, opt, tmp, list) {
 		if (opt == handler->default_boot_option) {
 			pb_log("Default option %s cancelled since device removed",
@@ -702,7 +710,17 @@  void device_handler_status_download_remove(struct device_handler *handler,
 
 static void device_handler_boot_status_cb(void *arg, struct status *status)
 {
-	device_handler_status(arg, status);
+	struct device_handler *handler = arg;
+
+	/* boot had failed; update handler state to allow a new default if one
+	 * is found later
+	 */
+	if (status->type == STATUS_ERROR) {
+		handler->pending_boot = NULL;
+		handler->default_boot_option = NULL;
+	}
+
+	device_handler_status(handler, status);
 }
 
 static void countdown_status(struct device_handler *handler,
@@ -1165,6 +1183,109 @@  out:
 	return 0;
 }
 
+struct requery_data {
+	struct device_handler	*handler;
+	struct discover_device	*device;
+};
+
+static int device_handler_requery_timeout_fn(void *data)
+{
+	struct discover_boot_option *opt, *tmp;
+	struct requery_data *rqd = data;
+	struct device_handler *handler;
+	struct discover_device *device;
+
+	handler = rqd->handler;
+	device = rqd->device;
+
+	talloc_free(rqd);
+
+	/* network_requery_device may re-add a timeout, so clear the device
+	 * waiter here, so we can potentially start a new one. */
+	device->requery_waiter = NULL;
+
+	/* We keep the device around, but get rid of the parsed boot
+	 * options on that device. That involves delaring out the lists,
+	 * and potentially cancelling a default.
+	 */
+	list_for_each_entry_safe(&handler->unresolved_boot_options,
+			opt, tmp, list) {
+		if (opt->device != device)
+			continue;
+		list_remove(&opt->list);
+		talloc_free(opt);
+	}
+
+	list_for_each_entry_safe(&device->boot_options, opt, tmp, list) {
+		if (opt == handler->default_boot_option) {
+			pb_log("Default option %s cancelled since device is being requeried",
+					opt->option->name);
+			device_handler_cancel_default(handler);
+		}
+		list_remove(&opt->list);
+		talloc_free(opt);
+	}
+
+	discover_server_notify_device_remove(handler->server, device->device);
+	device->notified = false;
+
+	network_requery_device(handler->network, device);
+
+	return 0;
+}
+
+/* Schedule a requery in timeout (seconds).
+ *
+ * Special values of timeout:
+ *   0: no requery
+ *  -1: use default
+ */
+void device_handler_start_requery_timeout( struct device_handler *handler,
+		struct discover_device *dev, int timeout)
+{
+	struct requery_data *rqd;
+
+	if (dev->requery_waiter)
+		return;
+
+	if (timeout == -1)
+		timeout = default_rescan_timeout;
+	else if (timeout == 0)
+		return;
+
+	rqd = talloc(dev, struct requery_data);
+	rqd->handler = handler;
+	rqd->device = dev;
+
+	pb_debug("starting requery timeout for device %s, in %d sec\n",
+			dev->device->id, timeout);
+
+	dev->requery_waiter = waiter_register_timeout(handler->waitset,
+			timeout * 1000, device_handler_requery_timeout_fn, rqd);
+}
+
+static int event_requery_timeout(struct event *event)
+{
+	int timeout = -1;
+	unsigned long x;
+	const char *str;
+	char *endp;
+
+	if (!event)
+		return timeout;
+
+	str = event_get_param(event, "reboottime");
+	if (!str)
+		return timeout;
+
+	x = strtoul(str, &endp, 0);
+	if (endp != str)
+		timeout = x;
+
+	return timeout;
+}
+
+
 /* Incoming dhcp event */
 int device_handler_dhcp(struct device_handler *handler,
 		struct discover_device *dev, struct event *event)
@@ -1182,6 +1303,9 @@  int device_handler_dhcp(struct device_handler *handler,
 	talloc_steal(ctx, event);
 	ctx->event = event;
 
+	device_handler_start_requery_timeout(handler, dev,
+			event_requery_timeout(event));
+
 	iterate_parsers(ctx);
 
 	device_handler_discover_context_commit(handler, ctx);
diff --git a/discover/device-handler.h b/discover/device-handler.h
index 771cd06..427a94a 100644
--- a/discover/device-handler.h
+++ b/discover/device-handler.h
@@ -38,6 +38,8 @@  struct discover_device {
 
 	struct list		boot_options;
 	struct list		params;
+
+	struct waiter		*requery_waiter;
 };
 
 struct discover_boot_option {
@@ -102,6 +104,8 @@  int device_handler_dhcp(struct device_handler *handler,
 		struct discover_device *dev, struct event *event);
 void device_handler_remove(struct device_handler *handler,
 		struct discover_device *device);
+void device_handler_start_requery_timeout( struct device_handler *handler,
+		struct discover_device *dev, int timeout);
 
 void device_handler_status(struct device_handler *handler,
 		struct status *status);
diff --git a/discover/network.c b/discover/network.c
index 9594b2e..5a3b0b4 100644
--- a/discover/network.c
+++ b/discover/network.c
@@ -331,6 +331,7 @@  static void configure_interface_dhcp(struct network *network,
 		"-f",
 		"-O", "pxeconffile",
 		"-O", "pxepathprefix",
+		"-O", "reboottime",
 		"-p", pidfile,
 		"-i", interface->name,
 		"-x", id, /* [11,12] - dhcp client identifier */
@@ -417,6 +418,8 @@  static void configure_interface_static(struct network *network,
 						interface->hwaddr,
 						sizeof(interface->hwaddr)),
 				config->static_config.address);
+		device_handler_start_requery_timeout(network->handler,
+				interface->dev, -1);
 	}
 
 	return;
@@ -498,6 +501,49 @@  static void configure_interface(struct network *network,
 	interface->state = IFSTATE_CONFIGURED;
 }
 
+void network_requery_device(struct network *network,
+		struct discover_device *dev)
+{
+	const struct interface_config *config;
+	struct interface *interface;
+
+	interface = find_interface_by_uuid(network, dev->uuid);
+	if (!interface)
+		return;
+
+	if (interface->udhcpc_process) {
+		interface->udhcpc_process->exit_cb = NULL;
+		interface->udhcpc_process->data = NULL;
+		process_stop_async(interface->udhcpc_process);
+		process_release(interface->udhcpc_process);
+	}
+
+	config = find_config_by_hwaddr(interface->hwaddr);
+
+	if (config && config->ignore)
+		return;
+
+	if (!config || config->method == CONFIG_METHOD_DHCP) {
+		/* Restart DHCP. Once we acquire a lease, we'll re-start
+		 * the requery timeout (based on any reboottime DHCP option)
+		 */
+		configure_interface_dhcp(network, interface);
+
+	} else if (config->method == CONFIG_METHOD_STATIC &&
+			config->static_config.url) {
+		/* Redownload statically-provided URL, and manually restart
+		 * requery timeout */
+		device_handler_process_url(network->handler,
+				config->static_config.url,
+				mac_bytes_to_string(interface->dev,
+						interface->hwaddr,
+						sizeof(interface->hwaddr)),
+				config->static_config.address);
+		device_handler_start_requery_timeout(network->handler,
+				dev, -1);
+	}
+}
+
 static int network_handle_nlmsg(struct network *network, struct nlmsghdr *nlmsg)
 {
 	bool have_ifaddr, have_ifname;
diff --git a/discover/network.h b/discover/network.h
index bf1f2de..0cea6f2 100644
--- a/discover/network.h
+++ b/discover/network.h
@@ -14,6 +14,8 @@  void network_register_device(struct network *network,
 		struct discover_device *dev);
 void network_unregister_device(struct network *network,
 		struct discover_device *dev);
+void network_requery_device(struct network *network,
+		struct discover_device *dev);
 
 uint8_t *find_mac_by_name(void *ctx, struct network *network,
 		const char *name);
diff --git a/utils/pb-udhcpc b/utils/pb-udhcpc
index 4495266..e73495d 100644
--- a/utils/pb-udhcpc
+++ b/utils/pb-udhcpc
@@ -18,8 +18,8 @@  pb_add () {
 	paramstr=''
 
 	# Collect relevant DHCP response parameters into $paramstr
-	for name in pxeconffile pxepathprefix bootfile mac ip siaddr \
-		serverid tftp
+	for name in pxeconffile pxepathprefix reboottime bootfile mac ip \
+	        siaddr serverid tftp
 	do
 		value=$(eval "echo \${$name}")
 		[ -n "$value" ] || continue;