diff mbox series

[PULL,07/20] virtio-net: Do not propagate ebpf-rss-fds errors

Message ID 20240604073755.1859-8-jasowang@redhat.com
State New
Headers show
Series [PULL,01/20] tap: Remove tap_probe_vnet_hdr_len() | expand

Commit Message

Jason Wang June 4, 2024, 7:37 a.m. UTC
From: Akihiko Odaki <akihiko.odaki@daynix.com>

Propagating ebpf-rss-fds errors has several problems.

First, it makes device realization fail and disables the fallback to the
conventional eBPF loading.

Second, it leaks memory by making device realization fail without
freeing memory already allocated.

Third, the convention is to set an error when a function returns false,
but virtio_net_load_ebpf_fds() and virtio_net_load_ebpf() returns false
without setting an error, which is confusing.

Remove the propagation to fix these problems.

Fixes: 0524ea0510a3 ("ebpf: Added eBPF initialization by fds.")
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/virtio-net.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

Comments

Daniel P. Berrangé June 5, 2024, 10:23 a.m. UTC | #1
On Tue, Jun 04, 2024 at 03:37:42PM +0800, Jason Wang wrote:
> From: Akihiko Odaki <akihiko.odaki@daynix.com>
> 
> Propagating ebpf-rss-fds errors has several problems.
> 
> First, it makes device realization fail and disables the fallback to the
> conventional eBPF loading.

AFAICT, this is not a bug - this is desired behaviour.

If the user/mgmt app has told QEMU to use FDs it has passed
in, then any failure to do this *MUST* be treated as a fatal
error. Falling back to other codepaths is ignoring a direct
user request.

> Second, it leaks memory by making device realization fail without
> freeing memory already allocated.
> 
> Third, the convention is to set an error when a function returns false,
> but virtio_net_load_ebpf_fds() and virtio_net_load_ebpf() returns false
> without setting an error, which is confusing.
> 
> Remove the propagation to fix these problems.

It doesn't fix the problems. It ignores the 2nd and 3rd problems
by removing the error reporting and ignoring the users' requested
config.

> 
> Fixes: 0524ea0510a3 ("ebpf: Added eBPF initialization by fds.")
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  hw/net/virtio-net.c | 23 ++++++++++-------------
>  1 file changed, 10 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index ff600b3002..3cee2ef3ac 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -1329,24 +1329,22 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
>      virtio_net_attach_ebpf_to_backend(n->nic, -1);
>  }
>  
> -static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
> +static bool virtio_net_load_ebpf_fds(VirtIONet *n)
>  {
>      int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
>      int ret = true;
>      int i = 0;
>  
> -    ERRP_GUARD();
> -
>      if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
> -        error_setg(errp,
> -                  "Expected %d file descriptors but got %d",
> -                  EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
> +        warn_report("Expected %d file descriptors but got %d",
> +                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
>         return false;
>     }
>  
>      for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
> -        fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
> -        if (*errp) {
> +        fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i],
> +                                  &error_warn);
> +        if (fds[i] < 0) {
>              ret = false;
>              goto exit;
>          }
> @@ -1355,7 +1353,7 @@ static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
>      ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]);
>  
>  exit:
> -    if (!ret || *errp) {
> +    if (!ret) {
>          for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
>              close(fds[i]);
>          }
> @@ -1364,13 +1362,12 @@ exit:
>      return ret;
>  }
>  
> -static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
> +static bool virtio_net_load_ebpf(VirtIONet *n)
>  {
>      bool ret = false;
>  
>      if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
> -        if (!(n->ebpf_rss_fds
> -                && virtio_net_load_ebpf_fds(n, errp))) {
> +        if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) {
>              ret = ebpf_rss_load(&n->ebpf_rss);
>          }
>      }
> @@ -3809,7 +3806,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>      net_rx_pkt_init(&n->rx_pkt);
>  
>      if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -        virtio_net_load_ebpf(n, errp);
> +        virtio_net_load_ebpf(n);
>      }
>  }
>  
> -- 
> 2.42.0
> 
> 

With regards,
Daniel
Akihiko Odaki June 5, 2024, 8:14 p.m. UTC | #2
On 2024/06/05 19:23, Daniel P. Berrangé wrote:
> On Tue, Jun 04, 2024 at 03:37:42PM +0800, Jason Wang wrote:
>> From: Akihiko Odaki <akihiko.odaki@daynix.com>
>>
>> Propagating ebpf-rss-fds errors has several problems.
>>
>> First, it makes device realization fail and disables the fallback to the
>> conventional eBPF loading.
> 
> AFAICT, this is not a bug - this is desired behaviour.
> 
> If the user/mgmt app has told QEMU to use FDs it has passed
> in, then any failure to do this *MUST* be treated as a fatal
> error. Falling back to other codepaths is ignoring a direct
> user request.

The FD options are more like an assistance rather than a request. When 
QEMU does not have a permission to load eBPF programs, a user can get 
the eBPF programs with the request-ebpf command of QMP, load it, and 
pass the FDs to QEMU.

A user must not to use the option to pass eBPF programs not included in 
QEMU since we don't have a stable ABI. Nobody should want to do that 
anyway since the function of the eBPF program is restricted with the 
virtio spec.

Regards,
Akihiko Odaki
Daniel P. Berrangé June 6, 2024, 7:14 a.m. UTC | #3
On Thu, Jun 06, 2024 at 05:14:20AM +0900, Akihiko Odaki wrote:
> On 2024/06/05 19:23, Daniel P. Berrangé wrote:
> > On Tue, Jun 04, 2024 at 03:37:42PM +0800, Jason Wang wrote:
> > > From: Akihiko Odaki <akihiko.odaki@daynix.com>
> > > 
> > > Propagating ebpf-rss-fds errors has several problems.
> > > 
> > > First, it makes device realization fail and disables the fallback to the
> > > conventional eBPF loading.
> > 
> > AFAICT, this is not a bug - this is desired behaviour.
> > 
> > If the user/mgmt app has told QEMU to use FDs it has passed
> > in, then any failure to do this *MUST* be treated as a fatal
> > error. Falling back to other codepaths is ignoring a direct
> > user request.
> 
> The FD options are more like an assistance rather than a request. When QEMU
> does not have a permission to load eBPF programs, a user can get the eBPF
> programs with the request-ebpf command of QMP, load it, and pass the FDs to
> QEMU.

That still doesn't alter the fact that if the user has chosen to pass FDs
and QEMU fails to use them, it *MUST* report that error back to the user.

With regards,
Daniel
Akihiko Odaki June 6, 2024, 7:19 a.m. UTC | #4
On 2024/06/06 16:14, Daniel P. Berrangé wrote:
> On Thu, Jun 06, 2024 at 05:14:20AM +0900, Akihiko Odaki wrote:
>> On 2024/06/05 19:23, Daniel P. Berrangé wrote:
>>> On Tue, Jun 04, 2024 at 03:37:42PM +0800, Jason Wang wrote:
>>>> From: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>
>>>> Propagating ebpf-rss-fds errors has several problems.
>>>>
>>>> First, it makes device realization fail and disables the fallback to the
>>>> conventional eBPF loading.
>>>
>>> AFAICT, this is not a bug - this is desired behaviour.
>>>
>>> If the user/mgmt app has told QEMU to use FDs it has passed
>>> in, then any failure to do this *MUST* be treated as a fatal
>>> error. Falling back to other codepaths is ignoring a direct
>>> user request.
>>
>> The FD options are more like an assistance rather than a request. When QEMU
>> does not have a permission to load eBPF programs, a user can get the eBPF
>> programs with the request-ebpf command of QMP, load it, and pass the FDs to
>> QEMU.
> 
> That still doesn't alter the fact that if the user has chosen to pass FDs
> and QEMU fails to use them, it *MUST* report that error back to the user.

The user should be more interested in whether the eBPF functionality is 
successfully enabled or not, and that is irrelevant from whether the 
eBPF program is loaded by QEMU or someone else. It is being worked on 
with another patch series:
https://patchew.org/QEMU/20240428-auto-v1-0-7b012216a120@daynix.com/

Regards,
Akihiko Odaki
Daniel P. Berrangé June 6, 2024, 7:59 a.m. UTC | #5
On Thu, Jun 06, 2024 at 04:19:11PM +0900, Akihiko Odaki wrote:
> On 2024/06/06 16:14, Daniel P. Berrangé wrote:
> > On Thu, Jun 06, 2024 at 05:14:20AM +0900, Akihiko Odaki wrote:
> > > On 2024/06/05 19:23, Daniel P. Berrangé wrote:
> > > > On Tue, Jun 04, 2024 at 03:37:42PM +0800, Jason Wang wrote:
> > > > > From: Akihiko Odaki <akihiko.odaki@daynix.com>
> > > > > 
> > > > > Propagating ebpf-rss-fds errors has several problems.
> > > > > 
> > > > > First, it makes device realization fail and disables the fallback to the
> > > > > conventional eBPF loading.
> > > > 
> > > > AFAICT, this is not a bug - this is desired behaviour.
> > > > 
> > > > If the user/mgmt app has told QEMU to use FDs it has passed
> > > > in, then any failure to do this *MUST* be treated as a fatal
> > > > error. Falling back to other codepaths is ignoring a direct
> > > > user request.
> > > 
> > > The FD options are more like an assistance rather than a request. When QEMU
> > > does not have a permission to load eBPF programs, a user can get the eBPF
> > > programs with the request-ebpf command of QMP, load it, and pass the FDs to
> > > QEMU.
> > 
> > That still doesn't alter the fact that if the user has chosen to pass FDs
> > and QEMU fails to use them, it *MUST* report that error back to the user.
> 
> The user should be more interested in whether the eBPF functionality is
> successfully enabled or not, and that is irrelevant from whether the eBPF
> program is loaded by QEMU or someone else.

No, this is wrong. A mgmt application or user will have made a decision
about *how* it wants QEMU to configure a particular feature. QEMU must
always honour the mgmt application's request, and not try to do something
different.

If the mgmt app did not want the FDs to be used, it would not have
passed them to QEMU in the first place. Ignoring the FDs is not likely
to work, because QEMU is unlikely to have permission to open the FDs
itself.

Ignoring the errors when creating the FDs, makes it much much harder
to detect and diagnose deployment problems, because the root cause
error is being discarded, and replaced by a later error which misleads
the app managing QEMU.

Always honouring the user requested config, or giving an error back
when it fails, is standard QEMU practice.

With regards,
Daniel
Akihiko Odaki June 7, 2024, 6:04 a.m. UTC | #6
On 2024/06/06 16:59, Daniel P. Berrangé wrote:
> On Thu, Jun 06, 2024 at 04:19:11PM +0900, Akihiko Odaki wrote:
>> On 2024/06/06 16:14, Daniel P. Berrangé wrote:
>>> On Thu, Jun 06, 2024 at 05:14:20AM +0900, Akihiko Odaki wrote:
>>>> On 2024/06/05 19:23, Daniel P. Berrangé wrote:
>>>>> On Tue, Jun 04, 2024 at 03:37:42PM +0800, Jason Wang wrote:
>>>>>> From: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>>>
>>>>>> Propagating ebpf-rss-fds errors has several problems.
>>>>>>
>>>>>> First, it makes device realization fail and disables the fallback to the
>>>>>> conventional eBPF loading.
>>>>>
>>>>> AFAICT, this is not a bug - this is desired behaviour.
>>>>>
>>>>> If the user/mgmt app has told QEMU to use FDs it has passed
>>>>> in, then any failure to do this *MUST* be treated as a fatal
>>>>> error. Falling back to other codepaths is ignoring a direct
>>>>> user request.
>>>>
>>>> The FD options are more like an assistance rather than a request. When QEMU
>>>> does not have a permission to load eBPF programs, a user can get the eBPF
>>>> programs with the request-ebpf command of QMP, load it, and pass the FDs to
>>>> QEMU.
>>>
>>> That still doesn't alter the fact that if the user has chosen to pass FDs
>>> and QEMU fails to use them, it *MUST* report that error back to the user.
>>
>> The user should be more interested in whether the eBPF functionality is
>> successfully enabled or not, and that is irrelevant from whether the eBPF
>> program is loaded by QEMU or someone else.
> 
> No, this is wrong. A mgmt application or user will have made a decision
> about *how* it wants QEMU to configure a particular feature. QEMU must
> always honour the mgmt application's request, and not try to do something
> different.
> 
> If the mgmt app did not want the FDs to be used, it would not have
> passed them to QEMU in the first place. Ignoring the FDs is not likely
> to work, because QEMU is unlikely to have permission to open the FDs
> itself.
> 
> Ignoring the errors when creating the FDs, makes it much much harder
> to detect and diagnose deployment problems, because the root cause
> error is being discarded, and replaced by a later error which misleads
> the app managing QEMU.
> 
> Always honouring the user requested config, or giving an error back
> when it fails, is standard QEMU practice.

I see.

I'll append a follow-up patch to the series "[PATCH 0/3] virtio-net: 
Convert feature properties to OnOffAuto" to remove the fallback path. We 
can keep this for now to remove the flawed error handling code.

Regards,
Akihiko Odaki
diff mbox series

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index ff600b3002..3cee2ef3ac 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1329,24 +1329,22 @@  static void virtio_net_detach_epbf_rss(VirtIONet *n)
     virtio_net_attach_ebpf_to_backend(n->nic, -1);
 }
 
-static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
+static bool virtio_net_load_ebpf_fds(VirtIONet *n)
 {
     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
     int ret = true;
     int i = 0;
 
-    ERRP_GUARD();
-
     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
-        error_setg(errp,
-                  "Expected %d file descriptors but got %d",
-                  EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
+        warn_report("Expected %d file descriptors but got %d",
+                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
        return false;
    }
 
     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
-        fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
-        if (*errp) {
+        fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i],
+                                  &error_warn);
+        if (fds[i] < 0) {
             ret = false;
             goto exit;
         }
@@ -1355,7 +1353,7 @@  static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]);
 
 exit:
-    if (!ret || *errp) {
+    if (!ret) {
         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
             close(fds[i]);
         }
@@ -1364,13 +1362,12 @@  exit:
     return ret;
 }
 
-static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
+static bool virtio_net_load_ebpf(VirtIONet *n)
 {
     bool ret = false;
 
     if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
-        if (!(n->ebpf_rss_fds
-                && virtio_net_load_ebpf_fds(n, errp))) {
+        if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) {
             ret = ebpf_rss_load(&n->ebpf_rss);
         }
     }
@@ -3809,7 +3806,7 @@  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     net_rx_pkt_init(&n->rx_pkt);
 
     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
-        virtio_net_load_ebpf(n, errp);
+        virtio_net_load_ebpf(n);
     }
 }