diff mbox series

[ovs-dev,v6] tc: fix crash on malformed reply from kernel.

Message ID 20230606093850.167866-1-frode.nordahl@canonical.com
State Superseded
Headers show
Series [ovs-dev,v6] tc: fix crash on malformed reply from kernel. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Frode Nordahl June 6, 2023, 9:38 a.m. UTC
The tc module combines the use of the `tc_transact` helper
function for communication with the in-kernel tc infrastructure
with assertions on the reply data by `ofpbuf_at_assert` on the
received data prior to further processing.

With the presence of bugs on the kernel side, we need to treat
the kernel as an unreliable service provider and replace assertions
on the reply from it with checks to avoid a fatal crash of OVS.

For the record, the symptom of the crash is this in the log:
EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()

And an excerpt of the backtrace looks like this:
0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384

Reported-At: https://launchpad.net/bugs/2018500
Fixes: 5c039ddc64ff ("netdev-linux: Add functions to manipulate tc police action")
Fixes: e7f6ba220e10 ("lib/tc: add ingress ratelimiting support for tc-offload")
Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
Fixes: c1c9c9c4b636 ("Implement QoS framework.")
Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 lib/netdev-linux.c | 32 +++++++++++++++++++++---------
 lib/tc.c           | 49 ++++++++++++++++++++++++++++++++--------------
 2 files changed, 57 insertions(+), 24 deletions(-)

Comments

Ilya Maximets June 6, 2023, 3:47 p.m. UTC | #1
On 6/6/23 11:38, Frode Nordahl wrote:
> The tc module combines the use of the `tc_transact` helper
> function for communication with the in-kernel tc infrastructure
> with assertions on the reply data by `ofpbuf_at_assert` on the
> received data prior to further processing.
> 
> With the presence of bugs on the kernel side, we need to treat
> the kernel as an unreliable service provider and replace assertions
> on the reply from it with checks to avoid a fatal crash of OVS.
> 
> For the record, the symptom of the crash is this in the log:
> EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()
> 
> And an excerpt of the backtrace looks like this:
> 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
> tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
> 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
> ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
> 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
> match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
> parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
> try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384
> 
> Reported-At: https://launchpad.net/bugs/2018500
> Fixes: 5c039ddc64ff ("netdev-linux: Add functions to manipulate tc police action")
> Fixes: e7f6ba220e10 ("lib/tc: add ingress ratelimiting support for tc-offload")
> Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
> Fixes: c1c9c9c4b636 ("Implement QoS framework.")
> Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> ---
>  lib/netdev-linux.c | 32 +++++++++++++++++++++---------
>  lib/tc.c           | 49 ++++++++++++++++++++++++++++++++--------------
>  2 files changed, 57 insertions(+), 24 deletions(-)
> 
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..1efc837ac 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -2714,8 +2714,15 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
>  
>      err = tc_transact(&request, &reply);
>      if (!err) {
> -        struct tcmsg *tc =
> -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> +
> +        if (!nlmsg || !tc) {
> +            VLOG_ERR_RL(&rl, "Failed to add match all policer, "
> +                        "malformed reply");

We will leak a 'reply' here if this condition ever happens.
But I'm still not sure what is the point of having and parsing
a reply for this request at all.  If it didn't fail, then we
can assume it succeeded?

We can't assume that cls_flower is working correctly, it has
issues.  Even more on older kernels.  That's why we request
to echo everythig back and comparing that kernel configured
what we asked.  I hope, we can trust at least some of the
interfaces and don't need to re-check everything.

Are there any known issues with the matchall classifier?
If not, I'd suggest we just don't ask for reply.  An error
code should be enough.

> +            return EPROTO;
> +        }
>          ofpbuf_delete(reply);
>      }
>  
> @@ -5744,26 +5751,27 @@ static int
>  tc_update_policer_action_stats(struct ofpbuf *msg,
>                                 struct ofputil_meter_stats *stats)
>  {
> +    struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
>      struct ovs_flow_stats stats_dropped;
>      struct ovs_flow_stats stats_hw;
>      struct ovs_flow_stats stats_sw;
>      const struct nlattr *act;
>      struct nlattr *prio;
> -    struct tcamsg *tca;
>      int error = 0;
>  
>      if (!stats) {
>          goto exit;
>      }
>  
> -    if (NLMSG_HDRLEN + sizeof *tca > msg->size) {
> +    if (!nlmsg || !tca) {
>          VLOG_ERR_RL(&rl, "Failed to get action stats, size error");
>          error = EPROTO;
>          goto exit;
>      }
>  
> -    tca = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tca);
> -    act = nl_attr_find(msg, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
> +    act = nl_attr_find(&b, 0, TCA_ACT_TAB);
>      if (!act) {
>          VLOG_ERR_RL(&rl, "Failed to get action stats, can't find attribute");
>          error = EPROTO;
> @@ -6028,20 +6036,26 @@ static int
>  tc_parse_class(const struct ofpbuf *msg, unsigned int *handlep,
>                 struct nlattr **options, struct netdev_queue_stats *stats)
>  {
> +    struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      static const struct nl_policy tca_policy[] = {
>          [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false },
>          [TCA_STATS2] = { .type = NL_A_NESTED, .optional = false },
>      };
>      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
>  
> -    if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg),
> -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nlmsg || !tc) {
> +        VLOG_ERR_RL(&rl, "failed to parse class message, malformed reply");
> +        goto error;
> +    }
> +
> +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_WARN_RL(&rl, "failed to parse class message");
>          goto error;
>      }
>  
>      if (handlep) {
> -        struct tcmsg *tc = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tc);
>          *handlep = tc->tcm_handle;
>      }
>  
> diff --git a/lib/tc.c b/lib/tc.c
> index 5c32c6f97..07975f9db 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -36,6 +36,7 @@
>  #include <unistd.h>
>  
>  #include "byte-order.h"
> +#include "coverage.h"
>  #include "netlink-socket.h"
>  #include "netlink.h"
>  #include "openvswitch/ofpbuf.h"
> @@ -67,6 +68,8 @@
>  
>  VLOG_DEFINE_THIS_MODULE(tc);
>  
> +COVERAGE_DEFINE(tc_netlink_malformed_reply);
> +
>  static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
>  
>  static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
> @@ -2190,18 +2193,19 @@ int
>  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>                             struct tc_flower *flower, bool terse)
>  {
> -    struct tcmsg *tc;
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
>      const char *kind;
>  
> -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
> +    if (!nlmsg || !tc) {
> +        COVERAGE_INC(tc_netlink_malformed_reply);
>          return EPROTO;
>      }
>  
>      memset(flower, 0, sizeof *flower);
>  
> -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> -
>      flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
>      flower->mask.eth_type = OVS_BE16_MAX;
>      id->prio = tc_get_major(tc->tcm_info);
> @@ -2215,8 +2219,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>          return EAGAIN;
>      }
>  
> -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
>          return EPROTO;
>      }
> @@ -2237,13 +2240,17 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>  int
>  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
>  {
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
> -    struct tcmsg *tc;
>  
> -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +    if (!nlmsg || !tc) {
> +        COVERAGE_INC(tc_netlink_malformed_reply);
> +        return EPROTO;
> +    }
>  
> -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
>          return EINVAL;
>      }
> @@ -2307,21 +2314,27 @@ int
>  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
>  {
>      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>      const int max_size = ARRAY_SIZE(actions_orders_policy);
> +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
>      const struct nlattr *actions;
>      struct tc_flower flower;
> -    struct tcamsg *tca;
>      int i, cnt = 0;
>      int err;
>  
> +    if (!nlmsg || !tca) {
> +        COVERAGE_INC(tc_netlink_malformed_reply);
> +        return EPROTO;
> +    }
> +
>      for (i = 0; i < max_size; i++) {
>          actions_orders_policy[i].type = NL_A_NESTED;
>          actions_orders_policy[i].optional = true;
>      }
>  
> -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
> -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
> +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
>      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
>                                       actions_orders, max_size)) {
>          VLOG_ERR_RL(&error_rl,
> @@ -3823,8 +3836,14 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>  
>      error = tc_transact(&request, &reply);
>      if (!error) {
> -        struct tcmsg *tc =
> -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> +
> +        if (!nlmsg || !tc) {
> +            COVERAGE_INC(tc_netlink_malformed_reply);

We're leaking a reply here.

> +            return EPROTO;
> +        }
>  
>          id->prio = tc_get_major(tc->tcm_info);
>          id->handle = tc->tcm_handle;
Frode Nordahl June 6, 2023, 4:08 p.m. UTC | #2
On Tue, Jun 6, 2023 at 5:46 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 6/6/23 11:38, Frode Nordahl wrote:
> > The tc module combines the use of the `tc_transact` helper
> > function for communication with the in-kernel tc infrastructure
> > with assertions on the reply data by `ofpbuf_at_assert` on the
> > received data prior to further processing.
> >
> > With the presence of bugs on the kernel side, we need to treat
> > the kernel as an unreliable service provider and replace assertions
> > on the reply from it with checks to avoid a fatal crash of OVS.
> >
> > For the record, the symptom of the crash is this in the log:
> > EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()
> >
> > And an excerpt of the backtrace looks like this:
> > 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
> > tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
> > 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
> > ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
> > 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
> > match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
> > parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
> > try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384
> >
> > Reported-At: https://launchpad.net/bugs/2018500
> > Fixes: 5c039ddc64ff ("netdev-linux: Add functions to manipulate tc police action")
> > Fixes: e7f6ba220e10 ("lib/tc: add ingress ratelimiting support for tc-offload")
> > Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
> > Fixes: c1c9c9c4b636 ("Implement QoS framework.")
> > Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> > ---
> >  lib/netdev-linux.c | 32 +++++++++++++++++++++---------
> >  lib/tc.c           | 49 ++++++++++++++++++++++++++++++++--------------
> >  2 files changed, 57 insertions(+), 24 deletions(-)
> >
> > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> > index 36620199e..1efc837ac 100644
> > --- a/lib/netdev-linux.c
> > +++ b/lib/netdev-linux.c
> > @@ -2714,8 +2714,15 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
> >
> >      err = tc_transact(&request, &reply);
> >      if (!err) {
> > -        struct tcmsg *tc =
> > -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> > +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> > +
> > +        if (!nlmsg || !tc) {
> > +            VLOG_ERR_RL(&rl, "Failed to add match all policer, "
> > +                        "malformed reply");
>
> We will leak a 'reply' here if this condition ever happens.
> But I'm still not sure what is the point of having and parsing
> a reply for this request at all.  If it didn't fail, then we
> can assume it succeeded?

Oh shoot, what can I say, iteration fatigue, all on me of course, will fix.

> We can't assume that cls_flower is working correctly, it has
> issues.  Even more on older kernels.  That's why we request
> to echo everythig back and comparing that kernel configured
> what we asked.  I hope, we can trust at least some of the
> interfaces and don't need to re-check everything.
>
> Are there any known issues with the matchall classifier?
> If not, I'd suggest we just don't ask for reply.  An error
> code should be enough.

As for if we can trust the kernel infrastructure here, I really can't say.
I'd prefer to replicate the existing behavior tbh, if you look at the end
of the function, the existing code even asserts that the reply is long
enough before deleting it:
https://github.com/openvswitch/ovs/blob/64cdc290ef441bc3b4c2cddc230311ba58bc31b3/lib/netdev-linux.c#L2718

> > +            return EPROTO;
> > +        }
> >          ofpbuf_delete(reply);
> >      }
> >
> > @@ -5744,26 +5751,27 @@ static int
> >  tc_update_policer_action_stats(struct ofpbuf *msg,
> >                                 struct ofputil_meter_stats *stats)
> >  {
> > +    struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
> >      struct ovs_flow_stats stats_dropped;
> >      struct ovs_flow_stats stats_hw;
> >      struct ovs_flow_stats stats_sw;
> >      const struct nlattr *act;
> >      struct nlattr *prio;
> > -    struct tcamsg *tca;
> >      int error = 0;
> >
> >      if (!stats) {
> >          goto exit;
> >      }
> >
> > -    if (NLMSG_HDRLEN + sizeof *tca > msg->size) {
> > +    if (!nlmsg || !tca) {
> >          VLOG_ERR_RL(&rl, "Failed to get action stats, size error");
> >          error = EPROTO;
> >          goto exit;
> >      }
> >
> > -    tca = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tca);
> > -    act = nl_attr_find(msg, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
> > +    act = nl_attr_find(&b, 0, TCA_ACT_TAB);
> >      if (!act) {
> >          VLOG_ERR_RL(&rl, "Failed to get action stats, can't find attribute");
> >          error = EPROTO;
> > @@ -6028,20 +6036,26 @@ static int
> >  tc_parse_class(const struct ofpbuf *msg, unsigned int *handlep,
> >                 struct nlattr **options, struct netdev_queue_stats *stats)
> >  {
> > +    struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      static const struct nl_policy tca_policy[] = {
> >          [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false },
> >          [TCA_STATS2] = { .type = NL_A_NESTED, .optional = false },
> >      };
> >      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
> >
> > -    if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg),
> > -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nlmsg || !tc) {
> > +        VLOG_ERR_RL(&rl, "failed to parse class message, malformed reply");
> > +        goto error;
> > +    }
> > +
> > +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_WARN_RL(&rl, "failed to parse class message");
> >          goto error;
> >      }
> >
> >      if (handlep) {
> > -        struct tcmsg *tc = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tc);
> >          *handlep = tc->tcm_handle;
> >      }
> >
> > diff --git a/lib/tc.c b/lib/tc.c
> > index 5c32c6f97..07975f9db 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -36,6 +36,7 @@
> >  #include <unistd.h>
> >
> >  #include "byte-order.h"
> > +#include "coverage.h"
> >  #include "netlink-socket.h"
> >  #include "netlink.h"
> >  #include "openvswitch/ofpbuf.h"
> > @@ -67,6 +68,8 @@
> >
> >  VLOG_DEFINE_THIS_MODULE(tc);
> >
> > +COVERAGE_DEFINE(tc_netlink_malformed_reply);
> > +
> >  static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
> >
> >  static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
> > @@ -2190,18 +2193,19 @@ int
> >  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
> >                             struct tc_flower *flower, bool terse)
> >  {
> > -    struct tcmsg *tc;
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
> >      const char *kind;
> >
> > -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
> > +    if (!nlmsg || !tc) {
> > +        COVERAGE_INC(tc_netlink_malformed_reply);
> >          return EPROTO;
> >      }
> >
> >      memset(flower, 0, sizeof *flower);
> >
> > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > -
> >      flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
> >      flower->mask.eth_type = OVS_BE16_MAX;
> >      id->prio = tc_get_major(tc->tcm_info);
> > @@ -2215,8 +2219,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
> >          return EAGAIN;
> >      }
> >
> > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> > -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
> >          return EPROTO;
> >      }
> > @@ -2237,13 +2240,17 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
> >  int
> >  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
> >  {
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
> > -    struct tcmsg *tc;
> >
> > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +    if (!nlmsg || !tc) {
> > +        COVERAGE_INC(tc_netlink_malformed_reply);
> > +        return EPROTO;
> > +    }
> >
> > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> > -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
> >          return EINVAL;
> >      }
> > @@ -2307,21 +2314,27 @@ int
> >  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
> >  {
> >      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> >      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> >      const int max_size = ARRAY_SIZE(actions_orders_policy);
> > +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
> >      const struct nlattr *actions;
> >      struct tc_flower flower;
> > -    struct tcamsg *tca;
> >      int i, cnt = 0;
> >      int err;
> >
> > +    if (!nlmsg || !tca) {
> > +        COVERAGE_INC(tc_netlink_malformed_reply);
> > +        return EPROTO;
> > +    }
> > +
> >      for (i = 0; i < max_size; i++) {
> >          actions_orders_policy[i].type = NL_A_NESTED;
> >          actions_orders_policy[i].optional = true;
> >      }
> >
> > -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
> > -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
> > +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
> >      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
> >                                       actions_orders, max_size)) {
> >          VLOG_ERR_RL(&error_rl,
> > @@ -3823,8 +3836,14 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
> >
> >      error = tc_transact(&request, &reply);
> >      if (!error) {
> > -        struct tcmsg *tc =
> > -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> > +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> > +
> > +        if (!nlmsg || !tc) {
> > +            COVERAGE_INC(tc_netlink_malformed_reply);
>
> We're leaking a reply here.

Ack

> > +            return EPROTO;
> > +        }
> >
> >          id->prio = tc_get_major(tc->tcm_info);
> >          id->handle = tc->tcm_handle;
>
Ilya Maximets June 6, 2023, 4:18 p.m. UTC | #3
On 6/6/23 18:08, Frode Nordahl wrote:
> On Tue, Jun 6, 2023 at 5:46 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 6/6/23 11:38, Frode Nordahl wrote:
>>> The tc module combines the use of the `tc_transact` helper
>>> function for communication with the in-kernel tc infrastructure
>>> with assertions on the reply data by `ofpbuf_at_assert` on the
>>> received data prior to further processing.
>>>
>>> With the presence of bugs on the kernel side, we need to treat
>>> the kernel as an unreliable service provider and replace assertions
>>> on the reply from it with checks to avoid a fatal crash of OVS.
>>>
>>> For the record, the symptom of the crash is this in the log:
>>> EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()
>>>
>>> And an excerpt of the backtrace looks like this:
>>> 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
>>> tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
>>> 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
>>> ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
>>> 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
>>> match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
>>> parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
>>> try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384
>>>
>>> Reported-At: https://launchpad.net/bugs/2018500
>>> Fixes: 5c039ddc64ff ("netdev-linux: Add functions to manipulate tc police action")
>>> Fixes: e7f6ba220e10 ("lib/tc: add ingress ratelimiting support for tc-offload")
>>> Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
>>> Fixes: c1c9c9c4b636 ("Implement QoS framework.")
>>> Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
>>> ---
>>>  lib/netdev-linux.c | 32 +++++++++++++++++++++---------
>>>  lib/tc.c           | 49 ++++++++++++++++++++++++++++++++--------------
>>>  2 files changed, 57 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>>> index 36620199e..1efc837ac 100644
>>> --- a/lib/netdev-linux.c
>>> +++ b/lib/netdev-linux.c
>>> @@ -2714,8 +2714,15 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
>>>
>>>      err = tc_transact(&request, &reply);
>>>      if (!err) {
>>> -        struct tcmsg *tc =
>>> -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>>> +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>>> +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>>> +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>>> +
>>> +        if (!nlmsg || !tc) {
>>> +            VLOG_ERR_RL(&rl, "Failed to add match all policer, "
>>> +                        "malformed reply");
>>
>> We will leak a 'reply' here if this condition ever happens.
>> But I'm still not sure what is the point of having and parsing
>> a reply for this request at all.  If it didn't fail, then we
>> can assume it succeeded?
> 
> Oh shoot, what can I say, iteration fatigue, all on me of course, will fix.
> 
>> We can't assume that cls_flower is working correctly, it has
>> issues.  Even more on older kernels.  That's why we request
>> to echo everythig back and comparing that kernel configured
>> what we asked.  I hope, we can trust at least some of the
>> interfaces and don't need to re-check everything.
>>
>> Are there any known issues with the matchall classifier?
>> If not, I'd suggest we just don't ask for reply.  An error
>> code should be enough.
> 
> As for if we can trust the kernel infrastructure here, I really can't say.
> I'd prefer to replicate the existing behavior tbh, if you look at the end
> of the function, the existing code even asserts that the reply is long
> enough before deleting it:
> https://github.com/openvswitch/ovs/blob/64cdc290ef441bc3b4c2cddc230311ba58bc31b3/lib/netdev-linux.c#L2718

For the sake of keeping it simple, let's keep the current logic, OK.
We can try to figure out why this check was added later.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 36620199e..1efc837ac 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2714,8 +2714,15 @@  tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
 
     err = tc_transact(&request, &reply);
     if (!err) {
-        struct tcmsg *tc =
-            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
+
+        if (!nlmsg || !tc) {
+            VLOG_ERR_RL(&rl, "Failed to add match all policer, "
+                        "malformed reply");
+            return EPROTO;
+        }
         ofpbuf_delete(reply);
     }
 
@@ -5744,26 +5751,27 @@  static int
 tc_update_policer_action_stats(struct ofpbuf *msg,
                                struct ofputil_meter_stats *stats)
 {
+    struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size);
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
     struct ovs_flow_stats stats_dropped;
     struct ovs_flow_stats stats_hw;
     struct ovs_flow_stats stats_sw;
     const struct nlattr *act;
     struct nlattr *prio;
-    struct tcamsg *tca;
     int error = 0;
 
     if (!stats) {
         goto exit;
     }
 
-    if (NLMSG_HDRLEN + sizeof *tca > msg->size) {
+    if (!nlmsg || !tca) {
         VLOG_ERR_RL(&rl, "Failed to get action stats, size error");
         error = EPROTO;
         goto exit;
     }
 
-    tca = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tca);
-    act = nl_attr_find(msg, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
+    act = nl_attr_find(&b, 0, TCA_ACT_TAB);
     if (!act) {
         VLOG_ERR_RL(&rl, "Failed to get action stats, can't find attribute");
         error = EPROTO;
@@ -6028,20 +6036,26 @@  static int
 tc_parse_class(const struct ofpbuf *msg, unsigned int *handlep,
                struct nlattr **options, struct netdev_queue_stats *stats)
 {
+    struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size);
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
     static const struct nl_policy tca_policy[] = {
         [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false },
         [TCA_STATS2] = { .type = NL_A_NESTED, .optional = false },
     };
     struct nlattr *ta[ARRAY_SIZE(tca_policy)];
 
-    if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg),
-                         tca_policy, ta, ARRAY_SIZE(ta))) {
+    if (!nlmsg || !tc) {
+        VLOG_ERR_RL(&rl, "failed to parse class message, malformed reply");
+        goto error;
+    }
+
+    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
         VLOG_WARN_RL(&rl, "failed to parse class message");
         goto error;
     }
 
     if (handlep) {
-        struct tcmsg *tc = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tc);
         *handlep = tc->tcm_handle;
     }
 
diff --git a/lib/tc.c b/lib/tc.c
index 5c32c6f97..07975f9db 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -36,6 +36,7 @@ 
 #include <unistd.h>
 
 #include "byte-order.h"
+#include "coverage.h"
 #include "netlink-socket.h"
 #include "netlink.h"
 #include "openvswitch/ofpbuf.h"
@@ -67,6 +68,8 @@ 
 
 VLOG_DEFINE_THIS_MODULE(tc);
 
+COVERAGE_DEFINE(tc_netlink_malformed_reply);
+
 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
 
 static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
@@ -2190,18 +2193,19 @@  int
 parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
                            struct tc_flower *flower, bool terse)
 {
-    struct tcmsg *tc;
+    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
     struct nlattr *ta[ARRAY_SIZE(tca_policy)];
     const char *kind;
 
-    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
+    if (!nlmsg || !tc) {
+        COVERAGE_INC(tc_netlink_malformed_reply);
         return EPROTO;
     }
 
     memset(flower, 0, sizeof *flower);
 
-    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
-
     flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
     flower->mask.eth_type = OVS_BE16_MAX;
     id->prio = tc_get_major(tc->tcm_info);
@@ -2215,8 +2219,7 @@  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
         return EAGAIN;
     }
 
-    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
-                         tca_policy, ta, ARRAY_SIZE(ta))) {
+    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
         VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
         return EPROTO;
     }
@@ -2237,13 +2240,17 @@  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
 int
 parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
 {
+    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
     struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
-    struct tcmsg *tc;
 
-    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+    if (!nlmsg || !tc) {
+        COVERAGE_INC(tc_netlink_malformed_reply);
+        return EPROTO;
+    }
 
-    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
-                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
+    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
         VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
         return EINVAL;
     }
@@ -2307,21 +2314,27 @@  int
 parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
 {
     static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
+    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
     struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
     const int max_size = ARRAY_SIZE(actions_orders_policy);
+    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
     const struct nlattr *actions;
     struct tc_flower flower;
-    struct tcamsg *tca;
     int i, cnt = 0;
     int err;
 
+    if (!nlmsg || !tca) {
+        COVERAGE_INC(tc_netlink_malformed_reply);
+        return EPROTO;
+    }
+
     for (i = 0; i < max_size; i++) {
         actions_orders_policy[i].type = NL_A_NESTED;
         actions_orders_policy[i].optional = true;
     }
 
-    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
-    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
+    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
     if (!actions || !nl_parse_nested(actions, actions_orders_policy,
                                      actions_orders, max_size)) {
         VLOG_ERR_RL(&error_rl,
@@ -3823,8 +3836,14 @@  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
 
     error = tc_transact(&request, &reply);
     if (!error) {
-        struct tcmsg *tc =
-            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
+
+        if (!nlmsg || !tc) {
+            COVERAGE_INC(tc_netlink_malformed_reply);
+            return EPROTO;
+        }
 
         id->prio = tc_get_major(tc->tcm_info);
         id->handle = tc->tcm_handle;