@@ -845,6 +845,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_ETF,
TC_SETUP_ROOT_QDISC,
TC_SETUP_QDISC_GRED,
+ TC_SETUP_QDISC_TAPRIO,
};
/* These structures hold the attributes of bpf state that are being passed
@@ -161,4 +161,22 @@ struct tc_etf_qopt_offload {
s32 queue;
};
+struct tc_taprio_sched_entry {
+ u8 command; /* TC_TAPRIO_CMD_* */
+
+ /* The gate_mask in the offloading side refers to HW queues */
+ u32 gate_mask;
+ u32 interval;
+};
+
+struct tc_taprio_qopt_offload {
+ u8 enable;
+ ktime_t base_time;
+ u64 cycle_time;
+ u64 cycle_time_extension;
+
+ size_t num_entries;
+ struct tc_taprio_sched_entry entries[0];
+};
+
#endif
@@ -1158,6 +1158,9 @@ enum {
* [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
*/
+#define TCA_TAPRIO_ATTR_OFFLOAD_FLAG_FULL_OFFLOAD 0x1
+#define TCA_TAPRIO_ATTR_OFFLOAD_FLAG_TXTIME_OFFLOAD 0x2
+
enum {
TCA_TAPRIO_ATTR_UNSPEC,
TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
@@ -1169,6 +1172,7 @@ enum {
TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
+ TCA_TAPRIO_ATTR_OFFLOAD_FLAGS, /* u32 */
__TCA_TAPRIO_ATTR_MAX,
};
@@ -26,6 +26,9 @@
static DEFINE_SPINLOCK(taprio_list_lock);
#define TAPRIO_ALL_GATES_OPEN -1
+#define FULL_OFFLOAD_IS_ON(flags) ((flags) & TCA_TAPRIO_ATTR_OFFLOAD_FLAG_FULL_OFFLOAD)
+#define TXTIME_OFFLOAD_IS_ON(flags) ((flags) & TCA_TAPRIO_ATTR_OFFLOAD_FLAG_TXTIME_OFFLOAD)
+#define VALID_OFFLOAD(flags) ((flags) != U32_MAX)
struct sched_entry {
struct list_head list;
@@ -55,6 +58,8 @@ struct sched_gate_list {
struct taprio_sched {
struct Qdisc **qdiscs;
struct Qdisc *root;
+ struct tc_mqprio_qopt mqprio;
+ u32 offload_flags;
int clockid;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
@@ -66,6 +71,8 @@ struct taprio_sched {
struct sched_gate_list __rcu *oper_sched;
struct sched_gate_list __rcu *admin_sched;
ktime_t (*get_time)(void);
+ struct sk_buff *(*dequeue)(struct Qdisc *sch);
+ struct sk_buff *(*peek)(struct Qdisc *sch);
struct hrtimer advance_timer;
struct list_head taprio_list;
};
@@ -143,7 +150,30 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_enqueue(skb, child, to_free);
}
-static struct sk_buff *taprio_peek(struct Qdisc *sch)
+static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->peek(child);
+ if (!skb)
+ continue;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -184,6 +214,13 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
return NULL;
}
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+
+ return q->peek(sch);
+}
+
static inline int length_to_duration(struct taprio_sched *q, int len)
{
return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
@@ -196,7 +233,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
atomic64_read(&q->picos_per_byte)));
}
-static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -275,6 +312,40 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
return skb;
}
+static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->dequeue(child);
+ if (unlikely(!skb))
+ continue;
+
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+
+ return q->dequeue(sch);
+}
+
static bool should_restart_cycle(const struct sched_gate_list *oper,
const struct sched_entry *entry)
{
@@ -707,6 +778,165 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
return NOTIFY_DONE;
}
+static u32 tc_mask_to_queue_mask(const struct tc_mqprio_qopt *mqprio,
+ u32 tc_mask)
+{
+ u32 i, queue_mask = 0;
+
+ for (i = 0; i < mqprio->num_tc; i++) {
+ u32 offset, count;
+
+ if (!(tc_mask & BIT(i)))
+ continue;
+
+ offset = mqprio->offset[i];
+ count = mqprio->count[i];
+
+ queue_mask |= GENMASK(offset + count - 1, offset);
+ }
+
+ return queue_mask;
+}
+
+static void taprio_sched_to_offload(struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct tc_taprio_qopt_offload *taprio)
+{
+ struct sched_entry *entry;
+ int i = 0;
+
+ taprio->base_time = sched->base_time;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ struct tc_taprio_sched_entry *e = &taprio->entries[i];
+
+ e->command = entry->command;
+ e->interval = entry->interval;
+
+ /* We do this transformation because the NIC
+ * has no knowledge of traffic classes, but it
+ * knows about queues.
+ */
+ e->gate_mask = tc_mask_to_queue_mask(&q->mqprio,
+ entry->gate_mask);
+ i++;
+ }
+
+ taprio->num_entries = i;
+}
+
+static void taprio_disable_offload(struct net_device *dev,
+ struct taprio_sched *q)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_taprio_qopt_offload taprio = { };
+ int err;
+
+ if (!q->offload_flags)
+ return;
+
+ if (!ops->ndo_setup_tc)
+ return;
+
+ taprio.enable = 0;
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, &taprio);
+ if (err < 0)
+ return;
+
+ /* Just to be sure to keep the function pointers in a
+ * consistent state always.
+ */
+ q->dequeue = taprio_dequeue_soft;
+ q->peek = taprio_peek_soft;
+
+ q->advance_timer.function = advance_sched;
+
+ q->offload_flags = 0;
+}
+
+static enum hrtimer_restart next_sched(struct hrtimer *timer)
+{
+ struct taprio_sched *q = container_of(timer, struct taprio_sched,
+ advance_timer);
+ struct sched_gate_list *oper, *admin;
+
+ spin_lock(&q->current_entry_lock);
+ oper = rcu_dereference_protected(q->oper_sched,
+ lockdep_is_held(&q->current_entry_lock));
+ admin = rcu_dereference_protected(q->admin_sched,
+ lockdep_is_held(&q->current_entry_lock));
+
+ rcu_assign_pointer(q->oper_sched, admin);
+ rcu_assign_pointer(q->admin_sched, NULL);
+
+ if (oper)
+ call_rcu(&oper->rcu, taprio_free_sched_cb);
+
+ spin_unlock(&q->current_entry_lock);
+
+ return HRTIMER_NORESTART;
+}
+
+static int taprio_enable_offload(struct net_device *dev,
+ struct tc_mqprio_qopt *mqprio,
+ struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct netlink_ext_ack *extack,
+ u32 offload_flags)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_taprio_qopt_offload *taprio;
+ size_t size;
+ int err = 0;
+
+ if (!FULL_OFFLOAD_IS_ON(offload_flags)) {
+ NL_SET_ERR_MSG(extack, "Offload mode is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ops->ndo_setup_tc) {
+ NL_SET_ERR_MSG(extack, "Specified device does not support taprio offload");
+ return -EOPNOTSUPP;
+ }
+
+ size = sizeof(*taprio) +
+ sched->num_entries * sizeof(struct tc_taprio_sched_entry);
+
+ taprio = kzalloc(size, GFP_ATOMIC);
+ if (!taprio) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for enabling offload mode");
+ return -ENOMEM;
+ }
+
+ taprio->enable = 1;
+ taprio_sched_to_offload(q, sched, taprio);
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, taprio);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Specified device failed to setup taprio hardware offload");
+ goto done;
+ }
+
+ q->dequeue = taprio_dequeue_offload;
+ q->peek = taprio_peek_offload;
+
+ /* This function will only serve to keep the pointers to the
+ * "oper" and "admin" schedules valid in relation to their
+ * base times, so when calling dump() the users looks at the
+ * right schedules.
+ */
+ q->advance_timer.function = next_sched;
+
+done:
+ kfree(taprio);
+
+ if (err == 0)
+ q->offload_flags = offload_flags;
+
+ return err;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -715,6 +945,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
+ u32 offload_flags = U32_MAX;
int i, err, clockid;
unsigned long flags;
ktime_t start;
@@ -731,6 +962,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
+ if (tb[TCA_TAPRIO_ATTR_OFFLOAD_FLAGS])
+ offload_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_OFFLOAD_FLAGS]);
+
new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
if (!new_admin) {
NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
@@ -749,6 +983,12 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto free_sched;
}
+ if (offload_flags != U32_MAX && (oper || admin)) {
+ NL_SET_ERR_MSG(extack, "Changing 'offload' of a running schedule is not supported");
+ err = -ENOTSUPP;
+ goto free_sched;
+ }
+
err = parse_taprio_schedule(tb, new_admin, extack);
if (err < 0)
goto free_sched;
@@ -802,6 +1042,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
for (i = 0; i < TC_BITMASK + 1; i++)
netdev_set_prio_tc_map(dev, i,
mqprio->prio_tc_map[i]);
+
+ memcpy(&q->mqprio, mqprio, sizeof(q->mqprio));
}
switch (q->clockid) {
@@ -823,6 +1065,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
+ if (!offload_flags) {
+ taprio_disable_offload(dev, q);
+ } else if (VALID_OFFLOAD(offload_flags) || q->offload_flags) {
+ err = taprio_enable_offload(dev, mqprio, q,
+ new_admin, extack, offload_flags);
+ if (err)
+ goto unlock;
+ }
+
err = taprio_get_start_time(sch, new_admin, &start);
if (err < 0) {
NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
@@ -866,6 +1117,8 @@ static void taprio_destroy(struct Qdisc *sch)
hrtimer_cancel(&q->advance_timer);
+ taprio_disable_offload(dev, q);
+
if (q->qdiscs) {
for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
qdisc_put(q->qdiscs[i]);
@@ -895,6 +1148,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
+ q->dequeue = taprio_dequeue_soft;
+ q->peek = taprio_peek_soft;
+
q->root = sch;
/* We only support static clockids. Use an invalid value as default
@@ -1080,6 +1336,9 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
goto options_error;
+ if (nla_put_u32(skb, TCA_TAPRIO_ATTR_OFFLOAD_FLAGS, q->offload_flags))
+ goto options_error;
+
if (oper && dump_schedule(skb, oper))
goto options_error;