From patchwork Fri Apr 22 12:20:13 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michal Kazior X-Patchwork-Id: 613576 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3qrvmV5bzCz9t4h for ; Fri, 22 Apr 2016 22:18:50 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=tieto.com header.i=@tieto.com header.b=T8GFpHiv; dkim-atps=neutral Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752240AbcDVMSt (ORCPT ); Fri, 22 Apr 2016 08:18:49 -0400 Received: from mail-lf0-f53.google.com ([209.85.215.53]:36725 "EHLO mail-lf0-f53.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751949AbcDVMSr (ORCPT ); Fri, 22 Apr 2016 08:18:47 -0400 Received: by mail-lf0-f53.google.com with SMTP id g184so78197683lfb.3 for ; Fri, 22 Apr 2016 05:18:46 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=tieto.com; s=google; h=from:to:cc:subject:date:message-id; bh=0Ee4VTgy1KH4O5XUEjVGAXvnQyZ8pZg7j9jutJ/SoCI=; b=T8GFpHivWf3yBrFfv0SR0uVIVQUxLPnulJ1wslhU2JVxHW5TvShHH4KaFga9XhGrxV xm7T3ae00ahc8QW5JXXNDqFxtJAeuQ7nAoa21r9HxeQ/BeDZkMPdAskVmHFpvwc8hBha P7WQJ9Dv7C+DAV5hNZ6acjoHAzb5i5Wr4h97Q= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id; bh=0Ee4VTgy1KH4O5XUEjVGAXvnQyZ8pZg7j9jutJ/SoCI=; b=fX4MN/w5YM6H5VxlbjVCzBLhwwvM0PlDCgx8lp29tSeneoXVrBNHtjuLi7VbcxxfiV LeDUwG4Dowy8LXjs0Ob6ufwKPFivobyDhm11DXFyUf0081oVU9lyEQWExwxKqC9l+DKP InLHXYYYDTUFKKNzOZrZ69X1Y1bncwKo7j3y+jyBwHM6MdzK8M2jCOlHDC4beXNHFjBL cfLu7DVhc16h+DAFVLggs/ZR16OuirCXF5SDaFGKTx6aONTuojj4OVjEbcVhKLMwQwOo WvfMYbL9lGZ5I2IBQVCpEwswsUkmmi7n+S98na1sPvE63n8NLRI6oFohTl8ni9fWb8P4 QX2A== X-Gm-Message-State: AOPr4FWyNE4kNEgrrTDbyL6YgZ2pBtggLhiQYjk1RWZaJfalSd8ARVhdv/8nxCpDk6X1qryEtXa4ghFxMJzq1YgxZT8Y0/U/5c0aM6PLNWbqvYhPxkRjnAsDphXTigY0djHoueuUO8JW98O4TZL6xtCCu6ylJr80w6bc0x6chLHQnwhAn+TFF1qA1NDL X-Received: by 10.25.90.73 with SMTP id o70mr8737676lfb.80.1461327525956; Fri, 22 Apr 2016 05:18:45 -0700 (PDT) Received: from localhost.localdomain ([91.198.246.10]) by smtp.gmail.com with ESMTPSA id zi6sm1495924lbb.5.2016.04.22.05.18.44 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Fri, 22 Apr 2016 05:18:44 -0700 (PDT) From: Michal Kazior To: netdev@vger.kernel.org Cc: davem@davemloft.net, johannes@sipsolutions.net, Michal Kazior Subject: [PATCH] fq: add fair queuing framework Date: Fri, 22 Apr 2016 14:20:13 +0200 Message-Id: <1461327613-22902-1-git-send-email-michal.kazior@tieto.com> X-Mailer: git-send-email 2.1.4 X-DomainID: tieto.com Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org This works on the same implementation principle as codel*.h, i.e. there's a generic header with structures and macros and a implementation header carrying function definitions to include in given, e.g. driver or module. The fairness logic comes from net/sched/sch_fq_codel.c but is generalized so it is more flexible and easier to re-use. Signed-off-by: Michal Kazior --- This will be used by mac80211. For more background please see: https://www.spinics.net/lists/linux-wireless/msg149976.html include/net/fq.h | 95 ++++++++++++++++++ include/net/fq_impl.h | 269 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 364 insertions(+) create mode 100644 include/net/fq.h create mode 100644 include/net/fq_impl.h diff --git a/include/net/fq.h b/include/net/fq.h new file mode 100644 index 000000000000..268b49049c37 --- /dev/null +++ b/include/net/fq.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 Qualcomm Atheros, Inc + * + * GPL v2 + * + * Based on net/sched/sch_fq_codel.c + */ +#ifndef __NET_SCHED_FQ_H +#define __NET_SCHED_FQ_H + +struct fq_tin; + +/** + * struct fq_flow - per traffic flow queue + * + * @tin: owner of this flow. Used to manage collisions, i.e. when a packet + * hashes to an index which points to a flow that is already owned by a + * different tin the packet is destined to. In such case the implementer + * must provide a fallback flow + * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ + * (deficit round robin) based round robin queuing similar to the one + * found in net/sched/sch_fq_codel.c + * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of + * fat flows and efficient head-dropping if packet limit is reached + * @queue: sk_buff queue to hold packets + * @backlog: number of bytes pending in the queue. The number of packets can be + * found in @queue.qlen + * @deficit: used for DRR++ + */ +struct fq_flow { + struct fq_tin *tin; + struct list_head flowchain; + struct list_head backlogchain; + struct sk_buff_head queue; + u32 backlog; + int deficit; +}; + +/** + * struct fq_tin - a logical container of fq_flows + * + * Used to group fq_flows into a logical aggregate. DRR++ scheme is used to + * pull interleaved packets out of the associated flows. + * + * @new_flows: linked list of fq_flow + * @old_flows: linked list of fq_flow + */ +struct fq_tin { + struct list_head new_flows; + struct list_head old_flows; + u32 backlog_bytes; + u32 backlog_packets; + u32 overlimit; + u32 collisions; + u32 flows; + u32 tx_bytes; + u32 tx_packets; +}; + +/** + * struct fq - main container for fair queuing purposes + * + * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient + * head-dropping when @backlog reaches @limit + * @limit: max number of packets that can be queued across all flows + * @backlog: number of packets queued across all flows + */ +struct fq { + struct fq_flow *flows; + struct list_head backlogs; + spinlock_t lock; + u32 flows_cnt; + u32 perturbation; + u32 limit; + u32 quantum; + u32 backlog; + u32 overlimit; + u32 collisions; +}; + +typedef struct sk_buff *fq_tin_dequeue_t(struct fq *, + struct fq_tin *, + struct fq_flow *flow); + +typedef void fq_skb_free_t(struct fq *, + struct fq_tin *, + struct fq_flow *, + struct sk_buff *); + +typedef struct fq_flow *fq_flow_get_default_t(struct fq *, + struct fq_tin *, + int idx, + struct sk_buff *); + +#endif diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h new file mode 100644 index 000000000000..02eab7c51adb --- /dev/null +++ b/include/net/fq_impl.h @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2016 Qualcomm Atheros, Inc + * + * GPL v2 + * + * Based on net/sched/sch_fq_codel.c + */ +#ifndef __NET_SCHED_FQ_IMPL_H +#define __NET_SCHED_FQ_IMPL_H + +#include + +/* functions that are embedded into includer */ + +static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) +{ + struct fq_tin *tin = flow->tin; + struct fq_flow *i; + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + + skb = __skb_dequeue(&flow->queue); + if (!skb) + return NULL; + + tin->backlog_bytes -= skb->len; + tin->backlog_packets--; + flow->backlog -= skb->len; + fq->backlog--; + + if (flow->backlog == 0) { + list_del_init(&flow->backlogchain); + } else { + i = flow; + + list_for_each_entry_continue(i, &fq->backlogs, backlogchain) + if (i->backlog < flow->backlog) + break; + + list_move_tail(&flow->backlogchain, + &i->backlogchain); + } + + return skb; +} + +static struct sk_buff *fq_tin_dequeue(struct fq *fq, + struct fq_tin *tin, + fq_tin_dequeue_t dequeue_func) +{ + struct fq_flow *flow; + struct list_head *head; + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + +begin: + head = &tin->new_flows; + if (list_empty(head)) { + head = &tin->old_flows; + if (list_empty(head)) + return NULL; + } + + flow = list_first_entry(head, struct fq_flow, flowchain); + + if (flow->deficit <= 0) { + flow->deficit += fq->quantum; + list_move_tail(&flow->flowchain, + &tin->old_flows); + goto begin; + } + + skb = dequeue_func(fq, tin, flow); + if (!skb) { + /* force a pass through old_flows to prevent starvation */ + if ((head == &tin->new_flows) && + !list_empty(&tin->old_flows)) { + list_move_tail(&flow->flowchain, &tin->old_flows); + } else { + list_del_init(&flow->flowchain); + flow->tin = NULL; + } + goto begin; + } + + flow->deficit -= skb->len; + tin->tx_bytes += skb->len; + tin->tx_packets++; + + return skb; +} + +static struct fq_flow *fq_flow_classify(struct fq *fq, + struct fq_tin *tin, + struct sk_buff *skb, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + u32 hash; + u32 idx; + + lockdep_assert_held(&fq->lock); + + hash = skb_get_hash_perturb(skb, fq->perturbation); + idx = reciprocal_scale(hash, fq->flows_cnt); + flow = &fq->flows[idx]; + + if (flow->tin && flow->tin != tin) { + flow = get_default_func(fq, tin, idx, skb); + tin->collisions++; + fq->collisions++; + } + + if (!flow->tin) + tin->flows++; + + return flow; +} + +static void fq_tin_enqueue(struct fq *fq, + struct fq_tin *tin, + struct sk_buff *skb, + fq_skb_free_t free_func, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + struct fq_flow *i; + + lockdep_assert_held(&fq->lock); + + flow = fq_flow_classify(fq, tin, skb, get_default_func); + + flow->tin = tin; + flow->backlog += skb->len; + tin->backlog_bytes += skb->len; + tin->backlog_packets++; + fq->backlog++; + + if (list_empty(&flow->backlogchain)) + list_add_tail(&flow->backlogchain, &fq->backlogs); + + i = flow; + list_for_each_entry_continue_reverse(i, &fq->backlogs, + backlogchain) + if (i->backlog > flow->backlog) + break; + + list_move(&flow->backlogchain, &i->backlogchain); + + if (list_empty(&flow->flowchain)) { + flow->deficit = fq->quantum; + list_add_tail(&flow->flowchain, + &tin->new_flows); + } + + __skb_queue_tail(&flow->queue, skb); + + if (fq->backlog > fq->limit) { + flow = list_first_entry_or_null(&fq->backlogs, + struct fq_flow, + backlogchain); + if (!flow) + return; + + skb = fq_flow_dequeue(fq, flow); + if (!skb) + return; + + free_func(fq, flow->tin, flow, skb); + + flow->tin->overlimit++; + fq->overlimit++; + } +} + +static void fq_flow_reset(struct fq *fq, + struct fq_flow *flow, + fq_skb_free_t free_func) +{ + struct sk_buff *skb; + + while ((skb = fq_flow_dequeue(fq, flow))) + free_func(fq, flow->tin, flow, skb); + + if (!list_empty(&flow->flowchain)) + list_del_init(&flow->flowchain); + + if (!list_empty(&flow->backlogchain)) + list_del_init(&flow->backlogchain); + + flow->tin = NULL; + + WARN_ON_ONCE(flow->backlog); +} + +static void fq_tin_reset(struct fq *fq, + struct fq_tin *tin, + fq_skb_free_t free_func) +{ + struct list_head *head; + struct fq_flow *flow; + + for (;;) { + head = &tin->new_flows; + if (list_empty(head)) { + head = &tin->old_flows; + if (list_empty(head)) + break; + } + + flow = list_first_entry(head, struct fq_flow, flowchain); + fq_flow_reset(fq, flow, free_func); + } + + WARN_ON_ONCE(tin->backlog_bytes); + WARN_ON_ONCE(tin->backlog_packets); +} + +static void fq_flow_init(struct fq_flow *flow) +{ + INIT_LIST_HEAD(&flow->flowchain); + INIT_LIST_HEAD(&flow->backlogchain); + __skb_queue_head_init(&flow->queue); +} + +static void fq_tin_init(struct fq_tin *tin) +{ + INIT_LIST_HEAD(&tin->new_flows); + INIT_LIST_HEAD(&tin->old_flows); +} + +static int fq_init(struct fq *fq, int flows_cnt) +{ + int i; + + memset(fq, 0, sizeof(fq[0])); + INIT_LIST_HEAD(&fq->backlogs); + spin_lock_init(&fq->lock); + fq->flows_cnt = max_t(u32, flows_cnt, 1); + fq->perturbation = prandom_u32(); + fq->quantum = 300; + fq->limit = 8192; + + fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + if (!fq->flows) + return -ENOMEM; + + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_init(&fq->flows[i]); + + return 0; +} + +static void fq_reset(struct fq *fq, + fq_skb_free_t free_func) +{ + int i; + + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_reset(fq, &fq->flows[i], free_func); + + kfree(fq->flows); + fq->flows = NULL; +} + +#endif