@@ -69,6 +69,10 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
+
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.
*/
@@ -62,4 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
@@ -62,4 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* __ASM_AVR32_SOCKET_H */
@@ -64,6 +64,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
-
-
@@ -62,5 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
-
@@ -62,4 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
@@ -71,4 +71,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_IA64_SOCKET_H */
@@ -62,4 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_M32R_SOCKET_H */
@@ -62,4 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
@@ -82,6 +82,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#ifdef __KERNEL__
/** sock_type - Socket types
@@ -62,4 +62,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
@@ -61,6 +61,9 @@
#define SO_RXQ_OVFL 0x4021
+#define SO_EPOOL_QLEN 0x4022
+#define SO_EPOOL_SIZE 0x4023
+#define SO_EPOOL_MODE 0x4024
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.
*/
@@ -69,4 +69,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_POWERPC_SOCKET_H */
@@ -70,4 +70,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _ASM_SOCKET_H */
@@ -58,6 +58,9 @@
#define SO_RXQ_OVFL 0x0024
+#define SO_EPOOL_QLEN 0x0025
+#define SO_EPOOL_SIZE 0x0026
+#define SO_EPOOL_MODE 0x0027
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
@@ -73,4 +73,7 @@
#define SO_RXQ_OVFL 40
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* _XTENSA_SOCKET_H */
@@ -64,4 +64,8 @@
#define SO_DOMAIN 39
#define SO_RXQ_OVFL 40
+
+#define SO_EPOOL_QLEN 41
+#define SO_EPOOL_SIZE 42
+#define SO_EPOOL_MODE 43
#endif /* __ASM_GENERIC_SOCKET_H */
@@ -1095,6 +1095,28 @@ struct net_device {
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
+/**
+ * dev_put - release reference to device
+ * @dev: network device
+ *
+ * Release reference to device to allow it to be freed.
+ */
+static inline void dev_put(struct net_device *dev)
+{
+ atomic_dec(&dev->refcnt);
+}
+
+/**
+ * dev_hold - get reference to device
+ * @dev: network device
+ *
+ * Hold reference to device to keep it from being freed.
+ */
+static inline void dev_hold(struct net_device *dev)
+{
+ atomic_inc(&dev->refcnt);
+}
+
static inline void net_recycle_init(struct net_device *dev, u32 qlen, u32 size)
{
dev->rx_rec_skbs_max = qlen;
@@ -1118,9 +1140,13 @@ static inline void net_recycle_cleanup(struct net_device *dev)
static inline void net_recycle_add(struct net_device *dev, struct sk_buff *skb)
{
+ if (skb->emerg_dev) {
+ dev_put(skb->emerg_dev);
+ skb->emerg_dev = NULL;
+ }
if (skb_queue_len(&dev->rx_recycle) < dev->rx_rec_skbs_max &&
skb_recycle_check(skb, dev->rx_rec_skb_size))
- __skb_queue_head(&dev->rx_recycle, skb);
+ skb_queue_head(&dev->rx_recycle, skb);
else
dev_kfree_skb_any(skb);
}
@@ -1129,7 +1155,7 @@ static inline struct sk_buff *net_recycle_get(struct net_device *dev)
{
struct sk_buff *skb;
- skb = __skb_dequeue(&dev->rx_recycle);
+ skb = skb_dequeue(&dev->rx_recycle);
if (skb)
return skb;
return netdev_alloc_skb(dev, dev->rx_rec_skb_size);
@@ -1783,28 +1809,6 @@ extern int netdev_budget;
/* Called by rtnetlink.c:rtnl_unlock() */
extern void netdev_run_todo(void);
-/**
- * dev_put - release reference to device
- * @dev: network device
- *
- * Release reference to device to allow it to be freed.
- */
-static inline void dev_put(struct net_device *dev)
-{
- atomic_dec(&dev->refcnt);
-}
-
-/**
- * dev_hold - get reference to device
- * @dev: network device
- *
- * Hold reference to device to keep it from being freed.
- */
-static inline void dev_hold(struct net_device *dev)
-{
- atomic_inc(&dev->refcnt);
-}
-
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
* and _off may be called from IRQ context, but it is caller
* who is responsible for serialization of these calls.
@@ -319,6 +319,7 @@ struct sk_buff {
struct sock *sk;
struct net_device *dev;
+ struct net_device *emerg_dev;
/*
* This is the control buffer. It is free to use for every
@@ -314,6 +314,8 @@ struct sock {
#endif
__u32 sk_mark;
u32 sk_classid;
+ u32 emerg_en;
+ /* XXX 4 bytes hole on 64 bit */
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk, int bytes);
void (*sk_write_space)(struct sock *sk);
@@ -425,6 +425,13 @@ static void skb_release_all(struct sk_buff *skb)
void __kfree_skb(struct sk_buff *skb)
{
+ struct net_device *ndev = skb->emerg_dev;
+
+ if (ndev) {
+ net_recycle_add(ndev, skb);
+ return;
+ }
+
skb_release_all(skb);
kfree_skbmem(skb);
}
@@ -563,6 +570,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
{
#define C(x) n->x = skb->x
+ n->emerg_dev = NULL;
n->next = n->prev = NULL;
n->sk = NULL;
__copy_skb_header(n, skb);
@@ -472,6 +472,71 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
sock_reset_flag(sk, bit);
}
+static int sock_epool_set_qlen(struct sock *sk, int val)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!sk->sk_bound_dev_if)
+ return -ENODEV;
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+
+ net_recycle_qlen(dev, val);
+ dev_put(dev);
+ return 0;
+}
+
+static int sock_epool_set_mode(struct sock *sk, int val)
+{
+ int ret;
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+
+ if (!val) {
+ sk->emerg_en = 0;
+ return 0;
+ }
+ if (sk->emerg_en && val)
+ return -EBUSY;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (!sk->sk_bound_dev_if)
+ return -ENODEV;
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = -ENODEV;
+ if (!dev->rx_rec_skb_size)
+ goto out;
+
+ do {
+ struct sk_buff *skb;
+
+ if (skb_queue_len(&dev->rx_recycle) >= dev->rx_rec_skbs_max) {
+ ret = 0;
+ break;
+ }
+
+ skb = __netdev_alloc_skb(dev, dev->rx_rec_skb_size, GFP_KERNEL);
+ if (!skb) {
+ ret = -ENOMEM;
+ break;
+ }
+ net_recycle_add(dev, skb);
+ } while (1);
+
+ if (!ret)
+ sk->emerg_en = 1;
+out:
+ dev_put(dev);
+ return ret;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -740,6 +805,15 @@ set_rcvbuf:
else
sock_reset_flag(sk, SOCK_RXQ_OVFL);
break;
+ case SO_EPOOL_QLEN:
+ ret = sock_epool_set_qlen(sk, val);
+ break;
+ case SO_EPOOL_SIZE:
+ ret = -EINVAL;
+ break;
+ case SO_EPOOL_MODE:
+ ret = sock_epool_set_mode(sk, valbool);
+ break;
default:
ret = -ENOPROTOOPT;
break;
@@ -961,6 +1035,35 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
break;
+ case SO_EPOOL_QLEN:
+ {
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+
+ if (!sk->sk_bound_dev_if)
+ return -ENODEV;
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ v.val = dev->rx_rec_skbs_max;
+ break;
+ }
+ case SO_EPOOL_SIZE:
+ {
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+
+ if (!sk->sk_bound_dev_if)
+ return -ENODEV;
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ v.val = dev->rx_rec_skb_size;
+ break;
+ }
+ case SO_EPOOL_MODE:
+ v.val = sk->emerg_en;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -1459,6 +1562,37 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
return timeo;
}
+static struct sk_buff *alloc_emerg_skb(struct sock *sk, unsigned int skb_len)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+ int err;
+ struct sk_buff *skb;
+
+ err = -ENODEV;
+ if (!sk->sk_bound_dev_if)
+ return ERR_PTR(err);
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ if (!dev)
+ return ERR_PTR(err);
+ err = -EINVAL;
+ if (dev->rx_rec_skb_size < skb_len) {
+ dev_put(dev);
+ return ERR_PTR(err);
+ }
+ skb = skb_dequeue(&dev->rx_recycle);
+ if (!skb) {
+ dev_put(dev);
+ err = -ENOBUFS;
+ return ERR_PTR(err);
+ }
+ /*
+ * dev will be put once the skb is back from
+ * its journey.
+ */
+ skb->emerg_dev = dev;
+ return skb;
+}
/*
* Generic send/receive buffer handlers
@@ -1488,6 +1622,14 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+ if (sk->emerg_en) {
+ skb = alloc_emerg_skb(sk, header_len + data_len);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto failure;
+ }
+ break;
+ }
skb = alloc_skb(header_len, gfp_mask);
if (skb) {
int npages;