diff mbox

[net-next,2/2] net:add socket option for low latency polling

Message ID 20130611142438.17879.6389.stgit@ladj378.jer.intel.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eliezer Tamir June 11, 2013, 2:24 p.m. UTC
adds a socket option for low latency polling.
This allows overriding the global sysctl value with a per-socket one.

Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
---

 arch/alpha/include/uapi/asm/socket.h   |    2 ++
 arch/avr32/include/uapi/asm/socket.h   |    2 ++
 arch/cris/include/uapi/asm/socket.h    |    2 ++
 arch/frv/include/uapi/asm/socket.h     |    2 ++
 arch/h8300/include/uapi/asm/socket.h   |    2 ++
 arch/ia64/include/uapi/asm/socket.h    |    2 ++
 arch/m32r/include/uapi/asm/socket.h    |    2 ++
 arch/mips/include/uapi/asm/socket.h    |    2 ++
 arch/mn10300/include/uapi/asm/socket.h |    2 ++
 arch/parisc/include/uapi/asm/socket.h  |    2 ++
 arch/powerpc/include/uapi/asm/socket.h |    2 ++
 arch/s390/include/uapi/asm/socket.h    |    2 ++
 arch/sparc/include/uapi/asm/socket.h   |    2 ++
 arch/xtensa/include/uapi/asm/socket.h  |    2 ++
 include/net/ll_poll.h                  |   10 +++++-----
 include/net/sock.h                     |    2 ++
 include/uapi/asm-generic/socket.h      |    2 ++
 net/core/sock.c                        |   26 ++++++++++++++++++++++++++
 18 files changed, 63 insertions(+), 5 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet June 11, 2013, 2:45 p.m. UTC | #1
On Tue, 2013-06-11 at 17:24 +0300, Eliezer Tamir wrote:
> adds a socket option for low latency polling.
> This allows overriding the global sysctl value with a per-socket one.
> 
> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
> ---
> 
>  
> -static inline cycles_t ll_end_time(void)
> +static inline cycles_t ll_end_time(struct sock *sk)
>  {
> -	return TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll) + get_cycles();
> +	return TSC_MHZ * ACCESS_ONCE(sk->sk_ll_usec) + get_cycles();
>  }

Hmm, sk_ll_usec is an unsigned int.

(cycles_t)TSC_MHZ * ACCESS_ONCE(sk->sk_ll_usec) + get_cycles();

>  
> +#ifdef CONFIG_NET_LL_RX_POLL
> +	case SO_LL:
> +		if (!capable(CAP_NET_ADMIN))
> +			ret = -EACCES;
> +		else {
> +			unsigned long ulval;

Why an "unsigned long" ?


> +
> +			if (!copy_from_user(&ulval, optval, sizeof(ulval))
> +							&& ulval >= 0)
> +				sk->sk_ll_usec = ulval;
> +
> +			else
> +				ret = -EINVAL;

Just use
	sk->sk_ll_usec = val;

> +
> +		}
> +		break;
> +#endif
>  	default:
>  		ret = -ENOPROTOOPT;
>  		break;
> @@ -944,6 +961,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
>  
>  	union {
>  		int val;
> +		unsigned long ulval;

Why an unsigned long ? 32bit are more than enough.

>  		struct linger ling;
>  		struct timeval tm;
>  	} v;
> @@ -1170,6 +1188,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
>  		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
>  		break;
>  
> +#ifdef CONFIG_NET_LL_RX_POLL
> +	case SO_LL:
> +		len = sizeof(v.ulval);
> +		v.ulval = sk->sk_ll_usec;
> +		break;
> +#endif
> +



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eliezer Tamir June 11, 2013, 3:37 p.m. UTC | #2
On 11/06/2013 17:45, Eric Dumazet wrote:
> On Tue, 2013-06-11 at 17:24 +0300, Eliezer Tamir wrote:
>> adds a socket option for low latency polling.
>> This allows overriding the global sysctl value with a per-socket one.
>>
>> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
>> ---
>>
>>
>> -static inline cycles_t ll_end_time(void)
>> +static inline cycles_t ll_end_time(struct sock *sk)
>>   {
>> -	return TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll) + get_cycles();
>> +	return TSC_MHZ * ACCESS_ONCE(sk->sk_ll_usec) + get_cycles();
>>   }
>
> Hmm, sk_ll_usec is an unsigned int.

We changed it to an unsigned long in v7, I guess that was gratuitous.
Re-reading your comments on v6 2/5 I realize a cast would have sufficed.
Should I send a patch to revert it to an unsigned int?

Thanks,
Eliezer

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 11, 2013, 3:58 p.m. UTC | #3
On Tue, 2013-06-11 at 18:37 +0300, Eliezer Tamir wrote:
> On 11/06/2013 17:45, Eric Dumazet wrote:
> > On Tue, 2013-06-11 at 17:24 +0300, Eliezer Tamir wrote:
> >> adds a socket option for low latency polling.
> >> This allows overriding the global sysctl value with a per-socket one.
> >>
> >> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
> >> ---
> >>
> >>
> >> -static inline cycles_t ll_end_time(void)
> >> +static inline cycles_t ll_end_time(struct sock *sk)
> >>   {
> >> -	return TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll) + get_cycles();
> >> +	return TSC_MHZ * ACCESS_ONCE(sk->sk_ll_usec) + get_cycles();
> >>   }
> >
> > Hmm, sk_ll_usec is an unsigned int.
> 
> We changed it to an unsigned long in v7, I guess that was gratuitous.
> Re-reading your comments on v6 2/5 I realize a cast would have sufficed.
> Should I send a patch to revert it to an unsigned int?

One sysctl as unsigned long was not a big deal so I was ok with your
change ;)

unsigned int for sk_ll_usec is enough, but using a 32x32->64bit multiply
is probably safer.




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings June 11, 2013, 8:24 p.m. UTC | #4
On Tue, 2013-06-11 at 17:24 +0300, Eliezer Tamir wrote:
> adds a socket option for low latency polling.
> This allows overriding the global sysctl value with a per-socket one.
[...]
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -913,6 +913,23 @@ set_rcvbuf:
>  		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
>  		break;
>  
> +#ifdef CONFIG_NET_LL_RX_POLL
> +	case SO_LL:
> +		if (!capable(CAP_NET_ADMIN))
> +			ret = -EACCES;
[...]

Failed capability checks normally result in EPERM whereas EACCES usually
results from a file permission/ACL/label check.

Perhaps unprivileged users should be allowed to set a value as long as
it's less than or equal to the global value?

Ben.
Eliezer Tamir June 12, 2013, 6:39 a.m. UTC | #5
On 11/06/2013 23:24, Ben Hutchings wrote:
> On Tue, 2013-06-11 at 17:24 +0300, Eliezer Tamir wrote:
>> adds a socket option for low latency polling.
>> This allows overriding the global sysctl value with a per-socket one.
> [...]
>> --- a/net/core/sock.c
>> +++ b/net/core/sock.c
>> @@ -913,6 +913,23 @@ set_rcvbuf:
>>   		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
>>   		break;
>>
>> +#ifdef CONFIG_NET_LL_RX_POLL
>> +	case SO_LL:
>> +		if (!capable(CAP_NET_ADMIN))
>> +			ret = -EACCES;
> [...]
>
> Failed capability checks normally result in EPERM whereas EACCES usually
> results from a file permission/ACL/label check.

OK

> Perhaps unprivileged users should be allowed to set a value as long as
> it's less than or equal to the global value?

I thought of allowing to disable even if you are not privileged, but 
this sounds better.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index eee6ea7..4885825 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -81,4 +81,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 37401f5..79b6179 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index ba409c9..47b1ec5 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -76,6 +76,8 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 31dbb5d..dbc0852 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -74,5 +74,7 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
index 5d1c6d0..a38d38a 100644
--- a/arch/h8300/include/uapi/asm/socket.h
+++ b/arch/h8300/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 6b4329f..d3358b7 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -83,4 +83,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 2a3b59e..44aaf46 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 3b21150..6a07992 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -92,4 +92,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index b4ce844..db80fd3 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 70c512a..f866fff 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -73,6 +73,8 @@ 
 
 #define SO_SELECT_ERR_QUEUE	0x4026
 
+#define SO_LL			0x4027
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a36daf3..405fb09 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -81,4 +81,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 2dacb306..0c5105fb 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 89f49b6..b46c3fa 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -70,6 +70,8 @@ 
 
 #define SO_SELECT_ERR_QUEUE	0x0029
 
+#define SO_LL			0x0030
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index a8f44f5..b21ace4 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/ll_poll.h b/include/net/ll_poll.h
index bc262f8..4dafd52 100644
--- a/include/net/ll_poll.h
+++ b/include/net/ll_poll.h
@@ -43,14 +43,14 @@  extern unsigned long sysctl_net_ll_poll __read_mostly;
 /* we don't mind a ~2.5% imprecision */
 #define TSC_MHZ (tsc_khz >> 10)
 
-static inline cycles_t ll_end_time(void)
+static inline cycles_t ll_end_time(struct sock *sk)
 {
-	return TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll) + get_cycles();
+	return TSC_MHZ * ACCESS_ONCE(sk->sk_ll_usec) + get_cycles();
 }
 
 static inline bool sk_valid_ll(struct sock *sk)
 {
-	return sysctl_net_ll_poll && sk->sk_napi_id &&
+	return sk->sk_ll_usec && sk->sk_napi_id &&
 	       !need_resched() && !signal_pending(current);
 }
 
@@ -62,7 +62,7 @@  static inline bool can_poll_ll(cycles_t end_time)
 
 static inline bool sk_poll_ll(struct sock *sk, int nonblock)
 {
-	cycles_t end_time = ll_end_time();
+	cycles_t end_time = ll_end_time(sk);
 	const struct net_device_ops *ops;
 	struct napi_struct *napi;
 	int rc = false;
@@ -116,7 +116,7 @@  static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
 
 #else /* CONFIG_NET_LL_RX_POLL */
 
-static inline cycles_t ll_end_time(void)
+static inline cycles_t ll_end_time(struct sock *sk)
 {
 	return 0;
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index ac8e181..21db792 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -230,6 +230,7 @@  struct cg_proto;
   *	@sk_wmem_queued: persistent queue size
   *	@sk_forward_alloc: space allocated forward
   *	@sk_napi_id: id of the last napi context to receive data for sk
+  *	@sk_ll_usec: usecs to busypoll when there is no data
   *	@sk_allocation: allocation mode
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
@@ -328,6 +329,7 @@  struct sock {
 #endif
 #ifdef CONFIG_NET_LL_RX_POLL
 	unsigned int		sk_napi_id;
+	unsigned int		sk_ll_usec;
 #endif
 	atomic_t		sk_drops;
 	int			sk_rcvbuf;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index c5d2e3a..ca3a20d 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -76,4 +76,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 788c0da..bddc962 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -913,6 +913,23 @@  set_rcvbuf:
 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
 		break;
 
+#ifdef CONFIG_NET_LL_RX_POLL
+	case SO_LL:
+		if (!capable(CAP_NET_ADMIN))
+			ret = -EACCES;
+		else {
+			unsigned long ulval;
+
+			if (!copy_from_user(&ulval, optval, sizeof(ulval))
+							&& ulval >= 0)
+				sk->sk_ll_usec = ulval;
+
+			else
+				ret = -EINVAL;
+
+		}
+		break;
+#endif
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -944,6 +961,7 @@  int sock_getsockopt(struct socket *sock, int level, int optname,
 
 	union {
 		int val;
+		unsigned long ulval;
 		struct linger ling;
 		struct timeval tm;
 	} v;
@@ -1170,6 +1188,13 @@  int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
 		break;
 
+#ifdef CONFIG_NET_LL_RX_POLL
+	case SO_LL:
+		len = sizeof(v.ulval);
+		v.ulval = sk->sk_ll_usec;
+		break;
+#endif
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2288,6 +2313,7 @@  void sock_init_data(struct socket *sock, struct sock *sk)
 
 #ifdef CONFIG_NET_LL_RX_POLL
 	sk->sk_napi_id		=	0;
+	sk->sk_ll_usec		=	sysctl_net_ll_poll;
 #endif
 
 	/*