diff mbox

[v2] unix: properly account for FDs passed over unix sockets

Message ID 56B0F574.5080105@stressinduktion.org
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Hannes Frederic Sowa Feb. 2, 2016, 6:29 p.m. UTC
On 02.02.2016 18:34, David Herrmann wrote:
> Hi
>
> On Sun, Jan 10, 2016 at 7:54 AM, Willy Tarreau <w@1wt.eu> wrote:
>> It is possible for a process to allocate and accumulate far more FDs than
>> the process' limit by sending them over a unix socket then closing them
>> to keep the process' fd count low.
>>
>> This change addresses this problem by keeping track of the number of FDs
>> in flight per user and preventing non-privileged processes from having
>> more FDs in flight than their configured FD limit.
>
> This is not really what this patch does. This patch rather prevents
> processes from having more of their owned *files* in flight than their
> configured FD limit. See below for details..
>
>> Reported-by: socketpair@gmail.com
>> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>> Mitigates: CVE-2013-4312 (Linux 2.0+)
>> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
>> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> Signed-off-by: Willy Tarreau <w@1wt.eu>
>> ---
>> v2: add reported-by, mitigates and acked-by.
>>
>> It would be nice if (if accepted) it would be backported to -stable as the
>> issue is currently exploitable.
>> ---
>>   include/linux/sched.h |  1 +
>>   net/unix/af_unix.c    | 24 ++++++++++++++++++++----
>>   net/unix/garbage.c    | 13 ++++++++-----
>>   3 files changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index edad7a4..fbf25f1 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -830,6 +830,7 @@ struct user_struct {
>>          unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
>>   #endif
>>          unsigned long locked_shm; /* How many pages of mlocked shm ? */
>> +       unsigned long unix_inflight;    /* How many files in flight in unix sockets */
>>
>>   #ifdef CONFIG_KEYS
>>          struct key *uid_keyring;        /* UID specific keyring */
>> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
>> index 45aebd9..d6d7b43 100644
>> --- a/net/unix/af_unix.c
>> +++ b/net/unix/af_unix.c
>> @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
>>          sock_wfree(skb);
>>   }
>>
>> +/*
>> + * The "user->unix_inflight" variable is protected by the garbage
>> + * collection lock, and we just read it locklessly here. If you go
>> + * over the limit, there might be a tiny race in actually noticing
>> + * it across threads. Tough.
>
> This limit is checked once per transaction. IIRC, a transaction can
> carry 255 files. Thus, it is easy to exceed this limit without any
> race involved.
>
>> + */
>> +static inline bool too_many_unix_fds(struct task_struct *p)
>> +{
>> +       struct user_struct *user = current_user();
>> +
>> +       if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
>> +               return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
>> +       return false;
>> +}
>> +
>>   #define MAX_RECURSION_LEVEL 4
>>
>>   static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>> @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>>          unsigned char max_level = 0;
>>          int unix_sock_count = 0;
>>
>> +       if (too_many_unix_fds(current))
>> +               return -ETOOMANYREFS;
>> +
>
> Ignoring the capabilities, this effectively resolves to:
>
>      if (current_cred()->user->unix_inflight > rlimit(RLIMIT_NOFILE))
>              return -ETOOMANYREFS;
>
> It limits the number of inflight FDs of the *current* user to its *own* limit.
>
> But..
>
>>          for (i = scm->fp->count - 1; i >= 0; i--) {
>>                  struct sock *sk = unix_get_socket(scm->fp->fp[i]);
>>
>> @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>>          if (!UNIXCB(skb).fp)
>>                  return -ENOMEM;
>>
>> -       if (unix_sock_count) {
>> -               for (i = scm->fp->count - 1; i >= 0; i--)
>> -                       unix_inflight(scm->fp->fp[i]);
>> -       }
>> +       for (i = scm->fp->count - 1; i >= 0; i--)
>> +               unix_inflight(scm->fp->fp[i]);
>>          return max_level;
>>   }
>>
>> diff --git a/net/unix/garbage.c b/net/unix/garbage.c
>> index a73a226..8fcdc22 100644
>> --- a/net/unix/garbage.c
>> +++ b/net/unix/garbage.c
>> @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
>>   {
>>          struct sock *s = unix_get_socket(fp);
>>
>> +       spin_lock(&unix_gc_lock);
>> +
>>          if (s) {
>>                  struct unix_sock *u = unix_sk(s);
>>
>> -               spin_lock(&unix_gc_lock);
>> -
>>                  if (atomic_long_inc_return(&u->inflight) == 1) {
>>                          BUG_ON(!list_empty(&u->link));
>>                          list_add_tail(&u->link, &gc_inflight_list);
>> @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
>>                          BUG_ON(list_empty(&u->link));
>>                  }
>>                  unix_tot_inflight++;
>> -               spin_unlock(&unix_gc_lock);
>>          }
>> +       fp->f_cred->user->unix_inflight++;
>
> ..this modifies the limit of the owner of the file you send. As such,
> if you only send files that you don't own, you will continuously bump
> the limit of the file-owner, but never your own. As such, the
> protection above will never fire.
>
> Is this really what this patch is supposed to do? Shouldn't the loop
> in unix_attach_fds() rather check for this:
>
>      if (unlikely(fp->f_cred->user->unix_inflight > nofile &&
>              !file_ns_capable(fp, &init_user_ns, CAP_SYS_RESOURCE) &&
>              !file_ns_capable(fp, &init_user_ns, CAP_SYS_ADMIN)))
>              return -ETOOMANYREFS;
>
> (or keeping capable() rather than file_ns_capable(), depending on the
> wanted semantics)
>
> Furthermore, with this patch in place, a process better not pass any
> file-descriptors to an untrusted process. This might have been a
> stupid idea in the first place, but I would have expected the patch to
> mention this. Because with this patch in place, a receiver of a
> file-descriptor can bump the unix_inflight limit of the sender
> arbitrarily. Did anyone notify the dbus maintainers of this? They
> might wanna document this, if not already done (CC: smcv).
>
> Why doesn't this patch modify "unix_inflight" of the sender rather
> than the passed descriptors? It would require pinning the affected
> user in 'scm' contexts, but that is probably already the case, anyway.
> This way, the original ETOOMANYREFS check would be perfectly fine.
>
> Anyway, can someone provide a high-level description of what exactly
> this patch is supposed to do? Which operation should be limited, who
> should inflight FDs be accounted on, and which rlimit should be used
> on each operation? I'm having a hard time auditing existing
> user-space, given just the scarce description of this commit.

Yes, all your observations are true. I think we need to explicitly
need to refer to the sending socket while attaching the fds.

Good thing is that we don't switch skb->sk while appending the skb to
the receive queue. First quick prototype which is completely untested
(only compile-tested):

Comments

Linus Torvalds Feb. 2, 2016, 7:29 p.m. UTC | #1
On Tue, Feb 2, 2016 at 10:29 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>>
>> Anyway, can someone provide a high-level description of what exactly
>> this patch is supposed to do? Which operation should be limited, who
>> should inflight FDs be accounted on, and which rlimit should be used
>> on each operation? I'm having a hard time auditing existing
>> user-space, given just the scarce description of this commit.
>
> Yes, all your observations are true. I think we need to explicitly
> need to refer to the sending socket while attaching the fds.

I don't think that really helps. Maybe somebody passed a unix domain
socket around, and now we're crediting the wrong socket again.

So how about we actually add a "struct cred *" to the scm_cookie
itself, and we initialize it to "get_current_cred()". And then always
use that.

That way it's always the person who actually does the send (rather
than the opener of the socket _or_ the opener of the file that gets
passed around) that gets credited, and thanks to the cred pointer we
can then de-credit them properly.

Hmm?

                Linus
Hannes Frederic Sowa Feb. 2, 2016, 8:32 p.m. UTC | #2
On 02.02.2016 20:29, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 10:29 AM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>>>
>>> Anyway, can someone provide a high-level description of what exactly
>>> this patch is supposed to do? Which operation should be limited, who
>>> should inflight FDs be accounted on, and which rlimit should be used
>>> on each operation? I'm having a hard time auditing existing
>>> user-space, given just the scarce description of this commit.
>>
>> Yes, all your observations are true. I think we need to explicitly
>> need to refer to the sending socket while attaching the fds.
>
> I don't think that really helps. Maybe somebody passed a unix domain
> socket around, and now we're crediting the wrong socket again.

I was struggling a bit what you meant but I think you are referring to 
the following scenario:

process-1 opens up a unix socket and passes it to process-2 (this 
process has different credentials) via af-unix. Process-2 then sends 
multiple fds to another destination over this transferred unix-fd.

True, in this case we again account the fds to the wrong process, which 
is bad.

> So how about we actually add a "struct cred *" to the scm_cookie
> itself, and we initialize it to "get_current_cred()". And then always
> use that.

Unfortunately we never transfer a scm_cookie via the skbs but merely use 
it to initialize unix_skb_parms structure in skb->cb and destroy it 
afterwards.

But "struct pid *" in unix_skb_parms should be enough to get us to 
corresponding "struct cred *" so we can decrement the correct counter 
during skb destruction.

So:

We increment current task's unix_inflight and also check the current 
task's limit during attaching fds to skbs and decrement the inflight 
counter via "struct pid *". This looks like it should work.

> That way it's always the person who actually does the send (rather
> than the opener of the socket _or_ the opener of the file that gets
> passed around) that gets credited, and thanks to the cred pointer we
> can then de-credit them properly.

Exactly, I try to implement that. Thanks a lot!

Hannes
Willy Tarreau Feb. 2, 2016, 8:39 p.m. UTC | #3
On Tue, Feb 02, 2016 at 09:32:56PM +0100, Hannes Frederic Sowa wrote:
> But "struct pid *" in unix_skb_parms should be enough to get us to 
> corresponding "struct cred *" so we can decrement the correct counter 
> during skb destruction.
> 
> So:
> 
> We increment current task's unix_inflight and also check the current 
> task's limit during attaching fds to skbs and decrement the inflight 
> counter via "struct pid *". This looks like it should work.

I like it as well, the principle sounds sane.

> >That way it's always the person who actually does the send (rather
> >than the opener of the socket _or_ the opener of the file that gets
> >passed around) that gets credited, and thanks to the cred pointer we
> >can then de-credit them properly.
> 
> Exactly, I try to implement that. Thanks a lot!

Thanks to you Hannes, I appreciate that you work on it, it would take
much more time to me to dig into this.

Willy
Linus Torvalds Feb. 2, 2016, 8:44 p.m. UTC | #4
On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>
> Unfortunately we never transfer a scm_cookie via the skbs but merely use it
> to initialize unix_skb_parms structure in skb->cb and destroy it afterwards.

Ok, I obviously didn't check very closely.

> But "struct pid *" in unix_skb_parms should be enough to get us to
> corresponding "struct cred *" so we can decrement the correct counter during
> skb destruction.

Umm. I think the "struct cred" may change in between, can't it?

So I don't think you can later look up the cred based on the pid.

Could we add the cred pointer (or just the user pointer) to the unix_skb_parms?

Or maybe just add it to the "struct scm_fp_list"?

               Linus
Willy Tarreau Feb. 2, 2016, 8:49 p.m. UTC | #5
On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > But "struct pid *" in unix_skb_parms should be enough to get us to
> > corresponding "struct cred *" so we can decrement the correct counter during
> > skb destruction.
> 
> Umm. I think the "struct cred" may change in between, can't it?

You mean for example in case of setuid() or something like this ?

willy
Linus Torvalds Feb. 2, 2016, 8:53 p.m. UTC | #6
On Tue, Feb 2, 2016 at 12:49 PM, Willy Tarreau <w@1wt.eu> wrote:
> On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote:
>>
>> Umm. I think the "struct cred" may change in between, can't it?
>
> You mean for example in case of setuid() or something like this ?

Yeah. I'd be worried about looking up the creds or user structure
later, and possibly getting a different one.

I'd much rather look it up at attach time, and just carry an extra
pointer around. That seems to be an inherently safer model where
there's no worry about "what happens if the user does X in the
meantime".

                  Linus
Hannes Frederic Sowa Feb. 2, 2016, 8:56 p.m. UTC | #7
On 02.02.2016 21:44, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>>
>> Unfortunately we never transfer a scm_cookie via the skbs but merely use it
>> to initialize unix_skb_parms structure in skb->cb and destroy it afterwards.
>
> Ok, I obviously didn't check very closely.
>
>> But "struct pid *" in unix_skb_parms should be enough to get us to
>> corresponding "struct cred *" so we can decrement the correct counter during
>> skb destruction.
>
> Umm. I think the "struct cred" may change in between, can't it?

While reviewing the task_struct->cred/real_cred assignments, I noticed 
that, too. I already went the same way and added a "struct cred *" to 
unix_skb_parms.

> So I don't think you can later look up the cred based on the pid.

Yep, it also looked to dangerous to me.

> Could we add the cred pointer (or just the user pointer) to the unix_skb_parms?
>
> Or maybe just add it to the "struct scm_fp_list"?

scm_fp_list seems to be an even better place. I have a look, thanks!

Hannes
Willy Tarreau Feb. 2, 2016, 8:58 p.m. UTC | #8
On Tue, Feb 02, 2016 at 12:53:20PM -0800, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 12:49 PM, Willy Tarreau <w@1wt.eu> wrote:
> > On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote:
> >>
> >> Umm. I think the "struct cred" may change in between, can't it?
> >
> > You mean for example in case of setuid() or something like this ?
> 
> Yeah. I'd be worried about looking up the creds or user structure
> later, and possibly getting a different one.
> 
> I'd much rather look it up at attach time, and just carry an extra
> pointer around. That seems to be an inherently safer model where
> there's no worry about "what happens if the user does X in the
> meantime".

Normally we can only change from root to non-root, and we don't apply
the limits to root, so if we have the ability to only store one bit
indicating "not tracked" or to simply nullify one pointer to avoid
counting in flight FDs for root, we don't take the risk to recredit
them to the target user after a change.

I just don't know if we can do that though :-/

Willy
David Laight Feb. 3, 2016, 12:19 p.m. UTC | #9
From: Linus Torvalds

> Sent: 02 February 2016 20:45

> On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa

> <hannes@stressinduktion.org> wrote:

> >

> > Unfortunately we never transfer a scm_cookie via the skbs but merely use it

> > to initialize unix_skb_parms structure in skb->cb and destroy it afterwards.

> 

> Ok, I obviously didn't check very closely.

> 

> > But "struct pid *" in unix_skb_parms should be enough to get us to

> > corresponding "struct cred *" so we can decrement the correct counter during

> > skb destruction.

> 

> Umm. I think the "struct cred" may change in between, can't it?

> 

> So I don't think you can later look up the cred based on the pid.

> 

> Could we add the cred pointer (or just the user pointer) to the unix_skb_parms?

> 

> Or maybe just add it to the "struct scm_fp_list"?


Is that going to work if the sending process exits before the message is read?
You need a reference count against the structure than contains the count.
I think this is 'struct user' not 'struct cred'

	David
diff mbox

Patch

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2a91a0561a4783..d7148ae04b02dd 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -6,8 +6,8 @@ 
  #include <linux/mutex.h>
  #include <net/sock.h>

-void unix_inflight(struct file *fp);
-void unix_notinflight(struct file *fp);
+void unix_inflight(struct sock *sk, struct file *fp);
+void unix_notinflight(struct sock *sk, struct file *fp);
  void unix_gc(void);
  void wait_for_unix_gc(void);
  struct sock *unix_get_socket(struct file *filp);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093eb0553a..7bbb4762899924 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1496,7 +1496,7 @@  static void unix_detach_fds(struct scm_cookie 
*scm, struct sk_buff *skb)
  	UNIXCB(skb).fp = NULL;

  	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_notinflight(scm->fp->fp[i]);
+		unix_notinflight(skb->sk, scm->fp->fp[i]);
  }

  static void unix_destruct_scm(struct sk_buff *skb)
@@ -1561,7 +1561,7 @@  static int unix_attach_fds(struct scm_cookie *scm, 
struct sk_buff *skb)
  		return -ENOMEM;

  	for (i = scm->fp->count - 1; i >= 0; i--)
-		unix_inflight(scm->fp->fp[i]);
+		unix_inflight(skb->sk, scm->fp->fp[i]);
  	return max_level;
  }

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8fcdc2283af50c..874c42161717f0 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,7 +116,7 @@  struct sock *unix_get_socket(struct file *filp)
   * descriptor if it is for an AF_UNIX socket.
   */

-void unix_inflight(struct file *fp)
+void unix_inflight(struct sock *sk, struct file *fp)
  {
  	struct sock *s = unix_get_socket(fp);

@@ -133,11 +133,11 @@  void unix_inflight(struct file *fp)
  		}
  		unix_tot_inflight++;
  	}
-	fp->f_cred->user->unix_inflight++;
+	sk->sk_socket->file->f_cred->user->unix_inflight++;
  	spin_unlock(&unix_gc_lock);
  }

-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct sock *sk, struct file *fp)
  {
  	struct sock *s = unix_get_socket(fp);

@@ -152,7 +152,7 @@  void unix_notinflight(struct file *fp)
  			list_del_init(&u->link);
  		unix_tot_inflight--;
  	}
-	fp->f_cred->user->unix_inflight--;
+	sk->sk_socket->file->f_cred->user->unix_inflight++;
  	spin_unlock(&unix_gc_lock);
  }