diff mbox series

[SRU,Trusty,CVE-2017-18221] mlock: fix mlock count can not decrease in race condition

Message ID 20180420135400.16358-1-kleber.souza@canonical.com
State New
Headers show
Series [SRU,Trusty,CVE-2017-18221] mlock: fix mlock count can not decrease in race condition | expand

Commit Message

Kleber Sacilotto de Souza April 20, 2018, 1:54 p.m. UTC
From: Yisheng Xie <xieyisheng1@huawei.com>

CVE-2017-18221

Kefeng reported that when running the follow test, the mlock count in
meminfo will increase permanently:

 [1] testcase
 linux:~ # cat test_mlockal
 grep Mlocked /proc/meminfo
  for j in `seq 0 10`
  do
 	for i in `seq 4 15`
 	do
 		./p_mlockall >> log &
 	done
 	sleep 0.2
 done
 # wait some time to let mlock counter decrease and 5s may not enough
 sleep 5
 grep Mlocked /proc/meminfo

 linux:~ # cat p_mlockall.c
 #include <sys/mman.h>
 #include <stdlib.h>
 #include <stdio.h>

 #define SPACE_LEN	4096

 int main(int argc, char ** argv)
 {
	 	int ret;
	 	void *adr = malloc(SPACE_LEN);
	 	if (!adr)
	 		return -1;

	 	ret = mlockall(MCL_CURRENT | MCL_FUTURE);
	 	printf("mlcokall ret = %d\n", ret);

	 	ret = munlockall();
	 	printf("munlcokall ret = %d\n", ret);

	 	free(adr);
	 	return 0;
	 }

In __munlock_pagevec() we should decrement NR_MLOCK for each page where
we clear the PageMlocked flag.  Commit 1ebb7cc6a583 ("mm: munlock: batch
NR_MLOCK zone state updates") has introduced a bug where we don't
decrement NR_MLOCK for pages where we clear the flag, but fail to
isolate them from the lru list (e.g.  when the pages are on some other
cpu's percpu pagevec).  Since PageMlocked stays cleared, the NR_MLOCK
accounting gets permanently disrupted by this.

Fix it by counting the number of page whose PageMlock flag is cleared.

Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates")
Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Joern Engel <joern@logfs.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michel Lespinasse <walken@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: zhongjiang <zhongjiang@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(backported from commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc)
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
---
 mm/mlock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

Khalid Elmously April 20, 2018, 2:23 p.m. UTC | #1
On 2018-04-20 15:54:00 , Kleber Souza wrote:
> From: Yisheng Xie <xieyisheng1@huawei.com>
> 
> CVE-2017-18221
> 
> Kefeng reported that when running the follow test, the mlock count in
> meminfo will increase permanently:
> 
>  [1] testcase
>  linux:~ # cat test_mlockal
>  grep Mlocked /proc/meminfo
>   for j in `seq 0 10`
>   do
>  	for i in `seq 4 15`
>  	do
>  		./p_mlockall >> log &
>  	done
>  	sleep 0.2
>  done
>  # wait some time to let mlock counter decrease and 5s may not enough
>  sleep 5
>  grep Mlocked /proc/meminfo
> 
>  linux:~ # cat p_mlockall.c
>  #include <sys/mman.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> 
>  #define SPACE_LEN	4096
> 
>  int main(int argc, char ** argv)
>  {
> 	 	int ret;
> 	 	void *adr = malloc(SPACE_LEN);
> 	 	if (!adr)
> 	 		return -1;
> 
> 	 	ret = mlockall(MCL_CURRENT | MCL_FUTURE);
> 	 	printf("mlcokall ret = %d\n", ret);
> 
> 	 	ret = munlockall();
> 	 	printf("munlcokall ret = %d\n", ret);
> 
> 	 	free(adr);
> 	 	return 0;
> 	 }
> 
> In __munlock_pagevec() we should decrement NR_MLOCK for each page where
> we clear the PageMlocked flag.  Commit 1ebb7cc6a583 ("mm: munlock: batch
> NR_MLOCK zone state updates") has introduced a bug where we don't
> decrement NR_MLOCK for pages where we clear the flag, but fail to
> isolate them from the lru list (e.g.  when the pages are on some other
> cpu's percpu pagevec).  Since PageMlocked stays cleared, the NR_MLOCK
> accounting gets permanently disrupted by this.
> 
> Fix it by counting the number of page whose PageMlock flag is cleared.
> 
> Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates")
> Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com
> Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
> Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Joern Engel <joern@logfs.org>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Michel Lespinasse <walken@google.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Rik van Riel <riel@redhat.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Xishi Qiu <qiuxishi@huawei.com>
> Cc: zhongjiang <zhongjiang@huawei.com>
> Cc: Hanjun Guo <guohanjun@huawei.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> (backported from commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc)
> Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> ---
>  mm/mlock.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 1b12dfad0794..a3569727baab 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -300,7 +300,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
>  {
>  	int i;
>  	int nr = pagevec_count(pvec);
> -	int delta_munlocked;
> +	int delta_munlocked = -nr;
>  	struct pagevec pvec_putback;
>  	int pgrescued = 0;
>  
> @@ -330,6 +330,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
>  			}
>  
>  		} else {
> +			delta_munlocked++;
>  skip_munlock:
>  			/*
>  			 * We won't be munlocking this page in the next phase
> @@ -341,7 +342,6 @@ skip_munlock:
>  			pvec->pages[i] = NULL;
>  		}
>  	}
> -	delta_munlocked = -nr + pagevec_count(&pvec_putback);
>  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
>  	spin_unlock_irq(&zone->lru_lock);
>  

Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
Colin Ian King April 20, 2018, 3:16 p.m. UTC | #2
On 20/04/18 14:54, Kleber Sacilotto de Souza wrote:
> From: Yisheng Xie <xieyisheng1@huawei.com>
> 
> CVE-2017-18221
> 
> Kefeng reported that when running the follow test, the mlock count in
> meminfo will increase permanently:
> 
>  [1] testcase
>  linux:~ # cat test_mlockal
>  grep Mlocked /proc/meminfo
>   for j in `seq 0 10`
>   do
>  	for i in `seq 4 15`
>  	do
>  		./p_mlockall >> log &
>  	done
>  	sleep 0.2
>  done
>  # wait some time to let mlock counter decrease and 5s may not enough
>  sleep 5
>  grep Mlocked /proc/meminfo
> 
>  linux:~ # cat p_mlockall.c
>  #include <sys/mman.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> 
>  #define SPACE_LEN	4096
> 
>  int main(int argc, char ** argv)
>  {
> 	 	int ret;
> 	 	void *adr = malloc(SPACE_LEN);
> 	 	if (!adr)
> 	 		return -1;
> 
> 	 	ret = mlockall(MCL_CURRENT | MCL_FUTURE);
> 	 	printf("mlcokall ret = %d\n", ret);
> 
> 	 	ret = munlockall();
> 	 	printf("munlcokall ret = %d\n", ret);
> 
> 	 	free(adr);
> 	 	return 0;
> 	 }
> 
> In __munlock_pagevec() we should decrement NR_MLOCK for each page where
> we clear the PageMlocked flag.  Commit 1ebb7cc6a583 ("mm: munlock: batch
> NR_MLOCK zone state updates") has introduced a bug where we don't
> decrement NR_MLOCK for pages where we clear the flag, but fail to
> isolate them from the lru list (e.g.  when the pages are on some other
> cpu's percpu pagevec).  Since PageMlocked stays cleared, the NR_MLOCK
> accounting gets permanently disrupted by this.
> 
> Fix it by counting the number of page whose PageMlock flag is cleared.
> 
> Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates")
> Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com
> Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
> Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Joern Engel <joern@logfs.org>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Michel Lespinasse <walken@google.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Rik van Riel <riel@redhat.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Xishi Qiu <qiuxishi@huawei.com>
> Cc: zhongjiang <zhongjiang@huawei.com>
> Cc: Hanjun Guo <guohanjun@huawei.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> (backported from commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc)
> Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> ---
>  mm/mlock.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 1b12dfad0794..a3569727baab 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -300,7 +300,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
>  {
>  	int i;
>  	int nr = pagevec_count(pvec);
> -	int delta_munlocked;
> +	int delta_munlocked = -nr;
>  	struct pagevec pvec_putback;
>  	int pgrescued = 0;
>  
> @@ -330,6 +330,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
>  			}
>  
>  		} else {
> +			delta_munlocked++;
>  skip_munlock:
>  			/*
>  			 * We won't be munlocking this page in the next phase
> @@ -341,7 +342,6 @@ skip_munlock:
>  			pvec->pages[i] = NULL;
>  		}
>  	}
> -	delta_munlocked = -nr + pagevec_count(&pvec_putback);
>  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
>  	spin_unlock_irq(&zone->lru_lock);
>  
> 
Looks OK to me,

Acked-by: Colin Ian King <colin.king@canonical.com>
Stefan Bader April 23, 2018, 12:55 p.m. UTC | #3
On 20.04.2018 15:54, Kleber Sacilotto de Souza wrote:
> From: Yisheng Xie <xieyisheng1@huawei.com>
> 
> CVE-2017-18221
> 
> Kefeng reported that when running the follow test, the mlock count in
> meminfo will increase permanently:
> 
>  [1] testcase
>  linux:~ # cat test_mlockal
>  grep Mlocked /proc/meminfo
>   for j in `seq 0 10`
>   do
>  	for i in `seq 4 15`
>  	do
>  		./p_mlockall >> log &
>  	done
>  	sleep 0.2
>  done
>  # wait some time to let mlock counter decrease and 5s may not enough
>  sleep 5
>  grep Mlocked /proc/meminfo
> 
>  linux:~ # cat p_mlockall.c
>  #include <sys/mman.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> 
>  #define SPACE_LEN	4096
> 
>  int main(int argc, char ** argv)
>  {
> 	 	int ret;
> 	 	void *adr = malloc(SPACE_LEN);
> 	 	if (!adr)
> 	 		return -1;
> 
> 	 	ret = mlockall(MCL_CURRENT | MCL_FUTURE);
> 	 	printf("mlcokall ret = %d\n", ret);
> 
> 	 	ret = munlockall();
> 	 	printf("munlcokall ret = %d\n", ret);
> 
> 	 	free(adr);
> 	 	return 0;
> 	 }
> 
> In __munlock_pagevec() we should decrement NR_MLOCK for each page where
> we clear the PageMlocked flag.  Commit 1ebb7cc6a583 ("mm: munlock: batch
> NR_MLOCK zone state updates") has introduced a bug where we don't
> decrement NR_MLOCK for pages where we clear the flag, but fail to
> isolate them from the lru list (e.g.  when the pages are on some other
> cpu's percpu pagevec).  Since PageMlocked stays cleared, the NR_MLOCK
> accounting gets permanently disrupted by this.
> 
> Fix it by counting the number of page whose PageMlock flag is cleared.
> 
> Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates")
> Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com
> Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
> Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Joern Engel <joern@logfs.org>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Michel Lespinasse <walken@google.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Rik van Riel <riel@redhat.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Xishi Qiu <qiuxishi@huawei.com>
> Cc: zhongjiang <zhongjiang@huawei.com>
> Cc: Hanjun Guo <guohanjun@huawei.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> (backported from commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc)
> Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> ---

Applied to trusty/master-next

>  mm/mlock.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 1b12dfad0794..a3569727baab 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -300,7 +300,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
>  {
>  	int i;
>  	int nr = pagevec_count(pvec);
> -	int delta_munlocked;
> +	int delta_munlocked = -nr;
>  	struct pagevec pvec_putback;
>  	int pgrescued = 0;
>  
> @@ -330,6 +330,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
>  			}
>  
>  		} else {
> +			delta_munlocked++;
>  skip_munlock:
>  			/*
>  			 * We won't be munlocking this page in the next phase
> @@ -341,7 +342,6 @@ skip_munlock:
>  			pvec->pages[i] = NULL;
>  		}
>  	}
> -	delta_munlocked = -nr + pagevec_count(&pvec_putback);
>  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
>  	spin_unlock_irq(&zone->lru_lock);
>  
>
diff mbox series

Patch

diff --git a/mm/mlock.c b/mm/mlock.c
index 1b12dfad0794..a3569727baab 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -300,7 +300,7 @@  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 {
 	int i;
 	int nr = pagevec_count(pvec);
-	int delta_munlocked;
+	int delta_munlocked = -nr;
 	struct pagevec pvec_putback;
 	int pgrescued = 0;
 
@@ -330,6 +330,7 @@  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 			}
 
 		} else {
+			delta_munlocked++;
 skip_munlock:
 			/*
 			 * We won't be munlocking this page in the next phase
@@ -341,7 +342,6 @@  skip_munlock:
 			pvec->pages[i] = NULL;
 		}
 	}
-	delta_munlocked = -nr + pagevec_count(&pvec_putback);
 	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
 	spin_unlock_irq(&zone->lru_lock);