Message ID | 20160815085129.GA3360@dhcp22.suse.cz |
---|---|
State | Not Applicable, archived |
Headers | show |
On Monday 15 of August 2016, Michal Hocko wrote: > [Fixing up linux-mm] > > Ups I had a c&p error in the previous patch. Here is an updated patch. Going to apply this patch now and report again. I mean time what I have is a while (true); do echo "XX date"; date; echo "XX SLAB"; cat /proc/slabinfo ; echo "XX VMSTAT"; cat /proc/vmstat ; echo "XX free"; free; echo "XX DMESG"; dmesg -T | tail -n 50; /bin/sleep 60;done 2>&1 | tee log loop gathering some data while few OOM conditions happened. I was doing "rm -rf copyX; cp -al original copyX" 10x in parallel. https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160816.txt > --- > From 348e768ab1f885bb6dc3160158c17f043fd7f219 Mon Sep 17 00:00:00 2001 > From: Michal Hocko <mhocko@suse.com> > Date: Sun, 14 Aug 2016 12:23:13 +0200 > Subject: [PATCH] mm, oom: report compaction/migration stats for higher > order requests > > Both oom and the allocation failure reports are not providing any > information about the compaction/migration counters which might give us > a clue what went wrong and why we are OOM for the particular order - > e.g. the compaction fails constantly because it cannot isolate any pages > or that the migration fails. So far we have been asking for /proc/vmstat > content before and after the OOM which is rather clumsy, especially when > the OOM is not 100% reproducible. > > Extend show_mem() to understand a new filter (SHOW_COMPACTION_STATS) > which is enabled only for higer order paths. > > Signed-off-by: Michal Hocko <mhocko@suse.com> > --- > include/linux/mm.h | 1 + > lib/show_mem.c | 14 ++++++++++++++ > mm/oom_kill.c | 2 +- > mm/page_alloc.c | 2 ++ > 4 files changed, 18 insertions(+), 1 deletion(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 7e44613c5078..b4859547acc4 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -1146,6 +1146,7 @@ extern void pagefault_out_of_memory(void); > * various contexts. > */ > #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ > +#define SHOW_COMPACTION_STATS (0x0002u) > > extern void show_free_areas(unsigned int flags); > extern bool skip_free_areas_node(unsigned int flags, int nid); > diff --git a/lib/show_mem.c b/lib/show_mem.c > index 1feed6a2b12a..c0ac5bd2c121 100644 > --- a/lib/show_mem.c > +++ b/lib/show_mem.c > @@ -8,6 +8,7 @@ > #include <linux/mm.h> > #include <linux/quicklist.h> > #include <linux/cma.h> > +#include <linux/vm_event_item.h> > > void show_mem(unsigned int filter) > { > @@ -17,6 +18,19 @@ void show_mem(unsigned int filter) > printk("Mem-Info:\n"); > show_free_areas(filter); > > +#ifdef CONFIG_COMPACTION > + if (filter & SHOW_COMPACTION_STATS) { > + printk("compaction_stall:%lu compaction_fail:%lu " > + "compact_migrate_scanned:%lu compact_free_scanned:%lu " > + "compact_isolated:%lu " > + "pgmigrate_success:%lu pgmigrate_fail:%lu\n", > + global_page_state(COMPACTSTALL), global_page_state(COMPACTFAIL), > + global_page_state(COMPACTMIGRATE_SCANNED), > global_page_state(COMPACTFREE_SCANNED), + > global_page_state(COMPACTISOLATED), > + global_page_state(PGMIGRATE_SUCCESS), > global_page_state(PGMIGRATE_FAIL)); + } > +#endif > + > for_each_online_pgdat(pgdat) { > unsigned long flags; > int zoneid; > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index 463cdd22d4e0..5e7a09f4dbc9 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -419,7 +419,7 @@ static void dump_header(struct oom_control *oc, struct > task_struct *p) if (oc->memcg) > mem_cgroup_print_oom_info(oc->memcg, p); > else > - show_mem(SHOW_MEM_FILTER_NODES); > + show_mem(SHOW_MEM_FILTER_NODES | (oc->order)?SHOW_COMPACTION_STATS:0); > if (sysctl_oom_dump_tasks) > dump_tasks(oc->memcg, oc->nodemask); > } > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 9d46b65061be..adf0cb655827 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -2999,6 +2999,8 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int > order, const char *fmt, ...) pr_warn("%s: page allocation failure: > order:%u, mode:%#x(%pGg)\n", current->comm, order, gfp_mask, &gfp_mask); > dump_stack(); > + if (order) > + filter |= SHOW_COMPACTION_STATS; > if (!should_suppress_show_mem()) > show_mem(filter); > }
On Tue 16-08-16 13:18:25, Arkadiusz Miskiewicz wrote: > On Monday 15 of August 2016, Michal Hocko wrote: > > [Fixing up linux-mm] > > > > Ups I had a c&p error in the previous patch. Here is an updated patch. > > > Going to apply this patch now and report again. I mean time what I have is a > > while (true); do echo "XX date"; date; echo "XX SLAB"; cat /proc/slabinfo ; > echo "XX VMSTAT"; cat /proc/vmstat ; echo "XX free"; free; echo "XX DMESG"; > dmesg -T | tail -n 50; /bin/sleep 60;done 2>&1 | tee log > > loop gathering some data while few OOM conditions happened. > > I was doing "rm -rf copyX; cp -al original copyX" 10x in parallel. > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160816.txt David was right when assuming it would be the ext4 inode cache which consumes the large portion of the memory. /proc/slabinfo shows ext4_inode_cache consuming between 2.5 to 4.6G of memory. first value last-first pgmigrate_success 1861785 2157917 pgmigrate_fail 335344 1400384 compact_isolated 4106390 5777027 compact_migrate_scanned 113962774 446290647 compact_daemon_wake 17039 43981 compact_fail 645 1039 compact_free_scanned 381701557 793430119 compact_success 217 307 compact_stall 862 1346 which means that we have invoked compaction 1346 times and failed in 77% of cases. It is interesting to see that the migration wasn't all that unsuccessful. We managed to migrate 1.5x more pages than failed. It smells like the compaction just backs off. Could you try to test with patch from http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE please? Ideally on top of linux-next. You can add both the compaction counters patch in the oom report and high order atomic reserves patch on top. Thanks
On Wed 17-08-16 10:34:54, Arkadiusz Miśkiewicz wrote: [...] > With "[PATCH] mm, oom: report compaction/migration stats for higher order > requests" patch: > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160817.txt > > Didn't count much - all counters are 0 > compaction_stall:0 compaction_fail:0 compact_migrate_scanned:0 > compact_free_scanned:0 compact_isolated:0 pgmigrate_success:0 pgmigrate_fail:0 Dohh, COMPACTION counters are events and those are different than other counters we have. They only have per-cpu representation and so we would have to do + for_each_online_cpu(cpu) { + struct vm_event_state *this = &per_cpu(vm_event_states, cpu); + ret += this->event[item]; + } which is really nasty because, strictly speaking, we would have to do {get,put}_online_cpus around that loop and that uses locking and we do not want to possibly block in this path just because something is in the middle of the hotplug. So let's scratch that patch for now and sorry I haven't realized that earlier. > two processes were killed by OOM (rm and cp), the rest of rm/cp didn't finish > and I'm interrupting it to try that next patch: > > > Could you try to test with > > patch from > > http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE please? > > Ideally on top of linux-next. You can add both the compaction counters > > patch in the oom report and high order atomic reserves patch on top. > > Uhm, was going to use it on top of 4.7.[01] first. OK
On Tue 16-08-16 13:18:25, Arkadiusz Miskiewicz wrote: > On Monday 15 of August 2016, Michal Hocko wrote: > > [Fixing up linux-mm] > > > > Ups I had a c&p error in the previous patch. Here is an updated patch. > > > Going to apply this patch now and report again. I mean time what I have is a > > while (true); do echo "XX date"; date; echo "XX SLAB"; cat /proc/slabinfo ; > echo "XX VMSTAT"; cat /proc/vmstat ; echo "XX free"; free; echo "XX DMESG"; > dmesg -T | tail -n 50; /bin/sleep 60;done 2>&1 | tee log > > loop gathering some data while few OOM conditions happened. > > I was doing "rm -rf copyX; cp -al original copyX" 10x in parallel. > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160816.txt Just one more debug idea to add on top of what Michal said: Can you enable mm_shrink_slab_start and mm_shrink_slab_end tracepoints (via /sys/kernel/debug/tracing/events/vmscan/mm_shrink_slab_{start,end}/enable) and gather output from /sys/kernel/debug/tracing/trace_pipe while the copy is running? Because your slab caches seem to contain a lot of dentries as well (even more than inodes in terms of numbers) so it may be that OOM is declared too early before slab shrinkers can actually catch up... Honza
On Wednesday 17 of August 2016, Michal Hocko wrote: > On Wed 17-08-16 10:34:54, Arkadiusz Miśkiewicz wrote: > [...] > > > With "[PATCH] mm, oom: report compaction/migration stats for higher order > > requests" patch: > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160817.txt > > > > Didn't count much - all counters are 0 > > compaction_stall:0 compaction_fail:0 compact_migrate_scanned:0 > > compact_free_scanned:0 compact_isolated:0 pgmigrate_success:0 > > pgmigrate_fail:0 > > Dohh, COMPACTION counters are events and those are different than other > counters we have. They only have per-cpu representation and so we would > have to do > + for_each_online_cpu(cpu) { > + struct vm_event_state *this = &per_cpu(vm_event_states, > cpu); + ret += this->event[item]; > + } > > which is really nasty because, strictly speaking, we would have to do > {get,put}_online_cpus around that loop and that uses locking and we do > not want to possibly block in this path just because something is in the > middle of the hotplug. So let's scratch that patch for now and sorry I > haven't realized that earlier. > > > two processes were killed by OOM (rm and cp), the rest of rm/cp didn't > > finish > > > > and I'm interrupting it to try that next patch: > > > Could you try to test with > > > patch from > > > http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE > > > please? Ideally on top of linux-next. You can add both the compaction > > > counters patch in the oom report and high order atomic reserves patch > > > on top. > > > > Uhm, was going to use it on top of 4.7.[01] first. > > OK So with http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE OOM no longer happens (all 10x rm/cp processes finished). https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160818.txt On Wednesday 17 of August 2016, Jan Kara wrote: > Just one more debug idea to add on top of what Michal said: Can you enable > mm_shrink_slab_start and mm_shrink_slab_end tracepoints (via > /sys/kernel/debug/tracing/events/vmscan/mm_shrink_slab_{start,end}/enable) > and gather output from /sys/kernel/debug/tracing/trace_pipe while the copy > is running? Here it is: https://ixion.pld-linux.org/~arekm/p2/ext4/log-trace_pipe-20160818.txt.gz
On 08/18/2016 08:49 PM, Arkadiusz Miskiewicz wrote: > On Wednesday 17 of August 2016, Michal Hocko wrote: >> On Wed 17-08-16 10:34:54, Arkadiusz Miśkiewicz wrote: >> [...] >> >>> With "[PATCH] mm, oom: report compaction/migration stats for higher order >>> requests" patch: >>> https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160817.txt >>> >>> Didn't count much - all counters are 0 >>> compaction_stall:0 compaction_fail:0 compact_migrate_scanned:0 >>> compact_free_scanned:0 compact_isolated:0 pgmigrate_success:0 >>> pgmigrate_fail:0 >> >> Dohh, COMPACTION counters are events and those are different than other >> counters we have. They only have per-cpu representation and so we would >> have to do >> + for_each_online_cpu(cpu) { >> + struct vm_event_state *this = &per_cpu(vm_event_states, >> cpu); + ret += this->event[item]; >> + } >> >> which is really nasty because, strictly speaking, we would have to do >> {get,put}_online_cpus around that loop and that uses locking and we do >> not want to possibly block in this path just because something is in the >> middle of the hotplug. So let's scratch that patch for now and sorry I >> haven't realized that earlier. >> >>> two processes were killed by OOM (rm and cp), the rest of rm/cp didn't >>> finish >>> >>> and I'm interrupting it to try that next patch: >>>> Could you try to test with >>>> patch from >>>> http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE >>>> please? Ideally on top of linux-next. You can add both the compaction >>>> counters patch in the oom report and high order atomic reserves patch >>>> on top. >>> >>> Uhm, was going to use it on top of 4.7.[01] first. >> >> OK > > So with http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE > OOM no longer happens (all 10x rm/cp processes finished). Is it on top of 4.7 then? That's a bit different from the other reporter who needed both linux-next and this patch to avoid OOM. In any case the proper solution should restrict this disabled heuristic to highest compaction priority, which needs the patches from linux-next anyway. So can you please also try linux-next with the patch from http://marc.info/?l=linux-mm&m=147158805719821 ? Thanks! > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160818.txt > > On Wednesday 17 of August 2016, Jan Kara wrote: >> Just one more debug idea to add on top of what Michal said: Can you enable >> mm_shrink_slab_start and mm_shrink_slab_end tracepoints (via >> /sys/kernel/debug/tracing/events/vmscan/mm_shrink_slab_{start,end}/enable) >> and gather output from /sys/kernel/debug/tracing/trace_pipe while the copy >> is running? > > Here it is: > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-trace_pipe-20160818.txt.gz > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Friday 19 of August 2016, Vlastimil Babka wrote: > On 08/18/2016 08:49 PM, Arkadiusz Miskiewicz wrote: > > On Wednesday 17 of August 2016, Michal Hocko wrote: > >> On Wed 17-08-16 10:34:54, Arkadiusz Miśkiewicz wrote: > >> [...] > >> > >>> With "[PATCH] mm, oom: report compaction/migration stats for higher > >>> order requests" patch: > >>> https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160817.txt > >>> > >>> Didn't count much - all counters are 0 > >>> compaction_stall:0 compaction_fail:0 compact_migrate_scanned:0 > >>> compact_free_scanned:0 compact_isolated:0 pgmigrate_success:0 > >>> pgmigrate_fail:0 > >> > >> Dohh, COMPACTION counters are events and those are different than other > >> counters we have. They only have per-cpu representation and so we would > >> have to do > >> + for_each_online_cpu(cpu) { > >> + struct vm_event_state *this = &per_cpu(vm_event_states, > >> cpu); + ret += this->event[item]; > >> + } > >> > >> which is really nasty because, strictly speaking, we would have to do > >> {get,put}_online_cpus around that loop and that uses locking and we do > >> not want to possibly block in this path just because something is in the > >> middle of the hotplug. So let's scratch that patch for now and sorry I > >> haven't realized that earlier. > >> > >>> two processes were killed by OOM (rm and cp), the rest of rm/cp didn't > >>> finish > >>> > >>> and I'm interrupting it to try that next patch: > >>>> Could you try to test with > >>>> patch from > >>>> http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE > >>>> please? Ideally on top of linux-next. You can add both the compaction > >>>> counters patch in the oom report and high order atomic reserves patch > >>>> on top. > >>> > >>> Uhm, was going to use it on top of 4.7.[01] first. > >> > >> OK > > > > So with > > http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE OOM no > > longer happens (all 10x rm/cp processes finished). > > Is it on top of 4.7 then? Yes, it was on top of 4.7.0. > That's a bit different from the other reporter > who needed both linux-next and this patch to avoid OOM. > In any case the proper solution should restrict this disabled heuristic > to highest compaction priority, which needs the patches from linux-next > anyway. > > So can you please also try linux-next with the patch from > http://marc.info/?l=linux-mm&m=147158805719821 ? https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160819.txt https://ixion.pld-linux.org/~arekm/p2/ext4/log-trace_pipe-20160819.txt.gz rm/cp -al x10 succeeded without any OOM so the question is - which solution is "the one" for stable/4.7.x ? Thanks > > Thanks! > > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160818.txt > > > > On Wednesday 17 of August 2016, Jan Kara wrote: > >> Just one more debug idea to add on top of what Michal said: Can you > >> enable mm_shrink_slab_start and mm_shrink_slab_end tracepoints (via > >> /sys/kernel/debug/tracing/events/vmscan/mm_shrink_slab_{start,end}/enabl > >> e) and gather output from /sys/kernel/debug/tracing/trace_pipe while the > >> copy is running? > > > > Here it is: > > > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-trace_pipe-20160818.txt.gz
On Sun 21-08-16 23:19:50, Arkadiusz Miskiewicz wrote: > On Friday 19 of August 2016, Vlastimil Babka wrote: > > On 08/18/2016 08:49 PM, Arkadiusz Miskiewicz wrote: > > > On Wednesday 17 of August 2016, Michal Hocko wrote: > > >> On Wed 17-08-16 10:34:54, Arkadiusz Miśkiewicz wrote: > > >> [...] > > >> > > >>> With "[PATCH] mm, oom: report compaction/migration stats for higher > > >>> order requests" patch: > > >>> https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160817.txt > > >>> > > >>> Didn't count much - all counters are 0 > > >>> compaction_stall:0 compaction_fail:0 compact_migrate_scanned:0 > > >>> compact_free_scanned:0 compact_isolated:0 pgmigrate_success:0 > > >>> pgmigrate_fail:0 > > >> > > >> Dohh, COMPACTION counters are events and those are different than other > > >> counters we have. They only have per-cpu representation and so we would > > >> have to do > > >> + for_each_online_cpu(cpu) { > > >> + struct vm_event_state *this = &per_cpu(vm_event_states, > > >> cpu); + ret += this->event[item]; > > >> + } > > >> > > >> which is really nasty because, strictly speaking, we would have to do > > >> {get,put}_online_cpus around that loop and that uses locking and we do > > >> not want to possibly block in this path just because something is in the > > >> middle of the hotplug. So let's scratch that patch for now and sorry I > > >> haven't realized that earlier. > > >> > > >>> two processes were killed by OOM (rm and cp), the rest of rm/cp didn't > > >>> finish > > >>> > > >>> and I'm interrupting it to try that next patch: > > >>>> Could you try to test with > > >>>> patch from > > >>>> http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE > > >>>> please? Ideally on top of linux-next. You can add both the compaction > > >>>> counters patch in the oom report and high order atomic reserves patch > > >>>> on top. > > >>> > > >>> Uhm, was going to use it on top of 4.7.[01] first. > > >> > > >> OK > > > > > > So with > > > http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE OOM no > > > longer happens (all 10x rm/cp processes finished). > > > > Is it on top of 4.7 then? > > Yes, it was on top of 4.7.0. > > > That's a bit different from the other reporter > > who needed both linux-next and this patch to avoid OOM. > > In any case the proper solution should restrict this disabled heuristic > > to highest compaction priority, which needs the patches from linux-next > > anyway. > > > > So can you please also try linux-next with the patch from > > http://marc.info/?l=linux-mm&m=147158805719821 ? > > https://ixion.pld-linux.org/~arekm/p2/ext4/log-20160819.txt > https://ixion.pld-linux.org/~arekm/p2/ext4/log-trace_pipe-20160819.txt.gz > > rm/cp -al x10 succeeded without any OOM > > so the question is - which solution is "the one" for stable/4.7.x ? I will send an email later today with other people reporting pre-mature OOMs later today and will make sure you are on the CC list as well. Thanks for the testing!
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7e44613c5078..b4859547acc4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1146,6 +1146,7 @@ extern void pagefault_out_of_memory(void); * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ +#define SHOW_COMPACTION_STATS (0x0002u) extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); diff --git a/lib/show_mem.c b/lib/show_mem.c index 1feed6a2b12a..c0ac5bd2c121 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/quicklist.h> #include <linux/cma.h> +#include <linux/vm_event_item.h> void show_mem(unsigned int filter) { @@ -17,6 +18,19 @@ void show_mem(unsigned int filter) printk("Mem-Info:\n"); show_free_areas(filter); +#ifdef CONFIG_COMPACTION + if (filter & SHOW_COMPACTION_STATS) { + printk("compaction_stall:%lu compaction_fail:%lu " + "compact_migrate_scanned:%lu compact_free_scanned:%lu " + "compact_isolated:%lu " + "pgmigrate_success:%lu pgmigrate_fail:%lu\n", + global_page_state(COMPACTSTALL), global_page_state(COMPACTFAIL), + global_page_state(COMPACTMIGRATE_SCANNED), global_page_state(COMPACTFREE_SCANNED), + global_page_state(COMPACTISOLATED), + global_page_state(PGMIGRATE_SUCCESS), global_page_state(PGMIGRATE_FAIL)); + } +#endif + for_each_online_pgdat(pgdat) { unsigned long flags; int zoneid; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 463cdd22d4e0..5e7a09f4dbc9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -419,7 +419,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p) if (oc->memcg) mem_cgroup_print_oom_info(oc->memcg, p); else - show_mem(SHOW_MEM_FILTER_NODES); + show_mem(SHOW_MEM_FILTER_NODES | (oc->order)?SHOW_COMPACTION_STATS:0); if (sysctl_oom_dump_tasks) dump_tasks(oc->memcg, oc->nodemask); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9d46b65061be..adf0cb655827 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2999,6 +2999,8 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n", current->comm, order, gfp_mask, &gfp_mask); dump_stack(); + if (order) + filter |= SHOW_COMPACTION_STATS; if (!should_suppress_show_mem()) show_mem(filter); }