diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index a2bbaef..06f8e38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -35,6 +35,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, + KSWAPD_SKIP_CONGESTION_WAIT, PAGEOUTRUN, ALLOCSTALL, PGROTATED, #ifdef CONFIG_COMPACTION COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, diff --git a/mm/vmstat.c b/mm/vmstat.c index 1de03bf..1034ebc 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -758,6 +758,7 @@ const char * const vmstat_text[] = { "kswapd_inodesteal", "kswapd_low_wmark_hit_quickly", "kswapd_high_wmark_hit_quickly", + "kswapd_skip_congestion_wait", "pageoutrun", "allocstall", diff --git a/mm/vmscan.c b/mm/vmscan.c index db61d7f..d6b0e39 100755 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2782,6 +2782,7 @@ loop_again: for (priority = DEF_PRIORITY; priority >= 0; priority--) { unsigned long lru_pages = 0; + int has_under_min_watermark_zone = 0; /* The swap token gets in the way of swapout... */ if (!priority) @@ -2923,7 +2924,17 @@ loop_again: continue; } - if (zone_balanced(zone, testorder, 0, end_zone)) { + if (!zone_balanced(zone, testorder, 0, end_zone)) { + all_zones_ok = 0; + /* + * We are still under min water mark. This + * means that we have a GFP_ATOMIC allocation + * failure risk. Hurry up! + */ + if (!zone_watermark_ok_safe(zone, order, + min_wmark_pages(zone), end_zone, 0)) + has_under_min_watermark_zone = 1; + } else { /* * If a zone reaches its high watermark, * consider it to be no longer congested. It's @@ -2940,6 +2951,17 @@ loop_again: if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) break; /* kswapd: all done */ /* + * OK, kswapd is getting into trouble. Take a nap, then take + * another pass across the zones. + */ + if (total_scanned && (priority < DEF_PRIORITY - 2)) { + if (has_under_min_watermark_zone) + count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); + else + congestion_wait(BLK_RW_ASYNC, HZ/10); + } + + /* * We do this so kswapd doesn't build up large priorities for * example when it is freeing in parallel with allocators. It * matches the direct reclaim path behaviour in terms of impact