#include #include #include #include #include #include #include #include #include #include #include int nr_allocated; int preempt_count; struct kmem_cache { pthread_mutex_t lock; int size; int nr_objs; void *objs; void (*ctor)(void *); }; void *mempool_alloc(mempool_t *pool, int gfp_mask) { return pool->alloc(gfp_mask, pool->data); } void mempool_free(void *element, mempool_t *pool) { pool->free(element, pool->data); } mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { mempool_t *ret = malloc(sizeof(*ret)); ret->alloc = alloc_fn; ret->free = free_fn; ret->data = pool_data; return ret; } void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) { struct radix_tree_node *node; if (flags & __GFP_NOWARN) return NULL; pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs) { cachep->nr_objs--; node = cachep->objs; cachep->objs = node->private_data; pthread_mutex_unlock(&cachep->lock); node->private_data = NULL; } else { pthread_mutex_unlock(&cachep->lock); node = malloc(cachep->size); if (cachep->ctor) cachep->ctor(node); } uatomic_inc(&nr_allocated); return node; } void kmem_cache_free(struct kmem_cache *cachep, void *objp) { assert(objp); uatomic_dec(&nr_allocated); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs > 10) { memset(objp, POISON_FREE, cachep->size); free(objp); } else { struct radix_tree_node *node = objp; cachep->nr_objs++; node->private_data = cachep->objs; cachep->objs = node; } pthread_mutex_unlock(&cachep->lock); } void *kmalloc(size_t size, gfp_t gfp) { void *ret = malloc(size); uatomic_inc(&nr_allocated); return ret; } void kfree(void *p) { if (!p) return; uatomic_dec(&nr_allocated); free(p); } struct kmem_cache * kmem_cache_create(const char *name, size_t size, size_t offset, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *ret = malloc(sizeof(*ret)); pthread_mutex_init(&ret->lock, NULL); ret->size = size; ret->nr_objs = 0; ret->objs = NULL; ret->ctor = ctor; return ret; } '/>
path: root/net/core
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2017-01-24 15:18:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-01-24 16:26:14 -0800
commite47483bca2cc59a4593b37a270b16ee42b1d9f08 (patch)
tree4bc66bd4f2f87a30d231068468d29900685fb45f /net/core
parent5ce9bfef1d27944c119a397a9d827bef795487ce (diff)
mm, page_alloc: fix premature OOM when racing with cpuset mems update
Ganapatrao Kulkarni reported that the LTP test cpuset01 in stress mode triggers OOM killer in few seconds, despite lots of free memory. The test attempts to repeatedly fault in memory in one process in a cpuset, while changing allowed nodes of the cpuset between 0 and 1 in another process. The problem comes from insufficient protection against cpuset changes, which can cause get_page_from_freelist() to consider all zones as non-eligible due to nodemask and/or current->mems_allowed. This was masked in the past by sufficient retries, but since commit 682a3385e773 ("mm, page_alloc: inline the fast path of the zonelist iterator") we fix the preferred_zoneref once, and don't iterate over the whole zonelist in further attempts, thus the only eligible zones might be placed in the zonelist before our starting point and we always miss them. A previous patch fixed this problem for current->mems_allowed. However, cpuset changes also update the task's mempolicy nodemask. The fix has two parts. We have to repeat the preferred_zoneref search when we detect cpuset update by way of seqcount, and we have to check the seqcount before considering OOM. [akpm@linux-foundation.org: fix typo in comment] Link: http://lkml.kernel.org/r/20170120103843.24587-5-vbabka@suse.cz Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reported-by: Ganapatrao Kulkarni <gpkulkarni@gmail.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Michal Hocko <mhocko@suse.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net/core')