#include "tests.h"
#include "machine.h"
#include "thread.h"
#include "map.h"
#include "debug.h"
int test__thread_mg_share(int subtest __maybe_unused)
{
struct machines machines;
struct machine *machine;
/* thread group */
struct thread *leader;
struct thread *t1, *t2, *t3;
struct map_groups *mg;
/* other process */
struct thread *other, *other_leader;
struct map_groups *other_mg;
/*
* This test create 2 processes abstractions (struct thread)
* with several threads and checks they properly share and
* maintain map groups info (struct map_groups).
*
* thread group (pid: 0, tids: 0, 1, 2, 3)
* other group (pid: 4, tids: 4, 5)
*/
machines__init(&machines);
machine = &machines.host;
/* create process with 4 threads */
leader = machine__findnew_thread(machine, 0, 0);
t1 = machine__findnew_thread(machine, 0, 1);
t2 = machine__findnew_thread(machine, 0, 2);
t3 = machine__findnew_thread(machine, 0, 3);
/* and create 1 separated process, without thread leader */
other = machine__findnew_thread(machine, 4, 5);
TEST_ASSERT_VAL("failed to create threads",
leader && t1 && t2 && t3 && other);
mg = leader->mg;
TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4);
/* test the map groups pointer is shared */
TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
TEST_ASSERT_VAL("map groups don't match", mg == t2->mg);
TEST_ASSERT_VAL("map groups don't match", mg == t3->mg);
/*
* Verify the other leader was created by previous call.
* It should have shared map groups with no change in
* refcnt.
*/
other_leader = machine__find_thread(machine, 4, 4);
TEST_ASSERT_VAL("failed to find other leader", other_leader);
/*
* Ok, now that all the rbtree related operations were done,
* lets remove all of them from there so that we can do the
* refcounting tests.
*/
machine__remove_thread(machine, leader);
machine__remove_thread(machine, t1);
machine__remove_thread(machine, t2);
machine__remove_thread(machine, t3);
machine__remove_thread(machine, other);
machine__remove_thread(machine, other_leader);
other_mg = other->mg;
TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2);
TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
/* release thread group */
thread__put(leader);
TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3);
thread__put(t1);
TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2);
thread__put(t2);
TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1);
thread__put(t3);
/* release other group */
thread__put(other_leader);
TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1);
thread__put(other);
machines__exit(&machines);
return 0;
}
3341'>root/include/net/neighbour.h
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return
"true" IFF they acquire a reference. But the return value from
atomic_long_inc_not_zero() is a long and may have high bits set,
e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines
is bool so the reference may actually be acquired but the routines
return "false" which results in a reference leak since the caller
assumes it does not need to do a corresponding percpu_ref_put().
This was seen when performing CPU hotplug during I/O, as hangs in
blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start)
raced with percpu_ref_tryget (blk_mq_timeout_work).
Sample stack trace:
__switch_to+0x2c0/0x450
__schedule+0x2f8/0x970
schedule+0x48/0xc0
blk_mq_freeze_queue_wait+0x94/0x120
blk_mq_queue_reinit_work+0xb8/0x180
blk_mq_queue_reinit_prepare+0x84/0xa0
cpuhp_invoke_callback+0x17c/0x600
cpuhp_up_callbacks+0x58/0x150
_cpu_up+0xf0/0x1c0
do_cpu_up+0x120/0x150
cpu_subsys_online+0x64/0xe0
device_online+0xb4/0x120
online_store+0xb4/0xc0
dev_attr_store+0x68/0xa0
sysfs_kf_write+0x80/0xb0
kernfs_fop_write+0x17c/0x250
__vfs_write+0x6c/0x1e0
vfs_write+0xd0/0x270
SyS_write+0x6c/0x110
system_call+0x38/0xe0
Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS,
and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests.
However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0
and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set.
The fix is to make the tryget routines use an actual boolean internally instead
of the atomic long result truncated to a int.
Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints
Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751
Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints")
Cc: stable@vger.kernel.org # v3.18+