/* * Queued read/write locks * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. * * Authors: Waiman Long */ #include #include #include #include #include #include /* * This internal data structure is used for optimizing access to some of * the subfields within the atomic_t cnts. */ struct __qrwlock { union { atomic_t cnts; struct { #ifdef __LITTLE_ENDIAN u8 wmode; /* Writer mode */ u8 rcnts[3]; /* Reader counts */ #else u8 rcnts[3]; /* Reader counts */ u8 wmode; /* Writer mode */ #endif }; }; arch_spinlock_t lock; }; /** * rspin_until_writer_unlock - inc reader count & spin until writer is gone * @lock : Pointer to queue rwlock structure * @writer: Current queue rwlock writer status byte * * In interrupt context or at the head of the queue, the reader will just * increment the reader count & wait until the writer releases the lock. */ static __always_inline void rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) { while ((cnts & _QW_WMASK) == _QW_LOCKED) { cpu_relax(); cnts = atomic_read_acquire(&lock->cnts); } } /** * queued_read_lock_slowpath - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure * @cnts: Current qrwlock lock value */ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) { /* * Readers come here when they cannot get the lock without waiting */ if (unlikely(in_interrupt())) { /* * Readers in interrupt context will get the lock immediately * if the writer is just waiting (not holding the lock yet). * The rspin_until_writer_unlock() function returns immediately * in this case. Otherwise, they will spin (with ACQUIRE * semantics) until the lock is available without waiting in * the queue. */ rspin_until_writer_unlock(lock, cnts); return; } atomic_sub(_QR_BIAS, &lock->cnts); /* * Put the reader into the wait queue */ arch_spin_lock(&lock->wait_lock); /* * The ACQUIRE semantics of the following spinning code ensure * that accesses can't leak upwards out of our subsequent critical * section in the case that the lock is currently held for write. */ cnts = atomic_fetch_add_acquire(_QR_BIAS, &lock->cnts); rspin_until_writer_unlock(lock, cnts); /* * Signal the next one in queue to become queue head */ arch_spin_unlock(&lock->wait_lock); } EXPORT_SYMBOL(queued_read_lock_slowpath); /** * queued_write_lock_slowpath - acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ void queued_write_lock_slowpath(struct qrwlock *lock) { u32 cnts; /* Put the writer into the wait queue */ arch_spin_lock(&lock->wait_lock); /* Try to acquire the lock directly if no reader is present */ if (!atomic_read(&lock->cnts) && (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)) goto unlock; /* * Set the waiting flag to notify readers that a writer is pending, * or wait for a previous writer to go away. */ for (;;) { struct __qrwlock *l = (struct __qrwlock *)lock; if (!READ_ONCE(l->wmode) && (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0)) break; cpu_relax(); } /* When no more readers, set the locked flag */ for (;;) { cnts = atomic_read(&lock->cnts); if ((cnts == _QW_WAITING) && (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING, _QW_LOCKED) == _QW_WAITING)) break; cpu_relax(); } unlock: arch_spin_unlock(&lock->wait_lock); } EXPORT_SYMBOL(queued_write_lock_slowpath); d>mode:
authorDouglas Miller <dougmill@linux.vnet.ibm.com>2017-01-28 06:42:20 -0600
committerTejun Heo <tj@kernel.org>2017-01-28 07:49:42 -0500
commit966d2b04e070bc040319aaebfec09e0144dc3341 (patch)
tree4b96156e3d1dd4dfd6039b7c219c9dc4616da52d /net/ceph/messenger.c
parent1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'net/ceph/messenger.c')