/* * Copyright (c) 2016 Trond Myklebust * * I/O and data path helper functionality. */ #include #include #include #include #include #include #include "internal.h" /* Call with exclusively locked inode->i_rwsem */ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) { if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { clear_bit(NFS_INO_ODIRECT, &nfsi->flags); inode_dio_wait(inode); } } /** * nfs_start_io_read - declare the file is being used for buffered reads * @inode - file inode * * Declare that a buffered read operation is about to start, and ensure * that we block all direct I/O. * On exit, the function ensures that the NFS_INO_ODIRECT flag is unset, * and holds a shared lock on inode->i_rwsem to ensure that the flag * cannot be changed. * In practice, this means that buffered read operations are allowed to * execute in parallel, thanks to the shared lock, whereas direct I/O * operations need to wait to grab an exclusive lock in order to set * NFS_INO_ODIRECT. * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. */ void nfs_start_io_read(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); /* Be an optimist! */ down_read(&inode->i_rwsem); if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0) return; up_read(&inode->i_rwsem); /* Slow path.... */ down_write(&inode->i_rwsem); nfs_block_o_direct(nfsi, inode); downgrade_write(&inode->i_rwsem); } /** * nfs_end_io_read - declare that the buffered read operation is done * @inode - file inode * * Declare that a buffered read operation is done, and release the shared * lock on inode->i_rwsem. */ void nfs_end_io_read(struct inode *inode) { up_read(&inode->i_rwsem); } /** * nfs_start_io_write - declare the file is being used for buffered writes * @inode - file inode * * Declare that a buffered read operation is about to start, and ensure * that we block all direct I/O. */ void nfs_start_io_write(struct inode *inode) { down_write(&inode->i_rwsem); nfs_block_o_direct(NFS_I(inode), inode); } /** * nfs_end_io_write - declare that the buffered write operation is done * @inode - file inode * * Declare that a buffered write operation is done, and release the * lock on inode->i_rwsem. */ void nfs_end_io_write(struct inode *inode) { up_write(&inode->i_rwsem); } /* Call with exclusively locked inode->i_rwsem */ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) { if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { set_bit(NFS_INO_ODIRECT, &nfsi->flags); nfs_wb_all(inode); } } /** * nfs_end_io_direct - declare the file is being used for direct i/o * @inode - file inode * * Declare that a direct I/O operation is about to start, and ensure * that we block all buffered I/O. * On exit, the function ensures that the NFS_INO_ODIRECT flag is set, * and holds a shared lock on inode->i_rwsem to ensure that the flag * cannot be changed. * In practice, this means that direct I/O operations are allowed to * execute in parallel, thanks to the shared lock, whereas buffered I/O * operations need to wait to grab an exclusive lock in order to clear * NFS_INO_ODIRECT. * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. */ void nfs_start_io_direct(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); /* Be an optimist! */ down_read(&inode->i_rwsem); if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0) return; up_read(&inode->i_rwsem); /* Slow path.... */ down_write(&inode->i_rwsem); nfs_block_buffered(nfsi, inode); downgrade_write(&inode->i_rwsem); } /** * nfs_end_io_direct - declare that the direct i/o operation is done * @inode - file inode * * Declare that a direct I/O operation is done, and release the shared * lock on inode->i_rwsem. */ void nfs_end_io_direct(struct inode *inode) { up_read(&inode->i_rwsem); } 5'>15space:mode:
authorDouglas Miller <dougmill@linux.vnet.ibm.com>2017-01-28 06:42:20 -0600
committerTejun Heo <tj@kernel.org>2017-01-28 07:49:42 -0500
commit966d2b04e070bc040319aaebfec09e0144dc3341 (patch)
tree4b96156e3d1dd4dfd6039b7c219c9dc4616da52d /include/dt-bindings/clock/qcom,gcc-ipq4019.h
parent1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'include/dt-bindings/clock/qcom,gcc-ipq4019.h')