/* * Copyright (c) 2000,2002-2005 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __XFS_BMAP_BTREE_H__ #define __XFS_BMAP_BTREE_H__ struct xfs_btree_cur; struct xfs_btree_block; struct xfs_mount; struct xfs_inode; struct xfs_trans; /* * Extent state and extent format macros. */ #define XFS_EXTFMT_INODE(x) \ (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \ XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE) #define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) /* * Btree block header size depends on a superblock flag. */ #define XFS_BMBT_BLOCK_LEN(mp) \ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN) #define XFS_BMBT_REC_ADDR(mp, block, index) \ ((xfs_bmbt_rec_t *) \ ((char *)(block) + \ XFS_BMBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_bmbt_rec_t))) #define XFS_BMBT_KEY_ADDR(mp, block, index) \ ((xfs_bmbt_key_t *) \ ((char *)(block) + \ XFS_BMBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_bmbt_key_t))) #define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_bmbt_ptr_t *) \ ((char *)(block) + \ XFS_BMBT_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_bmbt_key_t) + \ ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) #define XFS_BMDR_REC_ADDR(block, index) \ ((xfs_bmdr_rec_t *) \ ((char *)(block) + \ sizeof(struct xfs_bmdr_block) + \ ((index) - 1) * sizeof(xfs_bmdr_rec_t))) #define XFS_BMDR_KEY_ADDR(block, index) \ ((xfs_bmdr_key_t *) \ ((char *)(block) + \ sizeof(struct xfs_bmdr_block) + \ ((index) - 1) * sizeof(xfs_bmdr_key_t))) #define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \ ((xfs_bmdr_ptr_t *) \ ((char *)(block) + \ sizeof(struct xfs_bmdr_block) + \ (maxrecs) * sizeof(xfs_bmdr_key_t) + \ ((index) - 1) * sizeof(xfs_bmdr_ptr_t))) /* * These are to be used when we know the size of the block and * we don't have a cursor. */ #define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) #define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \ (int)(XFS_BMBT_BLOCK_LEN(mp) + \ ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) #define XFS_BMAP_BROOT_SPACE(mp, bb) \ (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs))) #define XFS_BMDR_SPACE_CALC(nrecs) \ (int)(sizeof(xfs_bmdr_block_t) + \ ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) #define XFS_BMAP_BMDR_SPACE(bb) \ (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) /* * Maximum number of bmap btree levels. */ #define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) /* * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, struct xfs_btree_block *, int); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v); extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v); extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); extern int xfs_bmdr_maxrecs(int blocklen, int leaf); extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, xfs_ino_t new_owner, struct list_head *buffer_list); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); #endif /* __XFS_BMAP_BTREE_H__ */ cgi/linux/net-next.git/patch/fs/xfs/libxfs/xfs_sb.c?id=966d2b04e070bc040319aaebfec09e0144dc3341'>patch) tree4b96156e3d1dd4dfd6039b7c219c9dc4616da52d /fs/xfs/libxfs/xfs_sb.c parent1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'fs/xfs/libxfs/xfs_sb.c')