/* * Copyright (C) 2011 Fujitsu. All rights reserved. * Written by Miao Xie * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #ifndef __DELAYED_TREE_OPERATION_H #define __DELAYED_TREE_OPERATION_H #include #include #include #include #include #include #include "ctree.h" /* types of the delayed item */ #define BTRFS_DELAYED_INSERTION_ITEM 1 #define BTRFS_DELAYED_DELETION_ITEM 2 struct btrfs_delayed_root { spinlock_t lock; struct list_head node_list; /* * Used for delayed nodes which is waiting to be dealt with by the * worker. If the delayed node is inserted into the work queue, we * drop it from this list. */ struct list_head prepare_list; atomic_t items; /* for delayed items */ atomic_t items_seq; /* for delayed items */ int nodes; /* for delayed nodes */ wait_queue_head_t wait; }; #define BTRFS_DELAYED_NODE_IN_LIST 0 #define BTRFS_DELAYED_NODE_INODE_DIRTY 1 #define BTRFS_DELAYED_NODE_DEL_IREF 2 struct btrfs_delayed_node { u64 inode_id; u64 bytes_reserved; struct btrfs_root *root; /* Used to add the node into the delayed root's node list. */ struct list_head n_list; /* * Used to add the node into the prepare list, the nodes in this list * is waiting to be dealt with by the async worker. */ struct list_head p_list; struct rb_root ins_root; struct rb_root del_root; struct mutex mutex; struct btrfs_inode_item inode_item; atomic_t refs; u64 index_cnt; unsigned long flags; int count; }; struct btrfs_delayed_item { struct rb_node rb_node; struct btrfs_key key; struct list_head tree_list; /* used for batch insert/delete items */ struct list_head readdir_list; /* used for readdir items */ u64 bytes_reserved; struct btrfs_delayed_node *delayed_node; atomic_t refs; int ins_or_del; u32 data_len; char data[0]; }; static inline void btrfs_init_delayed_root( struct btrfs_delayed_root *delayed_root) { atomic_set(&delayed_root->items, 0); atomic_set(&delayed_root->items_seq, 0); delayed_root->nodes = 0; spin_lock_init(&delayed_root->lock); init_waitqueue_head(&delayed_root->wait); INIT_LIST_HEAD(&delayed_root->node_list); INIT_LIST_HEAD(&delayed_root->prepare_list); } int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, const char *name, int name_len, struct inode *dir, struct btrfs_disk_key *disk_key, u8 type, u64 index); int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct inode *dir, u64 index); int btrfs_inode_delayed_dir_index_count(struct inode *inode); int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, int nr); void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info); int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, struct inode *inode); /* Used for evicting the inode. */ void btrfs_remove_delayed_node(struct inode *inode); void btrfs_kill_delayed_inode_items(struct inode *inode); int btrfs_commit_inode_delayed_inode(struct inode *inode); int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); int btrfs_fill_inode(struct inode *inode, u32 *rdev); int btrfs_delayed_delete_inode_ref(struct inode *inode); /* Used for drop dead root */ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); /* Used for clean the transaction */ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info); /* Used for readdir() */ bool btrfs_readdir_get_delayed_items(struct inode *inode, struct list_head *ins_list, struct list_head *del_list); void btrfs_readdir_put_delayed_items(struct inode *inode, struct list_head *ins_list, struct list_head *del_list); int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, struct list_head *ins_list); /* for init */ int __init btrfs_delayed_inode_init(void); void btrfs_delayed_inode_exit(void); /* for debugging */ void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info); #endif mill@linux.vnet.ibm.com>2017-01-28 06:42:20 -0600 committerTejun Heo <tj@kernel.org>2017-01-28 07:49:42 -0500 commit966d2b04e070bc040319aaebfec09e0144dc3341 (patch) tree4b96156e3d1dd4dfd6039b7c219c9dc4616da52d /tools/perf/scripts/python/sched-migration.py parent1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'tools/perf/scripts/python/sched-migration.py')