/* * Read-Copy Update definitions shared among RCU implementations. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2011 * * Author: Paul E. McKenney */ #ifndef __LINUX_RCU_H #define __LINUX_RCU_H #include #ifdef CONFIG_RCU_TRACE #define RCU_TRACE(stmt) stmt #else /* #ifdef CONFIG_RCU_TRACE */ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ /* * Process-level increment to ->dynticks_nesting field. This allows for * architectures that use half-interrupts and half-exceptions from * process context. * * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH * that counts the number of process-based reasons why RCU cannot * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE * is the value used to increment or decrement this field. * * The rest of the bits could in principle be used to count interrupts, * but this would mean that a negative-one value in the interrupt * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon * initial exit from idle. */ #define DYNTICK_TASK_NEST_WIDTH 7 #define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) #define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) #define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) #define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) #define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ DYNTICK_TASK_FLAG) /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. */ #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD # define STATE_RCU_HEAD_READY 0 # define STATE_RCU_HEAD_QUEUED 1 extern struct debug_obj_descr rcuhead_debug_descr; static inline int debug_rcu_head_queue(struct rcu_head *head) { int r1; r1 = debug_object_activate(head, &rcuhead_debug_descr); debug_object_active_state(head, &rcuhead_debug_descr, STATE_RCU_HEAD_READY, STATE_RCU_HEAD_QUEUED); return r1; } static inline void debug_rcu_head_unqueue(struct rcu_head *head) { debug_object_active_state(head, &rcuhead_debug_descr, STATE_RCU_HEAD_QUEUED, STATE_RCU_HEAD_READY); debug_object_deactivate(head, &rcuhead_debug_descr); } #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ static inline int debug_rcu_head_queue(struct rcu_head *head) { return 0; } static inline void debug_rcu_head_unqueue(struct rcu_head *head) { } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ void kfree(const void *); /* * Reclaim the specified callback, either by invoking it (non-lazy case) * or freeing it directly (lazy case). Return true if lazy, false otherwise. */ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) { unsigned long offset = (unsigned long)head->func; rcu_lock_acquire(&rcu_callback_map); if (__is_kfree_rcu_offset(offset)) { RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); kfree((void *)head - offset); rcu_lock_release(&rcu_callback_map); return true; } else { RCU_TRACE(trace_rcu_invoke_callback(rn, head)); head->func(head); rcu_lock_release(&rcu_callback_map); return false; } } #ifdef CONFIG_RCU_STALL_COMMON extern int rcu_cpu_stall_suppress; int rcu_jiffies_till_stall_check(void); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ /* * Strings used in tracepoints need to be exported via the * tracing system such that tools like perf and trace-cmd can * translate the string address pointers to actual text. */ #define TPS(x) tracepoint_string(x) void rcu_early_boot_tests(void); /* * This function really isn't for public consumption, but RCU is special in * that context switches can allow the state machine to make progress. */ extern void resched_cpu(int cpu); #endif /* __LINUX_RCU_H */ commit966d2b04e070bc040319aaebfec09e0144dc3341 (patch) tree4b96156e3d1dd4dfd6039b7c219c9dc4616da52d /net/bridge/br_stp_bpdu.c parent1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'net/bridge/br_stp_bpdu.c')