#!/bin/bash function config_device { ip netns add at_ns0 ip netns add at_ns1 ip netns add at_ns2 ip link add veth0 type veth peer name veth0b ip link add veth1 type veth peer name veth1b ip link add veth2 type veth peer name veth2b ip link set veth0b up ip link set veth1b up ip link set veth2b up ip link set dev veth0b mtu 1500 ip link set dev veth1b mtu 1500 ip link set dev veth2b mtu 1500 ip link set veth0 netns at_ns0 ip link set veth1 netns at_ns1 ip link set veth2 netns at_ns2 ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad ip netns exec at_ns0 ip link set dev veth0 up ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1 ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad ip netns exec at_ns1 ip link set dev veth1 up ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2 ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad ip netns exec at_ns2 ip link set dev veth2 up ip link add br0 type bridge ip link set br0 up ip link set dev br0 mtu 1500 ip link set veth0b master br0 ip link set veth1b master br0 ip link set veth2b master br0 } function add_ipip_tunnel { ip netns exec at_ns0 \ ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200 ip netns exec at_ns0 ip link set dev $DEV_NS up ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 ip netns exec at_ns1 \ ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200 ip netns exec at_ns1 ip link set dev $DEV_NS up # same inner IP address in at_ns0 and at_ns1 ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 ip netns exec at_ns2 ip link add dev $DEV type ipip external ip netns exec at_ns2 ip link set dev $DEV up ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 } function add_ipip6_tunnel { ip netns exec at_ns0 \ ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64 ip netns exec at_ns0 ip link set dev $DEV_NS up ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 ip netns exec at_ns1 \ ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64 ip netns exec at_ns1 ip link set dev $DEV_NS up # same inner IP address in at_ns0 and at_ns1 ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external ip netns exec at_ns2 ip link set dev $DEV up ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 } function add_ip6ip6_tunnel { ip netns exec at_ns0 \ ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64 ip netns exec at_ns0 ip link set dev $DEV_NS up ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64 ip netns exec at_ns1 \ ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64 ip netns exec at_ns1 ip link set dev $DEV_NS up # same inner IP address in at_ns0 and at_ns1 ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64 ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external ip netns exec at_ns2 ip link set dev $DEV up ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64 } function attach_bpf { DEV=$1 SET_TUNNEL=$2 GET_TUNNEL=$3 ip netns exec at_ns2 tc qdisc add dev $DEV clsact ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL } function test_ipip { DEV_NS=ipip_std DEV=ipip_bpf config_device # tcpdump -nei br0 & cat /sys/kernel/debug/tracing/trace_pipe & add_ipip_tunnel attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel ip netns exec at_ns0 ping -c 1 10.1.1.200 ip netns exec at_ns2 ping -c 1 10.1.1.100 ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null sleep 0.2 # tcp check _same_ IP over different tunnels ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 cleanup } # IPv4 over IPv6 tunnel function test_ipip6 { DEV_NS=ipip_std DEV=ipip_bpf config_device # tcpdump -nei br0 & cat /sys/kernel/debug/tracing/trace_pipe & add_ipip6_tunnel attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel ip netns exec at_ns0 ping -c 1 10.1.1.200 ip netns exec at_ns2 ping -c 1 10.1.1.100 ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null sleep 0.2 # tcp check _same_ IP over different tunnels ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 cleanup } # IPv6 over IPv6 tunnel function test_ip6ip6 { DEV_NS=ipip_std DEV=ipip_bpf config_device # tcpdump -nei br0 & cat /sys/kernel/debug/tracing/trace_pipe & add_ip6ip6_tunnel attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel ip netns exec at_ns0 ping -6 -c 1 2601:646::2 ip netns exec at_ns2 ping -6 -c 1 2601:646::1 ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null sleep 0.2 # tcp check _same_ IP over different tunnels ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200 ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201 cleanup } function cleanup { set +ex pkill iperf ip netns delete at_ns0 ip netns delete at_ns1 ip netns delete at_ns2 ip link del veth0 ip link del veth1 ip link del veth2 ip link del br0 pkill tcpdump pkill cat set -ex } cleanup echo "Testing IP tunnels..." test_ipip test_ipip6 test_ip6ip6 echo "*** PASS ***" d> parent1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'include/trace/events/sunrpc.h')