/* * netsniff-ng - the packet sniffing beast * Copyright 2009, 2010 Daniel Borkmann. * Subject to the GPL, version 2. */ #ifndef RING_H #define RING_H /* * "I love the smell of 10GbE in the morning. Smells like ... victory." * - W. Richard Stevens, "Secret Teachings of the UNIX Environment" */ #include #include #include #include #include #include #include #include #include "built_in.h" #include "die.h" #include "dev.h" #include "config.h" union tpacket_uhdr { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; #ifdef HAVE_TPACKET3 struct tpacket3_hdr *h3; #endif void *raw; }; struct frame_map { struct tpacket2_hdr tp_h __aligned_tpacket; struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr)); }; #ifdef HAVE_TPACKET3 struct block_desc { uint32_t version; uint32_t offset_to_priv; struct tpacket_hdr_v1 h1; }; #endif struct ring { struct iovec *frames; uint8_t *mm_space; size_t mm_len; struct sockaddr_ll s_ll; union { struct tpacket_req layout; #ifdef HAVE_TPACKET3 struct tpacket_req3 layout3; #endif uint8_t raw; }; }; static inline void next_rnd_slot(unsigned int *it, struct ring *ring) { *it = rand() % ring->layout.tp_frame_nr; } static inline unsigned int ring_size(char *ifname, unsigned int size) { if (size > 0) return size; /* * Device bitrate in bytes times two as ring size. * Fallback => ~ 64,00 MB * 10 MBit => ~ 2,38 MB * 54 MBit => ~ 12,88 MB * 100 MBit => ~ 23,84 MB * 300 MBit => ~ 71,52 MB * 1.000 MBit => ~ 238,42 MB * 10.000 MBit => ~ 2.384.18 MB */ size = device_bitrate(ifname); size = (size * 1000000) / 8; size = size * 2; if (size == 0) size = 1 << 26; return round_up_cacheline(size); } static inline unsigned int ring_frame_size(struct ring *ring) { return ring->layout.tp_frame_size; } static inline void ring_verify_layout(struct ring *ring) { bug_on(ring->layout.tp_block_size < ring->layout.tp_frame_size); bug_on((ring->layout.tp_block_size % ring->layout.tp_frame_size) != 0); bug_on((ring->layout.tp_block_size % getpagesize()) != 0); } static inline void tpacket_hdr_clone(struct tpacket2_hdr *thdrd, struct tpacket2_hdr *thdrs) { thdrd->tp_sec = thdrs->tp_sec; thdrd->tp_nsec = thdrs->tp_nsec; thdrd->tp_snaplen = thdrs->tp_snaplen; thdrd->tp_len = thdrs->tp_len; } static inline void prepare_polling(int sock, struct pollfd *pfd) { memset(pfd, 0, sizeof(*pfd)); pfd->fd = sock; pfd->revents = 0; pfd->events = POLLIN | POLLRDNORM | POLLERR; } static inline void __set_sockopt_tpacket(int sock, int val) { int ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)); if (ret) panic("Cannot set tpacketv2!\n"); } static inline void set_sockopt_tpacket_v2(int sock) { __set_sockopt_tpacket(sock, TPACKET_V2); } static inline void set_sockopt_tpacket_v3(int sock) { #ifdef HAVE_TPACKET3 __set_sockopt_tpacket(sock, TPACKET_V3); #endif } static inline int get_sockopt_tpacket(int sock) { int val, ret; socklen_t len = sizeof(val); ret = getsockopt(sock, SOL_PACKET, PACKET_VERSION, &val, &len); if (ret) panic("Cannot get tpacket version!\n"); return val; } extern void mmap_ring_generic(int sock, struct ring *ring); extern void alloc_ring_frames_generic(struct ring *ring, int num, size_t size); extern void bind_ring_generic(int sock, struct ring *ring, int ifindex, bool tx_only); #endif /* RING_H */ bel'>mode:
authorEric Dumazet <edumazet@google.com>2017-02-02 10:31:35 -0800
committerDavid S. Miller <davem@davemloft.net>2017-02-03 16:11:07 -0500
commit5fa8bbda38c668e56b0c6cdecced2eac2fe36dec (patch)
treead78a619ea5877e33ca1c380e83e16f721983c5b
parent3808d34838184fd29088d6b3a364ba2f1c018fb6 (diff)
net: use a work queue to defer net_disable_timestamp() work
Dmitry reported a warning [1] showing that we were calling net_disable_timestamp() -> static_key_slow_dec() from a non process context. Grabbing a mutex while holding a spinlock or rcu_read_lock() is not allowed. As Cong suggested, we now use a work queue. It is possible netstamp_clear() exits while netstamp_needed_deferred is not zero, but it is probably not worth trying to do better than that. netstamp_needed_deferred atomic tracks the exact number of deferred decrements. [1] [ INFO: suspicious RCU usage. ] 4.10.0-rc5+ #192 Not tainted ------------------------------- ./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by syz-executor14/23111: #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] lock_sock include/net/sock.h:1454 [inline] #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919 #1: (rcu_read_lock){......}, at: [<ffffffff83ae2678>] nf_hook include/linux/netfilter.h:201 [inline] #1: (rcu_read_lock){......}, at: [<ffffffff83ae2678>] __ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160 stack backtrace: CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452 rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline] ___might_sleep+0x560/0x650 kernel/sched/core.c:7748 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sock_wfree+0xae/0x120 net/core/sock.c:1645 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put include/net/inet_frag.h:133 [inline] nf_ct_frag6_gather+0x1106/0x3840 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook include/linux/netfilter.h:212 [inline] __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 vfs_writev+0x87/0xc0 fs/read_write.c:911 do_writev+0x110/0x2c0 fs/read_write.c:944 SYSC_writev fs/read_write.c:1017 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1014 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x445559 RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559 RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005 RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000 R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:752 in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14 INFO: lockdep is turned off. CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sock_wfree+0xae/0x120 net/core/sock.c:1645 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put include/net/inet_frag.h:133 [inline] nf_ct_frag6_gather+0x1106/0x3840 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook include/linux/netfilter.h:212 [inline] __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 vfs_writev+0x87/0xc0 fs/read_write.c:911 do_writev+0x110/0x2c0 fs/read_write.c:944 SYSC_writev fs/read_write.c:1017 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1014 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x445559 Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") Suggested-by: Cong Wang <xiyou.wangcong@gmail.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/core/dev.c31
1 files changed, 13 insertions, 18 deletions
diff --git a/net/core/dev.c b/net/core/dev.c