/* * sctp_probe - Observe the SCTP flow with kprobes. * * The idea for this came from Werner Almesberger's umlsim * Copyright (C) 2004, Stephen Hemminger * * Modified for SCTP from Stephen Hemminger's code * Copyright (C) 2010, Wei Yongjun * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include MODULE_SOFTDEP("pre: sctp"); MODULE_AUTHOR("Wei Yongjun "); MODULE_DESCRIPTION("SCTP snooper"); MODULE_LICENSE("GPL"); static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); static unsigned int fwmark __read_mostly = 0; MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); module_param(fwmark, uint, 0); static int bufsize __read_mostly = 64 * 1024; MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); module_param(bufsize, int, 0); static int full __read_mostly = 1; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); module_param(full, int, 0); static const char procname[] = "sctpprobe"; static struct { struct kfifo fifo; spinlock_t lock; wait_queue_head_t wait; struct timespec64 tstart; } sctpw; static __printf(1, 2) void printl(const char *fmt, ...) { va_list args; int len; char tbuf[256]; va_start(args, fmt); len = vscnprintf(tbuf, sizeof(tbuf), fmt, args); va_end(args); kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); wake_up(&sctpw.wait); } static int sctpprobe_open(struct inode *inode, struct file *file) { kfifo_reset(&sctpw.fifo); ktime_get_ts64(&sctpw.tstart); return 0; } static ssize_t sctpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { int error = 0, cnt = 0; unsigned char *tbuf; if (!buf) return -EINVAL; if (len == 0) return 0; tbuf = vmalloc(len); if (!tbuf) return -ENOMEM; error = wait_event_interruptible(sctpw.wait, kfifo_len(&sctpw.fifo) != 0); if (error) goto out_free; cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: vfree(tbuf); return error ? error : cnt; } static const struct file_operations sctpprobe_fops = { .owner = THIS_MODULE, .open = sctpprobe_open, .read = sctpprobe_read, .llseek = noop_llseek, }; static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; struct sk_buff *skb = chunk->skb; struct sctp_transport *sp; static __u32 lcwnd = 0; struct timespec64 now; sp = asoc->peer.primary_path; if (((port == 0 && fwmark == 0) || asoc->peer.port == port || ep->base.bind_addr.port == port || (fwmark > 0 && skb->mark == fwmark)) && (full || sp->cwnd != lcwnd)) { lcwnd = sp->cwnd; ktime_get_ts64(&now); now = timespec64_sub(now, sctpw.tstart); printl("%lu.%06lu ", (unsigned long) now.tv_sec, (unsigned long) now.tv_nsec / NSEC_PER_USEC); printl("%p %5d %5d %5d %8d %5d ", asoc, ep->base.bind_addr.port, asoc->peer.port, asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data); list_for_each_entry(sp, &asoc->peer.transport_addr_list, transports) { if (sp == asoc->peer.primary_path) printl("*"); printl("%pISc %2u %8u %8u %8u %8u %8u ", &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh, sp->flight_size, sp->partial_bytes_acked, sp->pathmtu); } printl("\n"); } jprobe_return(); return 0; } static struct jprobe sctp_recv_probe = { .kp = { .symbol_name = "sctp_sf_eat_sack_6_2", }, .entry = jsctp_sf_eat_sack, }; static __init int sctp_setup_jprobe(void) { int ret = register_jprobe(&sctp_recv_probe); if (ret) { if (request_module("sctp")) goto out; ret = register_jprobe(&sctp_recv_probe); } out: return ret; } static __init int sctpprobe_init(void) { int ret = -ENOMEM; /* Warning: if the function signature of sctp_sf_eat_sack_6_2, * has been changed, you also have to change the signature of * jsctp_sf_eat_sack, otherwise you end up right here! */ BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2, jsctp_sf_eat_sack) == 0); init_waitqueue_head(&sctpw.wait); spin_lock_init(&sctpw.lock); if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) return ret; if (!proc_create(procname, S_IRUSR, init_net.proc_net, &sctpprobe_fops)) goto free_kfifo; ret = sctp_setup_jprobe(); if (ret) goto remove_proc; pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", port, fwmark, bufsize); return 0; remove_proc: remove_proc_entry(procname, init_net.proc_net); free_kfifo: kfifo_free(&sctpw.fifo); return ret; } static __exit void sctpprobe_exit(void) { kfifo_free(&sctpw.fifo); remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&sctp_recv_probe); } module_init(sctpprobe_init); module_exit(sctpprobe_exit); er-highlight'> Pull networking fixes from David Miller: 1) Fix handling of interrupt status in stmmac driver. Just because we have masked the event from generating interrupts, doesn't mean the bit won't still be set in the interrupt status register. From Alexey Brodkin. 2) Fix DMA API debugging splats in gianfar driver, from Arseny Solokha. 3) Fix off-by-one error in __ip6_append_data(), from Vlad Yasevich. 4) cls_flow does not match on icmpv6 codes properly, from Simon Horman. 5) Initial MAC address can be set incorrectly in some scenerios, from Ivan Vecera. 6) Packet header pointer arithmetic fix in ip6_tnl_parse_tlv_end_lim(), from Dan Carpenter. 7) Fix divide by zero in __tcp_select_window(), from Eric Dumazet. 8) Fix crash in iwlwifi when unregistering thermal zone, from Jens Axboe. 9) Check for DMA mapping errors in starfire driver, from Alexey Khoroshilov. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (31 commits) tcp: fix 0 divide in __tcp_select_window() ipv6: pointer math error in ip6_tnl_parse_tlv_enc_lim() net: fix ndo_features_check/ndo_fix_features comment ordering net/sched: matchall: Fix configuration race be2net: fix initial MAC setting ipv6: fix flow labels when the traffic class is non-0 net: thunderx: avoid dereferencing xcv when NULL net/sched: cls_flower: Correct matching on ICMPv6 code ipv6: Paritially checksum full MTU frames net/mlx4_core: Avoid command timeouts during VF driver device shutdown gianfar: synchronize DMA API usage by free_skb_rx_queue w/ gfar_new_page net: ethtool: add support for 2500BaseT and 5000BaseT link modes can: bcm: fix hrtimer/tasklet termination in bcm op removal net: adaptec: starfire: add checks for dma mapping errors net: phy: micrel: KSZ8795 do not set SUPPORTED_[Asym_]Pause can: Fix kernel panic at security_sock_rcv_skb net: macb: Fix 64 bit addressing support for GEM stmmac: Discard masked flags in interrupt status register net/mlx5e: Check ets capability before ets query FW command net/mlx5e: Fix update of hash function/key via ethtool ... 2017-02-01net/sched: act_psample: Remove unnecessary ASSERT_RTNLYotam Gigi1-1/+0 The ASSERT_RTNL is not necessary in the init function, as it does not touch any rtnl protected structures, as opposed to the mirred action which does have to hold a net device. Reported-by: Cong Wang <xiyou.wangcong@gmail.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net/sched: act_sample: Fix error path in initYotam Gigi1-1/+4 Fix error path of in sample init, by releasing the tc hash in case of failure in psample_group creation. Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Reported-by: Cong Wang <xiyou.wangcong@gmail.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01tcp: fix 0 divide in __tcp_select_window()Eric Dumazet1-2/+4 syszkaller fuzzer was able to trigger a divide by zero, when TCP window scaling is not enabled. SO_RCVBUF can be used not only to increase sk_rcvbuf, also to decrease it below current receive buffers utilization. If mss is negative or 0, just return a zero TCP window. Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01ipv6: pointer math error in ip6_tnl_parse_tlv_enc_lim()Dan Carpenter1-1/+1 Casting is a high precedence operation but "off" and "i" are in terms of bytes so we need to have some parenthesis here. Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()") Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net: ipv6: add NLM_F_APPEND in notifications when applicableDavid Ahern1-0/+3 IPv6 does not set the NLM_F_APPEND flag in notifications to signal that a NEWROUTE is an append versus a new route or a replaced one. Add the flag if the request has it. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net: reduce skb_warn_bad_offload() noiseEric Dumazet1-3/+9 Dmitry reported warnings occurring in __skb_gso_segment() [1] All SKB_GSO_DODGY producers can allow user space to feed packets that trigger the current check. We could prevent them from doing so, rejecting packets, but this might add regressions to existing programs. It turns out our SKB_GSO_DODGY handlers properly set up checksum information that is needed anyway when packets needs to be segmented. By checking again skb_needs_check() after skb_mac_gso_segment(), we should remove these pesky warnings, at a very minor cost. With help from Willem de Bruijn [1] WARNING: CPU: 1 PID: 6768 at net/core/dev.c:2439 skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 lo: caps=(0x000000a2803b7c69, 0x0000000000000000) len=138 data_len=0 gso_size=15883 gso_type=4 ip_summed=0 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 6768 Comm: syz-executor1 Not tainted 4.9.0 #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ffff8801c063ecd8 ffffffff82346bdf ffffffff00000001 1ffff100380c7d2e ffffed00380c7d26 0000000041b58ab3 ffffffff84b37e38 ffffffff823468f1 ffffffff84820740 ffffffff84f289c0 dffffc0000000000 ffff8801c063ee20 Call Trace: [<ffffffff82346bdf>] __dump_stack lib/dump_stack.c:15 [inline] [<ffffffff82346bdf>] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 [<ffffffff81827e34>] panic+0x1fb/0x412 kernel/panic.c:179 [<ffffffff8141f704>] __warn+0x1c4/0x1e0 kernel/panic.c:542 [<ffffffff8141f7e5>] warn_slowpath_fmt+0xc5/0x100 kernel/panic.c:565 [<ffffffff8356cbaf>] skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 [<ffffffff83585cd2>] __skb_gso_segment+0x482/0x780 net/core/dev.c:2706 [<ffffffff83586f19>] skb_gso_segment include/linux/netdevice.h:3985 [inline] [<ffffffff83586f19>] validate_xmit_skb+0x5c9/0xc20 net/core/dev.c:2969 [<ffffffff835892bb>] __dev_queue_xmit+0xe6b/0x1e70 net/core/dev.c:3383 [<ffffffff8358a2d7>] dev_queue_xmit+0x17/0x20 net/core/dev.c:3424 [<ffffffff83ad161d>] packet_snd net/packet/af_packet.c:2930 [inline] [<ffffffff83ad161d>] packet_sendmsg+0x32ed/0x4d30 net/packet/af_packet.c:2955 [<ffffffff834f0aaa>] sock_sendmsg_nosec net/socket.c:621 [inline] [<ffffffff834f0aaa>] sock_sendmsg+0xca/0x110 net/socket.c:631 [<ffffffff834f329a>] ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1954 [<ffffffff834f5e58>] __sys_sendmsg+0x138/0x300 net/socket.c:1988 [<ffffffff834f604d>] SYSC_sendmsg net/socket.c:1999 [inline] [<ffffffff834f604d>] SyS_sendmsg+0x2d/0x50 net/socket.c:1995 [<ffffffff84371941>] entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net/sched: matchall: Fix configuration raceYotam Gigi1-82/+45 In the current version, the matchall internal state is split into two structs: cls_matchall_head and cls_matchall_filter. This makes little sense, as matchall instance supports only one filter, and there is no situation where one exists and the other does not. In addition, that led to some races when filter was deleted while packet was processed. Unify that two structs into one, thus simplifying the process of matchall creation and deletion. As a result, the new, delete and get callbacks have a dummy implementation where all the work is done in destroy and change callbacks, as was done in cls_cgroup. Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier") Reported-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01rtnetlink: Handle IFLA_MASTER parameter when processing rtnl_newlinkTheuns Verwoerd1-1/+6 Allow a master interface to be specified as one of the parameters when creating a new interface via rtnl_newlink. Previously this would require invoking interface creation, waiting for it to complete, and then separately binding that new interface to a master. In particular, this is used when creating a macvlan child interface for VRRP in a VRF configuration, allowing the interface creator to specify directly what master interface should be inherited by the child, without having to deal with asynchronous complications and potential race conditions. Signed-off-by: Theuns Verwoerd <theuns.verwoerd@alliedtelesis.co.nz> Acked-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>