/* * mm/fadvise.c * * Copyright (C) 2002, Linus Torvalds * * 11Jan2003 Andrew Morton * Initial version. */ #include #include #include #include #include #include #include #include #include #include #include #include /* * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could * deactivate the pages and clear PG_Referenced. */ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) { struct fd f = fdget(fd); struct inode *inode; struct address_space *mapping; struct backing_dev_info *bdi; loff_t endbyte; /* inclusive */ pgoff_t start_index; pgoff_t end_index; unsigned long nrpages; int ret = 0; if (!f.file) return -EBADF; inode = file_inode(f.file); if (S_ISFIFO(inode->i_mode)) { ret = -ESPIPE; goto out; } mapping = f.file->f_mapping; if (!mapping || len < 0) { ret = -EINVAL; goto out; } if (IS_DAX(inode)) { switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_RANDOM: case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_WILLNEED: case POSIX_FADV_NOREUSE: case POSIX_FADV_DONTNEED: /* no bad return value, but ignore advice */ break; default: ret = -EINVAL; } goto out; } /* Careful about overflows. Len == 0 means "as much as possible" */ endbyte = offset + len; if (!len || endbyte < len) endbyte = -1; else endbyte--; /* inclusive */ bdi = inode_to_bdi(mapping->host); switch (advice) { case POSIX_FADV_NORMAL: f.file->f_ra.ra_pages = bdi->ra_pages; spin_lock(&f.file->f_lock); f.file->f_mode &= ~FMODE_RANDOM; spin_unlock(&f.file->f_lock); break; case POSIX_FADV_RANDOM: spin_lock(&f.file->f_lock); f.file->f_mode |= FMODE_RANDOM; spin_unlock(&f.file->f_lock); break; case POSIX_FADV_SEQUENTIAL: f.file->f_ra.ra_pages = bdi->ra_pages * 2; spin_lock(&f.file->f_lock); f.file->f_mode &= ~FMODE_RANDOM; spin_unlock(&f.file->f_lock); break; case POSIX_FADV_WILLNEED: /* First and last PARTIAL page! */ start_index = offset >> PAGE_SHIFT; end_index = endbyte >> PAGE_SHIFT; /* Careful about overflow on the "+1" */ nrpages = end_index - start_index + 1; if (!nrpages) nrpages = ~0UL; /* * Ignore return value because fadvise() shall return * success even if filesystem can't retrieve a hint, */ force_page_cache_readahead(mapping, f.file, start_index, nrpages); break; case POSIX_FADV_NOREUSE: break; case POSIX_FADV_DONTNEED: if (!inode_write_congested(mapping->host)) __filemap_fdatawrite_range(mapping, offset, endbyte, WB_SYNC_NONE); /* * First and last FULL page! Partial pages are deliberately * preserved on the expectation that it is better to preserve * needed memory than to discard unneeded memory. */ start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; end_index = (endbyte >> PAGE_SHIFT); if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) { /* First page is tricky as 0 - 1 = -1, but pgoff_t * is unsigned, so the end_index >= start_index * check below would be true and we'll discard the whole * file cache which is not what was asked. */ if (end_index == 0) break; end_index--; } if (end_index >= start_index) { unsigned long count; /* * It's common to FADV_DONTNEED right after * the read or write that instantiates the * pages, in which case there will be some * sitting on the local LRU cache. Try to * avoid the expensive remote drain and the * second cache tree walk below by flushing * them out right away. */ lru_add_drain(); count = invalidate_mapping_pages(mapping, start_index, end_index); /* * If fewer pages were invalidated than expected then * it is possible that some of the pages were on * a per-cpu pagevec for a remote CPU. Drain all * pagevecs and try again. */ if (count < (end_index - start_index + 1)) { lru_add_drain_all(); invalidate_mapping_pages(mapping, start_index, end_index); } } break; default: ret = -EINVAL; } out: fdput(f); return ret; } #ifdef __ARCH_WANT_SYS_FADVISE64 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) { return sys_fadvise64_64(fd, offset, len, advice); } #endif error path of in sample init, by releasing the tc hash in case of failure in psample_group creation. Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Reported-by: Cong Wang <xiyou.wangcong@gmail.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net: ipv6: add NLM_F_APPEND in notifications when applicableDavid Ahern1-0/+3 IPv6 does not set the NLM_F_APPEND flag in notifications to signal that a NEWROUTE is an append versus a new route or a replaced one. Add the flag if the request has it. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net: reduce skb_warn_bad_offload() noiseEric Dumazet1-3/+9 Dmitry reported warnings occurring in __skb_gso_segment() [1] All SKB_GSO_DODGY producers can allow user space to feed packets that trigger the current check. We could prevent them from doing so, rejecting packets, but this might add regressions to existing programs. It turns out our SKB_GSO_DODGY handlers properly set up checksum information that is needed anyway when packets needs to be segmented. By checking again skb_needs_check() after skb_mac_gso_segment(), we should remove these pesky warnings, at a very minor cost. With help from Willem de Bruijn [1] WARNING: CPU: 1 PID: 6768 at net/core/dev.c:2439 skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 lo: caps=(0x000000a2803b7c69, 0x0000000000000000) len=138 data_len=0 gso_size=15883 gso_type=4 ip_summed=0 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 6768 Comm: syz-executor1 Not tainted 4.9.0 #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ffff8801c063ecd8 ffffffff82346bdf ffffffff00000001 1ffff100380c7d2e ffffed00380c7d26 0000000041b58ab3 ffffffff84b37e38 ffffffff823468f1 ffffffff84820740 ffffffff84f289c0 dffffc0000000000 ffff8801c063ee20 Call Trace: [<ffffffff82346bdf>] __dump_stack lib/dump_stack.c:15 [inline] [<ffffffff82346bdf>] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 [<ffffffff81827e34>] panic+0x1fb/0x412 kernel/panic.c:179 [<ffffffff8141f704>] __warn+0x1c4/0x1e0 kernel/panic.c:542 [<ffffffff8141f7e5>] warn_slowpath_fmt+0xc5/0x100 kernel/panic.c:565 [<ffffffff8356cbaf>] skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 [<ffffffff83585cd2>] __skb_gso_segment+0x482/0x780 net/core/dev.c:2706 [<ffffffff83586f19>] skb_gso_segment include/linux/netdevice.h:3985 [inline] [<ffffffff83586f19>] validate_xmit_skb+0x5c9/0xc20 net/core/dev.c:2969 [<ffffffff835892bb>] __dev_queue_xmit+0xe6b/0x1e70 net/core/dev.c:3383 [<ffffffff8358a2d7>] dev_queue_xmit+0x17/0x20 net/core/dev.c:3424 [<ffffffff83ad161d>] packet_snd net/packet/af_packet.c:2930 [inline] [<ffffffff83ad161d>] packet_sendmsg+0x32ed/0x4d30 net/packet/af_packet.c:2955 [<ffffffff834f0aaa>] sock_sendmsg_nosec net/socket.c:621 [inline] [<ffffffff834f0aaa>] sock_sendmsg+0xca/0x110 net/socket.c:631 [<ffffffff834f329a>] ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1954 [<ffffffff834f5e58>] __sys_sendmsg+0x138/0x300 net/socket.c:1988 [<ffffffff834f604d>] SYSC_sendmsg net/socket.c:1999 [inline] [<ffffffff834f604d>] SyS_sendmsg+0x2d/0x50 net/socket.c:1995 [<ffffffff84371941>] entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01rtnetlink: Handle IFLA_MASTER parameter when processing rtnl_newlinkTheuns Verwoerd1-1/+6 Allow a master interface to be specified as one of the parameters when creating a new interface via rtnl_newlink. Previously this would require invoking interface creation, waiting for it to complete, and then separately binding that new interface to a master. In particular, this is used when creating a macvlan child interface for VRRP in a VRF configuration, allowing the interface creator to specify directly what master interface should be inherited by the child, without having to deal with asynchronous complications and potential race conditions. Signed-off-by: Theuns Verwoerd <theuns.verwoerd@alliedtelesis.co.nz> Acked-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>