/*
 * mm/fadvise.c
 *
 * Copyright (C) 2002, Linus Torvalds
 *
 * 11Jan2003	Andrew Morton
 *		Initial version.
 */

#include <linux/kernel.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/fadvise.h>
#include <linux/writeback.h>
#include <linux/syscalls.h>
#include <linux/swap.h>

#include <asm/unistd.h>

/*
 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
 * deactivate the pages and clear PG_Referenced.
 */
SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
{
	struct fd f = fdget(fd);
	struct inode *inode;
	struct address_space *mapping;
	struct backing_dev_info *bdi;
	loff_t endbyte;			/* inclusive */
	pgoff_t start_index;
	pgoff_t end_index;
	unsigned long nrpages;
	int ret = 0;

	if (!f.file)
		return -EBADF;

	inode = file_inode(f.file);
	if (S_ISFIFO(inode->i_mode)) {
		ret = -ESPIPE;
		goto out;
	}

	mapping = f.file->f_mapping;
	if (!mapping || len < 0) {
		ret = -EINVAL;
		goto out;
	}

	if (IS_DAX(inode)) {
		switch (advice) {
		case POSIX_FADV_NORMAL:
		case POSIX_FADV_RANDOM:
		case POSIX_FADV_SEQUENTIAL:
		case POSIX_FADV_WILLNEED:
		case POSIX_FADV_NOREUSE:
		case POSIX_FADV_DONTNEED:
			/* no bad return value, but ignore advice */
			break;
		default:
			ret = -EINVAL;
		}
		goto out;
	}

	/* Careful about overflows. Len == 0 means "as much as possible" */
	endbyte = offset + len;
	if (!len || endbyte < len)
		endbyte = -1;
	else
		endbyte--;		/* inclusive */

	bdi = inode_to_bdi(mapping->host);

	switch (advice) {
	case POSIX_FADV_NORMAL:
		f.file->f_ra.ra_pages = bdi->ra_pages;
		spin_lock(&f.file->f_lock);
		f.file->f_mode &= ~FMODE_RANDOM;
		spin_unlock(&f.file->f_lock);
		break;
	case POSIX_FADV_RANDOM:
		spin_lock(&f.file->f_lock);
		f.file->f_mode |= FMODE_RANDOM;
		spin_unlock(&f.file->f_lock);
		break;
	case POSIX_FADV_SEQUENTIAL:
		f.file->f_ra.ra_pages = bdi->ra_pages * 2;
		spin_lock(&f.file->f_lock);
		f.file->f_mode &= ~FMODE_RANDOM;
		spin_unlock(&f.file->f_lock);
		break;
	case POSIX_FADV_WILLNEED:
		/* First and last PARTIAL page! */
		start_index = offset >> PAGE_SHIFT;
		end_index = endbyte >> PAGE_SHIFT;

		/* Careful about overflow on the "+1" */
		nrpages = end_index - start_index + 1;
		if (!nrpages)
			nrpages = ~0UL;

		/*
		 * Ignore return value because fadvise() shall return
		 * success even if filesystem can't retrieve a hint,
		 */
		force_page_cache_readahead(mapping, f.file, start_index,
					   nrpages);
		break;
	case POSIX_FADV_NOREUSE:
		break;
	case POSIX_FADV_DONTNEED:
		if (!inode_write_congested(mapping->host))
			__filemap_fdatawrite_range(mapping, offset, endbyte,
						   WB_SYNC_NONE);

		/*
		 * First and last FULL page! Partial pages are deliberately
		 * preserved on the expectation that it is better to preserve
		 * needed memory than to discard unneeded memory.
		 */
		start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
		end_index = (endbyte >> PAGE_SHIFT);
		if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
			/* First page is tricky as 0 - 1 = -1, but pgoff_t
			 * is unsigned, so the end_index >= start_index
			 * check below would be true and we'll discard the whole
			 * file cache which is not what was asked.
			 */
			if (end_index == 0)
				break;

			end_index--;
		}

		if (end_index >= start_index) {
			unsigned long count;

			/*
			 * It's common to FADV_DONTNEED right after
			 * the read or write that instantiates the
			 * pages, in which case there will be some
			 * sitting on the local LRU cache. Try to
			 * avoid the expensive remote drain and the
			 * second cache tree walk below by flushing
			 * them out right away.
			 */
			lru_add_drain();

			count = invalidate_mapping_pages(mapping,
						start_index, end_index);

			/*
			 * If fewer pages were invalidated than expected then
			 * it is possible that some of the pages were on
			 * a per-cpu pagevec for a remote CPU. Drain all
			 * pagevecs and try again.
			 */
			if (count < (end_index - start_index + 1)) {
				lru_add_drain_all();
				invalidate_mapping_pages(mapping, start_index,
						end_index);
			}
		}
		break;
	default:
		ret = -EINVAL;
	}
out:
	fdput(f);
	return ret;
}

#ifdef __ARCH_WANT_SYS_FADVISE64

SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
{
	return sys_fadvise64_64(fd, offset, len, advice);
}

#endif
mmary='commit info' class='commit-info'>
<tr><th>author</th><td>David S. Miller &lt;davem@davemloft.net&gt;</td><td class='right'>2017-02-07 13:07:56 -0500</td></tr>
<tr><th>committer</th><td>David S. Miller &lt;davem@davemloft.net&gt;</td><td class='right'>2017-02-07 13:07:56 -0500</td></tr>
<tr><th>commit</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/net/ipv4/route.c?id=29ba6e7400a317725bdfb86a725d1824447dbcd7'>29ba6e7400a317725bdfb86a725d1824447dbcd7</a> (<a href='/cgit.cgi/linux/net-next.git/patch/net/ipv4/route.c?id=29ba6e7400a317725bdfb86a725d1824447dbcd7'>patch</a>)</td></tr>
<tr><th>tree</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/tree/?id=29ba6e7400a317725bdfb86a725d1824447dbcd7'>b009850c5a2e7c633a94eeacb71a25f91b4b64f0</a> /<a href='/cgit.cgi/linux/net-next.git/tree/net/ipv4/route.c?id=29ba6e7400a317725bdfb86a725d1824447dbcd7'>net/ipv4/route.c</a></td></tr>
<tr><th>parent</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/net/ipv4/route.c?id=b08d46b01e995dd7b653b22d35bd1d958d6ee9b4'>b08d46b01e995dd7b653b22d35bd1d958d6ee9b4</a> (<a href='/cgit.cgi/linux/net-next.git/diff/net/ipv4/route.c?id=29ba6e7400a317725bdfb86a725d1824447dbcd7&amp;id2=b08d46b01e995dd7b653b22d35bd1d958d6ee9b4'>diff</a>)</td></tr><tr><th>parent</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/net/ipv4/route.c?id=51ce8bd4d17a761e1a90a34a1b5c9b762cce7553'>51ce8bd4d17a761e1a90a34a1b5c9b762cce7553</a> (<a href='/cgit.cgi/linux/net-next.git/diff/net/ipv4/route.c?id=29ba6e7400a317725bdfb86a725d1824447dbcd7&amp;id2=51ce8bd4d17a761e1a90a34a1b5c9b762cce7553'>diff</a>)</td></tr></table>
<div class='commit-subject'>Merge branch 'replace-dst_confirm'</div><div class='commit-msg'>Julian Anastasov says:

====================
net: dst_confirm replacement

	This patchset addresses the problem of neighbour
confirmation where received replies from one nexthop
can cause confirmation of different nexthop when using
the same dst. Thanks to YueHaibing &lt;yuehaibing@huawei.com&gt;
for tracking the dst-&gt;pending_confirm problem.

	Sockets can obtain cached output route. Such
routes can be to known nexthop (rt_gateway=IP) or to be
used simultaneously for different nexthop IPs by different
subnet prefixes (nh-&gt;nh_scope = RT_SCOPE_HOST, rt_gateway=0).

	At first look, there are more problems:

- dst_confirm() sets flag on dst and not on dst-&gt;path,
as result, indication is lost when XFRM is used

- DNAT can change the nexthop, so the really used nexthop is
not confirmed

	So, the following solution is to avoid using
dst-&gt;pending_confirm.

	The current dst_confirm() usage is as follows:

Protocols confirming dst on received packets:
- TCP (1 dst per socket)
- SCTP (1 dst per transport)
- CXGB*

Protocols supporting sendmsg with MSG_CONFIRM [ | MSG_PROBE ] to
confirm neighbour:
- UDP IPv4/IPv6
- ICMPv4 PING
- RAW IPv4/IPv6
- L2TP/IPv6

MSG_CONFIRM for other purposes (fix not needed):
- CAN

Sending without locking the socket:
- UDP (when no cork)
- RAW (when hdrincl=1)

Redirects from old to new GW:
- rt6_do_redirect

	The patchset includes the following changes:

1. sock: add sk_dst_pending_confirm flag

- used only by TCP with patch 4 to remember the received
indication in sk-&gt;sk_dst_pending_confirm

2. net: add dst_pending_confirm flag to skbuff

- skb-&gt;dst_pending_confirm will be used by all protocols
in following patches, via skb_{set,get}_dst_pending_confirm

3. sctp: add dst_pending_confirm flag

- SCTP uses per-transport dsts and can not use
sk-&gt;sk_dst_pending_confirm like TCP

4. tcp: replace dst_confirm with sk_dst_confirm

5. net: add confirm_neigh method to dst_ops

- IPv4 and IPv6 provision for slow neigh lookups for MSG_PROBE users.
I decided to use neigh lookup only for this case because on
MSG_PROBE the skb may pass MTU checks but it does not reach
the neigh confirmation code. This patch will be used from patch 6.

- xfrm_confirm_neigh: we use the last tunnel address, if present.
When there are only transports, the original dest address is used.

6. net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP

- dst_confirm conversion for UDP, RAW, ICMP and L2TP/IPv6

- these protocols use MSG_CONFIRM propagated by ip*_append_data
to skb-&gt;dst_pending_confirm. sk-&gt;sk_dst_pending_confirm is not
used because some sending paths do not lock the socket. For
MSG_PROBE we use the slow lookup (dst_confirm_neigh).

- there are also 2 cases that need the slow lookup:
__ip6_rt_update_pmtu and rt6_do_redirect. I hope
&amp;ipv6_hdr(skb)-&gt;saddr is the correct nexthop address to use here.

7. net: pending_confirm is not used anymore

- I failed to understand the CXGB* code, I see dst_confirm()
calls but I'm not sure dst_neigh_output() was called. For now
I just removed the dst-&gt;pending_confirm flag and left all
dst_confirm() calls there. Any better idea?

- Now may be old function neigh_output() should be restored
instead of dst_neigh_output?
====================

Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;
</div><div class='diffstat-header'><a href='/cgit.cgi/linux/net-next.git/diff/?id=29ba6e7400a317725bdfb86a725d1824447dbcd7'>Diffstat</a> (limited to 'net/ipv4/route.c')</div><table summary='diffstat' class='diffstat'><tr><td class='mode'>-rw-r--r--</td><td class='upd'><a href='/cgit.cgi/linux/net-next.git/diff/net/ipv4/route.c?id=29ba6e7400a317725bdfb86a725d1824447dbcd7'>net/ipv4/route.c</a></td><td class='right'>19</td><td class='graph'><table summary='file diffstat' width='19%'><tr><td class='add' style='width: 100.0%;'/><td class='rem' style='width: 0.0%;'/><td class='none' style='width: 0.0%;'/></tr></table></td></tr>
</table><div class='diffstat-summary'>1 files changed, 19 insertions, 0 deletions</div><table summary='diff' class='diff'><tr><td><div class='head'>diff --git a/net/ipv4/route.c b/net/ipv4/route.c