/*
 * Copyright (c) 2006 Oracle.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/cpu.h>
#include <linux/export.h>

#include "rds.h"

struct rds_page_remainder {
	struct page	*r_page;
	unsigned long	r_offset;
};

static
DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);

/*
 * returns 0 on success or -errno on failure.
 *
 * We don't have to worry about flush_dcache_page() as this only works
 * with private pages.  If, say, we were to do directed receive to pinned
 * user pages we'd have to worry more about cache coherence.  (Though
 * the flush_dcache_page() in get_user_pages() would probably be enough).
 */
int rds_page_copy_user(struct page *page, unsigned long offset,
		       void __user *ptr, unsigned long bytes,
		       int to_user)
{
	unsigned long ret;
	void *addr;

	addr = kmap(page);
	if (to_user) {
		rds_stats_add(s_copy_to_user, bytes);
		ret = copy_to_user(ptr, addr + offset, bytes);
	} else {
		rds_stats_add(s_copy_from_user, bytes);
		ret = copy_from_user(addr + offset, ptr, bytes);
	}
	kunmap(page);

	return ret ? -EFAULT : 0;
}
EXPORT_SYMBOL_GPL(rds_page_copy_user);

/**
 * rds_page_remainder_alloc - build up regions of a message.
 *
 * @scat: Scatter list for message
 * @bytes: the number of bytes needed.
 * @gfp: the waiting behaviour of the allocation
 *
 * @gfp is always ored with __GFP_HIGHMEM.  Callers must be prepared to
 * kmap the pages, etc.
 *
 * If @bytes is at least a full page then this just returns a page from
 * alloc_page().
 *
 * If @bytes is a partial page then this stores the unused region of the
 * page in a per-cpu structure.  Future partial-page allocations may be
 * satisfied from that cached region.  This lets us waste less memory on
 * small allocations with minimal complexity.  It works because the transmit
 * path passes read-only page regions down to devices.  They hold a page
 * reference until they are done with the region.
 */
int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
			     gfp_t gfp)
{
	struct rds_page_remainder *rem;
	unsigned long flags;
	struct page *page;
	int ret;

	gfp |= __GFP_HIGHMEM;

	/* jump straight to allocation if we're trying for a huge page */
	if (bytes >= PAGE_SIZE) {
		page = alloc_page(gfp);
		if (!page) {
			ret = -ENOMEM;
		} else {
			sg_set_page(scat, page, PAGE_SIZE, 0);
			ret = 0;
		}
		goto out;
	}

	rem = &per_cpu(rds_page_remainders, get_cpu());
	local_irq_save(flags);

	while (1) {
		/* avoid a tiny region getting stuck by tossing it */
		if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
			rds_stats_inc(s_page_remainder_miss);
			__free_page(rem->r_page);
			rem->r_page = NULL;
		}

		/* hand out a fragment from the cached page */
		if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
			sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
			get_page(sg_page(scat));

			if (rem->r_offset != 0)
				rds_stats_inc(s_page_remainder_hit);

			rem->r_offset += ALIGN(bytes, 8);
			if (rem->r_offset >= PAGE_SIZE) {
				__free_page(rem->r_page);
				rem->r_page = NULL;
			}
			ret = 0;
			break;
		}

		/* alloc if there is nothing for us to use */
		local_irq_restore(flags);
		put_cpu();

		page = alloc_page(gfp);

		rem = &per_cpu(rds_page_remainders, get_cpu());
		local_irq_save(flags);

		if (!page) {
			ret = -ENOMEM;
			break;
		}

		/* did someone race to fill the remainder before us? */
		if (rem->r_page) {
			__free_page(page);
			continue;
		}

		/* otherwise install our page and loop around to alloc */
		rem->r_page = page;
		rem->r_offset = 0;
	}

	local_irq_restore(flags);
	put_cpu();
out:
	rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
		 ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
		 ret ? 0 : scat->length);
	return ret;
}
EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);

void rds_page_exit(void)
{
	unsigned int cpu;

	for_each_possible_cpu(cpu) {
		struct rds_page_remainder *rem;

		rem = &per_cpu(rds_page_remainders, cpu);
		rdsdebug("cpu %u\n", cpu);

		if (rem->r_page)
			__free_page(rem->r_page);
		rem->r_page = NULL;
	}
}
3</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=37fabbf4d489cc2e1cbf7cde816d9453a65ddfb7'>net: busy-poll: remove LL_FLUSH_FAILED and LL_FLUSH_BUSY</a></td><td>Eric Dumazet</td><td>1</td><td><span class='deletions'>-4</span>/<span class='insertions'>+0</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Commit 79e7fff47b7b ("net: remove support for per driver
ndo_busy_poll()") made them obsolete.

Signed-off-by: Eric Dumazet &lt;edumazet@google.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-12 22:11:43 -0500'>2017-02-12</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=7c92d61eca453d5007669e4322dd8e469d443d04'>Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next</a></td><td>David S. Miller</td><td>4</td><td><span class='deletions'>-8</span>/<span class='insertions'>+53</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next
tree, most relevantly they are:

1) Extend nft_exthdr to allow to match TCP options bitfields, from
   Manuel Messner.

2) Allow to check if IPv6 extension header is present in nf_tables,
   from Phil Sutter.

3) Allow to set and match conntrack zone in nf_tables, patches from
   Florian Westphal.

4) Several patches for the nf_tables set infrastructure, this includes
   cleanup and preparatory patches to add the new bitmap set type.

5) Add optional ruleset generation ID check to nf_tables and allow to
   delete rules that got no public handle yet via NFTA_RULE_ID. These
   patches add the missing kernel infrastructure to support rule
   deletion by description from userspace.

6) Missing NFT_SET_OBJECT flag to select the right backend when sets
   stores an object map.

7) A couple of cleanups for the expectation and SIP helper, from Gao
   feng.
====================

Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-12 14:45:13 +0100'>2017-02-12</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=1a94e38d254b3622d5d53f74b3b716b0fcab0ba8'>netfilter: nf_tables: add NFTA_RULE_ID attribute</a></td><td>Pablo Neira Ayuso</td><td>2</td><td><span class='deletions'>-0</span>/<span class='insertions'>+5</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This new attribute allows us to uniquely identify a rule in transaction.
Robots may trigger an insertion followed by deletion in a batch, in that
scenario we still don't have a public rule handle that we can use to
delete the rule. This is similar to the NFTA_SET_ID attribute that
allows us to refer to an anonymous set from a batch.

Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-12 14:45:11 +0100'>2017-02-12</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=8c4d4e8b5626fec965fd5034e5bd5e57790f243f'>netfilter: nfnetlink: allow to check for generation ID</a></td><td>Pablo Neira Ayuso</td><td>2</td><td><span class='deletions'>-0</span>/<span class='insertions'>+13</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This patch allows userspace to specify the generation ID that has been
used to build an incremental batch update.

If userspace specifies the generation ID in the batch message as
attribute, then nfnetlink compares it to the current generation ID so
you make sure that you work against the right baseline. Otherwise, bail
out with ERESTART so userspace knows that its changeset is stale and
needs to respin. Userspace can do this transparently at the cost of
taking slightly more time to refresh caches and rework the changeset.

This check is optional, if there is no NFNL_BATCH_GENID attribute in the
batch begin message, then no check is performed.

Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 21:25:18 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=c16ec18599c8c1722d476011786fd9e2529888f7'>net: rename dst_neigh_output back to neigh_output</a></td><td>Julian Anastasov</td><td>2</td><td><span class='deletions'>-12</span>/<span class='insertions'>+10</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
After the dst-&gt;pending_confirm flag was removed, we do not
need anymore to provide dst arg to dst_neigh_output.
So, rename it to neigh_output as before commit 5110effee8fd
("net: Do delayed neigh confirmation.").

Signed-off-by: Julian Anastasov &lt;ja@ssi.bg&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 20:59:41 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=9a393b5d5988ea4eaa3e0da138321abe0dc03a68'>tap: tap as an independent module</a></td><td>Sainath Grandhi</td><td>1</td><td><span class='deletions'>-2</span>/<span class='insertions'>+2</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This patch makes tap a separate module for other types of virtual interfaces, for example,
ipvlan to use.

Signed-off-by: Sainath Grandhi &lt;sainath.grandhi@intel.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 20:59:41 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=d9f1f61c0801a73ff36d416a7ede54229b231e1d'>tap: Extending tap device create/destroy APIs</a></td><td>Sainath Grandhi</td><td>1</td><td><span class='deletions'>-2</span>/<span class='insertions'>+2</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one
type of virtual interface.

Signed-off-by: Sainath Grandhi &lt;sainath.grandhi@intel.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 20:59:41 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=6fe3faf86757eb7f078ff06b23b206f17dc4fb36'>tap: Abstract type of virtual interface from tap implementation</a></td><td>Sainath Grandhi</td><td>1</td><td><span class='deletions'>-4</span>/<span class='insertions'>+53</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
macvlan object is re-structured to hold tap related elements in a separate
entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with
idr and fetched again on tap_open. Few of the tap functions are modified to
accepted tap_dev as argument. tap_dev object includes callbacks to be used by
underlying virtual interface to take care of tx and rx accounting.

Signed-off-by: Sainath Grandhi &lt;sainath.grandhi@intel.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 20:59:41 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=ebc05ba7e8600b52a2a0c87a43105143368aca2a'>tap: Tap character device creation/destroy API</a></td><td>Sainath Grandhi</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+3</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This patch provides tap device create/destroy APIs in tap.c.

Signed-off-by: Sainath Grandhi &lt;sainath.grandhi@intel.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 20:59:41 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=635b8c8ecdd27142d7fdab0df334b2e9201481cf'>tap: Renaming tap related APIs, data structures, macros</a></td><td>Sainath Grandhi</td><td>3</td><td><span class='deletions'>-25</span>/<span class='insertions'>+25</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.*

Signed-off-by: Sainath Grandhi &lt;sainath.grandhi@intel.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 20:59:41 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=a8e04698732736f59fefe72c675791a006b76e1d'>tap: Refactoring macvtap.c</a></td><td>Sainath Grandhi</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+10</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
macvtap module has code for tap/queue management and link management. This patch splits
the code into macvtap_main.c for link management and tap.c for tap/queue management.
Functionality in tap.c can be re-used for implementing tap on other virtual interfaces.

Signed-off-by: Sainath Grandhi &lt;sainath.grandhi@intel.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-11 02:31:11 -0500'>2017-02-11</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?h=nds-private-remove&amp;id=35eeacf1820a08305c2b0960febfa190f5a6dd63'>Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net</a></td><td>David S. Miller