/*
 * NET		Generic infrastructure for Network protocols.
 *
 *		Definitions for request_sock
 *
 * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *
 * 		From code originally in include/net/tcp.h
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
#ifndef _REQUEST_SOCK_H
#define _REQUEST_SOCK_H

#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/bug.h>

#include <net/sock.h>

struct request_sock;
struct sk_buff;
struct dst_entry;
struct proto;

struct request_sock_ops {
	int		family;
	int		obj_size;
	struct kmem_cache	*slab;
	char		*slab_name;
	int		(*rtx_syn_ack)(const struct sock *sk,
				       struct request_sock *req);
	void		(*send_ack)(const struct sock *sk, struct sk_buff *skb,
				    struct request_sock *req);
	void		(*send_reset)(const struct sock *sk,
				      struct sk_buff *skb);
	void		(*destructor)(struct request_sock *req);
	void		(*syn_ack_timeout)(const struct request_sock *req);
};

int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);

/* struct request_sock - mini sock to represent a connection request
 */
struct request_sock {
	struct sock_common		__req_common;
#define rsk_refcnt			__req_common.skc_refcnt
#define rsk_hash			__req_common.skc_hash
#define rsk_listener			__req_common.skc_listener
#define rsk_window_clamp		__req_common.skc_window_clamp
#define rsk_rcv_wnd			__req_common.skc_rcv_wnd

	struct request_sock		*dl_next;
	u16				mss;
	u8				num_retrans; /* number of retransmits */
	u8				cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
	u8				num_timeout:7; /* number of timeouts */
	u32				ts_recent;
	struct timer_list		rsk_timer;
	const struct request_sock_ops	*rsk_ops;
	struct sock			*sk;
	u32				*saved_syn;
	u32				secid;
	u32				peer_secid;
};

static inline struct request_sock *inet_reqsk(const struct sock *sk)
{
	return (struct request_sock *)sk;
}

static inline struct sock *req_to_sk(struct request_sock *req)
{
	return (struct sock *)req;
}

static inline struct request_sock *
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
	    bool attach_listener)
{
	struct request_sock *req;

	req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
	if (!req)
		return NULL;
	req->rsk_listener = NULL;
	if (attach_listener) {
		if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
			kmem_cache_free(ops->slab, req);
			return NULL;
		}
		req->rsk_listener = sk_listener;
	}
	req->rsk_ops = ops;
	req_to_sk(req)->sk_prot = sk_listener->sk_prot;
	sk_node_init(&req_to_sk(req)->sk_node);
	sk_tx_queue_clear(req_to_sk(req));
	req->saved_syn = NULL;
	atomic_set(&req->rsk_refcnt, 0);

	return req;
}

static inline void reqsk_free(struct request_sock *req)
{
	/* temporary debugging */
	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);

	req->rsk_ops->destructor(req);
	if (req->rsk_listener)
		sock_put(req->rsk_listener);
	kfree(req->saved_syn);
	kmem_cache_free(req->rsk_ops->slab, req);
}

static inline void reqsk_put(struct request_sock *req)
{
	if (atomic_dec_and_test(&req->rsk_refcnt))
		reqsk_free(req);
}

/*
 * For a TCP Fast Open listener -
 *	lock - protects the access to all the reqsk, which is co-owned by
 *		the listener and the child socket.
 *	qlen - pending TFO requests (still in TCP_SYN_RECV).
 *	max_qlen - max TFO reqs allowed before TFO is disabled.
 *
 *	XXX (TFO) - ideally these fields can be made as part of "listen_sock"
 *	structure above. But there is some implementation difficulty due to
 *	listen_sock being part of request_sock_queue hence will be freed when
 *	a listener is stopped. But TFO related fields may continue to be
 *	accessed even after a listener is closed, until its sk_refcnt drops
 *	to 0 implying no more outstanding TFO reqs. One solution is to keep
 *	listen_opt around until	sk_refcnt drops to 0. But there is some other
 *	complexity that needs to be resolved. E.g., a listener can be disabled
 *	temporarily through shutdown()->tcp_disconnect(), and re-enabled later.
 */
struct fastopen_queue {
	struct request_sock	*rskq_rst_head; /* Keep track of past TFO */
	struct request_sock	*rskq_rst_tail; /* requests that caused RST.
						 * This is part of the defense
						 * against spoofing attack.
						 */
	spinlock_t	lock;
	int		qlen;		/* # of pending (TCP_SYN_RECV) reqs */
	int		max_qlen;	/* != 0 iff TFO is currently enabled */
};

/** struct request_sock_queue - queue of request_socks
 *
 * @rskq_accept_head - FIFO head of established children
 * @rskq_accept_tail - FIFO tail of established children
 * @rskq_defer_accept - User waits for some data after accept()
 *
 */
struct request_sock_queue {
	spinlock_t		rskq_lock;
	u8			rskq_defer_accept;

	u32			synflood_warned;
	atomic_t		qlen;
	atomic_t		young;

	struct request_sock	*rskq_accept_head;
	struct request_sock	*rskq_accept_tail;
	struct fastopen_queue	fastopenq;  /* Check max_qlen != 0 to determine
					     * if TFO is enabled.
					     */
};

void reqsk_queue_alloc(struct request_sock_queue *queue);

void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
			   bool reset);

static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
{
	return queue->rskq_accept_head == NULL;
}

static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
						      struct sock *parent)
{
	struct request_sock *req;

	spin_lock_bh(&queue->rskq_lock);
	req = queue->rskq_accept_head;
	if (req) {
		sk_acceptq_removed(parent);
		queue->rskq_accept_head = req->dl_next;
		if (queue->rskq_accept_head == NULL)
			queue->rskq_accept_tail = NULL;
	}
	spin_unlock_bh(&queue->rskq_lock);
	return req;
}

static inline void reqsk_queue_removed(struct request_sock_queue *queue,
				       const struct request_sock *req)
{
	if (req->num_timeout == 0)
		atomic_dec(&queue->young);
	atomic_dec(&queue->qlen);
}

static inline void reqsk_queue_added(struct request_sock_queue *queue)
{
	atomic_inc(&queue->young);
	atomic_inc(&queue->qlen);
}

static inline int reqsk_queue_len(const struct request_sock_queue *queue)
{
	return atomic_read(&queue->qlen);
}

static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
{
	return atomic_read(&queue->young);
}

#endif /* _REQUEST_SOCK_H */
'deletions'>-0</span>/<span class='insertions'>+12</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
New nested netlink attribute to associate tunnel info per vlan.
This is used by bridge driver to send tunnel metadata to
bridge ports in vlan tunnel mode. This patch also adds new per
port flag IFLA_BRPORT_VLAN_TUNNEL to enable vlan tunnel mode.
off by default.

One example use for this is a vxlan bridging gateway or vtep
which maps vlans to vn-segments (or vnis). User can configure
per-vlan tunnel information which the bridge driver can use
to bridge vlan into the corresponding vn-segment.

Signed-off-by: Roopa Prabhu &lt;roopa@cumulusnetworks.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:21:21 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi?id=3ad7a4b141ebd6091494913672d7166d5c2764e4'>vxlan: support fdb and learning in COLLECT_METADATA mode</a></td><td>Roopa Prabhu</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Vxlan COLLECT_METADATA mode today solves the per-vni netdev
scalability problem in l3 networks. It expects all forwarding
information to be present in dst_metadata. This patch series
enhances collect metadata mode to include the case where only
vni is present in dst_metadata, and the vxlan driver can then use
the rest of the forwarding information datbase to make forwarding
decisions. There is no change to default COLLECT_METADATA
behaviour. These changes only apply to COLLECT_METADATA when
used with the bridging use-case with a special dst_metadata
tunnel info flag (eg: where vxlan device is part of a bridge).
For all this to work, the vxlan driver will need to now support a
single fdb table hashed by mac + vni. This series essentially makes
this happen.

use-case and workflow:
vxlan collect metadata device participates in bridging vlan
to vn-segments. Bridge driver above the vxlan device,
sends the vni corresponding to the vlan in the dst_metadata.
vxlan driver will lookup forwarding database with (mac + vni)
for the required remote destination information to forward the
packet.

Changes introduced by this patch:
    - allow learning and forwarding database state in vxlan netdev in
      COLLECT_METADATA mode. Current behaviour is not changed
      by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
      to support the new bridge friendly mode.
    - A single fdb table hashed by (mac, vni) to allow fdb entries with
      multiple vnis in the same fdb table
    - rx path already has the vni
    - tx path expects a vni in the packet with dst_metadata
    - prior to this series, fdb remote_dsts carried remote vni and
      the vxlan device carrying the fdb table represented the
      source vni. With the vxlan device now representing multiple vnis,
      this patch adds a src vni attribute to the fdb entry. The remote
      vni already uses NDA_VNI attribute. This patch introduces
      NDA_SRC_VNI netlink attribute to represent the src vni in a multi
      vni fdb table.

iproute2 example (patched and pruned iproute2 output to just show
relevant fdb entries):
example shows same host mac learnt on two vni's.

before (netdev per vni):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self

after this patch with collect metadata in bridged mode (single netdev):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self

Signed-off-by: Roopa Prabhu &lt;roopa@cumulusnetworks.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:16:46 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi?id=295a6e06d21e1f469c9f38b00125a13b60ad4e7c'>net/sched: act_ife: Change to use ife module</a></td><td>Yotam Gigi</td><td>1</td><td><span class='deletions'>-9</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Use the encode/decode functionality from the ife module instead of using
implementation inside the act_ife.

Reviewed-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Signed-off-by: Yotam Gigi &lt;yotamg@mellanox.com&gt;
Signed-off-by: Jamal Hadi Salim &lt;jhs@mojatatu.com&gt;
Signed-off-by: Roman Mashak &lt;mrv@mojatatu.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:16:45 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi?id=1ce8460496c05379c66edc178c3c55ca4e953044'>net: Introduce ife encapsulation module</a></td><td>Yotam Gigi</td><td>2</td><td><span class='deletions'>-0</span>/<span class='insertions'>+19</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This module is responsible for the ife encapsulation protocol
encode/decode logics. That module can:
 - ife_encode: encode skb and reserve space for the ife meta header
 - ife_decode: decode skb and extract the meta header size
 - ife_tlv_meta_encode - encodes one tlv entry into the reserved ife
   header space.
 - ife_tlv_meta_decode - decodes one tlv entry from the packet
 - ife_tlv_meta_next - advance to the next tlv

Reviewed-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Signed-off-by: Yotam Gigi &lt;yotamg@mellanox.com&gt;
Signed-off-by: Jamal Hadi Salim &lt;jhs@mojatatu.com&gt;
Signed-off-by: Roman Mashak &lt;mrv@mojatatu.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 23:34:19 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi?id=8fe809a992639b2013c0d8da2ba55cdea28a959a'>net: add LINUX_MIB_PFMEMALLOCDROP counter</a></td><td>Eric Dumazet</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Debugging issues caused by pfmemalloc is often tedious.

Add a new SNMP counter to more easily diagnose these problems.

Signed-off-by: Eric Dumazet &lt;edumazet@google.com&gt;
Cc: Josef Bacik &lt;jbacik@fb.com&gt;
Acked-by: Josef Bacik &lt;jbacik@fb.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 21:58:02 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi?id=ba94f3088b792b16ea576a256a6030feddc87f24'>unix: add ioctl to open a unix socket file with O_PATH</a></td><td>Andrey Vagin</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+2</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This ioctl opens a file to which a socket is bound and
returns a file descriptor. The caller has to have CAP_NET_ADMIN
in the socket network namespace.

Currently it is impossible to get a path and a mount point
for a socket file. socket_diag reports address, device ID and inode
number for unix sockets. An address can contain a relative path or
a file may be moved somewhere. And these properties say nothing about
a mount namespace and a mount point of a socket file.

With the introduced ioctl, we can get a path by reading
/proc/self/fd/X and get mnt_id from /proc/self/fdinfo/X.

In CRIU we are going to use this ioctl to dump and restore unix socket.

Here is an example how it can be used:

$ strace -e socket,bind,ioctl ./test /tmp/test_sock
socket(AF_UNIX, SOCK_STREAM, 0)         = 3
bind(3, {sa_family=AF_UNIX, sun_path="test_sock"}, 11) = 0
ioctl(3, SIOCUNIXFILE, 0)           = 4
^Z

$ ss -a | grep test_sock
u_str  LISTEN     0      1      test_sock 17798                 * 0

$ ls -l /proc/760/fd/{3,4}
lrwx------ 1 root root 64 Feb  1 09:41 3 -&gt; 'socket:[17798]'
l--------- 1 root root 64 Feb  1 09:41 4 -&gt; /tmp/test_sock

$ cat /proc/760/fdinfo/4
pos:	0
flags:	012000000
mnt_id:	40

$ cat /proc/self/mountinfo | grep "^40\s"
40 19 0:37 / /tmp rw shared:23 - tmpfs tmpfs rw

Signed-off-by: Andrei Vagin &lt;avagin@openvz.org&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 16:54:00 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi?id=e2160156bf2a7d5018e99a9993fbcdda0abac09b'>Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net</a></td><td>David S. Miller</td><td>1</td><td><span class='deletions'>-1</span>/<span class='insertions'>+3</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
All merge conflicts were simple overlapping changes.

Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>