/*
 * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
 *
 * Copyright (c) 2011 Volkswagen Group Electronic Research
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of Volkswagen nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * Alternatively, provided that this notice is retained in full, this
 * software may be distributed under the terms of the GNU General
 * Public License ("GPL") version 2, in which case the provisions of the
 * GPL apply INSTEAD OF those given above.
 *
 * The provided data structures and external interfaces from this code
 * are not restricted to be used by modules with a GPL compatible license.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/can.h>
#include <linux/can/core.h>
#include <linux/can/skb.h>
#include <linux/can/gw.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>

#define CAN_GW_VERSION "20130117"
#define CAN_GW_NAME "can-gw"

MODULE_DESCRIPTION("PF_CAN netlink gateway");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
MODULE_ALIAS(CAN_GW_NAME);

#define CGW_MIN_HOPS 1
#define CGW_MAX_HOPS 6
#define CGW_DEFAULT_HOPS 1

static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
module_param(max_hops, uint, S_IRUGO);
MODULE_PARM_DESC(max_hops,
		 "maximum " CAN_GW_NAME " routing hops for CAN frames "
		 "(valid values: " __stringify(CGW_MIN_HOPS) "-"
		 __stringify(CGW_MAX_HOPS) " hops, "
		 "default: " __stringify(CGW_DEFAULT_HOPS) ")");

static HLIST_HEAD(cgw_list);
static struct notifier_block notifier;

static struct kmem_cache *cgw_cache __read_mostly;

/* structure that contains the (on-the-fly) CAN frame modifications */
struct cf_mod {
	struct {
		struct can_frame and;
		struct can_frame or;
		struct can_frame xor;
		struct can_frame set;
	} modframe;
	struct {
		u8 and;
		u8 or;
		u8 xor;
		u8 set;
	} modtype;
	void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf,
					  struct cf_mod *mod);

	/* CAN frame checksum calculation after CAN frame modifications */
	struct {
		struct cgw_csum_xor xor;
		struct cgw_csum_crc8 crc8;
	} csum;
	struct {
		void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
		void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
	} csumfunc;
	u32 uid;
};


/*
 * So far we just support CAN -> CAN routing and frame modifications.
 *
 * The internal can_can_gw structure contains data and attributes for
 * a CAN -> CAN gateway job.
 */
struct can_can_gw {
	struct can_filter filter;
	int src_idx;
	int dst_idx;
};

/* list entry for CAN gateways jobs */
struct cgw_job {
	struct hlist_node list;
	struct rcu_head rcu;
	u32 handled_frames;
	u32 dropped_frames;
	u32 deleted_frames;
	struct cf_mod mod;
	union {
		/* CAN frame data source */
		struct net_device *dev;
	} src;
	union {
		/* CAN frame data destination */
		struct net_device *dev;
	} dst;
	union {
		struct can_can_gw ccgw;
		/* tbc */
	};
	u8 gwtype;
	u8 limit_hops;
	u16 flags;
};

/* modification functions that are invoked in the hot path in can_can_gw_rcv */

#define MODFUNC(func, op) static void func(struct can_frame *cf, \
					   struct cf_mod *mod) { op ; }

MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc)
MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc)
MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc)
MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc)
MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)

static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
{
	/*
	 * Copy the struct members separately to ensure that no uninitialized
	 * data are copied in the 3 bytes hole of the struct. This is needed
	 * to make easy compares of the data in the struct cf_mod.
	 */

	dst->can_id = src->can_id;
	dst->can_dlc = src->can_dlc;
	*(u64 *)dst->data = *(u64 *)src->data;
}

static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
{
	/*
	 * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
	 * relative to received dlc -1 .. -8 :
	 * e.g. for received dlc = 8
	 * -1 => index = 7 (data[7])
	 * -3 => index = 5 (data[5])
	 * -8 => index = 0 (data[0])
	 */

	if (fr > -9 && fr < 8 &&
	    to > -9 && to < 8 &&
	    re > -9 && re < 8)
		return 0;
	else
		return -EINVAL;
}

static inline int calc_idx(int idx, int rx_dlc)
{
	if (idx < 0)
		return rx_dlc + idx;
	else
		return idx;
}

static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
{
	int from = calc_idx(xor->from_idx, cf->can_dlc);
	int to = calc_idx(xor->to_idx, cf->can_dlc);
	int res = calc_idx(xor->result_idx, cf->can_dlc);
	u8 val = xor->init_xor_val;
	int i;

	if (from < 0 || to < 0 || res < 0)
		return;

	if (from <= to) {
		for (i = from; i <= to; i++)
			val ^= cf->data[i];
	} else {
		for (i = from; i >= to; i--)
			val ^= cf->data[i];
	}

	cf->data[res] = val;
}

static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
{
	u8 val = xor->init_xor_val;
	int i;

	for (i = xor->from_idx; i <= xor->to_idx; i++)
		val ^= cf->data[i];

	cf->data[xor->result_idx] = val;
}

static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
{
	u8 val = xor->init_xor_val;
	int i;

	for (i = xor->from_idx; i >= xor->to_idx; i--)
		val ^= cf->data[i];

	cf->data[xor->result_idx] = val;
}

static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
{
	int from = calc_idx(crc8->from_idx, cf->can_dlc);
	int to = calc_idx(crc8->to_idx, cf->can_dlc);
	int res = calc_idx(crc8->result_idx, cf->can_dlc);
	u8 crc = crc8->init_crc_val;
	int i;

	if (from < 0 || to < 0 || res < 0)
		return;

	if (from <= to) {
		for (i = crc8->from_idx; i <= crc8->to_idx; i++)
			crc = crc8->crctab[crc^cf->data[i]];
	} else {
		for (i = crc8->from_idx; i >= crc8->to_idx; i--)
			crc = crc8->crctab[crc^cf->data[i]];
	}

	switch (crc8->profile) {

	case CGW_CRC8PRF_1U8:
		crc = crc8->crctab[crc^crc8->profile_data[0]];
		break;

	case  CGW_CRC8PRF_16U8:
		crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
		break;

	case CGW_CRC8PRF_SFFID_XOR:
		crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
				   (cf->can_id >> 8 & 0xFF)];
		break;

	}

	cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
}

static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
{
	u8 crc = crc8->init_crc_val;
	int i;

	for (i = crc8->from_idx; i <= crc8->to_idx; i++)
		crc = crc8->crctab[crc^cf->data[i]];

	switch (crc8->profile) {

	case CGW_CRC8PRF_1U8:
		crc = crc8->crctab[crc^crc8->profile_data[0]];
		break;

	case  CGW_CRC8PRF_16U8:
		crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
		break;

	case CGW_CRC8PRF_SFFID_XOR:
		crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
				   (cf->can_id >> 8 & 0xFF)];
		break;
	}

	cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
}

static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
{
	u8 crc = crc8->init_crc_val;
	int i;

	for (i = crc8->from_idx; i >= crc8->to_idx; i--)
		crc = crc8->crctab[crc^cf->data[i]];

	switch (crc8->profile) {

	case CGW_CRC8PRF_1U8:
		crc = crc8->crctab[crc^crc8->profile_data[0]];
		break;

	case  CGW_CRC8PRF_16U8:
		crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
		break;

	case CGW_CRC8PRF_SFFID_XOR:
		crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
				   (cf->can_id >> 8 & 0xFF)];
		break;
	}

	cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
}

/* the receive & process & send function */
static void can_can_gw_rcv(struct sk_buff *skb, void *data)
{
	struct cgw_job *gwj = (struct cgw_job *)data;
	struct can_frame *cf;
	struct sk_buff *nskb;
	int modidx = 0;

	/*
	 * Do not handle CAN frames routed more than 'max_hops' times.
	 * In general we should never catch this delimiter which is intended
	 * to cover a misconfiguration protection (e.g. circular CAN routes).
	 *
	 * The Controller Area Network controllers only accept CAN frames with
	 * correct CRCs - which are not visible in the controller registers.
	 * According to skbuff.h documentation the csum_start element for IP
	 * checksums is undefined/unused when ip_summed == CHECKSUM_UNNECESSARY.
	 * Only CAN skbs can be processed here which already have this property.
	 */

#define cgw_hops(skb) ((skb)->csum_start)

	BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY);

	if (cgw_hops(skb) >= max_hops) {
		/* indicate deleted frames due to misconfiguration */
		gwj->deleted_frames++;
		return;
	}

	if (!(gwj->dst.dev->flags & IFF_UP)) {
		gwj->dropped_frames++;
		return;
	}

	/* is sending the skb back to the incoming interface not allowed? */
	if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) &&
	    can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
		return;

	/*
	 * clone the given skb, which has not been done in can_rcv()
	 *
	 * When there is at least one modification function activated,
	 * we need to copy the skb as we want to modify skb->data.
	 */
	if (gwj->mod.modfunc[0])
		nskb = skb_copy(skb, GFP_ATOMIC);
	else
		nskb = skb_clone(skb, GFP_ATOMIC);

	if (!nskb) {
		gwj->dropped_frames++;
		return;
	}

	/* put the incremented hop counter in the cloned skb */
	cgw_hops(nskb) = cgw_hops(skb) + 1;

	/* first processing of this CAN frame -> adjust to private hop limit */
	if (gwj->limit_hops && cgw_hops(nskb) == 1)
		cgw_hops(nskb) = max_hops - gwj->limit_hops + 1;

	nskb->dev = gwj->dst.dev;

	/* pointer to modifiable CAN frame */
	cf = (struct can_frame *)nskb->data;

	/* perform preprocessed modification functions if there are any */
	while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
		(*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);

	/* check for checksum updates when the CAN frame has been modified */
	if (modidx) {
		if (gwj->mod.csumfunc.crc8)
			(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);

		if (gwj->mod.csumfunc.xor)
			(*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
	}

	/* clear the skb timestamp if not configured the other way */
	if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
		nskb->tstamp = 0;

	/* send to netdevice */
	if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO))
		gwj->dropped_frames++;
	else
		gwj->handled_frames++;
}

static inline int cgw_register_filter(struct cgw_job *gwj)
{
	return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
			       gwj->ccgw.filter.can_mask, can_can_gw_rcv,
			       gwj, "gw", NULL);
}

static inline void cgw_unregister_filter(struct cgw_job *gwj)
{
	can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id,
			  gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}

static int cgw_notifier(struct notifier_block *nb,
			unsigned long msg, void *ptr)
{
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);

	if (!net_eq(dev_net(dev), &init_net))
		return NOTIFY_DONE;
	if (dev->type != ARPHRD_CAN)
		return NOTIFY_DONE;

	if (msg == NETDEV_UNREGISTER) {

		struct cgw_job *gwj = NULL;
		struct hlist_node *nx;

		ASSERT_RTNL();

		hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {

			if (gwj->src.dev == dev || gwj->dst.dev == dev) {
				hlist_del(&gwj->list);
				cgw_unregister_filter(gwj);
				kmem_cache_free(cgw_cache, gwj);
			}
		}
	}

	return NOTIFY_DONE;
}

static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
		       u32 pid, u32 seq, int flags)
{
	struct cgw_frame_mod mb;
	struct rtcanmsg *rtcan;
	struct nlmsghdr *nlh;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags);
	if (!nlh)
		return -EMSGSIZE;

	rtcan = nlmsg_data(nlh);
	rtcan->can_family = AF_CAN;
	rtcan->gwtype = gwj->gwtype;
	rtcan->flags = gwj->flags;

	/* add statistics if available */

	if (gwj->handled_frames) {
		if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0)
			goto cancel;
	}

	if (gwj->dropped_frames) {
		if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0)
			goto cancel;
	}

	if (gwj->deleted_frames) {
		if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0)
			goto cancel;
	}

	/* check non default settings of attributes */

	if (gwj->limit_hops) {
		if (nla_put_u8(skb, CGW_LIM_HOPS, gwj->limit_hops) < 0)
			goto cancel;
	}

	if (gwj->mod.modtype.and) {
		memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
		mb.modtype = gwj->mod.modtype.and;
		if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
			goto cancel;
	}

	if (gwj->mod.modtype.or) {
		memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
		mb.modtype = gwj->mod.modtype.or;
		if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
			goto cancel;
	}

	if (gwj->mod.modtype.xor) {
		memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
		mb.modtype = gwj->mod.modtype.xor;
		if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
			goto cancel;
	}

	if (gwj->mod.modtype.set) {
		memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
		mb.modtype = gwj->mod.modtype.set;
		if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
			goto cancel;
	}

	if (gwj->mod.uid) {
		if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
			goto cancel;
	}

	if (gwj->mod.csumfunc.crc8) {
		if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
			    &gwj->mod.csum.crc8) < 0)
			goto cancel;
	}

	if (gwj->mod.csumfunc.xor) {
		if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN,
			    &gwj->mod.csum.xor) < 0)
			goto cancel;
	}

	if (gwj->gwtype == CGW_TYPE_CAN_CAN) {

		if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) {
			if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter),
				    &gwj->ccgw.filter) < 0)
				goto cancel;
		}

		if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0)
			goto cancel;

		if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0)
			goto cancel;
	}

	nlmsg_end(skb, nlh);
	return 0;

cancel:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */
static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct cgw_job *gwj = NULL;
	int idx = 0;
	int s_idx = cb->args[0];

	rcu_read_lock();
	hlist_for_each_entry_rcu(gwj, &cgw_list, list) {
		if (idx < s_idx)
			goto cont;

		if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid,
		    cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
			break;
cont:
		idx++;
	}
	rcu_read_unlock();

	cb->args[0] = idx;

	return skb->len;
}

static const struct nla_policy cgw_policy[CGW_MAX+1] = {
	[CGW_MOD_AND]	= { .len = sizeof(struct cgw_frame_mod) },
	[CGW_MOD_OR]	= { .len = sizeof(struct cgw_frame_mod) },
	[CGW_MOD_XOR]	= { .len = sizeof(struct cgw_frame_mod) },
	[CGW_MOD_SET]	= { .len = sizeof(struct cgw_frame_mod) },
	[CGW_CS_XOR]	= { .len = sizeof(struct cgw_csum_xor) },
	[CGW_CS_CRC8]	= { .len = sizeof(struct cgw_csum_crc8) },
	[CGW_SRC_IF]	= { .type = NLA_U32 },
	[CGW_DST_IF]	= { .type = NLA_U32 },
	[CGW_FILTER]	= { .len = sizeof(struct can_filter) },
	[CGW_LIM_HOPS]	= { .type = NLA_U8 },
	[CGW_MOD_UID]	= { .type = NLA_U32 },
};

/* check for common and gwtype specific attributes */
static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
			  u8 gwtype, void *gwtypeattr, u8 *limhops)
{
	struct nlattr *tb[CGW_MAX+1];
	struct cgw_frame_mod mb;
	int modidx = 0;
	int err = 0;

	/* initialize modification & checksum data space */
	memset(mod, 0, sizeof(*mod));

	err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX,
			  cgw_policy);
	if (err < 0)
		return err;

	if (tb[CGW_LIM_HOPS]) {
		*limhops = nla_get_u8(tb[CGW_LIM_HOPS]);

		if (*limhops < 1 || *limhops > max_hops)
			return -EINVAL;
	}

	/* check for AND/OR/XOR/SET modifications */

	if (tb[CGW_MOD_AND]) {
		nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);

		canframecpy(&mod->modframe.and, &mb.cf);
		mod->modtype.and = mb.modtype;

		if (mb.modtype & CGW_MOD_ID)
			mod->modfunc[modidx++] = mod_and_id;

		if (mb.modtype & CGW_MOD_DLC)
			mod->modfunc[modidx++] = mod_and_dlc;

		if (mb.modtype & CGW_MOD_DATA)
			mod->modfunc[modidx++] = mod_and_data;
	}

	if (tb[CGW_MOD_OR]) {
		nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);

		canframecpy(&mod->modframe.or, &mb.cf);
		mod->modtype.or = mb.modtype;

		if (mb.modtype & CGW_MOD_ID)
			mod->modfunc[modidx++] = mod_or_id;

		if (mb.modtype & CGW_MOD_DLC)
			mod->modfunc[modidx++] = mod_or_dlc;

		if (mb.modtype & CGW_MOD_DATA)
			mod->modfunc[modidx++] = mod_or_data;
	}

	if (tb[CGW_MOD_XOR]) {
		nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);

		canframecpy(&mod->modframe.xor, &mb.cf);
		mod->modtype.xor = mb.modtype;

		if (mb.modtype & CGW_MOD_ID)
			mod->modfunc[modidx++] = mod_xor_id;

		if (mb.modtype & CGW_MOD_DLC)
			mod->modfunc[modidx++] = mod_xor_dlc;

		if (mb.modtype & CGW_MOD_DATA)
			mod->modfunc[modidx++] = mod_xor_data;
	}

	if (tb[CGW_MOD_SET]) {
		nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);

		canframecpy(&mod->modframe.set, &mb.cf);
		mod->modtype.set = mb.modtype;

		if (mb.modtype & CGW_MOD_ID)
			mod->modfunc[modidx++] = mod_set_id;

		if (mb.modtype & CGW_MOD_DLC)
			mod->modfunc[modidx++] = mod_set_dlc;

		if (mb.modtype & CGW_MOD_DATA)
			mod->modfunc[modidx++] = mod_set_data;
	}

	/* check for checksum operations after CAN frame modifications */
	if (modidx) {

		if (tb[CGW_CS_CRC8]) {
			struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]);

			err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
						 c->result_idx);
			if (err)
				return err;

			nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8],
				   CGW_CS_CRC8_LEN);

			/*
			 * select dedicated processing function to reduce
			 * runtime operations in receive hot path.
			 */
			if (c->from_idx < 0 || c->to_idx < 0 ||
			    c->result_idx < 0)
				mod->csumfunc.crc8 = cgw_csum_crc8_rel;
			else if (c->from_idx <= c->to_idx)
				mod->csumfunc.crc8 = cgw_csum_crc8_pos;
			else
				mod->csumfunc.crc8 = cgw_csum_crc8_neg;
		}

		if (tb[CGW_CS_XOR]) {
			struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]);

			err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
						 c->result_idx);
			if (err)
				return err;

			nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR],
				   CGW_CS_XOR_LEN);

			/*
			 * select dedicated processing function to reduce
			 * runtime operations in receive hot path.
			 */
			if (c->from_idx < 0 || c->to_idx < 0 ||
			    c->result_idx < 0)
				mod->csumfunc.xor = cgw_csum_xor_rel;
			else if (c->from_idx <= c->to_idx)
				mod->csumfunc.xor = cgw_csum_xor_pos;
			else
				mod->csumfunc.xor = cgw_csum_xor_neg;
		}

		if (tb[CGW_MOD_UID]) {
			nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
		}
	}

	if (gwtype == CGW_TYPE_CAN_CAN) {

		/* check CGW_TYPE_CAN_CAN specific attributes */

		struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr;
		memset(ccgw, 0, sizeof(*ccgw));

		/* check for can_filter in attributes */
		if (tb[CGW_FILTER])
			nla_memcpy(&ccgw->filter, tb[CGW_FILTER],
				   sizeof(struct can_filter));

		err = -ENODEV;

		/* specifying two interfaces is mandatory */
		if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF])
			return err;

		ccgw->src_idx = nla_get_u32(tb[CGW_SRC_IF]);
		ccgw->dst_idx = nla_get_u32(tb[CGW_DST_IF]);

		/* both indices set to 0 for flushing all routing entries */
		if (!ccgw->src_idx && !ccgw->dst_idx)
			return 0;

		/* only one index set to 0 is an error */
		if (!ccgw->src_idx || !ccgw->dst_idx)
			return err;
	}

	/* add the checks for other gwtypes here */

	return 0;
}

static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
{
	struct rtcanmsg *r;
	struct cgw_job *gwj;
	struct cf_mod mod;
	struct can_can_gw ccgw;
	u8 limhops = 0;
	int err = 0;

	if (!netlink_capable(skb, CAP_NET_ADMIN))
		return -EPERM;

	if (nlmsg_len(nlh) < sizeof(*r))
		return -EINVAL;

	r = nlmsg_data(nlh);
	if (r->can_family != AF_CAN)
		return -EPFNOSUPPORT;

	/* so far we only support CAN -> CAN routings */
	if (r->gwtype != CGW_TYPE_CAN_CAN)
		return -EINVAL;

	err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
	if (err < 0)
		return err;

	if (mod.uid) {

		ASSERT_RTNL();

		/* check for updating an existing job with identical uid */
		hlist_for_each_entry(gwj, &cgw_list, list) {

			if (gwj->mod.uid != mod.uid)
				continue;

			/* interfaces & filters must be identical */
			if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
				return -EINVAL;

			/* update modifications with disabled softirq & quit */
			local_bh_disable();
			memcpy(&gwj->mod, &mod, sizeof(mod));
			local_bh_enable();
			return 0;
		}
	}

	/* ifindex == 0 is not allowed for job creation */
	if (!ccgw.src_idx || !ccgw.dst_idx)
		return -ENODEV;

	gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
	if (!gwj)
		return -ENOMEM;

	gwj->handled_frames = 0;
	gwj->dropped_frames = 0;
	gwj->deleted_frames = 0;
	gwj->flags = r->flags;
	gwj->gwtype = r->gwtype;
	gwj->limit_hops = limhops;

	/* insert already parsed information */
	memcpy(&gwj->mod, &mod, sizeof(mod));
	memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));

	err = -ENODEV;

	gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);

	if (!gwj->src.dev)
		goto out;

	if (gwj->src.dev->type != ARPHRD_CAN)
		goto out;

	gwj->dst.dev = __dev_get_by_index(&init_net, gwj->ccgw.dst_idx);

	if (!gwj->dst.dev)
		goto out;

	if (gwj->dst.dev->type != ARPHRD_CAN)
		goto out;

	ASSERT_RTNL();

	err = cgw_register_filter(gwj);
	if (!err)
		hlist_add_head_rcu(&gwj->list, &cgw_list);
out:
	if (err)
		kmem_cache_free(cgw_cache, gwj);

	return err;
}

static void cgw_remove_all_jobs(void)
{
	struct cgw_job *gwj = NULL;
	struct hlist_node *nx;

	ASSERT_RTNL();

	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
		hlist_del(&gwj->list);
		cgw_unregister_filter(gwj);
		kmem_cache_free(cgw_cache, gwj);
	}
}

static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct cgw_job *gwj = NULL;
	struct hlist_node *nx;
	struct rtcanmsg *r;
	struct cf_mod mod;
	struct can_can_gw ccgw;
	u8 limhops = 0;
	int err = 0;

	if (!netlink_capable(skb, CAP_NET_ADMIN))
		return -EPERM;

	if (nlmsg_len(nlh) < sizeof(*r))
		return -EINVAL;

	r = nlmsg_data(nlh);
	if (r->can_family != AF_CAN)
		return -EPFNOSUPPORT;

	/* so far we only support CAN -> CAN routings */
	if (r->gwtype != CGW_TYPE_CAN_CAN)
		return -EINVAL;

	err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
	if (err < 0)
		return err;

	/* two interface indices both set to 0 => remove all entries */
	if (!ccgw.src_idx && !ccgw.dst_idx) {
		cgw_remove_all_jobs();
		return 0;
	}

	err = -EINVAL;

	ASSERT_RTNL();

	/* remove only the first matching entry */
	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {

		if (gwj->flags != r->flags)
			continue;

		if (gwj->limit_hops != limhops)
			continue;

		/* we have a match when uid is enabled and identical */
		if (gwj->mod.uid || mod.uid) {
			if (gwj->mod.uid != mod.uid)
				continue;
		} else {
			/* no uid => check for identical modifications */
			if (memcmp(&gwj->mod, &mod, sizeof(mod)))
				continue;
		}

		/* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
		if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
			continue;

		hlist_del(&gwj->list);
		cgw_unregister_filter(gwj);
		kmem_cache_free(cgw_cache, gwj);
		err = 0;
		break;
	}

	return err;
}

static __init int cgw_module_init(void)
{
	/* sanitize given module parameter */
	max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);

	pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
		max_hops);

	cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
				      0, 0, NULL);

	if (!cgw_cache)
		return -ENOMEM;

	/* set notifier */
	notifier.notifier_call = cgw_notifier;
	register_netdevice_notifier(&notifier);

	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
		unregister_netdevice_notifier(&notifier);
		kmem_cache_destroy(cgw_cache);
		return -ENOBUFS;
	}

	/* Only the first call to __rtnl_register can fail */
	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);

	return 0;
}

static __exit void cgw_module_exit(void)
{
	rtnl_unregister_all(PF_CAN);

	unregister_netdevice_notifier(&notifier);

	rtnl_lock();
	cgw_remove_all_jobs();
	rtnl_unlock();

	rcu_barrier(); /* Wait for completion of call_rcu()'s */

	kmem_cache_destroy(cgw_cache);
}

module_init(cgw_module_init);
module_exit(cgw_module_exit);
lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-04 19:58:14 -0500'>2017-02-04</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=0ae8133586ad1c9be894411aaf8b17bb58c8efe5'>net: ipv6: Allow shorthand delete of all nexthops in multipath route</a></td><td>David Ahern</td><td>1</td><td><span class='deletions'>-1</span>/<span class='insertions'>+3</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
IPv4 allows multipath routes to be deleted using just the prefix and
length. For example:
    $ ip ro ls vrf red
    unreachable default metric 8192
    1.1.1.0/24
        nexthop via 10.100.1.254  dev eth1 weight 1
        nexthop via 10.11.200.2  dev eth11.200 weight 1
    10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3
    10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3

    $ ip ro del 1.1.1.0/24 vrf red

    $ ip ro ls vrf red
    unreachable default metric 8192
    10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3
    10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3

The same notation does not work with IPv6 because of how multipath routes
are implemented for IPv6. For IPv6 only the first nexthop of a multipath
route is deleted if the request contains only a prefix and length. This
leads to unnecessary complexity in userspace dealing with IPv6 multipath
routes.

This patch allows all nexthops to be deleted without specifying each one
in the delete request. Internally, this is done by walking the sibling
list of the route matching the specifications given (prefix, length,
metric, protocol, etc).

    $  ip -6 ro ls vrf red
    2001:db8:1::/120 dev eth1 proto kernel metric 256  pref medium
    2001:db8:2::/120 dev eth2 proto kernel metric 256  pref medium
    2001:db8:200::/120 via 2001:db8:1::2 dev eth1 metric 1024  pref medium
    2001:db8:200::/120 via 2001:db8:2::2 dev eth2 metric 1024  pref medium
    ...

    $ ip -6 ro del vrf red 2001:db8:200::/120

    $ ip -6 ro ls vrf red
    2001:db8:1::/120 dev eth1 proto kernel metric 256  pref medium
    2001:db8:2::/120 dev eth2 proto kernel metric 256  pref medium
    ...

Because IPv6 allows individual nexthops to be deleted without deleting
the entire route, the ip6_route_multipath_del and non-multipath code
path (ip6_route_del) have to be discriminated so that all nexthops are
only deleted for the latter case. This is done by making the existing
fc_type in fib6_config a u16 and then adding a new u16 field with
fc_delete_all_nh as the first bit.

Suggested-by: Dinesh Dutt &lt;ddutt@cumulusnetworks.com&gt;
Signed-off-by: David Ahern &lt;dsa@cumulusnetworks.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 17:28:29 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=79e7fff47b7bb4124ef970a13eac4fdeddd1fc25'>net: remove support for per driver ndo_busy_poll()</a></td><td>Eric Dumazet</td><td>2</td><td><span class='deletions'>-5</span>/<span class='insertions'>+0</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
We added generic support for busy polling in NAPI layer in linux-4.5

No network driver uses ndo_busy_poll() anymore, we can get rid
of the pointer in struct net_device_ops, and its use in sk_busy_loop()

Saves NETIF_F_BUSY_POLL features bit.

Signed-off-by: Eric Dumazet &lt;edumazet@google.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 16:58:20 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=52e01b84a244473074fc0612c169e2e043d58b01'>Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next</a></td><td>David S. Miller</td><td>11</td><td><span class='deletions'>-40</span>/<span class='insertions'>+60</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next
tree, they are:

1) Stash ctinfo 3-bit field into pointer to nf_conntrack object from
   sk_buff so we only access one single cacheline in the conntrack
   hotpath. Patchset from Florian Westphal.

2) Don't leak pointer to internal structures when exporting x_tables
   ruleset back to userspace, from Willem DeBruijn. This includes new
   helper functions to copy data to userspace such as xt_data_to_user()
   as well as conversions of our ip_tables, ip6_tables and arp_tables
   clients to use it. Not surprinsingly, ebtables requires an ad-hoc
   update. There is also a new field in x_tables extensions to indicate
   the amount of bytes that we copy to userspace.

3) Add nf_log_all_netns sysctl: This new knob allows you to enable
   logging via nf_log infrastructure for all existing netnamespaces.
   Given the effort to provide pernet syslog has been discontinued,
   let's provide a way to restore logging using netfilter kernel logging
   facilities in trusted environments. Patch from Michal Kubecek.

4) Validate SCTP checksum from conntrack helper, from Davide Caratti.

5) Merge UDPlite conntrack and NAT helpers into UDP, this was mostly
   a copy&amp;paste from the original helper, from Florian Westphal.

6) Reset netfilter state when duplicating packets, also from Florian.

7) Remove unnecessary check for broadcast in IPv6 in pkttype match and
   nft_meta, from Liping Zhang.

8) Add missing code to deal with loopback packets from nft_meta when
   used by the netdev family, also from Liping.

9) Several cleanups on nf_tables, one to remove unnecessary check from
   the netlink control plane path to add table, set and stateful objects
   and code consolidation when unregister chain hooks, from Gao Feng.

10) Fix harmless reference counter underflow in IPVS that, however,
    results in problems with the introduction of the new refcount_t
    type, from David Windsor.

11) Enable LIBCRC32C from nf_ct_sctp instead of nf_nat_sctp,
    from Davide Caratti.

12) Missing documentation on nf_tables uapi header, from Liping Zhang.

13) Use rb_entry() helper in xt_connlimit, from Geliang Tang.
====================

Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 16:35:43 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=69ca05ce9dec2cc95070df7f1f10ea6c9c12d237'>sched: cls_flower: expose priority to offloading netdevice</a></td><td>Jiri Pirko</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
The driver that offloads flower rules needs to know with which priority
user inserted the rules. So add this information into offload struct.

Signed-off-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Acked-by: Ido Schimmel &lt;idosch@mellanox.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 16:35:42 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=44091d29f2075972aede47ef17e1e70db3d51190'>lib: Introduce priority array area manager</a></td><td>Jiri Pirko</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+76</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This introduces a infrastructure for management of linear priority
areas. Priority order in an array matters, however order of items inside
a priority group does not matter.

As an initial implementation, L-sort algorithm is used. It is quite
trivial. More advanced algorithm called P-sort will be introduced as a
follow-up. The infrastructure is prepared for other algos.

Alongside this, a testing module is introduced as well.

Signed-off-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 16:35:42 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=b862815c3ee7b49ec20a9ab25da55a5f0bcbb95e'>list: introduce list_for_each_entry_from_reverse helper</a></td><td>Jiri Pirko</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+13</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Similar to list_for_each_entry_continue and its reverse variant
list_for_each_entry_continue_reverse, introduce reverse helper for
list_for_each_entry_from.

Signed-off-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Acked-by: Ido Schimmel &lt;idosch@mellanox.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:50:18 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=3898fac1f488c76e0eef5b5267b4ba8112a82ac4'>trace: rename trace_print_hex_seq arg and add kdoc</a></td><td>Daniel Borkmann</td><td>2</td><td><span class='deletions'>-3</span>/<span class='insertions'>+3</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Steven suggested to improve trace_print_hex_seq() a bit after commit
2acae0d5b0f7 ("trace: add variant without spacing in trace_print_hex_seq")
in two ways: i) by adding a kdoc comment for the helper function
itself and ii) by renaming 'spacing' argument into 'concatenate'
to better denote that we don't add spaces between each hex bytes.

Suggested-by: Steven Rostedt &lt;rostedt@goodmis.org&gt;
Signed-off-by: Daniel Borkmann &lt;daniel@iogearbox.net&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:21:21 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=b3c7ef0adadc5768e0baa786213c6bd1ce521a77'>bridge: uapi: add per vlan tunnel info</a></td><td>Roopa Prabhu</td><td>3</td><td><span class='deletions'>-0</span>/<span class='insertions'>+13</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
New nested netlink attribute to associate tunnel info per vlan.
This is used by bridge driver to send tunnel metadata to
bridge ports in vlan tunnel mode. This patch also adds new per
port flag IFLA_BRPORT_VLAN_TUNNEL to enable vlan tunnel mode.
off by default.

One example use for this is a vxlan bridging gateway or vtep
which maps vlans to vn-segments (or vnis). User can configure
per-vlan tunnel information which the bridge driver can use
to bridge vlan into the corresponding vn-segment.

Signed-off-by: Roopa Prabhu &lt;roopa@cumulusnetworks.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:21:21 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=3ad7a4b141ebd6091494913672d7166d5c2764e4'>vxlan: support fdb and learning in COLLECT_METADATA mode</a></td><td>Roopa Prabhu</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Vxlan COLLECT_METADATA mode today solves the per-vni netdev
scalability problem in l3 networks. It expects all forwarding
information to be present in dst_metadata. This patch series
enhances collect metadata mode to include the case where only
vni is present in dst_metadata, and the vxlan driver can then use
the rest of the forwarding information datbase to make forwarding
decisions. There is no change to default COLLECT_METADATA
behaviour. These changes only apply to COLLECT_METADATA when
used with the bridging use-case with a special dst_metadata
tunnel info flag (eg: where vxlan device is part of a bridge).
For all this to work, the vxlan driver will need to now support a
single fdb table hashed by mac + vni. This series essentially makes
this happen.

use-case and workflow:
vxlan collect metadata device participates in bridging vlan
to vn-segments. Bridge driver above the vxlan device,
sends the vni corresponding to the vlan in the dst_metadata.
vxlan driver will lookup forwarding database with (mac + vni)
for the required remote destination information to forward the
packet.

Changes introduced by this patch:
    - allow learning and forwarding database state in vxlan netdev in
      COLLECT_METADATA mode. Current behaviour is not changed
      by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
      to support the new bridge friendly mode.
    - A single fdb table hashed by (mac, vni) to allow fdb entries with
      multiple vnis in the same fdb table
    - rx path already has the vni
    - tx path expects a vni in the packet with dst_metadata
    - prior to this series, fdb remote_dsts carried remote vni and
      the vxlan device carrying the fdb table represented the
      source vni. With the vxlan device now representing multiple vnis,
      this patch adds a src vni attribute to the fdb entry. The remote
      vni already uses NDA_VNI attribute. This patch introduces
      NDA_SRC_VNI netlink attribute to represent the src vni in a multi
      vni fdb table.

iproute2 example (patched and pruned iproute2 output to just show
relevant fdb entries):
example shows same host mac learnt on two vni's.

before (netdev per vni):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self

after this patch with collect metadata in bridged mode (single netdev):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self

Signed-off-by: Roopa Prabhu &lt;roopa@cumulusnetworks.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:21:21 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=f35581d64e55fc65753a62957b3b98127d560d07'>ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info mode</a></td><td>Roopa Prabhu</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
New ip_tunnel_info flag to represent bridged tunnel metadata.
Used by bridge driver later in the series to pass per vlan dst
metadata to bridge ports.

Signed-off-by: Roopa Prabhu &lt;roopa@cumulusnetworks.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:16:46 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=295a6e06d21e1f469c9f38b00125a13b60ad4e7c'>net/sched: act_ife: Change to use ife module</a></td><td>Yotam Gigi</td><td>2</td><td><span class='deletions'>-10</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Use the encode/decode functionality from the ife module instead of using
implementation inside the act_ife.

Reviewed-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Signed-off-by: Yotam Gigi &lt;yotamg@mellanox.com&gt;
Signed-off-by: Jamal Hadi Salim &lt;jhs@mojatatu.com&gt;
Signed-off-by: Roman Mashak &lt;mrv@mojatatu.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:16:45 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=1ce8460496c05379c66edc178c3c55ca4e953044'>net: Introduce ife encapsulation module</a></td><td>Yotam Gigi</td><td>3</td><td><span class='deletions'>-0</span>/<span class='insertions'>+70</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This module is responsible for the ife encapsulation protocol
encode/decode logics. That module can:
 - ife_encode: encode skb and reserve space for the ife meta header
 - ife_decode: decode skb and extract the meta header size
 - ife_tlv_meta_encode - encodes one tlv entry into the reserved ife
   header space.
 - ife_tlv_meta_decode - decodes one tlv entry from the packet
 - ife_tlv_meta_next - advance to the next tlv

Reviewed-by: Jiri Pirko &lt;jiri@mellanox.com&gt;
Signed-off-by: Yotam Gigi &lt;yotamg@mellanox.com&gt;
Signed-off-by: Jamal Hadi Salim &lt;jhs@mojatatu.com&gt;
Signed-off-by: Roman Mashak &lt;mrv@mojatatu.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 15:16:45 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=1d5e7c859e81a66674d194c346119d154d31e9dc'>net/sched: act_ife: Unexport ife_tlv_meta_encode</a></td><td>Yotam Gigi</td><td>1</td><td><span class='deletions'>-2</span>/<span class='insertions'>+0</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
As the function ife_tlv_meta_encode is not used by any other module,
unexport it and make it static for the act_ife module.

Signed-off-by: Yotam Gigi &lt;yotamg@mellanox.com&gt;
Signed-off-by: Jamal Hadi Salim &lt;jhs@mojatatu.com&gt;
Signed-off-by: Roman Mashak &lt;mrv@mojatatu.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-03 11:19:34 -0500'>2017-02-03</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=3541f9e8bdebce02458882b66b638d7302c1f616'>tcp: add tcp_mss_clamp() helper</a></td><td>Eric Dumazet</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+9</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Small cleanup factorizing code doing the TCP_MAXSEG clamping.

Signed-off-by: Eric Dumazet &lt;edumazet@google.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 23:34:19 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=8fe809a992639b2013c0d8da2ba55cdea28a959a'>net: add LINUX_MIB_PFMEMALLOCDROP counter</a></td><td>Eric Dumazet</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Debugging issues caused by pfmemalloc is often tedious.

Add a new SNMP counter to more easily diagnose these problems.

Signed-off-by: Eric Dumazet &lt;edumazet@google.com&gt;
Cc: Josef Bacik &lt;jbacik@fb.com&gt;
Acked-by: Josef Bacik &lt;jbacik@fb.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 22:05:28 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=60f06fde4cff6f6134decbf09199b5e047bc3355'>net: phy: marvell: Add support for 88e1545 PHY</a></td><td>Andrew Lunn</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
The 88e1545 PHYs are discrete Marvell PHYs, found in a quad package on
the zii-devel-b board. Add support for it to the Marvell PHY driver.

Signed-off-by: Andrew Lunn &lt;andrew@lunn.ch&gt;
Reviewed-by: Florian Fainelli &lt;f.fainelli@gmail.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 21:58:02 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=ba94f3088b792b16ea576a256a6030feddc87f24'>unix: add ioctl to open a unix socket file with O_PATH</a></td><td>Andrey Vagin</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+2</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
This ioctl opens a file to which a socket is bound and
returns a file descriptor. The caller has to have CAP_NET_ADMIN
in the socket network namespace.

Currently it is impossible to get a path and a mount point
for a socket file. socket_diag reports address, device ID and inode
number for unix sockets. An address can contain a relative path or
a file may be moved somewhere. And these properties say nothing about
a mount namespace and a mount point of a socket file.

With the introduced ioctl, we can get a path by reading
/proc/self/fd/X and get mnt_id from /proc/self/fdinfo/X.

In CRIU we are going to use this ioctl to dump and restore unix socket.

Here is an example how it can be used:

$ strace -e socket,bind,ioctl ./test /tmp/test_sock
socket(AF_UNIX, SOCK_STREAM, 0)         = 3
bind(3, {sa_family=AF_UNIX, sun_path="test_sock"}, 11) = 0
ioctl(3, SIOCUNIXFILE, 0)           = 4
^Z

$ ss -a | grep test_sock
u_str  LISTEN     0      1      test_sock 17798                 * 0

$ ls -l /proc/760/fd/{3,4}
lrwx------ 1 root root 64 Feb  1 09:41 3 -&gt; 'socket:[17798]'
l--------- 1 root root 64 Feb  1 09:41 4 -&gt; /tmp/test_sock

$ cat /proc/760/fdinfo/4
pos:	0
flags:	012000000
mnt_id:	40

$ cat /proc/self/mountinfo | grep "^40\s"
40 19 0:37 / /tmp rw shared:23 - tmpfs tmpfs rw

Signed-off-by: Andrei Vagin &lt;avagin@openvz.org&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 21:50:51 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=e4cf8a38fc0d257b4070066e46a678f4777fecaa'>net: phy: Marvell: Add mv88e6390 internal PHY</a></td><td>Andrew Lunn</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+6</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
The mv88e6390 Ethernet switch has internal PHYs. These PHYs don't have
an model ID in the ID2 register. So the MDIO driver in the switch
intercepts reads to this register, and returns the switch family ID.
Extend the Marvell PHY driver by including this ID, and treat the PHY
as a 88E1540.

Signed-off-by: Andrew Lunn &lt;andrew@lunn.ch&gt;
Reviewed-by: Florian Fainelli &lt;f.fainelli@gmail.com&gt;
Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 16:54:00 -0500'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=e2160156bf2a7d5018e99a9993fbcdda0abac09b'>Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net</a></td><td>David S. Miller</td><td>9</td><td><span class='deletions'>-30</span>/<span class='insertions'>+40</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
All merge conflicts were simple overlapping changes.

Signed-off-by: David S. Miller &lt;davem@davemloft.net&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:58 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=2851940ffee313e0ff12540a8e11a8c54dea9c65'>netfilter: allow logging from non-init namespaces</a></td><td>Michal Kubeček</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+3</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Commit 69b34fb996b2 ("netfilter: xt_LOG: add net namespace support for
xt_LOG") disabled logging packets using the LOG target from non-init
namespaces. The motivation was to prevent containers from flooding
kernel log of the host. The plan was to keep it that way until syslog
namespace implementation allows containers to log in a safe way.

However, the work on syslog namespace seems to have hit a dead end
somewhere in 2013 and there are users who want to use xt_LOG in all
network namespaces. This patch allows to do so by setting

  /proc/sys/net/netfilter/nf_log_all_netns

to a nonzero value. This sysctl is only accessible from init_net so that
one cannot switch the behaviour from inside a container.

Signed-off-by: Michal Kubecek &lt;mkubecek@suse.cz&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:57 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=90c1aff702d449a1a248c4829d51c0bc677f968e'>ipvs: free ip_vs_dest structs when refcnt=0</a></td><td>David Windsor</td><td>1</td><td><span class='deletions'>-1</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Currently, the ip_vs_dest cache frees ip_vs_dest objects when their
reference count becomes &lt; 0.  Aside from not being semantically sound,
this is problematic for the new type refcount_t, which will be introduced
shortly in a separate patch. refcount_t is the new kernel type for
holding reference counts, and provides overflow protection and a
constrained interface relative to atomic_t (the type currently being
used for kernel reference counts).

Per Julian Anastasov: "The problem is that dest_trash currently holds
deleted dests (unlinked from RCU lists) with refcnt=0."  Changing
dest_trash to hold dest with refcnt=1 will allow us to free ip_vs_dest
structs when their refcnt=0, in ip_vs_dest_put_and_free().

Signed-off-by: David Windsor &lt;dwindsor@gmail.com&gt;
Signed-off-by: Julian Anastasov &lt;ja@ssi.bg&gt;
Signed-off-by: Simon Horman &lt;horms@verge.net.au&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:56 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=a9e419dc7be6997409dca6d1b9daf3cc7046902f'>netfilter: merge ctinfo into nfct pointer storage area</a></td><td>Florian Westphal</td><td>2</td><td><span class='deletions'>-17</span>/<span class='insertions'>+15</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
After this change conntrack operations (lookup, creation, matching from
ruleset) only access one instead of two sk_buff cache lines.

This works for normal conntracks because those are allocated from a slab
that guarantees hw cacheline or 8byte alignment (whatever is larger)
so the 3 bits needed for ctinfo won't overlap with nf_conn addresses.

Template allocation now does manual address alignment (see previous change)
on arches that don't have sufficent kmalloc min alignment.

Some spots intentionally use skb-&gt;_nfct instead of skb_nfct() helpers,
this is to avoid undoing the skb_nfct() use when we remove untracked
conntrack object in the future.

Signed-off-by: Florian Westphal &lt;fw@strlen.de&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:55 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=303223092081963513494b4377fa1ac9e362ed4b'>netfilter: guarantee 8 byte minalign for template addresses</a></td><td>Florian Westphal</td><td>1</td><td><span class='deletions'>-0</span>/<span class='insertions'>+2</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
The next change will merge skb-&gt;nfct pointer and skb-&gt;nfctinfo
status bits into single skb-&gt;_nfct (unsigned long) area.

For this to work nf_conn addresses must always be aligned at least on
an 8 byte boundary since we will need the lower 3bits to store nfctinfo.

Conntrack templates are allocated via kmalloc.
kbuild test robot reported
BUILD_BUG_ON failed: NFCT_INFOMASK &gt;= ARCH_KMALLOC_MINALIGN
on v1 of this patchset, so not all platforms meet this requirement.

Do manual alignment if needed,  the alignment offset is stored in the
nf_conn entry protocol area. This works because templates are not
handed off to L4 protocol trackers.

Reported-by: kbuild test robot &lt;fengguang.wu@intel.com&gt;
Signed-off-by: Florian Westphal &lt;fw@strlen.de&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:54 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=c74454fadd5ea6fc866ffe2c417a0dba56b2bf1c'>netfilter: add and use nf_ct_set helper</a></td><td>Florian Westphal</td><td>2</td><td><span class='deletions'>-2</span>/<span class='insertions'>+9</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Add a helper to assign a nf_conn entry and the ctinfo bits to an sk_buff.
This avoids changing code in followup patch that merges skb-&gt;nfct and
skb-&gt;nfctinfo into skb-&gt;_nfct.

Signed-off-by: Florian Westphal &lt;fw@strlen.de&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:53 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=cb9c68363efb6d1f950ec55fb06e031ee70db5fc'>skbuff: add and use skb_nfct helper</a></td><td>Florian Westphal</td><td>2</td><td><span class='deletions'>-4</span>/<span class='insertions'>+11</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Followup patch renames skb-&gt;nfct and changes its type so add a helper to
avoid intrusive rename change later.

Signed-off-by: Florian Westphal &lt;fw@strlen.de&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:52 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=97a6ad13decc16c5adbf181283932daba7e17faf'>netfilter: reduce direct skb-&gt;nfct usage</a></td><td>Florian Westphal</td><td>1</td><td><span class='deletions'>-3</span>/<span class='insertions'>+6</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
Next patch makes direct skb-&gt;nfct access illegal, reduce noise
in next patch by using accessors we already have.

Signed-off-by: Florian Westphal &lt;fw@strlen.de&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
<tr class='logheader'><td><span title='2017-02-02 14:31:51 +0100'>2017-02-02</span></td><td class='logsubject'><a href='/cgit.cgi/linux/net-next.git/commit/include?id=11df4b760f11ca7528c62b1c4b870735d1c62116'>netfilter: conntrack: no need to pass ctinfo to error handler</a></td><td>Florian Westphal</td><td>1</td><td><span class='deletions'>-1</span>/<span class='insertions'>+1</span></td></tr>
<tr class='nohover-highlight'><td/><td colspan='5' class='logmsg'>
It is never accessed for reading and the only places that write to it
are the icmp(6) handlers, which also set skb-&gt;nfct (and skb-&gt;nfctinfo).

The conntrack core specifically checks for attached skb-&gt;nfct after
-&gt;error() invocation and returns early in this case.

Signed-off-by: Florian Westphal &lt;fw@strlen.de&gt;
Signed-off-by: Pablo Neira Ayuso &lt;pablo@netfilter.org&gt;


</td></tr>
</table><ul class='pager'>