#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
#include "gateway_client.h"
#include "hard-interface.h"
#include "originator.h"
#include "packet.h"
#include "soft-interface.h"
#include "tp_meter.h"
#include "translation-table.h"
struct genl_family batadv_netlink_family;
/* multicast groups */
enum batadv_netlink_multicast_groups {
BATADV_NL_MCGRP_TPMETER,
};
static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
};
static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
[BATADV_ATTR_VERSION] = { .type = NLA_STRING },
[BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
[BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
[BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
[BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
[BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
[BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
[BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
[BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
[BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
[BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
[BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
[BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
[BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
[BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
[BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
[BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
[BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
[BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
[BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
[BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
[BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
[BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
[BATADV_ATTR_TQ] = { .type = NLA_U8 },
[BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
[BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
[BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
[BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
[BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
[BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
[BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
[BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
[BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
};
/**
* batadv_netlink_get_ifindex - Extract an interface index from a message
* @nlh: Message header
* @attrtype: Attribute which holds an interface index
*
* Return: interface index, or 0.
*/
int
batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
{
struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
return attr ? nla_get_u32(attr) : 0;
}
/**
* batadv_netlink_mesh_info_put - fill in generic information about mesh
* interface
* @msg: netlink message to be sent back
* @soft_iface: interface for which the data should be taken
*
* Return: 0 on success, < 0 on error
*/
static int
batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_hard_iface *primary_if = NULL;
struct net_device *hard_iface;
int ret = -ENOBUFS;
if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
bat_priv->algo_ops->name) ||
nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
soft_iface->dev_addr) ||
nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
(u8)atomic_read(&bat_priv->tt.vn)))
goto out;
#ifdef CONFIG_BATMAN_ADV_BLA
if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
ntohs(bat_priv->bla.claim_dest.group)))
goto out;
#endif
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
hard_iface = primary_if->net_dev;
if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
hard_iface->ifindex) ||
nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
hard_iface->name) ||
nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
hard_iface->dev_addr))
goto out;
}
ret = 0;
out:
if (primary_if)
batadv_hardif_put(primary_if);
return ret;
}
/**
* batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO
* netlink request
* @skb: received netlink message
* @info: receiver information
*
* Return: 0 on success, < 0 on error
*/
static int
batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct net_device *soft_iface;
struct sk_buff *msg = NULL;
void *msg_head;
int ifindex;
int ret;
if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
return -EINVAL;
ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
if (!ifindex)
return -EINVAL;
soft_iface = dev_get_by_index(net, ifindex);
if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
ret = -ENODEV;
goto out;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
goto out;
}
msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
&batadv_netlink_family, 0,
BATADV_CMD_GET_MESH_INFO);
if (!msg_head) {
ret = -ENOBUFS;
goto out;
}
ret = batadv_netlink_mesh_info_put(msg, soft_iface);
out:
if (soft_iface)
dev_put(soft_iface);
if (ret) {
if (msg)
nlmsg_free(msg);
return ret;
}
genlmsg_end(msg, msg_head);
return genlmsg_reply(msg, info);
}
/**
* batadv_netlink_tp_meter_put - Fill information of started tp_meter session
* @msg: netlink message to be sent back
* @cookie: tp meter session cookie
*
* Return: 0 on success, < 0 on error
*/
static int
batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
{
if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
return -ENOBUFS;
return 0;
}
/**
* batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client
* @bat_priv: the bat priv with all the soft interface information
* @dst: destination of tp_meter session
* @result: reason for tp meter session stop
* @test_time: total time ot the tp_meter session
* @total_bytes: bytes acked to the receiver
* @cookie: cookie of tp_meter session
*
* Return: 0 on success, < 0 on error
*/
int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
u8 result, u32 test_time, u64 total_bytes,
u32 cookie)
{
struct sk_buff *msg;
void *hdr;
int ret;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0,
BATADV_CMD_TP_METER);
if (!hdr) {
ret = -ENOBUFS;
goto err_genlmsg;
}
if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
goto nla_put_failure;
if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes,
BATADV_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result))
goto nla_put_failure;
if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst))
goto nla_put_failure;
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&batadv_netlink_family,
dev_net(bat_priv->soft_iface), msg, 0,
BATADV_NL_MCGRP_TPMETER, GFP_KERNEL);
return 0;
nla_put_failure:
genlmsg_cancel(msg, hdr);
ret = -EMSGSIZE;
err_genlmsg:
nlmsg_free(msg);
return ret;
}
/**
* batadv_netlink_tp_meter_start - Start a new tp_meter session
* @skb: received netlink message
* @info: receiver information
*
* Return: 0 on success, < 0 on error
*/
static int
batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct net_device *soft_iface;
struct batadv_priv *bat_priv;
struct sk_buff *msg = NULL;
u32 test_length;
void *msg_head;
int ifindex;
u32 cookie;
u8 *dst;
int ret;
if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
return -EINVAL;
if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
return -EINVAL;
if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
return -EINVAL;
ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
if (!ifindex)
return -EINVAL;
dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
soft_iface = dev_get_by_index(net, ifindex);
if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
ret = -ENODEV;
goto out;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
goto out;
}
msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
&batadv_netlink_family, 0,
BATADV_CMD_TP_METER);
if (!msg_head) {
ret = -ENOBUFS;
goto out;
}
bat_priv = netdev_priv(soft_iface);
batadv_tp_start(bat_priv, dst, test_length, &cookie);
ret = batadv_netlink_tp_meter_put(msg, cookie);
out:
if (soft_iface)
dev_put(soft_iface);
if (ret) {
if (msg)
nlmsg_free(msg);
return ret;
}
genlmsg_end(msg, msg_head);
return genlmsg_reply(msg, info);
}
/**
* batadv_netlink_tp_meter_start - Cancel a running tp_meter session
* @skb: received netlink message
* @info: receiver information
*
* Return: 0 on success, < 0 on error
*/
static int
batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct net_device *soft_iface;
struct batadv_priv *bat_priv;
int ifindex;
u8 *dst;
int ret = 0;
if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
return -EINVAL;
if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
return -EINVAL;
ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
if (!ifindex)
return -EINVAL;
dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
soft_iface = dev_get_by_index(net, ifindex);
if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
ret = -ENODEV;
goto out;
}
bat_priv = netdev_priv(soft_iface);
batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
out:
if (soft_iface)
dev_put(soft_iface);
return ret;
}
/**
* batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
* @hard_iface: Hard interface to dump
*
* Return: error code, or 0 on success
*/
static int
batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
struct batadv_hard_iface *hard_iface)
{
struct net_device *net_dev = hard_iface->net_dev;
void *hdr;
hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
BATADV_CMD_GET_HARDIFS);
if (!hdr)
return -EMSGSIZE;
if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
net_dev->ifindex) ||
nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
net_dev->name) ||
nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
net_dev->dev_addr))
goto nla_put_failure;
if (hard_iface->if_status == BATADV_IF_ACTIVE) {
if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
goto nla_put_failure;
}
genlmsg_end(msg, hdr);
return 0;
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
}
/**
* batadv_netlink_dump_hardifs - Dump all hard interface into a messages
* @msg: Netlink message to dump into
* @cb: Parameters from query
*
* Return: error code, or length of reply message on success
*/
static int
batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
{
struct net *net = sock_net(cb->skb->sk);
struct net_device *soft_iface;
struct batadv_hard_iface *hard_iface;
int ifindex;
int portid = NETLINK_CB(cb->skb).portid;
int seq = cb->nlh->nlmsg_seq;
int skip = cb->args[0];
int i = 0;
ifindex = batadv_netlink_get_ifindex(cb->nlh,
BATADV_ATTR_MESH_IFINDEX);
if (!ifindex)
return -EINVAL;
soft_iface = dev_get_by_index(net, ifindex);
if (!soft_iface)
return -ENODEV;
if (!batadv_softif_is_valid(soft_iface)) {
dev_put(soft_iface);
return -ENODEV;
}
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
if (hard_iface->soft_iface != soft_iface)
continue;
if (i++ < skip)
continue;
if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
hard_iface)) {
i--;
break;
}
}
rcu_read_unlock();
dev_put(soft_iface);
cb->args[0] = i;
return msg->len;
}
static const struct genl_ops batadv_netlink_ops[] = {
{
.cmd = BATADV_CMD_GET_MESH_INFO,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.doit = batadv_netlink_get_mesh_info,
},
{
.cmd = BATADV_CMD_TP_METER,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.doit = batadv_netlink_tp_meter_start,
},
{
.cmd = BATADV_CMD_TP_METER_CANCEL,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.doit = batadv_netlink_tp_meter_cancel,
},
{
.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_algo_dump,
},
{
.cmd = BATADV_CMD_GET_HARDIFS,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_netlink_dump_hardifs,
},
{
.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_tt_local_dump,
},
{
.cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_tt_global_dump,
},
{
.cmd = BATADV_CMD_GET_ORIGINATORS,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_orig_dump,
},
{
.cmd = BATADV_CMD_GET_NEIGHBORS,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_hardif_neigh_dump,
},
{
.cmd = BATADV_CMD_GET_GATEWAYS,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_gw_dump,
},
{
.cmd = BATADV_CMD_GET_BLA_CLAIM,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_bla_claim_dump,
},
{
.cmd = BATADV_CMD_GET_BLA_BACKBONE,
.flags = GENL_ADMIN_PERM,
.policy = batadv_netlink_policy,
.dumpit = batadv_bla_backbone_dump,
},
};
struct genl_family batadv_netlink_family __ro_after_init = {
.hdrsize = 0,
.name = BATADV_NL_NAME,
.version = 1,
.maxattr = BATADV_ATTR_MAX,
.netnsok = true,
.module = THIS_MODULE,
.ops = batadv_netlink_ops,
.n_ops = ARRAY_SIZE(batadv_netlink_ops),
.mcgrps = batadv_netlink_mcgrps,
.n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
};
/**
* batadv_netlink_register - register batadv genl netlink family
*/
void __init batadv_netlink_register(void)
{
int ret;
ret = genl_register_family(&batadv_netlink_family);
if (ret)
pr_warn("unable to register netlink family");
}
/**
* batadv_netlink_unregister - unregister batadv genl netlink family
*/
void batadv_netlink_unregister(void)
{
genl_unregister_family(&batadv_netlink_family);
}
-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
The FIB notification chain currently uses the NLM_F_{REPLACE,APPEND}
flags to signal routes being replaced or appended.
Instead of using netlink flags for in-kernel notifications we can simply
introduce two new events in the FIB notification chain. This has the
added advantage of making the API cleaner, thereby making it clear that
these events should be supported by listeners of the notification chain.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
When a FIB alias is replaced following NLM_F_REPLACE, the ENTRY_ADD
notification is sent after the reference on the previous FIB info was
dropped. This is problematic as potential listeners might need to access
it in their notification blocks.
Solve this by sending the notification prior to the deletion of the
replaced FIB alias. This is consistent with ENTRY_DEL notifications.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
When a FIB alias is removed, a notification is sent using the type
passed from user space - can be RTN_UNSPEC - instead of the actual type
of the removed alias. This is problematic for listeners of the FIB
notification chain, as several FIB aliases can exist with matching
parameters, but the type.
Solve this by passing the actual type of the removed FIB alias.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
In case the MAIN table is flushed and its trie is shared with the LOCAL
table, then we might be flushing FIB aliases belonging to the latter.
This can lead to FIB_ENTRY_DEL notifications sent with the wrong table
ID.
The above doesn't affect current listeners, as the table ID is ignored
during entry deletion, but this will change later in the patchset.
When flushing a particular table, skip any aliases belonging to a
different one.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Patrick McHardy <kaber@trash.net>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
struct sw_flow_key has two 16-bit holes. Move the most matched
conntrack match fields there. In some typical cases this reduces the
size of the key that needs to be hashed into half and into one cache
line.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Stateful network admission policy may allow connections to one
direction and reject connections initiated in the other direction.
After policy change it is possible that for a new connection an
overlapping conntrack entry already exists, where the original
direction of the existing connection is opposed to the new
connection's initial packet.
Most importantly, conntrack state relating to the current packet gets
the "reply" designation based on whether the original direction tuple
or the reply direction tuple matched. If this "directionality" is
wrong w.r.t. to the stateful network admission policy it may happen
that packets in neither direction are correctly admitted.
This patch adds a new "force commit" option to the OVS conntrack
action that checks the original direction of an existing conntrack
entry. If that direction is opposed to the current packet, the
existing conntrack entry is deleted and a new one is subsequently
created in the correct direction.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Add the fields of the conntrack original direction 5-tuple to struct
sw_flow_key. The new fields are initially marked as non-existent, and
are populated whenever a conntrack action is executed and either finds
or generates a conntrack entry. This means that these fields exist
for all packets that were not rejected by conntrack as untrackable.
The original tuple fields in the sw_flow_key are filled from the
original direction tuple of the conntrack entry relating to the
current packet, or from the original direction tuple of the master
conntrack entry, if the current conntrack entry has a master.
Generally, expected connections of connections having an assigned
helper (e.g., FTP), have a master conntrack entry.
The main purpose of the new conntrack original tuple fields is to
allow matching on them for policy decision purposes, with the premise
that the admissibility of tracked connections reply packets (as well
as original direction packets), and both direction packets of any
related connections may be based on ACL rules applying to the master
connection's original direction 5-tuple. This also makes it easier to
make policy decisions when the actual packet headers might have been
transformed by NAT, as the original direction 5-tuple represents the
packet headers before any such transformation.
When using the original direction 5-tuple the admissibility of return
and/or related packets need not be based on the mere existence of a
conntrack entry, allowing separation of admission policy from the
established conntrack state. While existence of a conntrack entry is
required for admission of the return or related packets, policy
changes can render connections that were initially admitted to be
rejected or dropped afterwards. If the admission of the return and
related packets was based on mere conntrack state (e.g., connection
being in an established state), a policy change that would make the
connection rejected or dropped would need to find and delete all
conntrack entries affected by such a change. When using the original
direction 5-tuple matching the affected conntrack entries can be
allowed to time out instead, as the established state of the
connection would not need to be the basis for packet admission any
more.
It should be noted that the directionality of related connections may
be the same or different than that of the master connection, and
neither the original direction 5-tuple nor the conntrack state bits
carry this information. If needed, the directionality of the master
connection can be stored in master's conntrack mark or labels, which
are automatically inherited by the expected related connections.
The fact that neither ARP nor ND packets are trackable by conntrack
allows mutual exclusion between ARP/ND and the new conntrack original
tuple fields. Hence, the IP addresses are overlaid in union with ARP
and ND fields. This allows the sw_flow_key to not grow much due to
this patch, but it also means that we must be careful to never use the
new key fields with ARP or ND packets. ARP is easy to distinguish and
keep mutually exclusive based on the ethernet type, but ND being an
ICMPv6 protocol requires a bit more attention.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
We avoid calling into nf_conntrack_in() for expected connections, as
that would remove the expectation that we want to stick around until
we are ready to commit the connection. Instead, we do a lookup in the
expectation table directly. However, after a successful expectation
lookup we have set the flow key label field from the master
connection, whereas nf_conntrack_in() does not do this. This leads to
master's labels being inherited after an expectation lookup, but those
labels not being inherited after the corresponding conntrack action
with a commit flag.
This patch resolves the problem by changing the commit code path to
also inherit the master's labels to the expected connection.
Resolving this conflict in favor of inheriting the labels allows more
information be passed from the master connection to related
connections, which would otherwise be much harder if the 32 bits in
the connmark are not enough. Labels can still be set explicitly, so
this change only affects the default values of the labels in presense
of a master connection.
Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Refactoring conntrack labels initialization makes changes in later
patches easier to review.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Since 23014011ba42 ("netfilter: conntrack: support a fixed size of 128
distinct labels"), the size of conntrack labels extension has fixed to
128 bits, so we do not need to check for labels sizes shorter than 128
at run-time. This patch simplifies labels length logic accordingly,
but allows the conntrack labels size to be increased in the future
without breaking the build. In the event of conntrack labels
increasing in size OVS would still be able to deal with the 128 first
label bits.
Suggested-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Make the array of labels in struct ovs_key_ct_label an union, adding a
u32 array of the same byte size as the existing u8 array. It is
faster to loop through the labels 32 bits at the time, which is also
the alignment of netlink attributes.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Receiving change events before the 'new' event for the connection has
been received can be confusing. Avoid triggering change events for
setting conntrack mark or labels before the conntrack entry has been
confirmed.
Fixes: 182e3042e15d ("openvswitch: Allow matching on conntrack mark")
Fixes: c2ac66735870 ("openvswitch: Allow matching on conntrack label")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
The conntrack lookup for existing connections fails to invert the
packet 5-tuple for NATted packets, and therefore fails to find the
existing conntrack entry. Conntrack only stores 5-tuples for incoming
packets, and there are various situations where a lookup on a packet
that has already been transformed by NAT needs to be made. Looking up
an existing conntrack entry upon executing packet received from the
userspace is one of them.
This patch fixes ovs_ct_find_existing() to invert the packet 5-tuple
for the conntrack lookup whenever the packet has already been
transformed by conntrack from its input form as evidenced by one of
the NAT flags being set in the conntrack state metadata.
Fixes: 05752523e565 ("openvswitch: Interface with NAT.")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Fix comments referring to skb 'nfct' and 'nfctinfo' fields now that
they are combined into '_nfct'.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
While adding switch.o to the list of DSA object files, we essentially
duplicated the previous obj-y line and just added switch.o, remove the
duplicate.
Fixes: f515f192ab4f ("net: dsa: add switch notifier")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Request Parameter
This patch is to implement Sender-Side Procedures for the Add
Outgoing and Incoming Streams Request Parameter described in
rfc6525 section 5.1.5-5.1.6.
It is also to add sockopt SCTP_ADD_STREAMS in rfc6525 section
6.3.4 for users.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
request chunk
This patch is to define Add Incoming/Outgoing Streams Request
Parameter described in rfc6525 section 4.5 and 4.6. They can
be in one same chunk trunk as rfc6525 section 3.1-7 describes,
so make them in one function.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
This patch is to implement Sender-Side Procedures for the SSN/TSN
Reset Request Parameter descibed in rfc6525 section 5.1.4.
It is also to add sockopt SCTP_RESET_ASSOC in rfc6525 section 6.3.3
for users.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
This patch is to define SSN/TSN Reset Request Parameter described
in rfc6525 section 4.3.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Now when sending stream reset request, it closes the streams to
block further xmit of data until this request is completed, then
calls sctp_send_reconf to send the chunk.
But if sctp_send_reconf returns err, and it doesn't recover the
streams' states back, which means the request chunk would not be
queued and sent, so the asoc will get stuck, streams are closed
and no packet is even queued.
This patch is to fix it by recovering the streams' states when
it fails to send the request, it is also to fix a return value.
Fixes: 7f9d68ac944e ("sctp: implement sender-side procedures for SSN Reset Request Parameter")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
When a multipath route is hit the kernel doesn't consider nexthops that
are DEAD or LINKDOWN when IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN is set.
Devices that offload multipath routes need to be made aware of nexthop
status changes. Otherwise, the device will keep forwarding packets to
non-functional nexthops.
Add the FIB_EVENT_NH_{ADD,DEL} events to the fib notification chain,
which notify capable devices when they should add or delete a nexthop
from their tables.
Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Cc: David Ahern <dsa@cumulusnetworks.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
This fixes a bug and cleans up tunnelid range size
calculation code by using consistent variable names
and checks in size calculation and fill functions.
tested for a few cases of vlan-vni range mappings:
(output from patched iproute2):
$bridge vlan showtunnel
port vid tunid
vxlan0 100-105 1000-1005
200 2000
210 2100
211-213 2100-2102
214 2104
216-217 2108-2109
219 2119
Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
We have many gro cells users, so lets move the code to avoid
duplication.
This creates a CONFIG_GRO_CELLS option.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|