/* * NetLabel Unlabeled Support * * This file defines functions for dealing with unlabeled packets for the * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * * Author: Paul Moore * */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . * */ #ifndef _NETLABEL_UNLABELED_H #define _NETLABEL_UNLABELED_H #include /* * The following NetLabel payloads are supported by the Unlabeled subsystem. * * o STATICADD * This message is sent from an application to add a new static label for * incoming unlabeled connections. * * Required attributes: * * NLBL_UNLABEL_A_IFACE * NLBL_UNLABEL_A_SECCTX * * If IPv4 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV4ADDR * NLBL_UNLABEL_A_IPV4MASK * * If IPv6 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV6ADDR * NLBL_UNLABEL_A_IPV6MASK * * o STATICREMOVE * This message is sent from an application to remove an existing static * label for incoming unlabeled connections. * * Required attributes: * * NLBL_UNLABEL_A_IFACE * * If IPv4 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV4ADDR * NLBL_UNLABEL_A_IPV4MASK * * If IPv6 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV6ADDR * NLBL_UNLABEL_A_IPV6MASK * * o STATICLIST * This message can be sent either from an application or by the kernel in * response to an application generated STATICLIST message. When sent by an * application there is no payload and the NLM_F_DUMP flag should be set. * The kernel should response with a series of the following messages. * * Required attributes: * * NLBL_UNLABEL_A_IFACE * NLBL_UNLABEL_A_SECCTX * * If IPv4 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV4ADDR * NLBL_UNLABEL_A_IPV4MASK * * If IPv6 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV6ADDR * NLBL_UNLABEL_A_IPV6MASK * * o STATICADDDEF * This message is sent from an application to set the default static * label for incoming unlabeled connections. * * Required attribute: * * NLBL_UNLABEL_A_SECCTX * * If IPv4 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV4ADDR * NLBL_UNLABEL_A_IPV4MASK * * If IPv6 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV6ADDR * NLBL_UNLABEL_A_IPV6MASK * * o STATICREMOVEDEF * This message is sent from an application to remove the existing default * static label for incoming unlabeled connections. * * If IPv4 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV4ADDR * NLBL_UNLABEL_A_IPV4MASK * * If IPv6 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV6ADDR * NLBL_UNLABEL_A_IPV6MASK * * o STATICLISTDEF * This message can be sent either from an application or by the kernel in * response to an application generated STATICLISTDEF message. When sent by * an application there is no payload and the NLM_F_DUMP flag should be set. * The kernel should response with the following message. * * Required attribute: * * NLBL_UNLABEL_A_SECCTX * * If IPv4 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV4ADDR * NLBL_UNLABEL_A_IPV4MASK * * If IPv6 is specified the following attributes are required: * * NLBL_UNLABEL_A_IPV6ADDR * NLBL_UNLABEL_A_IPV6MASK * * o ACCEPT * This message is sent from an application to specify if the kernel should * allow unlabled packets to pass if they do not match any of the static * mappings defined in the unlabeled module. * * Required attributes: * * NLBL_UNLABEL_A_ACPTFLG * * o LIST * This message can be sent either from an application or by the kernel in * response to an application generated LIST message. When sent by an * application there is no payload. The kernel should respond to a LIST * message with a LIST message on success. * * Required attributes: * * NLBL_UNLABEL_A_ACPTFLG * */ /* NetLabel Unlabeled commands */ enum { NLBL_UNLABEL_C_UNSPEC, NLBL_UNLABEL_C_ACCEPT, NLBL_UNLABEL_C_LIST, NLBL_UNLABEL_C_STATICADD, NLBL_UNLABEL_C_STATICREMOVE, NLBL_UNLABEL_C_STATICLIST, NLBL_UNLABEL_C_STATICADDDEF, NLBL_UNLABEL_C_STATICREMOVEDEF, NLBL_UNLABEL_C_STATICLISTDEF, __NLBL_UNLABEL_C_MAX, }; /* NetLabel Unlabeled attributes */ enum { NLBL_UNLABEL_A_UNSPEC, NLBL_UNLABEL_A_ACPTFLG, /* (NLA_U8) * if true then unlabeled packets are allowed to pass, else unlabeled * packets are rejected */ NLBL_UNLABEL_A_IPV6ADDR, /* (NLA_BINARY, struct in6_addr) * an IPv6 address */ NLBL_UNLABEL_A_IPV6MASK, /* (NLA_BINARY, struct in6_addr) * an IPv6 address mask */ NLBL_UNLABEL_A_IPV4ADDR, /* (NLA_BINARY, struct in_addr) * an IPv4 address */ NLBL_UNLABEL_A_IPV4MASK, /* (NLA_BINARY, struct in_addr) * and IPv4 address mask */ NLBL_UNLABEL_A_IFACE, /* (NLA_NULL_STRING) * network interface */ NLBL_UNLABEL_A_SECCTX, /* (NLA_BINARY) * a LSM specific security context */ __NLBL_UNLABEL_A_MAX, }; #define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1) /* NetLabel protocol functions */ int netlbl_unlabel_genl_init(void); /* Unlabeled connection hash table size */ /* XXX - currently this number is an uneducated guess */ #define NETLBL_UNLHSH_BITSIZE 7 /* General Unlabeled init function */ int netlbl_unlabel_init(u32 size); /* Static/Fallback label management functions */ int netlbl_unlhsh_add(struct net *net, const char *dev_name, const void *addr, const void *mask, u32 addr_len, u32 secid, struct netlbl_audit *audit_info); int netlbl_unlhsh_remove(struct net *net, const char *dev_name, const void *addr, const void *mask, u32 addr_len, struct netlbl_audit *audit_info); /* Process Unlabeled incoming network packets */ int netlbl_unlabel_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); /* Set the default configuration to allow Unlabeled packets */ int netlbl_unlabel_defconf(void); #endif -0/+10 A slave device will now notify the switch fabric once its port is bridged or unbridged, instead of calling directly its switch operations. This code allows propagating cross-chip bridging events in the fabric. Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-06net: dsa: add switch notifierVivien Didelot1-0/+7 Add a notifier block per DSA switch, registered against a notifier head in the switch fabric they belong to. This infrastructure will allow to propagate fabric-wide events such as port bridging, VLAN configuration, etc. If a DSA switch driver cares about cross-chip configuration, such events can be caught. Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-04net: ipv6: Change notifications for multipath add to RTA_MULTIPATHDavid Ahern1-0/+1 Change ip6_route_multipath_add to send one notifciation with the full route encoded with RTA_MULTIPATH instead of a series of individual routes. This is done by adding a skip_notify flag to the nl_info struct. The flag is used to skip sending of the notification in the fib code that actually inserts the route. Once the full route has been added, a notification is generated with all nexthops. ip6_route_multipath_add handles 3 use cases: new routes, route replace, and route append. The multipath notification generated needs to be consistent with the order of the nexthops and it should be consistent with the order in a FIB dump which means the route with the first nexthop needs to be used as the route reference. For the first 2 cases (new and replace), a reference to the route used to send the notification is obtained by saving the first route added. For the append case, the last route added is used to loop back to its first sibling route which is the first nexthop in the multipath route. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-04net: ipv6: Allow shorthand delete of all nexthops in multipath routeDavid Ahern1-1/+3 IPv4 allows multipath routes to be deleted using just the prefix and length. For example: $ ip ro ls vrf red unreachable default metric 8192 1.1.1.0/24 nexthop via 10.100.1.254 dev eth1 weight 1 nexthop via 10.11.200.2 dev eth11.200 weight 1 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 $ ip ro del 1.1.1.0/24 vrf red $ ip ro ls vrf red unreachable default metric 8192 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 The same notation does not work with IPv6 because of how multipath routes are implemented for IPv6. For IPv6 only the first nexthop of a multipath route is deleted if the request contains only a prefix and length. This leads to unnecessary complexity in userspace dealing with IPv6 multipath routes. This patch allows all nexthops to be deleted without specifying each one in the delete request. Internally, this is done by walking the sibling list of the route matching the specifications given (prefix, length, metric, protocol, etc). $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium 2001:db8:200::/120 via 2001:db8:1::2 dev eth1 metric 1024 pref medium 2001:db8:200::/120 via 2001:db8:2::2 dev eth2 metric 1024 pref medium ... $ ip -6 ro del vrf red 2001:db8:200::/120 $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium ... Because IPv6 allows individual nexthops to be deleted without deleting the entire route, the ip6_route_multipath_del and non-multipath code path (ip6_route_del) have to be discriminated so that all nexthops are only deleted for the latter case. This is done by making the existing fc_type in fib6_config a u16 and then adding a new u16 field with fc_delete_all_nh as the first bit. Suggested-by: Dinesh Dutt <ddutt@cumulusnetworks.com> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-nextDavid S. Miller8-26/+28 Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree, they are: 1) Stash ctinfo 3-bit field into pointer to nf_conntrack object from sk_buff so we only access one single cacheline in the conntrack hotpath. Patchset from Florian Westphal. 2) Don't leak pointer to internal structures when exporting x_tables ruleset back to userspace, from Willem DeBruijn. This includes new helper functions to copy data to userspace such as xt_data_to_user() as well as conversions of our ip_tables, ip6_tables and arp_tables clients to use it. Not surprinsingly, ebtables requires an ad-hoc update. There is also a new field in x_tables extensions to indicate the amount of bytes that we copy to userspace. 3) Add nf_log_all_netns sysctl: This new knob allows you to enable logging via nf_log infrastructure for all existing netnamespaces. Given the effort to provide pernet syslog has been discontinued, let's provide a way to restore logging using netfilter kernel logging facilities in trusted environments. Patch from Michal Kubecek. 4) Validate SCTP checksum from conntrack helper, from Davide Caratti. 5) Merge UDPlite conntrack and NAT helpers into UDP, this was mostly a copy&paste from the original helper, from Florian Westphal. 6) Reset netfilter state when duplicating packets, also from Florian. 7) Remove unnecessary check for broadcast in IPv6 in pkttype match and nft_meta, from Liping Zhang. 8) Add missing code to deal with loopback packets from nft_meta when used by the netdev family, also from Liping. 9) Several cleanups on nf_tables, one to remove unnecessary check from the netlink control plane path to add table, set and stateful objects and code consolidation when unregister chain hooks, from Gao Feng. 10) Fix harmless reference counter underflow in IPVS that, however, results in problems with the introduction of the new refcount_t type, from David Windsor. 11) Enable LIBCRC32C from nf_ct_sctp instead of nf_nat_sctp, from Davide Caratti. 12) Missing documentation on nf_tables uapi header, from Liping Zhang. 13) Use rb_entry() helper in xt_connlimit, from Geliang Tang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03sched: cls_flower: expose priority to offloading netdeviceJiri Pirko1-0/+1 The driver that offloads flower rules needs to know with which priority user inserted the rules. So add this information into offload struct. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info modeRoopa Prabhu1-0/+1 New ip_tunnel_info flag to represent bridged tunnel metadata. Used by bridge driver later in the series to pass per vlan dst metadata to bridge ports. Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net/sched: act_ife: Change to use ife moduleYotam Gigi1-1/+0 Use the encode/decode functionality from the ife module instead of using implementation inside the act_ife. Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net: Introduce ife encapsulation moduleYotam Gigi1-0/+51 This module is responsible for the ife encapsulation protocol encode/decode logics. That module can: - ife_encode: encode skb and reserve space for the ife meta header - ife_decode: decode skb and extract the meta header size - ife_tlv_meta_encode - encodes one tlv entry into the reserved ife header space. - ife_tlv_meta_decode - decodes one tlv entry from the packet - ife_tlv_meta_next - advance to the next tlv Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net/sched: act_ife: Unexport ife_tlv_meta_encodeYotam Gigi1-2/+0 As the function ife_tlv_meta_encode is not used by any other module, unexport it and make it static for the act_ife module. Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-02Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/netDavid S. Miller1-0/+5 All merge conflicts were simple overlapping changes. Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-02netfilter: allow logging from non-init namespacesMichal Kubeček1-0/+3 Commit 69b34fb996b2 ("netfilter: xt_LOG: add net namespace support for xt_LOG") disabled logging packets using the LOG target from non-init namespaces. The motivation was to prevent containers from flooding kernel log of the host. The plan was to keep it that way until syslog namespace implementation allows containers to log in a safe way. However, the work on syslog namespace seems to have hit a dead end somewhere in 2013 and there are users who want to use xt_LOG in all network namespaces. This patch allows to do so by setting /proc/sys/net/netfilter/nf_log_all_netns to a nonzero value. This sysctl is only accessible from init_net so that one cannot switch the behaviour from inside a container. Signed-off-by: Michal Kubecek <mkubecek@suse.cz> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02ipvs: free ip_vs_dest structs when refcnt=0David Windsor1-1/+1 Currently, the ip_vs_dest cache frees ip_vs_dest objects when their reference count becomes < 0. Aside from not being semantically sound, this is problematic for the new type refcount_t, which will be introduced shortly in a separate patch. refcount_t is the new kernel type for holding reference counts, and provides overflow protection and a constrained interface relative to atomic_t (the type currently being used for kernel reference counts). Per Julian Anastasov: "The problem is that dest_trash currently holds deleted dests (unlinked from RCU lists) with refcnt=0." Changing dest_trash to hold dest with refcnt=1 will allow us to free ip_vs_dest structs when their refcnt=0, in ip_vs_dest_put_and_free(). Signed-off-by: David Windsor <dwindsor@gmail.com> Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02netfilter: merge ctinfo into nfct pointer storage areaFlorian Westphal1-5/+6 After this change conntrack operations (lookup, creation, matching from ruleset) only access one instead of two sk_buff cache lines. This works for normal conntracks because those are allocated from a slab that guarantees hw cacheline or 8byte alignment (whatever is larger) so the 3 bits needed for ctinfo won't overlap with nf_conn addresses. Template allocation now does manual address alignment (see previous change) on arches that don't have sufficent kmalloc min alignment. Some spots intentionally use skb->_nfct instead of skb_nfct() helpers, this is to avoid undoing the skb_nfct() use when we remove untracked conntrack object in the future. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02netfilter: guarantee 8 byte minalign for template addressesFlorian Westphal1-0/+2 The next change will merge skb->nfct pointer and skb->nfctinfo status bits into single skb->_nfct (unsigned long) area. For this to work nf_conn addresses must always be aligned at least on an 8 byte boundary since we will need the lower 3bits to store nfctinfo. Conntrack templates are allocated via kmalloc. kbuild test robot reported BUILD_BUG_ON failed: NFCT_INFOMASK >= ARCH_KMALLOC_MINALIGN on v1 of this patchset, so not all platforms meet this requirement. Do manual alignment if needed, the alignment offset is stored in the nf_conn entry protocol area. This works because templates are not handed off to L4 protocol trackers. Reported-by: kbuild test robot <fengguang.wu@intel.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02netfilter: add and use nf_ct_set helperFlorian Westphal2-2/+9 Add a helper to assign a nf_conn entry and the ctinfo bits to an sk_buff. This avoids changing code in followup patch that merges skb->nfct and skb->nfctinfo into skb->_nfct. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02skbuff: add and use skb_nfct helperFlorian Westphal1-1/+1 Followup patch renames skb->nfct and changes its type so add a helper to avoid intrusive rename change later. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02netfilter: reduce direct skb->nfct usageFlorian Westphal1-3/+6 Next patch makes direct skb->nfct access illegal, reduce noise in next patch by using accessors we already have. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> 2017-02-02netfilter: conntrack: no need to pass ctinfo to error handlerFlorian Westphal1-1/+1 It is never accessed for reading and the only places that write to it are the icmp(6) handlers, which also set skb->nfct (and skb->nfctinfo). The conntrack core specifically checks for attached skb->nfct after ->error() invocation and returns early in this case. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>