#ifndef _NET_DN_DEV_H
#define _NET_DN_DEV_H
struct dn_dev;
struct dn_ifaddr {
struct dn_ifaddr __rcu *ifa_next;
struct dn_dev *ifa_dev;
__le16 ifa_local;
__le16 ifa_address;
__u32 ifa_flags;
__u8 ifa_scope;
char ifa_label[IFNAMSIZ];
struct rcu_head rcu;
};
#define DN_DEV_S_RU 0 /* Run - working normally */
#define DN_DEV_S_CR 1 /* Circuit Rejected */
#define DN_DEV_S_DS 2 /* Data Link Start */
#define DN_DEV_S_RI 3 /* Routing Layer Initialize */
#define DN_DEV_S_RV 4 /* Routing Layer Verify */
#define DN_DEV_S_RC 5 /* Routing Layer Complete */
#define DN_DEV_S_OF 6 /* Off */
#define DN_DEV_S_HA 7 /* Halt */
/*
* The dn_dev_parms structure contains the set of parameters
* for each device (hence inclusion in the dn_dev structure)
* and an array is used to store the default types of supported
* device (in dn_dev.c).
*
* The type field matches the ARPHRD_ constants and is used in
* searching the list for supported devices when new devices
* come up.
*
* The mode field is used to find out if a device is broadcast,
* multipoint, or pointopoint. Please note that DECnet thinks
* different ways about devices to the rest of the kernel
* so the normal IFF_xxx flags are invalid here. For devices
* which can be any combination of the previously mentioned
* attributes, you can set this on a per device basis by
* installing an up() routine.
*
* The device state field, defines the initial state in which the
* device will come up. In the dn_dev structure, it is the actual
* state.
*
* Things have changed here. I've killed timer1 since it's a user space
* issue for a user space routing deamon to sort out. The kernel does
* not need to be bothered with it.
*
* Timers:
* t2 - Rate limit timer, min time between routing and hello messages
* t3 - Hello timer, send hello messages when it expires
*
* Callbacks:
* up() - Called to initialize device, return value can veto use of
* device with DECnet.
* down() - Called to turn device off when it goes down
* timer3() - Called once for each ifaddr when timer 3 goes off
*
* sysctl - Hook for sysctl things
*
*/
struct dn_dev_parms {
int type; /* ARPHRD_xxx */
int mode; /* Broadcast, Unicast, Mulitpoint */
#define DN_DEV_BCAST 1
#define DN_DEV_UCAST 2
#define DN_DEV_MPOINT 4
int state; /* Initial state */
int forwarding; /* 0=EndNode, 1=L1Router, 2=L2Router */
unsigned long t2; /* Default value of t2 */
unsigned long t3; /* Default value of t3 */
int priority; /* Priority to be a router */
char *name; /* Name for sysctl */
int (*up)(struct net_device *);
void (*down)(struct net_device *);
void (*timer3)(struct net_device *, struct dn_ifaddr *ifa);
void *sysctl;
};
struct dn_dev {
struct dn_ifaddr __rcu *ifa_list;
struct net_device *dev;
struct dn_dev_parms parms;
char use_long;
struct timer_list timer;
unsigned long t3;
struct neigh_parms *neigh_parms;
__u8 addr[ETH_ALEN];
struct neighbour *router; /* Default router on circuit */
struct neighbour *peer; /* Peer on pointopoint links */
unsigned long uptime; /* Time device went up in jiffies */
};
struct dn_short_packet {
__u8 msgflg;
__le16 dstnode;
__le16 srcnode;
__u8 forward;
} __packed;
struct dn_long_packet {
__u8 msgflg;
__u8 d_area;
__u8 d_subarea;
__u8 d_id[6];
__u8 s_area;
__u8 s_subarea;
__u8 s_id[6];
__u8 nl2;
__u8 visit_ct;
__u8 s_class;
__u8 pt;
} __packed;
/*------------------------- DRP - Routing messages ---------------------*/
struct endnode_hello_message {
__u8 msgflg;
__u8 tiver[3];
__u8 id[6];
__u8 iinfo;
__le16 blksize;
__u8 area;
__u8 seed[8];
__u8 neighbor[6];
__le16 timer;
__u8 mpd;
__u8 datalen;
__u8 data[2];
} __packed;
struct rtnode_hello_message {
__u8 msgflg;
__u8 tiver[3];
__u8 id[6];
__u8 iinfo;
__le16 blksize;
__u8 priority;
__u8 area;
__le16 timer;
__u8 mpd;
} __packed;
void dn_dev_init(void);
void dn_dev_cleanup(void);
int dn_dev_ioctl(unsigned int cmd, void __user *arg);
void dn_dev_devices_off(void);
void dn_dev_devices_on(void);
void dn_dev_init_pkt(struct sk_buff *skb);
void dn_dev_veri_pkt(struct sk_buff *skb);
void dn_dev_hello(struct sk_buff *skb);
void dn_dev_up(struct net_device *);
void dn_dev_down(struct net_device *);
int dn_dev_set_default(struct net_device *dev, int force);
struct net_device *dn_dev_get_default(void);
int dn_dev_bind_default(__le16 *addr);
int register_dnaddr_notifier(struct notifier_block *nb);
int unregister_dnaddr_notifier(struct notifier_block *nb);
static inline int dn_dev_islocal(struct net_device *dev, __le16 addr)
{
struct dn_dev *dn_db;
struct dn_ifaddr *ifa;
int res = 0;
rcu_read_lock();
dn_db = rcu_dereference(dev->dn_ptr);
if (dn_db == NULL) {
printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n");
goto out;
}
for (ifa = rcu_dereference(dn_db->ifa_list);
ifa != NULL;
ifa = rcu_dereference(ifa->ifa_next))
if ((addr ^ ifa->ifa_local) == 0) {
res = 1;
break;
}
out:
rcu_read_unlock();
return res;
}
#endif /* _NET_DN_DEV_H */
ed
path, until we have a generic notifier based solution.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
In preparation for using this function in net/dsa/dsa2.c, rename the function
to make its scope DSA specific, and export it.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
A slave device will now notify the switch fabric once its port is
bridged or unbridged, instead of calling directly its switch operations.
This code allows propagating cross-chip bridging events in the fabric.
Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Add a notifier block per DSA switch, registered against a notifier head
in the switch fabric they belong to.
This infrastructure will allow to propagate fabric-wide events such as
port bridging, VLAN configuration, etc. If a DSA switch driver cares
about cross-chip configuration, such events can be caught.
Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Change ip6_route_multipath_add to send one notifciation with the full
route encoded with RTA_MULTIPATH instead of a series of individual routes.
This is done by adding a skip_notify flag to the nl_info struct. The
flag is used to skip sending of the notification in the fib code that
actually inserts the route. Once the full route has been added, a
notification is generated with all nexthops.
ip6_route_multipath_add handles 3 use cases: new routes, route replace,
and route append. The multipath notification generated needs to be
consistent with the order of the nexthops and it should be consistent
with the order in a FIB dump which means the route with the first nexthop
needs to be used as the route reference. For the first 2 cases (new and
replace), a reference to the route used to send the notification is
obtained by saving the first route added. For the append case, the last
route added is used to loop back to its first sibling route which is
the first nexthop in the multipath route.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
IPv4 allows multipath routes to be deleted using just the prefix and
length. For example:
$ ip ro ls vrf red
unreachable default metric 8192
1.1.1.0/24
nexthop via 10.100.1.254 dev eth1 weight 1
nexthop via 10.11.200.2 dev eth11.200 weight 1
10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3
10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3
$ ip ro del 1.1.1.0/24 vrf red
$ ip ro ls vrf red
unreachable default metric 8192
10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3
10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3
The same notation does not work with IPv6 because of how multipath routes
are implemented for IPv6. For IPv6 only the first nexthop of a multipath
route is deleted if the request contains only a prefix and length. This
leads to unnecessary complexity in userspace dealing with IPv6 multipath
routes.
This patch allows all nexthops to be deleted without specifying each one
in the delete request. Internally, this is done by walking the sibling
list of the route matching the specifications given (prefix, length,
metric, protocol, etc).
$ ip -6 ro ls vrf red
2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium
2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium
2001:db8:200::/120 via 2001:db8:1::2 dev eth1 metric 1024 pref medium
2001:db8:200::/120 via 2001:db8:2::2 dev eth2 metric 1024 pref medium
...
$ ip -6 ro del vrf red 2001:db8:200::/120
$ ip -6 ro ls vrf red
2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium
2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium
...
Because IPv6 allows individual nexthops to be deleted without deleting
the entire route, the ip6_route_multipath_del and non-multipath code
path (ip6_route_del) have to be discriminated so that all nexthops are
only deleted for the latter case. This is done by making the existing
fc_type in fib6_config a u16 and then adding a new u16 field with
fc_delete_all_nh as the first bit.
Suggested-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for your net-next
tree, they are:
1) Stash ctinfo 3-bit field into pointer to nf_conntrack object from
sk_buff so we only access one single cacheline in the conntrack
hotpath. Patchset from Florian Westphal.
2) Don't leak pointer to internal structures when exporting x_tables
ruleset back to userspace, from Willem DeBruijn. This includes new
helper functions to copy data to userspace such as xt_data_to_user()
as well as conversions of our ip_tables, ip6_tables and arp_tables
clients to use it. Not surprinsingly, ebtables requires an ad-hoc
update. There is also a new field in x_tables extensions to indicate
the amount of bytes that we copy to userspace.
3) Add nf_log_all_netns sysctl: This new knob allows you to enable
logging via nf_log infrastructure for all existing netnamespaces.
Given the effort to provide pernet syslog has been discontinued,
let's provide a way to restore logging using netfilter kernel logging
facilities in trusted environments. Patch from Michal Kubecek.
4) Validate SCTP checksum from conntrack helper, from Davide Caratti.
5) Merge UDPlite conntrack and NAT helpers into UDP, this was mostly
a copy&paste from the original helper, from Florian Westphal.
6) Reset netfilter state when duplicating packets, also from Florian.
7) Remove unnecessary check for broadcast in IPv6 in pkttype match and
nft_meta, from Liping Zhang.
8) Add missing code to deal with loopback packets from nft_meta when
used by the netdev family, also from Liping.
9) Several cleanups on nf_tables, one to remove unnecessary check from
the netlink control plane path to add table, set and stateful objects
and code consolidation when unregister chain hooks, from Gao Feng.
10) Fix harmless reference counter underflow in IPVS that, however,
results in problems with the introduction of the new refcount_t
type, from David Windsor.
11) Enable LIBCRC32C from nf_ct_sctp instead of nf_nat_sctp,
from Davide Caratti.
12) Missing documentation on nf_tables uapi header, from Liping Zhang.
13) Use rb_entry() helper in xt_connlimit, from Geliang Tang.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
The driver that offloads flower rules needs to know with which priority
user inserted the rules. So add this information into offload struct.
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
New ip_tunnel_info flag to represent bridged tunnel metadata.
Used by bridge driver later in the series to pass per vlan dst
metadata to bridge ports.
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Use the encode/decode functionality from the ife module instead of using
implementation inside the act_ife.
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
This module is responsible for the ife encapsulation protocol
encode/decode logics. That module can:
- ife_encode: encode skb and reserve space for the ife meta header
- ife_decode: decode skb and extract the meta header size
- ife_tlv_meta_encode - encodes one tlv entry into the reserved ife
header space.
- ife_tlv_meta_decode - decodes one tlv entry from the packet
- ife_tlv_meta_next - advance to the next tlv
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
As the function ife_tlv_meta_encode is not used by any other module,
unexport it and make it static for the act_ife module.
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
All merge conflicts were simple overlapping changes.
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Commit 69b34fb996b2 ("netfilter: xt_LOG: add net namespace support for
xt_LOG") disabled logging packets using the LOG target from non-init
namespaces. The motivation was to prevent containers from flooding
kernel log of the host. The plan was to keep it that way until syslog
namespace implementation allows containers to log in a safe way.
However, the work on syslog namespace seems to have hit a dead end
somewhere in 2013 and there are users who want to use xt_LOG in all
network namespaces. This patch allows to do so by setting
/proc/sys/net/netfilter/nf_log_all_netns
to a nonzero value. This sysctl is only accessible from init_net so that
one cannot switch the behaviour from inside a container.
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Currently, the ip_vs_dest cache frees ip_vs_dest objects when their
reference count becomes < 0. Aside from not being semantically sound,
this is problematic for the new type refcount_t, which will be introduced
shortly in a separate patch. refcount_t is the new kernel type for
holding reference counts, and provides overflow protection and a
constrained interface relative to atomic_t (the type currently being
used for kernel reference counts).
Per Julian Anastasov: "The problem is that dest_trash currently holds
deleted dests (unlinked from RCU lists) with refcnt=0." Changing
dest_trash to hold dest with refcnt=1 will allow us to free ip_vs_dest
structs when their refcnt=0, in ip_vs_dest_put_and_free().
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
After this change conntrack operations (lookup, creation, matching from
ruleset) only access one instead of two sk_buff cache lines.
This works for normal conntracks because those are allocated from a slab
that guarantees hw cacheline or 8byte alignment (whatever is larger)
so the 3 bits needed for ctinfo won't overlap with nf_conn addresses.
Template allocation now does manual address alignment (see previous change)
on arches that don't have sufficent kmalloc min alignment.
Some spots intentionally use skb->_nfct instead of skb_nfct() helpers,
this is to avoid undoing the skb_nfct() use when we remove untracked
conntrack object in the future.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
The next change will merge skb->nfct pointer and skb->nfctinfo
status bits into single skb->_nfct (unsigned long) area.
For this to work nf_conn addresses must always be aligned at least on
an 8 byte boundary since we will need the lower 3bits to store nfctinfo.
Conntrack templates are allocated via kmalloc.
kbuild test robot reported
BUILD_BUG_ON failed: NFCT_INFOMASK >= ARCH_KMALLOC_MINALIGN
on v1 of this patchset, so not all platforms meet this requirement.
Do manual alignment if needed, the alignment offset is stored in the
nf_conn entry protocol area. This works because templates are not
handed off to L4 protocol trackers.
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Add a helper to assign a nf_conn entry and the ctinfo bits to an sk_buff.
This avoids changing code in followup patch that merges skb->nfct and
skb->nfctinfo into skb->_nfct.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Followup patch renames skb->nfct and changes its type so add a helper to
avoid intrusive rename change later.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Next patch makes direct skb->nfct access illegal, reduce noise
in next patch by using accessors we already have.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
It is never accessed for reading and the only places that write to it
are the icmp(6) handlers, which also set skb->nfct (and skb->nfctinfo).
The conntrack core specifically checks for attached skb->nfct after
->error() invocation and returns early in this case.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|