static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
{
return (n - len + 16) & 0x7;
}
static inline void *mip6_padn(__u8 *data, __u8 padlen)
{
if (!data)
return NULL;
if (padlen == 1) {
data[0] = IPV6_TLV_PAD1;
} else if (padlen > 1) {
data[0] = IPV6_TLV_PADN;
data[1] = padlen - 2;
if (padlen > 2)
memset(data+2, 0, data[1]);
}
return data + padlen;
}
static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
}
static int mip6_mh_len(int type)
{
int len = 0;
switch (type) {
case IP6_MH_TYPE_BRR:
len = 0;
break;
case IP6_MH_TYPE_HOTI:
case IP6_MH_TYPE_COTI:
case IP6_MH_TYPE_BU:
case IP6_MH_TYPE_BACK:
len = 1;
break;
case IP6_MH_TYPE_HOT:
case IP6_MH_TYPE_COT:
case IP6_MH_TYPE_BERROR:
len = 2;
break;
}
return len;
}
static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
{
struct ip6_mh _hdr;
const struct ip6_mh *mh;
mh = skb_header_pointer(skb, skb_transport_offset(skb),
sizeof(_hdr), &_hdr);
if (!mh)
return -1;
if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len)
return -1;
if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n",
mh->ip6mh_hdrlen,
mip6_mh_len(mh->ip6mh_type));
mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
skb_network_header_len(skb));
return -1;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n",
mh->ip6mh_proto);
mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
skb_network_header_len(skb));
return -1;
}
return 0;
}
struct mip6_report_rate_limiter {
spinlock_t lock;
ktime_t stamp;
int iif;
struct in6_addr src;
struct in6_addr dst;
};
static struct mip6_report_rate_limiter mip6_report_rl = {
.lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
};
static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
int err = destopt->nexthdr;
spin_lock(&x->lock);
if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
!ipv6_addr_any((struct in6_addr *)x->coaddr))
err = -ENOENT;
spin_unlock(&x->lock);
return err;
}
/* Destination Option Header is inserted.
* IP Header's src address is replaced with Home Address Option in
* Destination Option Header.
*/
static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *iph;
struct ipv6_destopt_hdr *dstopt;
struct ipv6_destopt_hao *hao;
u8 nexthdr;
int len;
skb_push(skb, -skb_network_offset(skb));
iph = ipv6_hdr(skb);
nexthdr = *skb_mac_header(skb);
*skb_mac_header(skb) = IPPROTO_DSTOPTS;
dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
dstopt->nexthdr = nexthdr;
hao = mip6_padn((char *)(dstopt + 1),
calc_padlen(sizeof(*dstopt), 6));
hao->type = IPV6_TLV_HAO;
BUILD_BUG_ON(sizeof(*hao) != 18);
hao->length = sizeof(*hao) - 2;
len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
spin_lock_bh(&x->lock);
memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
spin_unlock_bh(&x->lock);
WARN_ON(len != x->props.header_len);
dstopt->hdrlen = (x->props.header_len >> 3) - 1;
return 0;
}
static inline int mip6_report_rl_allow(ktime_t stamp,
const struct in6_addr *dst,
const struct in6_addr *src, int iif)
{
int allow = 0;
spin_lock_bh(&mip6_report_rl.lock);
if (mip6_report_rl.stamp != stamp ||
mip6_report_rl.iif != iif ||
!ipv6_addr_equal(&mip6_report_rl.src, src) ||
!ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
mip6_report_rl.stamp = stamp;
mip6_report_rl.iif = iif;
mip6_report_rl.src = *src;
mip6_report_rl.dst = *dst;
allow = 1;
}
spin_unlock_bh(&mip6_report_rl.lock);
return allow;
}
static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
const struct flowi *fl)
{
struct net *net = xs_net(x);
struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
const struct flowi6 *fl6 = &fl->u.ip6;
struct ipv6_destopt_hao *hao = NULL;
struct xfrm_selector sel;
int offset;
ktime_t stamp;
int err = 0;
if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
fl6->fl6_mh_type <= IP6_MH_TYPE_MAX))
goto out;
if (likely(opt->dsthao)) {
offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
if (likely(offset >= 0))
hao = (struct ipv6_destopt_hao *)
(skb_network_header(skb) + offset);
}
stamp = skb_get_ktime(skb);
if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr,
hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
opt->iif))
goto out;
memset(&sel, 0, sizeof(sel));
memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
sizeof(sel.daddr));
sel.prefixlen_d = 128;
memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
sizeof(sel.saddr));
sel.prefixlen_s = 128;
sel.family = AF_INET6;
sel.proto = fl6->flowi6_proto;
sel.dport = xfrm_flowi_dport(fl, &fl6->uli);
if (sel.dport)
sel.dport_mask = htons(~0);
sel.sport = xfrm_flowi_sport(fl, &fl6->uli);
if (sel.sport)
sel.sport_mask = htons(~0);
sel.ifindex = fl6->flowi6_oif;
err = km_report(net, IPPROTO_DSTOPTS, &sel,
(hao ? (xfrm_address_t *)&hao->addr : NULL));
out:
return err;
}
static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
unsigned int packet_len = skb_tail_pointer(skb) -
skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
switch (**nexthdr) {
case NEXTHDR_HOP:
break;
case NEXTHDR_ROUTING:
found_rhdr = 1;
break;
case NEXTHDR_DEST:
/*
* HAO MUST NOT appear more than once.
* XXX: It is better to try to find by the end of
* XXX: packet if HAO exists.
*/
if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
net_dbg_ratelimited("mip6: hao exists already, override\n");
return offset;
}
if (found_rhdr)
return offset;
break;
default:
return offset;
}
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr *)(nh + offset);
}
return offset;
}
static int mip6_destopt_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
pr_info("%s: state's mode is not %u: %u\n",
__func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
return -EINVAL;
}
x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
sizeof(struct ipv6_destopt_hao);
WARN_ON(x->props.header_len != 24);
return 0;
}
/*
* Do nothing about destroying since it has no specific operation for
* destination options header unlike IPsec protocols.
*/
static void mip6_destopt_destroy(struct xfrm_state *x)
{
}
static const struct xfrm_type mip6_destopt_type = {
.description = "MIP6DESTOPT",
.owner = THIS_MODULE,
.proto = IPPROTO_DSTOPTS,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
.init_state = mip6_destopt_init_state,
.destructor = mip6_destopt_destroy,
.input = mip6_destopt_input,
.output = mip6_destopt_output,
.reject = mip6_destopt_reject,
.hdr_offset = mip6_destopt_offset,
};
static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
int err = rt2->rt_hdr.nexthdr;
spin_lock(&x->lock);
if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
!ipv6_addr_any((struct in6_addr *)x->coaddr))
err = -ENOENT;
spin_unlock(&x->lock);
return err;
}
/* Routing Header type 2 is inserted.
* IP Header's dst address is replaced with Routing Header's Home Address.
*/
static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *iph;
struct rt2_hdr *rt2;
u8 nexthdr;
skb_push(skb, -skb_network_offset(skb));
iph = ipv6_hdr(skb);
nexthdr = *skb_mac_header(skb);
*skb_mac_header(skb) = IPPROTO_ROUTING;
rt2 = (struct rt2_hdr *)skb_transport_header(skb);
rt2->rt_hdr.nexthdr = nexthdr;
rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
rt2->rt_hdr.segments_left = 1;
memset(&rt2->reserved, 0, sizeof(rt2->reserved));
WARN_ON(rt2->rt_hdr.hdrlen != 2);
memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
spin_lock_bh(&x->lock);
memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
spin_unlock_bh(&x->lock);
return 0;
}
static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
unsigned int packet_len = skb_tail_pointer(skb) -
skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
switch (**nexthdr) {
case NEXTHDR_HOP:
break;
case NEXTHDR_ROUTING:
if (offset + 3 <= packet_len) {
struct ipv6_rt_hdr *rt;
rt = (struct ipv6_rt_hdr *)(nh + offset);
if (rt->type != 0)
return offset;
}
found_rhdr = 1;
break;
case NEXTHDR_DEST:
if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
return offset;
if (found_rhdr)
return offset;
break;
default:
return offset;
}
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr *)(nh + offset);
}
return offset;
}
static int mip6_rthdr_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
pr_info("%s: state's mode is not %u: %u\n",
__func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
return -EINVAL;
}
x->props.header_len = sizeof(struct rt2_hdr);
return 0;
}
/*
* Do nothing about destroying since it has no specific operation for routing
* header type 2 unlike IPsec protocols.
*/
static void mip6_rthdr_destroy(struct xfrm_state *x)
{
}
static const struct xfrm_type mip6_rthdr_type = {
.description = "MIP6RT",
.owner = THIS_MODULE,
.proto = IPPROTO_ROUTING,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
.init_state = mip6_rthdr_init_state,
.destructor = mip6_rthdr_destroy,
.input = mip6_rthdr_input,
.output = mip6_rthdr_output,
.hdr_offset = mip6_rthdr_offset,
};
static int __init mip6_init(void)
{
pr_info("Mobile IPv6\n");
if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
pr_info("%s: can't add xfrm type(destopt)\n", __func__);
goto mip6_destopt_xfrm_fail;
}
if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
pr_info("%s: can't add xfrm type(rthdr)\n", __func__);
goto mip6_rthdr_xfrm_fail;
}
if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
pr_info("%s: can't add rawv6 mh filter\n", __func__);
goto mip6_rawv6_mh_fail;
}
return 0;
mip6_rawv6_mh_fail:
xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
mip6_rthdr_xfrm_fail:
xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
mip6_destopt_xfrm_fail:
return -EAGAIN;
}
static void __exit mip6_fini(void)
{
if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
pr_info("%s: can't remove rawv6 mh filter\n", __func__);
if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type(rthdr)\n", __func__);
if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type(destopt)\n", __func__);
}
module_init(mip6_init);
module_exit(mip6_fini);
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
te when duplicating packets, also from Florian.
7) Remove unnecessary check for broadcast in IPv6 in pkttype match and
nft_meta, from Liping Zhang.
8) Add missing code to deal with loopback packets from nft_meta when
used by the netdev family, also from Liping.
9) Several cleanups on nf_tables, one to remove unnecessary check from
the netlink control plane path to add table, set and stateful objects
and code consolidation when unregister chain hooks, from Gao Feng.
10) Fix harmless reference counter underflow in IPVS that, however,
results in problems with the introduction of the new refcount_t
type, from David Windsor.
11) Enable LIBCRC32C from nf_ct_sctp instead of nf_nat_sctp,
from Davide Caratti.
12) Missing documentation on nf_tables uapi header, from Liping Zhang.
13) Use rb_entry() helper in xt_connlimit, from Geliang Tang.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
|
IPv6 stack does not set the protocol for local routes, so those routes show
up with proto "none":
$ ip -6 ro ls table local
local ::1 dev lo proto none metric 0 pref medium
local 2100:3:: dev lo proto none metric 0 pref medium
local 2100:3::4 dev lo proto none metric 0 pref medium
local fe80:: dev lo proto none metric 0 pref medium
...
Set rt6i_protocol to RTPROT_KERNEL for consistency with IPv4. Now routes
show up with proto "kernel":
$ ip -6 ro ls table local
local ::1 dev lo proto kernel metric 0 pref medium
local 2100:3:: dev lo proto kernel metric 0 pref medium
local 2100:3::4 dev lo proto kernel metric 0 pref medium
local fe80:: dev lo proto kernel metric 0 pref medium
...
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Small cleanup factorizing code doing the TCP_MAXSEG clamping.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
All merge conflicts were simple overlapping changes.
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Commit 69b34fb996b2 ("netfilter: xt_LOG: add net namespace support for
xt_LOG") disabled logging packets using the LOG target from non-init
namespaces. The motivation was to prevent containers from flooding
kernel log of the host. The plan was to keep it that way until syslog
namespace implementation allows containers to log in a safe way.
However, the work on syslog namespace seems to have hit a dead end
somewhere in 2013 and there are users who want to use xt_LOG in all
network namespaces. This patch allows to do so by setting
/proc/sys/net/netfilter/nf_log_all_netns
to a nonzero value. This sysctl is only accessible from init_net so that
one cannot switch the behaviour from inside a container.
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
After this change conntrack operations (lookup, creation, matching from
ruleset) only access one instead of two sk_buff cache lines.
This works for normal conntracks because those are allocated from a slab
that guarantees hw cacheline or 8byte alignment (whatever is larger)
so the 3 bits needed for ctinfo won't overlap with nf_conn addresses.
Template allocation now does manual address alignment (see previous change)
on arches that don't have sufficent kmalloc min alignment.
Some spots intentionally use skb->_nfct instead of skb_nfct() helpers,
this is to avoid undoing the skb_nfct() use when we remove untracked
conntrack object in the future.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Add a helper to assign a nf_conn entry and the ctinfo bits to an sk_buff.
This avoids changing code in followup patch that merges skb->nfct and
skb->nfctinfo into skb->_nfct.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Followup patch renames skb->nfct and changes its type so add a helper to
avoid intrusive rename change later.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
We should also toss nf_bridge_info, if any -- packet is leaving via
ip_local_out, also, this skb isn't bridged -- it is a locally generated
copy. Also this avoids the need to touch this later when skb->nfct is
replaced with 'unsigned long _nfct' in followup patch.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
It is never accessed for reading and the only places that write to it
are the icmp(6) handlers, which also set skb->nfct (and skb->nfctinfo).
The conntrack core specifically checks for attached skb->nfct after
->error() invocation and returns early in this case.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|