/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ #include #include "bpf_helpers.h" #include #include #include #include #include #include #include #define IP_MF 0x2000 #define IP_OFFSET 0x1FFF #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F struct bpf_map_def SEC("maps") jmp_table = { .type = BPF_MAP_TYPE_PROG_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(u32), .max_entries = 8, }; #define PARSE_VLAN 1 #define PARSE_MPLS 2 #define PARSE_IP 3 #define PARSE_IPV6 4 /* protocol dispatch routine. * It tail-calls next BPF program depending on eth proto * Note, we could have used: * bpf_tail_call(skb, &jmp_table, proto); * but it would need large prog_array */ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) { switch (proto) { case ETH_P_8021Q: case ETH_P_8021AD: bpf_tail_call(skb, &jmp_table, PARSE_VLAN); break; case ETH_P_MPLS_UC: case ETH_P_MPLS_MC: bpf_tail_call(skb, &jmp_table, PARSE_MPLS); break; case ETH_P_IP: bpf_tail_call(skb, &jmp_table, PARSE_IP); break; case ETH_P_IPV6: bpf_tail_call(skb, &jmp_table, PARSE_IPV6); break; } } struct vlan_hdr { __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; }; struct bpf_flow_keys { __be32 src; __be32 dst; union { __be32 ports; __be16 port16[2]; }; __u32 ip_proto; }; static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) { return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) & (IP_MF | IP_OFFSET); } static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) { __u64 w0 = load_word(ctx, off); __u64 w1 = load_word(ctx, off + 4); __u64 w2 = load_word(ctx, off + 8); __u64 w3 = load_word(ctx, off + 12); return (__u32)(w0 ^ w1 ^ w2 ^ w3); } struct globals { struct bpf_flow_keys flow; }; struct bpf_map_def SEC("maps") percpu_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct globals), .max_entries = 32, }; /* user poor man's per_cpu until native support is ready */ static struct globals *this_cpu_globals(void) { u32 key = bpf_get_smp_processor_id(); return bpf_map_lookup_elem(&percpu_map, &key); } /* some simple stats for user space consumption */ struct pair { __u64 packets; __u64 bytes; }; struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(struct bpf_flow_keys), .value_size = sizeof(struct pair), .max_entries = 1024, }; static void update_stats(struct __sk_buff *skb, struct globals *g) { struct bpf_flow_keys key = g->flow; struct pair *value; value = bpf_map_lookup_elem(&hash_map, &key); if (value) { __sync_fetch_and_add(&value->packets, 1); __sync_fetch_and_add(&value->bytes, skb->len); } else { struct pair val = {1, skb->len}; bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); } } static __always_inline void parse_ip_proto(struct __sk_buff *skb, struct globals *g, __u32 ip_proto) { __u32 nhoff = skb->cb[0]; int poff; switch (ip_proto) { case IPPROTO_GRE: { struct gre_hdr { __be16 flags; __be16 proto; }; __u32 gre_flags = load_half(skb, nhoff + offsetof(struct gre_hdr, flags)); __u32 gre_proto = load_half(skb, nhoff + offsetof(struct gre_hdr, proto)); if (gre_flags & (GRE_VERSION|GRE_ROUTING)) break; nhoff += 4; if (gre_flags & GRE_CSUM) nhoff += 4; if (gre_flags & GRE_KEY) nhoff += 4; if (gre_flags & GRE_SEQ) nhoff += 4; skb->cb[0] = nhoff; parse_eth_proto(skb, gre_proto); break; } case IPPROTO_IPIP: parse_eth_proto(skb, ETH_P_IP); break; case IPPROTO_IPV6: parse_eth_proto(skb, ETH_P_IPV6); break; case IPPROTO_TCP: case IPPROTO_UDP: g->flow.ports = load_word(skb, nhoff); case IPPROTO_ICMP: g->flow.ip_proto = ip_proto; update_stats(skb, g); break; default: break; } } PROG(PARSE_IP)(struct __sk_buff *skb) { struct globals *g = this_cpu_globals(); __u32 nhoff, verlen, ip_proto; if (!g) return 0; nhoff = skb->cb[0]; if (unlikely(ip_is_fragment(skb, nhoff))) return 0; ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); if (ip_proto != IPPROTO_GRE) { g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); } verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); nhoff += (verlen & 0xF) << 2; skb->cb[0] = nhoff; parse_ip_proto(skb, g, ip_proto); return 0; } PROG(PARSE_IPV6)(struct __sk_buff *skb) { struct globals *g = this_cpu_globals(); __u32 nhoff, ip_proto; if (!g) return 0; nhoff = skb->cb[0]; ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); g->flow.src = ipv6_addr_hash(skb, nhoff + offsetof(struct ipv6hdr, saddr)); g->flow.dst = ipv6_addr_hash(skb, nhoff + offsetof(struct ipv6hdr, daddr)); nhoff += sizeof(struct ipv6hdr); skb->cb[0] = nhoff; parse_ip_proto(skb, g, ip_proto); return 0; } PROG(PARSE_VLAN)(struct __sk_buff *skb) { __u32 nhoff, proto; nhoff = skb->cb[0]; proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); nhoff += sizeof(struct vlan_hdr); skb->cb[0] = nhoff; parse_eth_proto(skb, proto); return 0; } PROG(PARSE_MPLS)(struct __sk_buff *skb) { __u32 nhoff, label; nhoff = skb->cb[0]; label = load_word(skb, nhoff); nhoff += sizeof(struct mpls_label); skb->cb[0] = nhoff; if (label & MPLS_LS_S_MASK) { __u8 verlen = load_byte(skb, nhoff); if ((verlen & 0xF0) == 4) parse_eth_proto(skb, ETH_P_IP); else parse_eth_proto(skb, ETH_P_IPV6); } else { parse_eth_proto(skb, ETH_P_MPLS_UC); } return 0; } SEC("socket/0") int main_prog(struct __sk_buff *skb) { __u32 nhoff = ETH_HLEN; __u32 proto = load_half(skb, 12); skb->cb[0] = nhoff; parse_eth_proto(skb, proto); return 0; } char _license[] SEC("license") = "GPL"; >-4/+0 2017-02-06can: rx-offload: Add support for timestamp based irq offloadingMarc Kleine-Budde1-1/+9 2017-02-06can: rx-offload: Add support for HW fifo based irq offloadingDavid Jander1-0/+51 2017-02-05net: remove __napi_complete()Eric Dumazet1-1/+0 2017-02-04net: ipv6: Change notifications for multipath add to RTA_MULTIPATHDavid Ahern1-0/+1 2017-02-04net: ipv6: Allow shorthand delete of all nexthops in multipath routeDavid Ahern1-1/+3 2017-02-03net: remove support for per driver ndo_busy_poll()Eric Dumazet2-5/+0 2017-02-03Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-nextDavid S. Miller11-40/+60 2017-02-03sched: cls_flower: expose priority to offloading netdeviceJiri Pirko1-0/+1 2017-02-03lib: Introduce priority array area managerJiri Pirko1-0/+76 2017-02-03list: introduce list_for_each_entry_from_reverse helperJiri Pirko1-0/+13 2017-02-03trace: rename trace_print_hex_seq arg and add kdocDaniel Borkmann2-3/+3 2017-02-03bridge: uapi: add per vlan tunnel infoRoopa Prabhu3-0/+13 2017-02-03vxlan: support fdb and learning in COLLECT_METADATA modeRoopa Prabhu1-0/+1 2017-02-03ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info modeRoopa Prabhu1-0/+1 2017-02-03net/sched: act_ife: Change to use ife moduleYotam Gigi2-10/+1 2017-02-03net: Introduce ife encapsulation moduleYotam Gigi3-0/+70 2017-02-03net/sched: act_ife: Unexport ife_tlv_meta_encodeYotam Gigi1-2/+0 2017-02-03tcp: add tcp_mss_clamp() helperEric Dumazet1-0/+9 2017-02-02net: add LINUX_MIB_PFMEMALLOCDROP counterEric Dumazet1-0/+1 2017-02-02net: phy: marvell: Add support for 88e1545 PHYAndrew Lunn1-0/+1 2017-02-02unix: add ioctl to open a unix socket file with O_PATHAndrey Vagin1-0/+2 2017-02-02net: phy: Marvell: Add mv88e6390 internal PHYAndrew Lunn1-0/+6 2017-02-02Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/netDavid S. Miller9-30/+40 2017-02-02netfilter: allow logging from non-init namespacesMichal Kubeček1-0/+3 2017-02-02ipvs: free ip_vs_dest structs when refcnt=0David Windsor1-1/+1 2017-02-02netfilter: merge ctinfo into nfct pointer storage areaFlorian Westphal2-17/+15 2017-02-02netfilter: guarantee 8 byte minalign for template addressesFlorian Westphal1-0/+2 2017-02-02netfilter: add and use nf_ct_set helperFlorian Westphal2-2/+9 2017-02-02skbuff: add and use skb_nfct helperFlorian Westphal2-4/+11 2017-02-02netfilter: reduce direct skb->nfct usageFlorian Westphal1-3/+6 2017-02-02netfilter: conntrack: no need to pass ctinfo to error handlerFlorian Westphal1-1/+1